<a href="https://colab.research.google.com/github/SanjayBista1010/DeepLearning/blob/main/PytorchSnake%26SpiderColor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [54]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [55]:
import zipfile
import os

# Path to your ZIP file in Google Drive
zip_path = '/content/drive/MyDrive/dataset.zip'

# Destination folder in Colab
extract_path = '/content/images'
os.makedirs(extract_path, exist_ok=True)

# Extract
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Extracted files to {extract_path}")


Extracted files to /content/images


In [56]:
import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import optim
from torch import nn
from torch.utils.data import Subset, DataLoader
from tqdm import tqdm

class CNN(nn.Module):
    def __init__(self, in_channels=3, num_classes=2):  # Changed to 3 channels for RGB
        super().__init__()
        # Block 1 (2 conv layers)
        self.conv1_1 = nn.Conv2d(in_channels, 64, 3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)  # 224×224 → 112×112

        # Block 2 (2 conv layers)
        self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)  # 112×112 → 56×56

        # Block 3 (3 conv layers)
        self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)  # 56×56 → 28×28

        # Block 4 (3 conv layers)
        self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
        self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.pool4 = nn.MaxPool2d(2, 2)  # 28×28 → 14×14

        # Block 5 (3 conv layers)
        self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.pool5 = nn.MaxPool2d(2, 2)  # 14×14 → 7×7

        # Fully connected layers
        self.fc1 = nn.Linear(512 * 7 * 7, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Block 1
        x = F.relu(self.conv1_1(x))
        x = F.relu(self.conv1_2(x))
        x = self.pool1(x)

        # Block 2
        x = F.relu(self.conv2_1(x))
        x = F.relu(self.conv2_2(x))
        x = self.pool2(x)

        # Block 3
        x = F.relu(self.conv3_1(x))
        x = F.relu(self.conv3_2(x))
        x = F.relu(self.conv3_3(x))
        x = self.pool3(x)

        # Block 4
        x = F.relu(self.conv4_1(x))
        x = F.relu(self.conv4_2(x))
        x = F.relu(self.conv4_3(x))
        x = self.pool4(x)

        # Block 5
        x = F.relu(self.conv5_1(x))
        x = F.relu(self.conv5_2(x))
        x = F.relu(self.conv5_3(x))
        x = self.pool5(x)

        # Flatten
        x = x.view(x.size(0), -1)

        # FC layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [57]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [58]:
input_size = 32,768  # after 256x256 images and 2x2 pooling
num_classes = 2      # snake/spider
learning_rate = 0.001
batch_size = 64      # or 64 if memory allows
num_epochs = 10      # increase if needed

In [59]:
!pip install opencv-python



In [60]:
# MODIFIED TRANSFORM PIPELINE FOR COLOR IMAGES
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224
    transforms.RandomHorizontalFlip(),  # augmentation
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Standard ImageNet normalization for RGB
                         std=[0.229, 0.224, 0.225])
])


In [61]:
dataset_path = 'images/dataset'  # contains 'snake' and 'spider' subfolders
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)
print(f"Classes: {full_dataset.classes}")  # ['snake', 'spider']

Classes: ['snake', 'spider']


In [62]:
from sklearn.model_selection import train_test_split


indices = list(range(len(full_dataset)))
labels = [full_dataset[i][1] for i in indices]

train_indices, test_indices = [], []

for class_label in [0, 1]:  # 0=snake, 1=spider
    class_indices = [i for i, l in enumerate(labels) if l == class_label]

    # train_test_split to select fixed numbers
    train_idx, test_idx = train_test_split(
        class_indices, train_size=2500, test_size=100, random_state=42, shuffle=True
    )
    train_indices += train_idx
    test_indices += test_idx

In [63]:
train_dataset = Subset(full_dataset, train_indices)
test_dataset = Subset(full_dataset, test_indices)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Train samples: {len(train_dataset)}, Test samples: {len(test_dataset)}")


Train samples: 5000, Test samples: 200


In [64]:
model = CNN(in_channels=3, num_classes=num_classes).to(device)

In [65]:
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    print(f"Epoch [{epoch + 1}/{num_epochs}]")

    for batch_index, (data, targets) in enumerate(tqdm(train_loader)):
        data, targets = data.to(device), targets.to(device)

        # Forward
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/{num_epochs}] Loss: {running_loss / len(train_loader):.4f}")


Epoch [1/10]


  1%|▏         | 1/79 [02:11<2:51:13, 131.72s/it]

In [None]:
def check_accuracy(loader, model, loader_name="Data"):
    print(f"Checking accuracy on {loader_name}")

    num_correct = 0
    num_samples = 0
    model.eval()  # evaluation mode

    with torch.no_grad():  # no gradient needed
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum().item()  # convert to number
            num_samples += predictions.size(0)

    accuracy = 100 * num_correct / num_samples
    print(f"Got {num_correct}/{num_samples} correct -> Accuracy: {accuracy:.2f}%")

    model.train()  # back to training mode

# Usage
check_accuracy(train_loader, model, loader_name="Training Data")
check_accuracy(test_loader, model, loader_name="Test Data")