<a href="https://colab.research.google.com/github/SanjayBista1010/DeepLearning/blob/main/PytorchSnake%26SpiderColor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import zipfile
import os

# Path to your ZIP file in Google Drive
zip_path = '/content/drive/MyDrive/dataset.zip'

# Destination folder in Colab
extract_path = '/content/images'
os.makedirs(extract_path, exist_ok=True)

# Extract
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Extracted files to {extract_path}")


Extracted files to /content/images


In [4]:
import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import optim
from torch import nn
from torch.utils.data import Subset, DataLoader
from tqdm import tqdm

import torch
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self, in_channels=3, num_classes=2):
        super(CNN, self).__init__()

        # Feature extraction layers (VGG16-like with BatchNorm)
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(in_channels, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 4
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 5
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        # Classifier (same as VGG16)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True), nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True), nn.Dropout(0.5),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)  # Flatten before FC layers
        x = self.classifier(x)
        return x

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
num_classes = 2      # snake/spider
learning_rate = 0.001
batch_size = 64      # or 64 if memory allows
num_epochs = 10      # increase if needed

In [7]:
!pip install opencv-python



In [8]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),                          # resize to VGG16 input size
    transforms.RandomHorizontalFlip(p=0.5),                 # 50% chance to flip
    transforms.RandomRotation(15),                          # small rotations
    transforms.ColorJitter(brightness=0.2, contrast=0.2,
                           saturation=0.2, hue=0.1),        # light color augmentation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],        # ImageNet mean
                         std=[0.229, 0.224, 0.225])         # ImageNet std
])


In [9]:
dataset_path = 'images/'  # contains 'snake' and 'spider' subfolders
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)
print(f"Classes: {full_dataset.classes}")  # ['snake', 'spider']

Classes: ['snake', 'spider']


In [10]:
from torch.utils.data import random_split, DataLoader

# Split dataset
total_size = len(full_dataset)   # 5300 images
train_size = int(0.8 * total_size)
test_size = total_size - train_size

train_dataset, test_dataset = random_split(
    full_dataset,
    [train_size, test_size],
    generator=torch.Generator().manual_seed(42)  # ensures reproducibility
)




In [11]:
# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

print(f"Train size: {len(train_dataset)}, Test size: {len(test_dataset)}")

Train size: 4240, Test size: 1060


In [12]:
model = CNN(in_channels=3, num_classes=num_classes).to(device)

In [13]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

In [14]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]")
    for data, targets in loop:
        data, targets = data.to(device), targets.to(device)

        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update running stats
        running_loss += loss.item()
        _, preds = outputs.max(1)
        correct += (preds == targets).sum().item()
        total += targets.size(0)

        # Update tqdm description
        loop.set_postfix(loss=loss.item(), acc=100.0 * correct / total)

    avg_loss = running_loss / len(train_loader)
    train_acc = 100.0 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}] → Avg Loss: {avg_loss:.4f} | Train Accuracy: {train_acc:.2f}%")


Epoch [1/10]: 100%|██████████| 67/67 [01:19<00:00,  1.18s/it, acc=53.2, loss=0.806]


Epoch [1/10] → Avg Loss: 1.0003 | Train Accuracy: 53.23%


Epoch [2/10]: 100%|██████████| 67/67 [01:13<00:00,  1.09s/it, acc=52.8, loss=0.695]


Epoch [2/10] → Avg Loss: 0.7696 | Train Accuracy: 52.76%


Epoch [3/10]: 100%|██████████| 67/67 [01:11<00:00,  1.07s/it, acc=56.2, loss=0.665]


Epoch [3/10] → Avg Loss: 0.7000 | Train Accuracy: 56.25%


Epoch [4/10]: 100%|██████████| 67/67 [01:12<00:00,  1.09s/it, acc=56.8, loss=0.718]


Epoch [4/10] → Avg Loss: 0.6743 | Train Accuracy: 56.82%


Epoch [5/10]: 100%|██████████| 67/67 [01:15<00:00,  1.12s/it, acc=58.5, loss=0.613]


Epoch [5/10] → Avg Loss: 0.6596 | Train Accuracy: 58.54%


Epoch [6/10]: 100%|██████████| 67/67 [01:12<00:00,  1.08s/it, acc=59.4, loss=0.693]


Epoch [6/10] → Avg Loss: 0.6544 | Train Accuracy: 59.43%


Epoch [7/10]: 100%|██████████| 67/67 [01:13<00:00,  1.10s/it, acc=61.4, loss=0.527]


Epoch [7/10] → Avg Loss: 0.6480 | Train Accuracy: 61.37%


Epoch [8/10]: 100%|██████████| 67/67 [01:12<00:00,  1.08s/it, acc=61.7, loss=0.509]


Epoch [8/10] → Avg Loss: 0.6358 | Train Accuracy: 61.75%


Epoch [9/10]: 100%|██████████| 67/67 [01:13<00:00,  1.10s/it, acc=61.8, loss=0.603]


Epoch [9/10] → Avg Loss: 0.6399 | Train Accuracy: 61.84%


Epoch [10/10]: 100%|██████████| 67/67 [01:13<00:00,  1.10s/it, acc=63.5, loss=0.886]

Epoch [10/10] → Avg Loss: 0.6316 | Train Accuracy: 63.51%





In [15]:
def check_accuracy(loader, model, loader_name="Data"):
    """
    Compute and print accuracy of the model on a given DataLoader.
    """
    model.eval()  # set to evaluation mode
    num_correct = 0
    num_samples = 0

    with torch.no_grad():  # no gradients needed
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = outputs.max(1)
            num_correct += (preds == labels).sum().item()
            num_samples += labels.size(0)

    accuracy = 100.0 * num_correct / num_samples
    print(f"[{loader_name}] Accuracy: {num_correct}/{num_samples} = {accuracy:.2f}%")

    model.train()  # switch back to training mode
    return accuracy

# -------------------
# Usage
# -------------------
check_accuracy(train_loader, model, loader_name="Training Data")
check_accuracy(test_loader, model, loader_name="Test Data")


[Training Data] Accuracy: 2892/4240 = 68.21%
[Test Data] Accuracy: 707/1060 = 66.70%


66.69811320754717

In [18]:
import torchvision.models as models

# =========================
# Device and hyperparameters
# =========================
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_classes = 2
batch_size = 32
num_epochs = 10
learning_rate = 1e-3

# =========================
# Transformations / Augmentation
# =========================
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# =========================
# Dataset
# =========================
DATA_DIR = "images/"  # folder containing 'snake' and 'spider'
full_dataset = datasets.ImageFolder(root=DATA_DIR, transform=transform)

# Split train/test
total_size = len(full_dataset)
train_size = int(0.8 * total_size)
test_size = total_size - train_size
train_dataset, test_dataset = random_split(
    full_dataset, [train_size, test_size], generator=torch.Generator().manual_seed(42)
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# =========================
# Pretrained VGG16-BN
# =========================
model = models.vgg16_bn(pretrained=True)

# Freeze feature extractor (optional at first)
for param in model.features.parameters():
    param.requires_grad = False

# Replace classifier for 2 classes
model.classifier[6] = nn.Linear(4096, num_classes)
model = model.to(device)

# =========================
# Loss and optimizer
# =========================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# =========================
# Training loop
# =========================
for epoch in range(num_epochs):
    model.train()
    running_loss, correct, total = 0, 0, 0
    loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]")

    for imgs, labels in loop:
        imgs, labels = imgs.to(device), labels.to(device)

        outputs = model(imgs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = outputs.max(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

        loop.set_postfix(loss=loss.item(), acc=100.0*correct/total)

    scheduler.step()
    print(f"Epoch [{epoch+1}/{num_epochs}] → Avg Loss: {running_loss/len(train_loader):.4f} | Train Acc: {100.0*correct/total:.2f}%")

# =========================
# Evaluation function
# =========================
def check_accuracy(loader, model, loader_name="Data"):
    model.eval()
    num_correct, num_samples = 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = outputs.max(1)
            num_correct += (preds == labels).sum().item()
            num_samples += labels.size(0)
    accuracy = 100.0 * num_correct / num_samples
    print(f"[{loader_name}] Accuracy: {num_correct}/{num_samples} = {accuracy:.2f}%")
    model.train()
    return accuracy

# =========================
# Evaluate
# =========================
check_accuracy(train_loader, model, loader_name="Training Data")
check_accuracy(test_loader, model, loader_name="Test Data")



Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth


100%|██████████| 528M/528M [00:04<00:00, 124MB/s]
Epoch [1/10]: 100%|██████████| 133/133 [01:01<00:00,  2.18it/s, acc=78, loss=0.566]


Epoch [1/10] → Avg Loss: 0.5129 | Train Acc: 78.00%


Epoch [2/10]: 100%|██████████| 133/133 [00:59<00:00,  2.22it/s, acc=83.6, loss=0.245]


Epoch [2/10] → Avg Loss: 0.3481 | Train Acc: 83.63%


Epoch [3/10]: 100%|██████████| 133/133 [00:59<00:00,  2.23it/s, acc=83.1, loss=0.261]


Epoch [3/10] → Avg Loss: 0.3754 | Train Acc: 83.11%


Epoch [4/10]: 100%|██████████| 133/133 [01:02<00:00,  2.14it/s, acc=83.7, loss=0.465]


Epoch [4/10] → Avg Loss: 0.3555 | Train Acc: 83.70%


Epoch [5/10]: 100%|██████████| 133/133 [01:00<00:00,  2.21it/s, acc=84.3, loss=0.798]


Epoch [5/10] → Avg Loss: 0.3509 | Train Acc: 84.27%


Epoch [6/10]: 100%|██████████| 133/133 [00:59<00:00,  2.22it/s, acc=86.8, loss=0.27]


Epoch [6/10] → Avg Loss: 0.2890 | Train Acc: 86.84%


Epoch [7/10]: 100%|██████████| 133/133 [01:00<00:00,  2.21it/s, acc=87.3, loss=0.614]


Epoch [7/10] → Avg Loss: 0.2595 | Train Acc: 87.29%


Epoch [8/10]: 100%|██████████| 133/133 [01:02<00:00,  2.14it/s, acc=87.5, loss=0.43]


Epoch [8/10] → Avg Loss: 0.2553 | Train Acc: 87.55%


Epoch [9/10]: 100%|██████████| 133/133 [01:00<00:00,  2.18it/s, acc=88.3, loss=0.383]


Epoch [9/10] → Avg Loss: 0.2574 | Train Acc: 88.25%


Epoch [10/10]:  96%|█████████▌| 128/133 [00:58<00:02,  2.20it/s, acc=87.7, loss=0.219]


KeyboardInterrupt: 