<a href="https://colab.research.google.com/github/AnkitaMishra31/Deep-Learning-Experiment-1/blob/main/dl_exp_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
import kagglehub

In [2]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Using device: cpu


In [4]:
print("\n=== Loading Datasets ===")

# Dogs vs Cats
path = kagglehub.dataset_download("bhavikjikadara/dog-and-cat-classification-dataset")
print(f"Dogs vs Cats path: {path}")

# CIFAR-10
cifar_root = "./data"


=== Loading Datasets ===
Using Colab cache for faster access to the 'dog-and-cat-classification-dataset' dataset.
Dogs vs Cats path: /kaggle/input/dog-and-cat-classification-dataset


In [5]:
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

In [6]:
dogcat_dataset = datasets.ImageFolder(root=path, transform=train_transform)
train_size = int(0.8 * len(dogcat_dataset))
val_size = len(dogcat_dataset) - train_size
dogcat_train, dogcat_val = random_split(dogcat_dataset, [train_size, val_size])

dogcat_train_loader = DataLoader(dogcat_train, batch_size=64, shuffle=True, num_workers=2)
dogcat_val_loader = DataLoader(dogcat_val, batch_size=64, shuffle=False, num_workers=2)


In [7]:
cifar_train_dataset = datasets.CIFAR10(root=cifar_root, train=True, download=True, transform=train_transform)
cifar_test_dataset = datasets.CIFAR10(root=cifar_root, train=False, download=True, transform=test_transform)

cifar_train_loader = DataLoader(cifar_train_dataset, batch_size=128, shuffle=True, num_workers=2)
cifar_test_loader = DataLoader(cifar_test_dataset, batch_size=128, shuffle=False, num_workers=2)

print(f"Dogs vs Cats - Train: {len(dogcat_train)}, Val: {len(dogcat_val)}")
print(f"CIFAR-10 - Train: {len(cifar_train_dataset)}, Test: {len(cifar_test_dataset)}")


100%|██████████| 170M/170M [00:02<00:00, 79.5MB/s]


Dogs vs Cats - Train: 19998, Val: 5000
CIFAR-10 - Train: 50000, Test: 10000


In [8]:
class ResidualBlock(nn.Module):
    """Residual block with skip connection"""
    def __init__(self, in_channels, out_channels, stride=1, activation='relu'):
        super().__init__()

        if activation == 'relu':
            self.act = nn.ReLU(inplace=True)
        elif activation == 'elu':
            self.act = nn.ELU(inplace=True)
        elif activation == 'gelu':
            self.act = nn.GELU()

        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Skip connection
        self.skip = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.skip = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = self.skip(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.act(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.act(out)

        return out


class ImprovedCNN(nn.Module):
    """Deeper CNN with residual connections"""
    def __init__(self, num_classes, activation='relu', dropout_rate=0.3):
        super().__init__()

        if activation == 'relu':
            self.act = nn.ReLU(inplace=True)
        elif activation == 'elu':
            self.act = nn.ELU(inplace=True)
        elif activation == 'gelu':
            self.act = nn.GELU()

        # Initial convolution
        self.conv1 = nn.Conv2d(3, 64, 7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)

        # Residual blocks
        self.layer1 = self._make_layer(64, 64, 2, activation)
        self.layer2 = self._make_layer(64, 128, 2, activation, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, activation, stride=2)

        # Global average pooling
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        # Classifier
        self.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(256, 128),
            self.act,
            nn.Dropout(dropout_rate / 2),
            nn.Linear(128, num_classes)
        )

    def _make_layer(self, in_channels, out_channels, num_blocks, activation, stride=1):
        layers = []
        layers.append(ResidualBlock(in_channels, out_channels, stride, activation))
        for _ in range(1, num_blocks):
            layers.append(ResidualBlock(out_channels, out_channels, 1, activation))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.act(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [9]:
def initialize_model(model, init_method='he'):
    """Initialize model weights"""
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            if init_method == 'he':
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif init_method == 'xavier_uniform':
                nn.init.xavier_uniform_(m.weight)
            elif init_method == 'xavier_normal':
                nn.init.xavier_normal_(m.weight)
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            if init_method == 'he':
                nn.init.kaiming_normal_(m.weight)
            elif init_method in ['xavier_uniform', 'xavier_normal']:
                nn.init.xavier_normal_(m.weight)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)

In [10]:
def train_with_scheduler(model, train_loader, val_loader, optimizer, scheduler, epochs=10):
    """Train model with learning rate scheduling"""
    criterion = nn.CrossEntropyLoss()
    best_acc = 0.0

    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()

        # Validation
        model.eval()
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

        train_acc = train_correct / train_total
        val_acc = val_correct / val_total

        # Step scheduler
        if scheduler is not None:
            scheduler.step()

        if val_acc > best_acc:
            best_acc = val_acc

        if (epoch + 1) % 2 == 0 or epoch == 0:
            lr = optimizer.param_groups[0]['lr']
            print(f"Epoch {epoch+1}/{epochs} - Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, LR: {lr:.6f}")

    return best_acc

In [11]:
def random_search(train_loader, val_loader, num_classes, n_trials=15):
    """Random hyperparameter search"""

    # Search space
    activations = ['relu', 'elu', 'gelu']
    init_methods = ['he', 'xavier_uniform', 'xavier_normal']
    optimizers_config = [
        ('adam', {'lr': 0.001, 'weight_decay': 1e-4}),
        ('adam', {'lr': 0.0005, 'weight_decay': 1e-5}),
        ('sgd', {'lr': 0.01, 'momentum': 0.9, 'weight_decay': 1e-4}),
        ('sgd', {'lr': 0.05, 'momentum': 0.9, 'weight_decay': 1e-4}),
        ('adamw', {'lr': 0.001, 'weight_decay': 0.01})
    ]
    dropout_rates = [0.2, 0.3, 0.4, 0.5]

    best_acc = 0.0
    best_config = None
    results = []

    print(f"\n=== Random Search - {n_trials} Trials ===")

    for trial in range(n_trials):
        # Random sampling
        activation = random.choice(activations)
        init_method = random.choice(init_methods)
        opt_name, opt_params = random.choice(optimizers_config)
        dropout = random.choice(dropout_rates)

        config = {
            'activation': activation,
            'init': init_method,
            'optimizer': opt_name,
            'opt_params': opt_params,
            'dropout': dropout
        }

        print(f"\nTrial {trial+1}/{n_trials}: {activation}, {init_method}, {opt_name}, dropout={dropout}")

        # Create model
        model = ImprovedCNN(num_classes, activation, dropout).to(device)
        initialize_model(model, init_method)

        # Create optimizer
        if opt_name == 'adam':
            optimizer = optim.Adam(model.parameters(), **opt_params)
        elif opt_name == 'sgd':
            optimizer = optim.SGD(model.parameters(), **opt_params)
        elif opt_name == 'adamw':
            optimizer = optim.AdamW(model.parameters(), **opt_params)

        # Cosine annealing scheduler
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5)

        # Train
        acc = train_with_scheduler(model, train_loader, val_loader, optimizer, scheduler, epochs=5)

        results.append((config, acc))

        if acc > best_acc:
            best_acc = acc
            best_config = config
            print(f"  → New best: {acc:.4f}")

    return best_config, best_acc, results

In [None]:
print("\n" + "="*60)
print("DOGS vs CATS - Random Search")
print("="*60)
dogcat_config, dogcat_acc, dogcat_results = random_search(
    dogcat_train_loader, dogcat_val_loader, num_classes=2, n_trials=4
)

print("\n" + "="*60)
print("CIFAR-10 - Random Search")
print("="*60)
cifar_config, cifar_acc, cifar_results = random_search(
    cifar_train_loader, cifar_test_loader, num_classes=10, n_trials=4
)


DOGS vs CATS - Random Search

=== Random Search - 4 Trials ===

Trial 1/4: gelu, xavier_uniform, adam, dropout=0.2


In [None]:
print("\n" + "="*60)
print("FINAL TRAINING - Dogs vs Cats")
print("="*60)
print(f"Best config: {dogcat_config}")

final_dogcat_model = ImprovedCNN(2, dogcat_config['activation'], dogcat_config['dropout']).to(device)
initialize_model(final_dogcat_model, dogcat_config['init'])

if dogcat_config['optimizer'] == 'adam':
    final_opt = optim.Adam(final_dogcat_model.parameters(), **dogcat_config['opt_params'])
elif dogcat_config['optimizer'] == 'sgd':
    final_opt = optim.SGD(final_dogcat_model.parameters(), **dogcat_config['opt_params'])
else:
    final_opt = optim.AdamW(final_dogcat_model.parameters(), **dogcat_config['opt_params'])

final_scheduler = optim.lr_scheduler.CosineAnnealingLR(final_opt, T_max=5)
final_dogcat_acc = train_with_scheduler(final_dogcat_model, dogcat_train_loader, dogcat_val_loader,
                                        final_opt, final_scheduler, epochs=5)

torch.save(final_dogcat_model.state_dict(), "improved_cnn_dogcat.pth")

print("\n" + "="*60)
print("FINAL TRAINING - CIFAR-10")
print("="*60)
print(f"Best config: {cifar_config}")

final_cifar_model = ImprovedCNN(10, cifar_config['activation'], cifar_config['dropout']).to(device)
initialize_model(final_cifar_model, cifar_config['init'])

if cifar_config['optimizer'] == 'adam':
    final_opt = optim.Adam(final_cifar_model.parameters(), **cifar_config['opt_params'])
elif cifar_config['optimizer'] == 'sgd':
    final_opt = optim.SGD(final_cifar_model.parameters(), **cifar_config['opt_params'])
else:
    final_opt = optim.AdamW(final_cifar_model.parameters(), **cifar_config['opt_params'])

final_scheduler = optim.lr_scheduler.CosineAnnealingLR(final_opt, T_max=5)
final_cifar_acc = train_with_scheduler(final_cifar_model, cifar_train_loader, cifar_test_loader,
                                       final_opt, final_scheduler, epochs=5)

torch.save(final_cifar_model.state_dict(), "improved_cnn_cifar10.pth")

In [None]:
print("\n" + "="*60)
print("TRANSFER LEARNING - ResNet-18")
print("="*60)

# Dogs vs Cats
print("\nTraining ResNet-18 on Dogs vs Cats...")
resnet_dogcat = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
resnet_dogcat.fc = nn.Linear(resnet_dogcat.fc.in_features, 2)
resnet_dogcat = resnet_dogcat.to(device)

resnet_opt = optim.Adam(resnet_dogcat.parameters(), lr=0.0001, weight_decay=1e-4)
resnet_scheduler = optim.lr_scheduler.StepLR(resnet_opt, step_size=1, gamma=0.5)

resnet_dogcat_acc = train_with_scheduler(resnet_dogcat, dogcat_train_loader, dogcat_val_loader,
                                         resnet_opt, resnet_scheduler, epochs=3)
torch.save(resnet_dogcat.state_dict(), "resnet18_dogcat_alt.pth")

# CIFAR-10
print("\nTraining ResNet-18 on CIFAR-10...")
resnet_cifar = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
resnet_cifar.fc = nn.Linear(resnet_cifar.fc.in_features, 10)
resnet_cifar = resnet_cifar.to(device)

resnet_opt = optim.Adam(resnet_cifar.parameters(), lr=0.0001, weight_decay=1e-4)
resnet_scheduler = optim.lr_scheduler.StepLR(resnet_opt, step_size=1, gamma=0.5)

resnet_cifar_acc = train_with_scheduler(resnet_cifar, cifar_train_loader, cifar_test_loader,
                                        resnet_opt, resnet_scheduler, epochs=3)
torch.save(resnet_cifar.state_dict(), "resnet18_cifar10_alt.pth")


In [None]:
print("\n" + "="*60)
print("FINAL RESULTS COMPARISON")
print("="*60)

print("\nDogs vs Cats:")
print(f"  Improved CNN (best search): {dogcat_acc:.4f}")
print(f"  Improved CNN (final):       {final_dogcat_acc:.4f}")
print(f"  ResNet-18:                  {resnet_dogcat_acc:.4f}")

print("\nCIFAR-10:")
print(f"  Improved CNN (best search): {cifar_acc:.4f}")
print(f"  Improved CNN (final):       {final_cifar_acc:.4f}")
print(f"  ResNet-18:                  {resnet_cifar_acc:.4f}")

print("\n" + "="*60)