#**CNN IMPLEMENTATION**

**Objective**

The objective of this lab is to implement Convolutional Neural Networks (CNNs) to classify
images in the Cats vs. Dogs dataset and the CIFAR-10 dataset. You will explore different
configurations by experimenting with:

● 3 Activation Functions


● 3 Weight Initialization Techniques

● 3 Optimizers

Additionally, you will compare your best CNN model for both datasets with a pretrained
ResNet-18 model.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import copy
import os
import urllib.request
import zipfile
from PIL import Image
import shutil


DATASET_NAME = 'CATS_DOGS'
BATCH_SIZE = 32
NUM_EPOCHS = 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def download_and_extract_catsdogs(root_dir='./data'):
    # [FIX] Updated URL to the new '5340.zip' version (Old '3367a.zip' is dead)
    url = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip"
    filename = "cats_dogs.zip"
    download_path = os.path.join(root_dir, filename)
    extract_path = os.path.join(root_dir, 'cats_dogs_extracted')

    # Create data directory
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)

    # 1. Download
    if not os.path.exists(download_path) and not os.path.exists(extract_path):
        print("Downloading Cats vs Dogs dataset (New Version 5340)...")
        try:
            # Add user-agent to avoid 403 forbidden errors on some networks
            opener = urllib.request.build_opener()
            opener.addheaders = [('User-agent', 'Mozilla/5.0')]
            urllib.request.install_opener(opener)
            urllib.request.urlretrieve(url, download_path)
            print("Download complete.")
        except Exception as e:
            print(f"Download failed: {e}")
            raise RuntimeError("Could not download dataset. Check your internet connection.")

    # 2. Extract
    if not os.path.exists(extract_path):
        print("Extracting dataset...")
        try:
            with zipfile.ZipFile(download_path, 'r') as zip_ref:
                zip_ref.extractall(root_dir)

            # The zip contains a 'PetImages' folder. We rename it to avoid conflicts.
            original_folder = os.path.join(root_dir, 'PetImages')
            if os.path.exists(original_folder):
                os.rename(original_folder, extract_path)
            print("Extraction complete.")
        except zipfile.BadZipFile:
            print("Error: The downloaded zip file is corrupted. Please delete it and try again.")
            return None
    else:
        print("Dataset already found.")

    # 3. Clean Corrupt Images
    print("Scanning for corrupt images (this is required to prevent crashes)...")
    folders = ['Cat', 'Dog']
    removed = 0
    for folder in folders:
        folder_path = os.path.join(extract_path, folder)
        if not os.path.exists(folder_path): continue

        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            try:
                # Check 1: File size
                if os.path.getsize(file_path) == 0:
                    os.remove(file_path)
                    removed += 1
                    continue

                # Check 2: Verify Image Header
                with Image.open(file_path) as img:
                    img.verify()

            except (IOError, SyntaxError, Image.UnidentifiedImageError):
                # print(f"Removing corrupt file: {file}")
                try:
                    os.remove(file_path)
                    removed += 1
                except: pass

    print(f"Cleanup complete. Removed {removed} corrupt images.")
    return extract_path

def get_dataloaders(dataset_name):
    print(f"Preparing data for: {dataset_name}...")

    if dataset_name == 'CIFAR10':
        transform = transforms.Compose([
            transforms.Resize((32, 32)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
        valset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
        num_classes = 10

    elif dataset_name == 'CATS_DOGS':
        # 1. Auto-download and get path
        data_path = download_and_extract_catsdogs()

        if data_path is None:
            raise RuntimeError("Failed to prepare CATS_DOGS dataset.")

        # 2. Transforms (Resize is mandatory as images are different sizes)
        transform = transforms.Compose([
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

        # 3. Create Dataset
        # ImageFolder expects structure: root/class_x/xxx.jpg
        full_dataset = torchvision.datasets.ImageFolder(root=data_path, transform=transform)

        # 4. Split (80% Train, 20% Val)
        train_size = int(0.8 * len(full_dataset))
        val_size = len(full_dataset) - train_size
        trainset, valset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
        num_classes = 2

    train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False)

    return train_loader, val_loader, num_classes

Using device: cpu


# **Steps to Complete the Task**
**1.CNN Implementation**


**● Define a CNN architecture:**

o Experiment with different numbers of convolutional layers and filter sizes.

o Include pooling layers and fully connected layers as needed.

o Add dropout and batch normalization for better regularization and stability.
**● Experiment with configurations:**

o Implement 3 different activation functions:

*  ReLU

*  Tanh

*  Leaky ReLU

o Implement 3 different weight initialization techniques:

*  Xavier Initialization

*  Kaiming Initialization

*  Random Initialization

o Experiment with 3 optimizers:

*  SGD

*  Adam

*  RMSprop

In [2]:
class ModularCNN(nn.Module):
    def __init__(self, activation_name, init_method, num_classes):
        super(ModularCNN, self).__init__()

        # [Step 1.1] Define CNN Architecture
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32) # [Step 1.1] Batch Normalization
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(2, 2) # [Step 1.1] Pooling Layers
        self.dropout = nn.Dropout(0.5) # [Step 1.1] Dropout

        # Calculate Flatten Size dynamically based on dataset
        if num_classes == 10: # CIFAR-10 (32x32)
            self.flatten_size = 64 * 8 * 8
        else: # Cats vs Dogs (Resized to 64x64)
            self.flatten_size = 64 * 16 * 16

        self.fc1 = nn.Linear(self.flatten_size, 512)
        self.fc2 = nn.Linear(512, num_classes)

        # [Step 1.2] Implement 3 different activation functions
        if activation_name == 'relu':
            self.activation = nn.ReLU()
        elif activation_name == 'tanh':
            self.activation = nn.Tanh()
        elif activation_name == 'leaky_relu':
            self.activation = nn.LeakyReLU()

        # [Step 1.3] Implement 3 different weight initialization techniques
        self.init_method = init_method
        self._initialize_weights()

    def forward(self, x):
        x = self.pool(self.activation(self.bn1(self.conv1(x))))
        x = self.pool(self.activation(self.bn2(self.conv2(x))))
        x = x.view(x.size(0), -1) # Flatten
        x = self.dropout(self.activation(self.fc1(x)))
        x = self.fc2(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                if self.init_method == 'xavier':
                    nn.init.xavier_uniform_(m.weight)
                elif self.init_method == 'kaiming':
                    nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
                elif self.init_method == 'random':
                    nn.init.normal_(m.weight, mean=0, std=0.01)

**2.Training and Evaluation**

● Train your CNN on each dataset using all combinations of activations, weight
initializations, and optimizers.

● Save the best-performing model for each dataset.

● Save the weights of your best-performing models and upload them to a GitHub
repository along with your code.

● Use accuracy and loss metrics to evaluate performance.

In [3]:
def train_one_epoch(model, train_loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    return running_loss / total, correct / total

def evaluate(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return running_loss / total, correct / total

def run_training_experiments(train_loader, val_loader, num_classes):
    # Train using all combinations
    activations = ['relu', 'tanh', 'leaky_relu']
    inits = ['xavier', 'kaiming', 'random']
    optimizers_list = ['sgd', 'adam', 'rmsprop']

    best_acc = 0.0
    best_config = ""
    best_weights = None


    for act in activations:
        for init in inits:
            for opt_name in optimizers_list:
                print(f"Training: Act={act}, Init={init}, Opt={opt_name}")
                model = ModularCNN(act, init, num_classes).to(device)


                if opt_name == 'sgd':
                    optimizer = optim.SGD(model.parameters(), lr=0.01)
                elif opt_name == 'adam':
                    optimizer = optim.Adam(model.parameters(), lr=0.001)
                elif opt_name == 'rmsprop':
                    optimizer = optim.RMSprop(model.parameters(), lr=0.001)

                criterion = nn.CrossEntropyLoss()

                # Training Loop
                for epoch in range(NUM_EPOCHS):
                    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer)
                    val_loss, val_acc = evaluate(model, val_loader, criterion)

                # Save best performing model logic
                if val_acc > best_acc:
                    best_acc = val_acc
                    best_config = f"{act}_{init}_{opt_name}"
                    best_weights = copy.deepcopy(model.state_dict())
                    print(f"  -> New Best Accuracy: {val_acc:.4f}")

    # [Step 2] Save weights of best-performing model
    if best_weights:
        torch.save(best_weights, f'best_model_{best_config}.pth')
        print(f"\nBest Config Saved: {best_config} with Acc: {best_acc:.4f}")

    return best_acc, best_config

**3.Transfer Learning with ResNet-18**

● Fine-tune ResNet-18 on both datasets.

● Compare its performance with your best CNN model.

In [4]:
def run_resnet_transfer(train_loader, val_loader, num_classes):
    print("\n--- Starting Transfer Learning (Section 3) ---")

    # [Step 3] Fine-tune ResNet-18
    resnet = models.resnet18(pretrained=True)

    # Freeze initial layers (optional, but standard for fine-tuning)
    for param in resnet.parameters():
        param.requires_grad = False

    # Replace final layer
    num_ftrs = resnet.fc.in_features
    resnet.fc = nn.Linear(num_ftrs, num_classes)
    resnet = resnet.to(device)

    optimizer = optim.Adam(resnet.fc.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    best_resnet_acc = 0.0

    for epoch in range(NUM_EPOCHS):
        _, train_acc = train_one_epoch(resnet, train_loader, criterion, optimizer)
        _, val_acc = evaluate(resnet, val_loader, criterion)
        print(f"ResNet Epoch {epoch+1}: Val Acc: {val_acc:.4f}")

        if val_acc > best_resnet_acc:
            best_resnet_acc = val_acc
            torch.save(resnet.state_dict(), 'best_resnet.pth')

    return best_resnet_acc

In [4]:
if __name__ == '__main__':
    # List of datasets to process
    datasets_to_run = ['CATS_DOGS', 'CIFAR10']

    final_results = {}

    for dataset_name in datasets_to_run:
        print(f"\n{'#'*40}")
        print(f"PROCESSING DATASET: {dataset_name}")
        print(f"{'#'*40}")

        # REMOVED: The manual safety check.
        # Reason: get_dataloaders() now handles the download automatically.

        try:
            # 1. Prepare Data (This will now auto-download CATS_DOGS if missing)
            train_loader, val_loader, num_classes = get_dataloaders(dataset_name)

            # 2. Run User CNN Experiments (Section 2 of PDF)
            best_cnn_acc, best_config = run_training_experiments(train_loader, val_loader, num_classes)

            # 3. Run ResNet Experiments (Section 3 of PDF)
            best_resnet_acc = run_resnet_transfer(train_loader, val_loader, num_classes)

            # Store results for final summary
            final_results[dataset_name] = {
                'best_config': best_config,
                'cnn_acc': best_cnn_acc,
                'resnet_acc': best_resnet_acc
            }

        except Exception as e:
            print(f"(!) Error processing {dataset_name}: {e}")
            continue

    # 4. Final Comparison for Deliverables
    print("\n" + "="*50)
    print("FINAL DELIVERABLE SUMMARY (ALL DATASETS)")
    print("="*50)

    for ds, res in final_results.items():
        print(f"\n--- {ds} Results ---")
        print(f"Best Custom CNN ({res['best_config']}): {res['cnn_acc']:.4f}")
        print(f"ResNet-18 Transfer Learning:      {res['resnet_acc']:.4f}")

        if res['resnet_acc'] > res['cnn_acc']:
            print("Conclusion: ResNet-18 outperformed the Custom CNN.")
        else:
            print("Conclusion: Custom CNN outperformed ResNet-18.")
    print("="*50)

########################################
PROCESSING DATASET: CATS_DOGS
########################################
Preparing data for: CATS_DOGS...
Download complete.
Extracting dataset...
Extraction complete.
Cleanup complete. Removed 1738 corrupt images.

Training: Act=relu, Init=xavier, Opt=sgd
  -> New Best Accuracy: 0.5824
Training: Act=relu, Init=xavier, Opt=adam
  -> New Best Accuracy: 0.7105
Training: Act=relu, Init=xavier, Opt=rmsprop
Training: Act=relu, Init=kaiming, Opt=sgd
Training: Act=relu, Init=kaiming, Opt=adam
  -> New Best Accuracy: 0.7412
Training: Act=relu, Init=kaiming, Opt=rmsprop
Training: Act=relu, Init=random, Opt=sgd
Training: Act=relu, Init=random, Opt=adam
Training: Act=relu, Init=random, Opt=rmsprop
Training: Act=tanh, Init=xavier, Opt=sgd
Training: Act=tanh, Init=xavier, Opt=adam
Training: Act=tanh, Init=xavier, Opt=rmsprop
Training: Act=tanh, Init=kaiming, Opt=sgd
Training: Act=tanh, Init=kaiming, Opt=adam
Training: Act=tanh, Init=kaiming, Opt=rmsprop
Traini