In [None]:
!pip install torchinfo
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchinfo import summary


# Torchvision for datasets and transforms
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10

# Data utilities
from torch.utils.data import DataLoader

# Metrics & visualization
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

device = torch.device ("cuda" if torch.cuda.is_available() else "cpu")
print(device)

import torchvision.transforms as transforms

# Training transforms: data augmentation + normalization
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),          # Random crop with padding
    transforms.RandomHorizontalFlip(),             # Random horizontal flip
    transforms.ToTensor(),                         # Convert to tensor
    transforms.Normalize(
        mean=(0.4914, 0.4822, 0.4465),
        std=(0.2023, 0.1994, 0.2010)
    )
])

# Test transforms: only normalization
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.4914, 0.4822, 0.4465),
        std=(0.2023, 0.1994, 0.2010)
    )
])

# Training set
trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train
)
trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

# Test set
testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test
)
testloader = DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# Class names
classes = trainset.classes
print("Classes:", classes)

images, labels = next (iter(trainloader))
print(images.shape)
print(labels.shape)
print(labels[:10])




## Experiment 1: Architecture Comparison

This experiment compares three convolutional neural network architectures on the CIFAR-10 dataset:
1. A custom Baseline CNN designed from scratch
2. ResNet18 adapted for CIFAR-10
3. DenseNet121 adapted for CIFAR-10

All models are trained using the same data preprocessing, optimizer (Adam), learning rate (0.001),
batch size, and number of epochs to isolate the effect of architectural design.


In [None]:
import torch

import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn as nn
from sklearn.metrics import confusion_matrix

class BaselineCNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.network = nn.Sequential(
            # Convolutional + ReLU + Pooling
            nn.Conv2d(3, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            # Flatten and fully connected layers
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.network(x)


import torch
import torch.nn as nn



model1 = BaselineCNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model1.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model1.train()
    running_loss = 0.0

    for images, labels in trainloader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward
        outputs = model1(images)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(trainloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")



model1.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model1(images)           # [batch, 10]
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")
baseline_accuracy = accuracy



model1.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model1(images)
        _, predicted = torch.max(outputs, 1)

        all_preds.append(predicted.cpu())
        all_labels.append(labels.cpu())

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)



cm = confusion_matrix(all_labels, all_preds)


plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=classes,
            yticklabels=classes)

plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix – Baseline CNN")
plt.show()


In [None]:
from torchvision import models

from sklearn.metrics import confusion_matrix


import matplotlib.pyplot as plt
import seaborn as sns

class ResNet18_CIFAR(nn.Module):
  def __init__(self):
    super().__init__()  
    self.model = models.resnet18(weights = None)
    self.model.conv1 = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size =3, stride = 1, padding = 1, bias = False )
    self.model.maxpool = nn.Identity()
    self.model.fc = nn.Linear(512, 10)

  def forward(self, x):
    return self.model(x) 

model2 = ResNet18_CIFAR().to(device)

    

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model2.parameters(), lr=0.001)

num_epochs = 10



for epoch in range(num_epochs):
    running_loss = 0
    model2.train()
    
    for images, labels in trainloader:  # fixed order
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model2(images)
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader):.4f}")

model2.eval()
correct = 0
total = 0
with torch.no_grad():
  for images, labels in testloader:
    images, labels = images.to(device), labels.to(device)
    outputs = model2(images)
    _, predicted = torch.max(outputs,1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item() 

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

resnet_accuracy = accuracy



model2.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model2(images)
        _, predicted = torch.max(outputs, 1)

        all_preds.append(predicted.cpu())
        all_labels.append(labels.cpu())

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

cm = confusion_matrix(all_labels, all_preds)


plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=classes,
            yticklabels=classes)

plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix – ResNet CNN")
plt.show()




In [None]:
from torchvision import models
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

class DenseNet_CIFAR(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = models.densenet121(weights=None)

        # Modify first conv layer for CIFAR-10
        self.model.features.conv0 = nn.Conv2d(
            in_channels=3,
            out_channels=64,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False
        )

        # Remove initial pooling
        self.model.features.pool0 = nn.Identity()

        # Modify classifier for 10 classes
        self.model.classifier = nn.Linear(
            self.model.classifier.in_features, 10
        )

    def forward(self, x):
        return self.model(x)



model3 = DenseNet_CIFAR().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model3.parameters(), lr=0.001)

num_epochs = 10

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    model3.train()

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        outputs = model3(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader):.4f}")

model3.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)

        outputs = model3(images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

densenet_accuracy = accuracy



model3.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model3(images)
        _, predicted = torch.max(outputs, 1)

        all_preds.append(predicted.cpu())
        all_labels.append(labels.cpu())

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)



cm = confusion_matrix(all_labels, all_preds)



plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=classes,
            yticklabels=classes)

plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix – DenseNet CNN")
plt.show()





In [None]:
import pandas as pd

results_exp1 = pd.DataFrame({
    "Model": ["Baseline CNN", "ResNet18", "DenseNet121"],
    "Test Accuracy (%)": [
        baseline_accuracy,
        resnet_accuracy,
        densenet_accuracy
    ]
})

results_exp1


## Experiment 2: Optimizer Comparison

In this experiment, the ResNet18 architecture is fixed while comparing two optimization strategies:
Adam and Stochastic Gradient Descent (SGD).

The goal is to analyze how optimizer choice affects training convergence and final performance.


In [None]:
def train_model(model, optimizer, num_epochs=10):
    model.train()
    train_losses = []

    for epoch in range(num_epochs):
        running_loss = 0.0

        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        epoch_loss = running_loss / len(trainloader)
        train_losses.append(epoch_loss)

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

    return train_losses



model_adam = ResNet18_CIFAR().to(device)
optimizer_adam = optim.Adam(model_adam.parameters(), lr=0.001)

loss_adam = train_model(model_adam, optimizer_adam)


model_sgd = ResNet18_CIFAR().to(device)
optimizer_sgd = optim.SGD(model_sgd.parameters(), lr=0.1, momentum=0.9)

loss_sgd = train_model(model_sgd, optimizer_sgd)


In [None]:
plt.figure(figsize=(8,5))
plt.plot(loss_adam, label="Adam")
plt.plot(loss_sgd, label="SGD")
plt.xlabel("Epoch")
plt.ylabel("Training Loss")
plt.title("Experiment 2: Optimizer Comparison")
plt.legend()
plt.show()
