# ResNet-50

In [13]:
# Imports & Hyperparameters
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import multilabel_confusion_matrix

# Device and hyperparameters
device        = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
num_classes   = 100
batch_size    = 128
num_epochs    = 25
learning_rate = 0.001
momentum      = 0.9
weight_decay  = 1e-4

# CIFAR-100 normalization
mean = [0.5071, 0.4867, 0.4408]
std  = [0.2675, 0.2565, 0.2761]


cuda


In [14]:
# Data Transforms & Loaders

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

val_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

train_dataset = datasets.CIFAR100(root='./data',
                                  train=True,
                                  download=True,
                                  transform=train_transform)
val_dataset   = datasets.CIFAR100(root='./data',
                                  train=False,
                                  download=True,
                                  transform=val_transform)

train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=4)
val_loader   = DataLoader(val_dataset,
                          batch_size=batch_size,
                          shuffle=False,
                          num_workers=4)


In [15]:
# ResNet-50 Building Blocks

class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_planes, planes, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1   = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2   = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * self.expansion,
                               kernel_size=1, bias=False)
        self.bn3   = nn.BatchNorm2d(planes * self.expansion)
        self.relu  = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        return self.relu(out)


In [16]:
# ResNet-50 Definition

class ResNet50(nn.Module):
    def __init__(self, num_classes=100):
        super().__init__()
        self.in_planes = 64
        self.conv1     = nn.Conv2d(3, 64, kernel_size=7,
                                   stride=2, padding=3, bias=False)
        self.bn1       = nn.BatchNorm2d(64)
        self.relu      = nn.ReLU(inplace=True)
        self.maxpool   = nn.MaxPool2d(3, stride=2, padding=1)

        # layers configuration: [3, 4, 6, 3]
        self.layer1 = self._make_layer(64,  3)
        self.layer2 = self._make_layer(128, 4, stride=2)
        self.layer3 = self._make_layer(256, 6, stride=2)
        self.layer4 = self._make_layer(512, 3, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc      = nn.Linear(512 * Bottleneck.expansion, num_classes)

    def _make_layer(self, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_planes != planes * Bottleneck.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_planes,
                          planes * Bottleneck.expansion,
                          kernel_size=1,
                          stride=stride,
                          bias=False),
                nn.BatchNorm2d(planes * Bottleneck.expansion),
            )
        layers = [Bottleneck(self.in_planes, planes, stride, downsample)]
        self.in_planes = planes * Bottleneck.expansion
        for _ in range(1, blocks):
            layers.append(Bottleneck(self.in_planes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        return self.fc(x)


In [17]:
# Instantiate Model & Save Initial Filters

model = ResNet50(num_classes=num_classes).to(device)
initial_conv1 = model.conv1.weight.data.clone().cpu()


In [18]:
# Loss, Optimizer & Scheduler

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),
                      lr=learning_rate,
                      momentum=momentum,
                      weight_decay=weight_decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


In [19]:
from tqdm import tqdm

# Training & Validation Loop with Progress Bars

train_losses, val_losses = [], []
train_accs,   val_accs   = [], []

for epoch in range(1, num_epochs+1):
    # -----------------
    # Training
    # -----------------
    model.train()
    run_loss, run_corr, run_tot = 0.0, 0, 0
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [train]", leave=False)
    for x, y in train_bar:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        run_loss += loss.item() * x.size(0)
        preds = out.argmax(1)
        run_corr += (preds == y).sum().item()
        run_tot  += y.size(0)

        train_bar.set_postfix({
            'loss': f"{run_loss/run_tot:.4f}",
            'acc':  f"{run_corr/run_tot:.4f}"
        })

    train_losses.append(run_loss / run_tot)
    train_accs.append(run_corr / run_tot)

    # -----------------
    # Validation
    # -----------------
    model.eval()
    run_loss, run_corr, run_tot = 0.0, 0, 0
    val_bar = tqdm(val_loader, desc=f"Epoch {epoch}/{num_epochs} [val]  ", leave=False)
    with torch.no_grad():
        for x, y in val_bar:
            x, y = x.to(device), y.to(device)
            out = model(x)
            loss = criterion(out, y)

            run_loss += loss.item() * x.size(0)
            preds = out.argmax(1)
            run_corr += (preds == y).sum().item()
            run_tot  += y.size(0)

            val_bar.set_postfix({
                'loss': f"{run_loss/run_tot:.4f}",
                'acc':  f"{run_corr/run_tot:.4f}"
            })

    val_losses.append(run_loss / run_tot)
    val_accs.append(run_corr / run_tot)

    # -----------------
    # Scheduler Step
    # -----------------
    scheduler.step()

    # -----------------
    # Epoch Summary
    # -----------------
    print(
        f"Epoch {epoch}/{num_epochs}  "
        f"Train Loss {train_losses[-1]:.4f}  Acc {train_accs[-1]:.4f}  "
        f"Val   Loss {val_losses[-1]:.4f}  Acc {val_accs[-1]:.4f}"
    )


                                                                                                   

Epoch 1/25  Train Loss 4.2029  Acc 0.0635  Val   Loss 3.8743  Acc 0.1052


                                                                                                   

Epoch 2/25  Train Loss 3.6751  Acc 0.1351  Val   Loss 3.5107  Acc 0.1667


                                                                                                   

Epoch 3/25  Train Loss 3.4063  Acc 0.1814  Val   Loss 3.2946  Acc 0.2085


                                                                                                  

Epoch 4/25  Train Loss 3.1817  Acc 0.2231  Val   Loss 3.2313  Acc 0.2169


                                                                                                  

Epoch 5/25  Train Loss 2.9783  Acc 0.2629  Val   Loss 2.9231  Acc 0.2742


                                                                                                

Epoch 6/25  Train Loss 2.8014  Acc 0.2944  Val   Loss 2.8792  Acc 0.2826


                                                                                                

KeyboardInterrupt: 

In [None]:
# Plot Loss & Accuracy

epochs = np.arange(1, num_epochs+1)

plt.figure()
plt.plot(epochs, train_losses, label='Train Loss')
plt.plot(epochs, val_losses,   label='Val Loss')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend(); plt.title('Loss')
plt.show()

plt.figure()
plt.plot(epochs, train_accs, label='Train Acc')
plt.plot(epochs, val_accs,   label='Val Acc')
plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.legend(); plt.title('Accuracy')
plt.show()


In [None]:
# Weight Update Analysis & Filter Visualization

final_conv1 = model.conv1.weight.data.clone().cpu()
l2_change   = torch.norm(final_conv1 - initial_conv1)
print(f"Conv1 L2 weight change: {l2_change:.4f}")

def show_filters(filters, title):
    n = 2  # first two filters
    fig, axs = plt.subplots(1, n, figsize=(n*3, 3))
    for i in range(n):
        f = filters[i]
        fmin, fmax = f.min(), f.max()
        img = (f - fmin)/(fmax - fmin)
        img = img.permute(1,2,0).numpy()
        axs[i].imshow(img)
        axs[i].axis('off')
        axs[i].set_title(f"{title} #{i}")
    plt.show()

show_filters(initial_conv1, "Before Training")
show_filters(final_conv1,   "After Training")


In [None]:
# Testing & Metrics

model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for x, y in val_loader:
        x, y = x.to(device), y.to(device)
        out = model(x)
        preds = out.argmax(1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(y.cpu().numpy())

mlcms = multilabel_confusion_matrix(all_labels, all_preds, labels=list(range(num_classes)))
rows = []
for cls in range(num_classes):
    tn, fp, fn, tp = mlcms[cls].ravel()
    f1 = (2*tp)/(2*tp + fp + fn) if (2*tp + fp + fn)>0 else 0.0
    rows.append([cls, tp, tn, fp, fn, f1])

df = pd.DataFrame(rows, columns=['class','tp','tn','fp','fn','f1_score'])
display(df)
df.to_csv('cifar100_metrics.csv', index=False)
