In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tabulate import tabulate
from sklearn.metrics import confusion_matrix, f1_score
import seaborn as sns
import wandb

In [2]:
def load_dataset():
    train_data = torch.load("./data/train_data.pt").float() / 255.0  
    train_labels = torch.load("./data/train_labels.pt").long()  
    test_data = torch.load("./data/test_data.pt").float() / 255.0  
    test_labels = torch.load("./data/test_labels.pt").long()

    train_dataset = TensorDataset(train_data, train_labels)
    test_dataset = TensorDataset(test_data, test_labels)

    return train_dataset, test_dataset

In [3]:
def plot_confusion_matrix(y_true, y_pred, num_classes):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    return plt


In [4]:
def evaluate_model(model, test_loader, wandb_run=None):
    model.eval()
    device = next(model.parameters()).device
    correct, total = 0, 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * correct / total
    f1 = f1_score(all_labels, all_preds, average='weighted')

    if wandb_run:
        plot_confusion_matrix(all_labels, all_preds, "Test Confusion Matrix", wandb_run)
        wandb_run.log({"test_accuracy": accuracy, "test_f1_score": f1})

    print(f"Test Accuracy: {accuracy:.2f}% | F1 Score: {f1:.4f}")
    return accuracy, f1

def plot_confusion_matrix(true_labels, pred_labels, title, wandb_run):
    plt.figure(figsize=(8, 6))
    cm = confusion_matrix(true_labels, pred_labels)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(title)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    wandb_run.log({title: wandb.Image(plt)})
    plt.close()

def train_model(model, train_loader, test_loader, num_epochs=10, lr=0.001, experiment_name="default"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    wandb_run = wandb.init(
        project="resnet18-comparison",
        name=experiment_name,
        config={"learning_rate": lr, "epochs": num_epochs, "batch_size": train_loader.batch_size}
    )
    wandb_run.watch(model, log="all")
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    history = {"losses": [], "train_acc": [], "test_acc": [], "train_f1": [], "test_f1": []}
    
    for epoch in range(num_epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        epoch_preds, epoch_labels = [], []
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)
            
            epoch_preds.extend(predicted.cpu().numpy())
            epoch_labels.extend(labels.cpu().numpy())
        
        train_accuracy = 100 * correct / total
        train_f1 = f1_score(epoch_labels, epoch_preds, average='weighted')
        test_accuracy, test_f1 = evaluate_model(model, test_loader)
        
        history["losses"].append(total_loss)
        history["train_acc"].append(train_accuracy)
        history["test_acc"].append(test_accuracy)
        history["train_f1"].append(train_f1)
        history["test_f1"].append(test_f1)
        
        plot_confusion_matrix(epoch_labels, epoch_preds, f"Training Confusion Matrix - Epoch {epoch+1}", wandb_run)
        
        wandb_run.log({
            "epoch": epoch + 1, "train_loss": total_loss,
            "train_accuracy": train_accuracy, "test_accuracy": test_accuracy,
            "train_f1": train_f1, "test_f1": test_f1
        })
        
        print(f"Epoch {epoch+1}: Loss={total_loss:.4f}, Train Acc={train_accuracy:.2f}%, F1={train_f1:.4f}")
    
    history["final_test_accuracy"], history["final_test_f1"] = evaluate_model(model, test_loader, wandb_run)
    wandb_run.finish()
    return history


In [5]:
def get_standard_resnet(num_classes):
    model = models.resnet18(weights=None) 
    model.fc = nn.Linear(model.fc.in_features, num_classes)  
    return model

def get_pretrained_resnet(num_classes):
    model = models.resnet18(weights="IMAGENET1K_V1")  
    model.fc = nn.Linear(model.fc.in_features, num_classes)  
    return model

In [25]:
train_dataset, test_dataset = load_dataset()
num_classes = len(torch.unique(train_dataset.tensors[1]))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [7]:
num_classes

10

In [8]:
wandb.login(key="adaae359c8f78abbd7f4b739f52905b2ffcc5e4e")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home2/poorvi.c/.netrc


[34m[1mwandb[0m: Currently logged in as: [33mpoorvi-c[0m ([33mpoorvi-c-iiit-hyderabad[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [9]:
print("\nTraining Standard ResNet-18 from Scratch...")
model1 = get_standard_resnet(num_classes)
std_results = train_model(
    model1, 
    train_loader, 
    test_loader, 
    num_epochs=10,
    experiment_name="Standard_ResNet18"
)


Training Standard ResNet-18 from Scratch...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Test Accuracy: 53.23%
F1 Score: 0.5416
Epoch 1; Loss=2127.2241; Train Acc=51.17%, F1=0.5092
Test Accuracy: 68.44%
F1 Score: 0.6887
Epoch 2; Loss=1510.7613; Train Acc=66.16%, F1=0.6607
Test Accuracy: 69.14%
F1 Score: 0.6968
Epoch 3; Loss=1218.4441; Train Acc=72.89%, F1=0.7283
Test Accuracy: 71.50%
F1 Score: 0.7151
Epoch 4; Loss=999.3149; Train Acc=77.77%, F1=0.7773
Test Accuracy: 67.61%
F1 Score: 0.6842
Epoch 5; Loss=819.2302; Train Acc=81.79%, F1=0.8177
Test Accuracy: 75.45%
F1 Score: 0.7528
Epoch 6; Loss=648.3713; Train Acc=85.63%, F1=0.8561
Test Accuracy: 76.87%
F1 Score: 0.7670
Epoch 7; Loss=495.7954; Train Acc=88.97%, F1=0.8896
Test Accuracy: 75.62%
F1 Score: 0.7603
Epoch 8; Loss=372.6156; Train Acc=91.55%, F1=0.9155
Test Accuracy: 75.74%
F1 Score: 0.7584
Epoch 9; Loss=300.2729; Train Acc=93.11%, F1=0.9311
Test Accuracy: 74.54%
F1 Score: 0.7448
Epoch 10; Loss=233.6598; Train Acc=94.75%, F1=0.9475
Test Accuracy: 74.54%
F1 Score: 0.7448


0,1
batch,▁▁▁▂▂▂▂▂▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇████
batch_loss,█▇█▆▅▅▅▆▄▄▄▅▅▅▄▄▃▂▃▃▂▃▂▃▃▂▂▂▂▃▃▂▂▁▂▁▂▁▃▁
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▆▆▆▅████▇▇
test_f1,▁▆▆▆▅████▇
test_f1_score,▁
train_accuracy,▁▃▄▅▆▇▇▇██
train_f1,▁▃▄▅▆▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁

0,1
batch,15627.0
batch_loss,0.20301
epoch,9.0
test_accuracy,74.54
test_f1,0.74479
test_f1_score,0.74479
train_accuracy,94.752
train_f1,0.94751
train_loss,233.65983


In [10]:
print("\nFine-Tuning Pretrained ResNet-18...")
model2 = get_pretrained_resnet(num_classes)
fine_results = train_model(
    model2, 
    train_loader, 
    test_loader, 
    num_epochs=10,
    experiment_name="Pretrained_ResNet18"
)


Fine-Tuning Pretrained ResNet-18...


Test Accuracy: 67.24%
F1 Score: 0.6732
Epoch 1; Loss=1541.1256; Train Acc=66.87%, F1=0.6678
Test Accuracy: 76.45%
F1 Score: 0.7624
Epoch 2; Loss=1061.8984; Train Acc=77.08%, F1=0.7704
Test Accuracy: 76.64%
F1 Score: 0.7688
Epoch 3; Loss=844.6784; Train Acc=81.68%, F1=0.8164
Test Accuracy: 80.03%
F1 Score: 0.8013
Epoch 4; Loss=684.6034; Train Acc=85.14%, F1=0.8512
Test Accuracy: 79.17%
F1 Score: 0.7877
Epoch 5; Loss=543.4623; Train Acc=88.22%, F1=0.8821
Test Accuracy: 80.71%
F1 Score: 0.8059
Epoch 6; Loss=421.0830; Train Acc=90.70%, F1=0.9069
Test Accuracy: 80.56%
F1 Score: 0.8060
Epoch 7; Loss=339.4887; Train Acc=92.76%, F1=0.9276
Test Accuracy: 79.59%
F1 Score: 0.7960
Epoch 8; Loss=271.7261; Train Acc=94.03%, F1=0.9403
Test Accuracy: 78.88%
F1 Score: 0.7902
Epoch 9; Loss=223.6098; Train Acc=95.06%, F1=0.9506
Test Accuracy: 81.18%
F1 Score: 0.8109
Epoch 10; Loss=221.3776; Train Acc=95.24%, F1=0.9524
Test Accuracy: 81.18%
F1 Score: 0.8109


0,1
batch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇█████
batch_loss,█▆▆▇▆▆▆▇▅▅▅▄▅▅▃▃▄▃▃▂▃▂▃▃▃▂▁▂▃▁▂▂▁▂▂▃▂▁▂▁
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▆▆▇▇██▇▇██
test_f1,▁▆▆█▇██▇▇█
test_f1_score,▁
train_accuracy,▁▄▅▆▆▇▇███
train_f1,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁

0,1
batch,15627.0
batch_loss,0.3302
epoch,9.0
test_accuracy,81.18
test_f1,0.81089
test_f1_score,0.81089
train_accuracy,95.244
train_f1,0.95244
train_loss,221.37763


In [46]:
data = {
    "Layer / Block": [
        "Input", "Conv1 (7×7, stride=2)", "MaxPool (3×3, stride=2)",
        "Layer1 (ResBlock x2)", "Layer2 (ResBlock x2, stride=2)",
        "Layer3 (ResBlock x2, stride=2)", "Layer4 (ResBlock x2, stride=2)",
        "Before Average Pooling", "Global Average Pooling",
        "Fully Connected Layer (FC)"
    ],
    "Operation": [
        "-", "Conv2D (7×7, stride=2, padding=3)", "3×3 Max Pooling (stride=2)",
        "3×3 Conv + Identity Mapping", "3×3 Conv (stride=2)",
        "3×3 Conv (stride=2)", "3×3 Conv (stride=2)", "-",
        "Avg Pooling → 1 × 1 feature map", "FC layer → num_classes output"
    ],
    "Output Size (HxW)": [
        "36 × 36", "18 × 18", "9 × 9",
        "9 × 9", "5 × 5",
        "3 × 3", "2 × 2", "2 × 2",
        "1 × 1", "num_classes(10)"
    ]
}

df = pd.DataFrame(data)

print(tabulate(df, headers='keys', tablefmt='psql'))


+----+--------------------------------+-----------------------------------+---------------------+
|    | Layer / Block                  | Operation                         | Output Size (HxW)   |
|----+--------------------------------+-----------------------------------+---------------------|
|  0 | Input                          | -                                 | 36 × 36             |
|  1 | Conv1 (7×7, stride=2)          | Conv2D (7×7, stride=2, padding=3) | 18 × 18             |
|  2 | MaxPool (3×3, stride=2)        | 3×3 Max Pooling (stride=2)        | 9 × 9               |
|  3 | Layer1 (ResBlock x2)           | 3×3 Conv + Identity Mapping       | 9 × 9               |
|  4 | Layer2 (ResBlock x2, stride=2) | 3×3 Conv (stride=2)               | 5 × 5               |
|  5 | Layer3 (ResBlock x2, stride=2) | 3×3 Conv (stride=2)               | 3 × 3               |
|  6 | Layer4 (ResBlock x2, stride=2) | 3×3 Conv (stride=2)               | 2 × 2               |
|  7 | Before Averag

#### Resized Images

In [13]:
def load_resized_dataset():
    resize_transform = transforms.Compose([
        transforms.Resize((224, 224)),  
        transforms.Lambda(lambda x: x / 255.0)
    ])
    
    class CustomImageDataset(Dataset):
        def __init__(self, data_path, labels_path, transform=None):
            self.data = torch.load(data_path)
            self.labels = torch.load(labels_path)
            self.transform = transform
        
        def __len__(self):
            return len(self.labels)
        
        def __getitem__(self, idx):
            image = self.data[idx].float()
            if self.transform:
                image = self.transform(image)
            return image, self.labels[idx].long()
    
    train_dataset = CustomImageDataset(
        "./data/train_data.pt",
        "./data/train_labels.pt",
        transform=resize_transform
    )
    
    test_dataset = CustomImageDataset(
        "./data/test_data.pt",
        "./data/test_labels.pt",
        transform=resize_transform
    )
    
    return train_dataset, test_dataset

In [14]:
train_resized_dataset, test_resized_dataset = load_resized_dataset()
num_classes = len(torch.unique(train_resized_dataset.labels))

train_loader = DataLoader(train_resized_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_resized_dataset, batch_size=32, shuffle=False)

In [15]:
print("\nTraining Standard ResNet-18 from Scratch on resized images...")
model3 = get_standard_resnet(num_classes)
std_results_resized = train_model(
    model3, 
    train_loader, 
    test_loader, 
    num_epochs=10,
    experiment_name="Standard_ResNet18_Resized"
)
    


Training Standard ResNet-18 from Scratch on resized images...


Test Accuracy: 64.29%
F1 Score: 0.6438
Epoch 1; Loss=2148.7248; Train Acc=50.17%, F1=0.4987
Test Accuracy: 74.31%
F1 Score: 0.7432
Epoch 2; Loss=1306.1625; Train Acc=70.88%, F1=0.7081
Test Accuracy: 75.64%
F1 Score: 0.7603
Epoch 3; Loss=1006.3156; Train Acc=77.76%, F1=0.7772
Test Accuracy: 80.79%
F1 Score: 0.8103
Epoch 4; Loss=803.0939; Train Acc=82.12%, F1=0.8209
Test Accuracy: 81.95%
F1 Score: 0.8210
Epoch 5; Loss=638.4421; Train Acc=85.75%, F1=0.8573
Test Accuracy: 81.21%
F1 Score: 0.8108
Epoch 6; Loss=493.7011; Train Acc=89.03%, F1=0.8901
Test Accuracy: 82.31%
F1 Score: 0.8252
Epoch 7; Loss=355.6636; Train Acc=92.04%, F1=0.9203
Test Accuracy: 82.82%
F1 Score: 0.8269
Epoch 8; Loss=263.4163; Train Acc=93.94%, F1=0.9394
Test Accuracy: 81.56%
F1 Score: 0.8179
Epoch 9; Loss=183.4760; Train Acc=95.85%, F1=0.9585
Test Accuracy: 83.42%
F1 Score: 0.8346
Epoch 10; Loss=151.8948; Train Acc=96.59%, F1=0.9659
Test Accuracy: 83.42%
F1 Score: 0.8346


0,1
batch,▁▁▁▁▂▂▂▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇█████
batch_loss,█▆▇▇▄▅▃▄▄▄▂▃▂▄▂▂▃▃▃▃▂▁▂▂▃▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▅▅▇▇▇██▇██
test_f1,▁▅▅▇█▇██▇█
test_f1_score,▁
train_accuracy,▁▄▅▆▆▇▇███
train_f1,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁

0,1
batch,15627.0
batch_loss,0.07048
epoch,9.0
test_accuracy,83.42
test_f1,0.83456
test_f1_score,0.83456
train_accuracy,96.59
train_f1,0.96589
train_loss,151.89481


In [16]:
print("\nFine-Tuning Pretrained ResNet-18...")
model4 = get_pretrained_resnet(num_classes)
fine_results_resized = train_model(
    model4, 
    train_loader, 
    test_loader, 
    num_epochs=10,
    experiment_name="Pretrained_ResNet18_Resized"
)


Fine-Tuning Pretrained ResNet-18...


Test Accuracy: 83.50%
F1 Score: 0.8344
Epoch 1; Loss=1106.0997; Train Acc=75.88%, F1=0.7585
Test Accuracy: 84.06%
F1 Score: 0.8408
Epoch 2; Loss=660.0258; Train Acc=85.50%, F1=0.8549
Test Accuracy: 87.71%
F1 Score: 0.8767
Epoch 3; Loss=478.2064; Train Acc=89.57%, F1=0.8956
Test Accuracy: 88.07%
F1 Score: 0.8800
Epoch 4; Loss=362.1395; Train Acc=92.01%, F1=0.9201
Test Accuracy: 88.62%
F1 Score: 0.8870
Epoch 5; Loss=260.9106; Train Acc=94.13%, F1=0.9413
Test Accuracy: 89.36%
F1 Score: 0.8937
Epoch 6; Loss=203.1388; Train Acc=95.58%, F1=0.9558
Test Accuracy: 89.36%
F1 Score: 0.8938
Epoch 7; Loss=155.1594; Train Acc=96.60%, F1=0.9660
Test Accuracy: 88.62%
F1 Score: 0.8859
Epoch 8; Loss=132.8596; Train Acc=97.02%, F1=0.9702
Test Accuracy: 88.63%
F1 Score: 0.8874
Epoch 9; Loss=114.4891; Train Acc=97.47%, F1=0.9747
Test Accuracy: 89.65%
F1 Score: 0.8966
Epoch 10; Loss=97.6690; Train Acc=97.87%, F1=0.9787
Test Accuracy: 89.65%
F1 Score: 0.8966


0,1
batch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇█████
batch_loss,█▆▇▅▄▂▂▂▂▂▂▁▁▂▁▂▁▂▁▁▁▂▂▁▂▂▁▁▂▁▂▁▁▂▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▂▆▆▇██▇▇██
test_f1,▁▂▆▆▇██▇▇█
test_f1_score,▁
train_accuracy,▁▄▅▆▇▇████
train_f1,▁▄▅▆▇▇████
train_loss,█▅▄▃▂▂▁▁▁▁

0,1
batch,15627.0
batch_loss,0.003
epoch,9.0
test_accuracy,89.65
test_f1,0.89658
test_f1_score,0.89658
train_accuracy,97.874
train_f1,0.97874
train_loss,97.66898


1. Training Accuracy:

    The models are achieving high training accuracy, with Pretrained_ResNet18_Resized leading.
    The accuracy increases steadily, showing that the models are learning well.

2. Test Accuracy:

    Pretrained_ResNet18_Resized has the highest test accuracy, but the gap between training and test accuracy suggests a potential overfitting issue.
    Other models (Standard_ResNet18 and Pretrained_ResNet18) show more stable test accuracy, indicating better generalization.

3. Training Loss:

    The loss decreases as expected, but the Pretrained_ResNet18_Resized has the lowest training loss, which again suggests it might be overfitting to the training data.


Better accuracy may come at a cost. What changed/degraded from the previous setup?

- The Pretrained_ResNet18_Resized model has improved training accuracy but does not show a proportional improvement in test accuracy. This suggests it is fitting the training data too well while generalizing poorly.

- The test accuracy curves show that Pretrained_ResNet18_Resized fluctuates more, indicating that the model may not be robust across different test samples.

- The resizing step might have introduced biases, benefiting training but not helping generalization.

- Other models might not reach the same peak training accuracy but generalize better, meaning a model with slightly lower training accuracy could still perform better on unseen data.

### Modify initial layers of Resnet18

In [5]:
def get_modified_resnet_v1(num_classes):
    # Smaller stride in first layer
    model = models.resnet18(weights=None)
    model.conv1 = nn.Conv2d(3, 64, kernel_size=7,stride=1, padding=3, bias=False)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

def get_modified_resnet_v2(num_classes):
    # Smaller kernel size in first layer
    model = models.resnet18(weights=None)
    model.conv1 = nn.Conv2d(3, 64, kernel_size=3,stride=2, padding=1, bias=False)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

def get_modified_resnet_v3(num_classes):
    # Remove maxpool layer
    model = models.resnet18(weights=None)
    model.maxpool = nn.Identity()
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

In [6]:
def get_modified_pretrained_v1(num_classes):
    # Smaller stride in first layer
    model = models.resnet18(weights="IMAGENET1K_V1")
    model.conv1 = nn.Conv2d(3, 64, kernel_size=7,stride=1, padding=3, bias=False)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

def get_modified_pretrained_v2(num_classes):
    # Smaller kernel size in first layer and no max pool
    model = models.resnet18(weights="IMAGENET1K_V1")
    model.conv1 = nn.Conv2d(3, 64, kernel_size=3,stride=2, padding=1, bias=False)
    model.maxpool = nn.Identity()
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

def get_modified_pretrained_v3(num_classes):
    # Custom first layers with pretrained weights
    model = models.resnet18(weights="IMAGENET1K_V1")
    # Replace first conv with two smaller conv layers
    model.conv1 = nn.Sequential(
        nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False),
        nn.BatchNorm2d(32),
        nn.ReLU(inplace=True),
        nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False)
    )
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

In [7]:
model_configs = [
        ("ResNet18_Modified_Stride", get_modified_resnet_v1, False),
        ("ResNet18_Modified_Kernel", get_modified_resnet_v2, False),
        ("ResNet18_No_Maxpool", get_modified_resnet_v3, False),        
        ("Pretrained_Modified_Stride", get_modified_pretrained_v1, True),
        ("Pretrained_Modified_Kernel", get_modified_pretrained_v2, True),
        ("Pretrained_Custom_First", get_modified_pretrained_v3, True)
    ]

In [8]:
def train_modifications_model(model_fn, train_loader, test_loader, num_classes=10, num_epochs=10, lr=0.001, model_name="default", is_pretrained=False):
    model = model_fn(num_classes)
    
    run = wandb.init(
        project="resnet18-modifications",
        name=model_name,
        config={
            "model_type": model_name,
            "learning_rate": lr,
            "batch_size": train_loader.batch_size,
            "epochs": num_epochs,
            "optimizer": "Adam",
            "architecture": "ResNet18"
        }
    )
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    run.watch(model, log="all")

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        epoch_preds = []
        epoch_labels = []
        
        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            
            epoch_preds.extend(predicted.cpu().numpy())
            epoch_labels.extend(labels.cpu().numpy())

            if batch_idx % 10 == 0:
                run.log({
                    "batch_loss": loss.item(),
                    "batch": batch_idx + epoch * len(train_loader)
                })

        train_accuracy = 100 * np.mean(np.array(epoch_preds) == np.array(epoch_labels))
        train_f1 = f1_score(epoch_labels, epoch_preds, average='weighted')
        
        test_accuracy, test_f1 = evaluate_model(model, test_loader, None)  # Don't log test metrics here

        plt.figure(figsize=(10, 8))
        train_cm = confusion_matrix(epoch_labels, epoch_preds)
        sns.heatmap(train_cm, annot=True, fmt='d', cmap='Blues')
        plt.title(f'Training Confusion Matrix - Epoch {epoch+1}')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')

        run.log({
            "epoch": epoch,
            "train_loss": total_loss,
            "train_accuracy": train_accuracy,
            "test_accuracy": test_accuracy,
            "train_f1": train_f1,
            "test_f1": test_f1,
            "train_confusion_matrix": wandb.Image(plt),
        })
        plt.close()

        print(f"Epoch {epoch+1}; Loss={total_loss:.4f}; Train Acc={train_accuracy:.2f}%, F1={train_f1:.4f}")

    evaluate_model(model, test_loader, run)
    
    run.finish()

In [9]:
train_dataset, test_dataset = load_dataset()
num_classes = len(torch.unique(train_dataset.tensors[1]))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [10]:
for model_name, model_fn, is_pretrained in model_configs:
    print(f"\nTraining {model_name}...")

    train_modifications_model(
        model_fn, 
        train_loader, 
        test_loader, 
        num_epochs=10,
        model_name=model_name,
        is_pretrained=is_pretrained
    )


Training ResNet18_Modified_Stride...


[34m[1mwandb[0m: Currently logged in as: [33mpoorvi-c[0m ([33mpoorvi-c-iiit-hyderabad[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Test Accuracy: 61.79%
F1 Score: 0.6087
Epoch 1; Loss=2105.5178; Train Acc=51.40%, F1=0.5115
Test Accuracy: 67.70%
F1 Score: 0.6835
Epoch 2; Loss=1407.2558; Train Acc=68.53%, F1=0.6847
Test Accuracy: 71.69%
F1 Score: 0.7084
Epoch 3; Loss=1091.2341; Train Acc=75.53%, F1=0.7550
Test Accuracy: 76.27%
F1 Score: 0.7636
Epoch 4; Loss=861.3336; Train Acc=80.81%, F1=0.8078
Test Accuracy: 76.05%
F1 Score: 0.7604
Epoch 5; Loss=668.3356; Train Acc=85.05%, F1=0.8504
Test Accuracy: 78.94%
F1 Score: 0.7882
Epoch 6; Loss=488.4661; Train Acc=88.99%, F1=0.8898
Test Accuracy: 79.38%
F1 Score: 0.7913
Epoch 7; Loss=342.3967; Train Acc=92.22%, F1=0.9222
Test Accuracy: 78.21%
F1 Score: 0.7847
Epoch 8; Loss=249.0162; Train Acc=94.36%, F1=0.9436
Test Accuracy: 74.71%
F1 Score: 0.7499
Epoch 9; Loss=184.0450; Train Acc=95.91%, F1=0.9591
Test Accuracy: 77.43%
F1 Score: 0.7748
Epoch 10; Loss=159.1762; Train Acc=96.33%, F1=0.9633
Test Accuracy: 77.43%
F1 Score: 0.7748


0,1
batch,▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
batch_loss,█▆▅▅▄▄▃▄▄▃▄▃▄▃▂▃▂▂▂▃▂▁▂▂▂▁▂▁▁▁▂▁▂▁▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▃▅▇▇███▆▇▇
test_f1,▁▄▅▇▇███▆▇
test_f1_score,▁
train_accuracy,▁▄▅▆▆▇▇███
train_f1,▁▄▅▆▆▇▇███
train_loss,█▅▄▄▃▂▂▁▁▁

0,1
batch,15627.0
batch_loss,0.12028
epoch,9.0
test_accuracy,77.43
test_f1,0.77482
test_f1_score,0.77482
train_accuracy,96.334
train_f1,0.96335
train_loss,159.17621



Training ResNet18_Modified_Kernel...


Test Accuracy: 63.12%
F1 Score: 0.6361
Epoch 1; Loss=2021.6472; Train Acc=53.65%, F1=0.5346
Test Accuracy: 69.25%
F1 Score: 0.6875
Epoch 2; Loss=1419.6505; Train Acc=68.58%, F1=0.6850
Test Accuracy: 70.37%
F1 Score: 0.7088
Epoch 3; Loss=1145.1464; Train Acc=74.46%, F1=0.7441
Test Accuracy: 71.16%
F1 Score: 0.7132
Epoch 4; Loss=930.5949; Train Acc=79.48%, F1=0.7945
Test Accuracy: 75.26%
F1 Score: 0.7479
Epoch 5; Loss=754.1536; Train Acc=83.35%, F1=0.8333
Test Accuracy: 75.90%
F1 Score: 0.7564
Epoch 6; Loss=588.4804; Train Acc=87.01%, F1=0.8700
Test Accuracy: 74.77%
F1 Score: 0.7481
Epoch 7; Loss=445.7455; Train Acc=90.09%, F1=0.9009
Test Accuracy: 75.98%
F1 Score: 0.7621
Epoch 8; Loss=343.8973; Train Acc=92.28%, F1=0.9227
Test Accuracy: 76.15%
F1 Score: 0.7619
Epoch 9; Loss=263.8606; Train Acc=94.18%, F1=0.9418
Test Accuracy: 76.03%
F1 Score: 0.7610
Epoch 10; Loss=219.9937; Train Acc=95.09%, F1=0.9509
Test Accuracy: 76.03%
F1 Score: 0.7610


0,1
batch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█████
batch_loss,█▇▅▄▅▄▅▄▆▅▄▅▄▄▃▄▄▄▅▃▃▅▃▄▂▂▃▂▂▂▁▁▂▂▁▁▁▃▂▂
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▄▅▅██▇████
test_f1,▁▄▅▅▇█▇███
test_f1_score,▁
train_accuracy,▁▄▅▅▆▇▇███
train_f1,▁▄▅▅▆▇▇███
train_loss,█▆▅▄▃▂▂▁▁▁

0,1
batch,15627.0
batch_loss,0.37964
epoch,9.0
test_accuracy,76.03
test_f1,0.76101
test_f1_score,0.76101
train_accuracy,95.092
train_f1,0.95092
train_loss,219.99366



Training ResNet18_No_Maxpool...


Test Accuracy: 61.03%
F1 Score: 0.6089
Epoch 1; Loss=2164.4769; Train Acc=49.87%, F1=0.4956
Test Accuracy: 70.87%
F1 Score: 0.7064
Epoch 2; Loss=1421.8274; Train Acc=67.84%, F1=0.6776
Test Accuracy: 71.79%
F1 Score: 0.7154
Epoch 3; Loss=1089.4894; Train Acc=75.79%, F1=0.7576
Test Accuracy: 74.37%
F1 Score: 0.7380
Epoch 4; Loss=843.2414; Train Acc=81.27%, F1=0.8125
Test Accuracy: 78.17%
F1 Score: 0.7795
Epoch 5; Loss=636.9084; Train Acc=85.81%, F1=0.8580
Test Accuracy: 77.87%
F1 Score: 0.7808
Epoch 6; Loss=453.8191; Train Acc=89.91%, F1=0.8991
Test Accuracy: 78.67%
F1 Score: 0.7881
Epoch 7; Loss=307.3722; Train Acc=93.19%, F1=0.9319
Test Accuracy: 79.22%
F1 Score: 0.7935
Epoch 8; Loss=227.7895; Train Acc=94.87%, F1=0.9487
Test Accuracy: 78.86%
F1 Score: 0.7891
Epoch 9; Loss=173.7152; Train Acc=96.18%, F1=0.9618
Test Accuracy: 78.71%
F1 Score: 0.7902
Epoch 10; Loss=146.7544; Train Acc=96.78%, F1=0.9678
Test Accuracy: 78.71%
F1 Score: 0.7902


0,1
batch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇██
batch_loss,█▆███▅▅▄▄▃▄▄▃▂▃▂▂▂▃▃▂▃▂▂▁▃▁▂▄▃▂▂▁▁▁▁▁▁▂▂
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▅▅▆█▇█████
test_f1,▁▅▅▆▇█████
test_f1_score,▁
train_accuracy,▁▄▅▆▆▇▇███
train_f1,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁

0,1
batch,15627.0
batch_loss,0.06549
epoch,9.0
test_accuracy,78.71
test_f1,0.7902
test_f1_score,0.7902
train_accuracy,96.78
train_f1,0.96779
train_loss,146.75442



Training Pretrained_Modified_Stride...


Test Accuracy: 65.30%
F1 Score: 0.6419
Epoch 1; Loss=1815.7091; Train Acc=59.70%, F1=0.5959
Test Accuracy: 75.92%
F1 Score: 0.7603
Epoch 2; Loss=1168.3753; Train Acc=74.52%, F1=0.7446
Test Accuracy: 77.02%
F1 Score: 0.7698
Epoch 3; Loss=904.5928; Train Acc=80.38%, F1=0.8035
Test Accuracy: 79.82%
F1 Score: 0.7963
Epoch 4; Loss=687.0563; Train Acc=85.13%, F1=0.8512
Test Accuracy: 78.74%
F1 Score: 0.7897
Epoch 5; Loss=505.0700; Train Acc=88.88%, F1=0.8888
Test Accuracy: 79.90%
F1 Score: 0.7978
Epoch 6; Loss=367.1254; Train Acc=91.84%, F1=0.9183
Test Accuracy: 78.28%
F1 Score: 0.7825
Epoch 7; Loss=273.7776; Train Acc=94.03%, F1=0.9403
Test Accuracy: 80.16%
F1 Score: 0.8034
Epoch 8; Loss=218.1647; Train Acc=95.19%, F1=0.9519
Test Accuracy: 79.36%
F1 Score: 0.7932
Epoch 9; Loss=180.5000; Train Acc=96.00%, F1=0.9600
Test Accuracy: 79.30%
F1 Score: 0.7954
Epoch 10; Loss=155.4323; Train Acc=96.67%, F1=0.9667
Test Accuracy: 79.30%
F1 Score: 0.7954


0,1
batch,▁▁▁▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
batch_loss,█▆▇▄▄▄▄▃▄▄▄▄▃▃▃▂▃▃▂▁▁▃▂▁▁▃▁▂▂▁▁▁▁▁▁▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▆▇█▇█▇████
test_f1,▁▆▇█▇█▇███
test_f1_score,▁
train_accuracy,▁▄▅▆▇▇████
train_f1,▁▄▅▆▇▇████
train_loss,█▅▄▃▂▂▁▁▁▁

0,1
batch,15627.0
batch_loss,0.05669
epoch,9.0
test_accuracy,79.3
test_f1,0.79544
test_f1_score,0.79544
train_accuracy,96.668
train_f1,0.96668
train_loss,155.4323



Training Pretrained_Modified_Kernel...


Test Accuracy: 71.42%
F1 Score: 0.7150
Epoch 1; Loss=1711.4744; Train Acc=61.87%, F1=0.6176
Test Accuracy: 76.55%
F1 Score: 0.7666
Epoch 2; Loss=1056.6976; Train Acc=77.13%, F1=0.7709
Test Accuracy: 79.19%
F1 Score: 0.7916
Epoch 3; Loss=787.0344; Train Acc=82.88%, F1=0.8286
Test Accuracy: 79.86%
F1 Score: 0.7979
Epoch 4; Loss=589.2962; Train Acc=87.15%, F1=0.8714
Test Accuracy: 81.12%
F1 Score: 0.8118
Epoch 5; Loss=429.7756; Train Acc=90.63%, F1=0.9063
Test Accuracy: 81.49%
F1 Score: 0.8149
Epoch 6; Loss=317.9325; Train Acc=92.99%, F1=0.9298
Test Accuracy: 81.44%
F1 Score: 0.8155
Epoch 7; Loss=237.5101; Train Acc=94.76%, F1=0.9476
Test Accuracy: 81.15%
F1 Score: 0.8108
Epoch 8; Loss=189.2645; Train Acc=95.97%, F1=0.9597
Test Accuracy: 81.68%
F1 Score: 0.8163
Epoch 9; Loss=164.8763; Train Acc=96.44%, F1=0.9644
Test Accuracy: 82.05%
F1 Score: 0.8190
Epoch 10; Loss=139.4342; Train Acc=97.00%, F1=0.9700
Test Accuracy: 82.05%
F1 Score: 0.8190


0,1
batch,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
batch_loss,█▅▅▅▃▃▅▂▂▂▂▃▂▂▁▂▂▁▂▁▂▂▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▂▁
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▄▆▇▇██▇███
test_f1,▁▄▆▇███▇██
test_f1_score,▁
train_accuracy,▁▄▅▆▇▇████
train_f1,▁▄▅▆▇▇████
train_loss,█▅▄▃▂▂▁▁▁▁

0,1
batch,15627.0
batch_loss,0.00916
epoch,9.0
test_accuracy,82.05
test_f1,0.81898
test_f1_score,0.81898
train_accuracy,96.998
train_f1,0.96998
train_loss,139.43422



Training Pretrained_Custom_First...


Test Accuracy: 74.55%
F1 Score: 0.7427
Epoch 1; Loss=1645.5820; Train Acc=63.65%, F1=0.6357
Test Accuracy: 79.23%
F1 Score: 0.7964
Epoch 2; Loss=997.3183; Train Acc=78.45%, F1=0.7840
Test Accuracy: 81.57%
F1 Score: 0.8163
Epoch 3; Loss=737.0838; Train Acc=84.10%, F1=0.8408
Test Accuracy: 83.05%
F1 Score: 0.8318
Epoch 4; Loss=559.0420; Train Acc=87.83%, F1=0.8782
Test Accuracy: 82.79%
F1 Score: 0.8262
Epoch 5; Loss=399.9726; Train Acc=91.30%, F1=0.9130
Test Accuracy: 82.60%
F1 Score: 0.8258
Epoch 6; Loss=304.9962; Train Acc=93.33%, F1=0.9333
Test Accuracy: 82.55%
F1 Score: 0.8254
Epoch 7; Loss=217.1242; Train Acc=95.30%, F1=0.9530
Test Accuracy: 81.95%
F1 Score: 0.8194
Epoch 8; Loss=184.1495; Train Acc=95.95%, F1=0.9595
Test Accuracy: 82.59%
F1 Score: 0.8240
Epoch 9; Loss=155.8418; Train Acc=96.67%, F1=0.9667
Test Accuracy: 82.37%
F1 Score: 0.8239
Epoch 10; Loss=136.7596; Train Acc=97.11%, F1=0.9711
Test Accuracy: 82.37%
F1 Score: 0.8239


0,1
batch,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇█████
batch_loss,█▅▄▄▃▂▃▂▂▃▃▂▂▂▂▃▂▂▂▃▄▂▂▂▁▁▁▁▁▂▂▁▁▂▁▁▂▂▂▁
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▅▇████▇█▇▇
test_f1,▁▅▇████▇▇▇
test_f1_score,▁
train_accuracy,▁▄▅▆▇▇████
train_f1,▁▄▅▆▇▇████
train_loss,█▅▄▃▂▂▁▁▁▁

0,1
batch,15627.0
batch_loss,0.07633
epoch,9.0
test_accuracy,82.37
test_f1,0.82386
test_f1_score,0.82386
train_accuracy,97.11
train_f1,0.9711
train_loss,136.75963


- The first layer is initialized randomly, while other layers use pretrained weights, leading to mismatched distributions.

- The randomly initialized first layer may take longer to adapt and align with the pretrained feature maps.

- The pretrained layers expect a certain feature representation from the first layer, which is disrupted by custom initialization.

- With sufficient training, the model can still learn effectively, though it may require more epochs to stabilize.

- More drastic changes (e.g., replacing conv1 with multiple layers) may worsen the issue compared to smaller kernel/stride changes.

### Comparison Report

The provided graphs show that pretrained models (e.g., 'Pretrained_Custom_First', 'Pretrained_Modified_Kernel', 'Pretrained_Modified_Stride') generally outperform non-pretrained ResNet18 models ('ResNet18_No_Maxpool', 'ResNet18_Modified_Kernel', 'ResNet18_Modified_Stride') across all metrics (accuracy, loss, F1-score). 

Modifications to kernel size and stride within both pretrained and non-pretrained models show varying impacts; some modifications improve performance while others don't. The 'Pretrained_Custom_First' model achieves the highest test F1-score, suggesting a strong balance between precision and recall. The training graphs show pretrained models converge faster and to higher accuracy. Image size isn't explicitly shown, precluding direct comparison based on that factor.

Regarding F1-score and confusion matrices (not shown), the F1-score is a more robust metric than accuracy alone, as it considers both precision and recall, particularly important when dealing with imbalanced datasets. A confusion matrix would provide a granular view of classification performance, revealing which classes are more accurately predicted than others.

The performance differences are likely due to the pretrained models leveraging learned features from a large dataset. This gives them a significant head start, leading to faster convergence and potentially higher accuracy. Modifications to kernel size and stride can affect the model's ability to learn features at different resolutions and receptive fields. The optimal choice depends on the specific dataset and architecture.