<a href="https://colab.research.google.com/github/Abhishekkk2004/Different-CNN-Architectures/blob/main/Different_CNN_Architectures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## In the following notebook, we will analyse how various CNN Architectures are loaded and used in Transfer Learning. We will also perform classification task on a dataset and we will see the comparision of which one performs how

In [3]:
# Different Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torchvision import models
from torchsummary import summary
import wandb
import os

In [4]:
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
device

device(type='cpu')

## Data Preprocessing

In [None]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

# Data transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

# Path to your dataset
dataset_dir = "/kaggle/input/5-flower-classification/flower_images"

# Load the dataset once
full_dataset = datasets.ImageFolder(root=dataset_dir, transform=data_transforms['train'])

# Split into train (90%) and val (10%)
train_size = int(0.9 * len(full_dataset))
val_size = len(full_dataset) - train_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Change transform for validation set
val_dataset.dataset.transform = data_transforms['val']

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# Check classes
print("Classes:", full_dataset.classes)
print("Train samples:", len(train_dataset))
print("Val samples:", len(val_dataset))


## Initialising Various models and their custom dictionary

### AlexNet

In [9]:
config_AlexNet={
    "epochs": 10,
    "batch_size": 16,
    "learning_rate": 0.001,
    "architecture": "alexnet",
    "pretrained": True,
    "input_size": 128
}

In [11]:
alexnet = models.alexnet(weights=models.AlexNet_Weights.DEFAULT)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:02<00:00, 103MB/s]


In [12]:
print(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [18]:
# Model setup function for AlexNet
def get_alexnet(num_classes=5, pretrained=True):
    model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT if pretrained else None)
    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final classifier layer
    model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, num_classes)

    # Only train the new classifier layer
    for param in model.classifier[6].parameters():
        param.requires_grad = True
    return model

### VGG16

In [15]:
vgg16 = models.vgg16(weights=models.VGG16_Weights.DEFAULT)

In [16]:
vgg16

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [19]:
# Model setup function for VGG16
def get_vgg16(num_classes=5, pretrained=True):
    model = models.vgg16(weights=models.VGG16_Weights.DEFAULT if pretrained else None)

    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final classifier layer
    model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, num_classes)

    # Only train the new classifier layer
    for param in model.classifier[-1].parameters():
        param.requires_grad = True

    return model


### GoogleNet

In [21]:
googlenet = models.googlenet(weights=models.GoogLeNet_Weights.DEFAULT)

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:00<00:00, 123MB/s]


In [22]:
googlenet

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

Note:- model.fc is the final classification layer in GoogLeNet, unlike VGG which uses model.classifier[-1].

In [23]:
# Model setup function for GoogLeNet
def get_googlenet(num_classes=5, pretrained=True):
    model = models.googlenet(weights=models.GoogLeNet_Weights.DEFAULT if pretrained else None)

    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final fully connected (fc) layer
    model.fc = nn.Linear(model.fc.in_features, num_classes)

    # Only train the new fc layer
    for param in model.fc.parameters():
        param.requires_grad = True

    return model

### SqueezeNet

In [24]:
squeezenet = models.squeezenet1_1(weights=models.SqueezeNet1_1_Weights.DEFAULT)

Downloading: "https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth" to /root/.cache/torch/hub/checkpoints/squeezenet1_1-b8a52dc0.pth
100%|██████████| 4.73M/4.73M [00:00<00:00, 82.3MB/s]


In [25]:
squeezenet

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): MaxPool2d

For SqueezeNet, the last layer is a Conv2d inside model.classifier[1], not a fully connected layer like in VGG or GoogLeNet.

In [26]:
# Model setup function for SqueezeNet 1.1
def get_squeezenet(num_classes=5, pretrained=True):
    model = models.squeezenet1_1(weights=models.SqueezeNet1_1_Weights.DEFAULT if pretrained else None)

    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final classifier convolution layer
    model.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1), stride=(1, 1))

    # Only train the new classifier layer
    for param in model.classifier[1].parameters():
        param.requires_grad = True

    return model

### Resnet

In [27]:
resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 155MB/s]


In [28]:
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [29]:
# Model setup function for ResNet50
def get_resnet50(num_classes=5, pretrained=True):
    model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT if pretrained else None)

    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final fully connected (fc) layer
    model.fc = nn.Linear(model.fc.in_features, num_classes)

    # Only train the new fc layer
    for param in model.fc.parameters():
        param.requires_grad = True

    return model

### MobileNet

In [30]:
mobilenet = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-7ebf99e0.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 42.0MB/s]


In [31]:
mobilenet

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [32]:
# Model setup function for MobileNetV2
def get_mobilenet_v2(num_classes=5, pretrained=True):
    model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT if pretrained else None)

    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final classifier
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

    # Only train the new classifier layer
    for param in model.classifier[1].parameters():
        param.requires_grad = True

    return model

### DenseNet

In [33]:
densenet = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 133MB/s]


In [34]:
densenet

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [35]:
# Model setup function for DenseNet121
def get_densenet121(num_classes=5, pretrained=True):
    model = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT if pretrained else None)

    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final classifier
    model.classifier = nn.Linear(model.classifier.in_features, num_classes)

    # Only train the new classifier layer
    for param in model.classifier.parameters():
        param.requires_grad = True

    return model

### SENet

In [37]:
import timm

In [41]:
senet = timm.create_model('senet154', pretrained=True)

In [42]:
senet

ResNet(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2

SENet isn’t in torchvision.models, we’ll use timm

In [43]:
# Model setup function for SENet
def get_senet(num_classes=5, pretrained=True):
    # Load SENet154 from timm
    model = timm.create_model('senet154', pretrained=pretrained)

    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final fully connected (classifier) layer
    in_features = model.get_classifier().in_features
    model.fc = nn.Linear(in_features, num_classes)

    # Only train the new fc layer
    for param in model.fc.parameters():
        param.requires_grad = True

    return model

## Parameter comparison

In [45]:
# Dictionary mapping model names to your functions
model_funcs = {
    "AlexNet": get_alexnet,
    "VGG": get_vgg16,
    "GoogLeNet": get_googlenet,
    "SqueezeNet": get_squeezenet,
    "ResNet": get_resnet50,
    "MobileNet": get_mobilenet_v2,
    "DenseNet": get_densenet121,
    "SENet": get_senet
}

# Function to count parameters
def count_params(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

# Loop and print with percentage
num_classes = 5
pretrained = True

print(f"{'Model':<12} {'Total Params':>15} {'Trainable Params':>20} {'% Trainable':>15}")
print("-" * 70)
for name, func in model_funcs.items():
    model = func(num_classes=num_classes, pretrained=pretrained)
    total, trainable = count_params(model)
    percent = (trainable / total * 100) if total > 0 else 0
    print(f"{name:<12} {total:>15,} {trainable:>20,} {percent:>14.2f}%")


Model           Total Params     Trainable Params     % Trainable
----------------------------------------------------------------------
AlexNet           57,024,325               20,485           0.04%
VGG              134,281,029               20,485           0.02%
GoogLeNet          5,605,029                5,125           0.09%
SqueezeNet           725,061                2,565           0.35%
ResNet            23,518,277               10,245           0.04%
MobileNet          2,230,277                6,405           0.29%
DenseNet           6,958,981                5,125           0.07%
SENet            113,050,229               10,245           0.01%


In [46]:
# =============================
# Model Dictionary
# =============================
model_dict = {
    "AlexNet": {
        "fn": get_alexnet,
        "config": {
            "model_name": "AlexNet",
            "epochs": 5,
            "batch_size": 32,
            "learning_rate": 1e-4
        }
    },
    "VGG16": {
        "fn": get_vgg16,
        "config": {
            "model_name": "VGG16",
            "epochs": 5,
            "batch_size": 32,
            "learning_rate": 1e-4
        }
    },
    "GoogLeNet": {
        "fn": get_googlenet,
        "config": {
            "model_name": "GoogLeNet",
            "epochs": 5,
            "batch_size": 32,
            "learning_rate": 1e-4
        }
    },
    "SqueezeNet": {
        "fn": get_squeezenet,
        "config": {
            "model_name": "SqueezeNet",
            "epochs": 5,
            "batch_size": 32,
            "learning_rate": 1e-4
        }
    },
    "ResNet50": {
        "fn": get_resnet50,
        "config": {
            "model_name": "ResNet50",
            "epochs": 5,
            "batch_size": 32,
            "learning_rate": 1e-4
        }
    },
    "MobileNetV2": {
        "fn": get_mobilenet_v2,
        "config": {
            "model_name": "MobileNetV2",
            "epochs": 5,
            "batch_size": 32,
            "learning_rate": 1e-4
        }
    },
    "DenseNet121": {
        "fn": get_densenet121,
        "config": {
            "model_name": "DenseNet121",
            "epochs": 5,
            "batch_size": 32,
            "learning_rate": 1e-4
        }
    },
    "SENet": {
        "fn": get_senet,
        "config": {
            "model_name": "SENet",
            "epochs": 5,
            "batch_size": 32,
            "learning_rate": 1e-4
        }
    }
}


## Final Train Model

In [49]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

# Dictionary to store results
model_results = {}

def train_model(model, config: dict, train_loader, val_loader, device):
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=config["learning_rate"]
    )

    best_val_acc = 0.0  # Track the best validation accuracy

    for epoch in range(config["epochs"]):
        # Training phase
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss /= len(train_loader)
        train_acc = correct / total

        # Validation phase
        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_acc = val_correct / val_total

        # Save best val accuracy
        if val_acc > best_val_acc:
            best_val_acc = val_acc

        print(f"[{config['model_name']}] Epoch {epoch+1}/{config['epochs']} "
              f"| Train Loss: {train_loss:.4f} "
              f"| Train Acc: {train_acc*100:.2f}% "
              f"| Val Acc: {val_acc*100:.2f}%")

    # Store result for later comparison
    model_results[config["model_name"]] = best_val_acc * 100


def plot_model_performance(results_dict):
    """Plots the performance of models stored in results_dict."""
    plt.figure(figsize=(10, 6))
    models = list(results_dict.keys())
    accuracies = list(results_dict.values())

    plt.bar(models, accuracies, color='skyblue')
    plt.xlabel("Model")
    plt.ylabel("Best Validation Accuracy (%)")
    plt.title("Model Performance Comparison")
    plt.xticks(rotation=45)
    plt.ylim(0, 100)
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.show()


## Final Evaluation

In [50]:
import gc

In [48]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for name, data in model_dict.items():
    model = data["fn"](num_classes=5, pretrained=True)
    train_model(model, data["config"], train_loader, val_loader, device)


    # Free up memory
    del model
    torch.cuda.empty_cache()
    gc.collect()


NameError: name 'train_loader' is not defined

In [None]:
# After training all models
plot_model_performance(model_results)