Module 3.0


In [1]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

In [2]:
print("Torch version:", torch.__version__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

if device.type == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))


Torch version: 2.7.1+cu118
Using device: cuda
GPU: NVIDIA GeForce RTX 3050 6GB Laptop GPU


In [3]:
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)


In [4]:
IMG_SIZE = 224

basic_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor() 
])


In [5]:
train_dir = "../dataset/train"
val_dir   = "../dataset/val"
test_dir  = "../dataset/test"

train_dataset = datasets.ImageFolder(train_dir, transform=basic_transform)
val_dataset   = datasets.ImageFolder(val_dir, transform=basic_transform)
test_dataset  = datasets.ImageFolder(test_dir, transform=basic_transform)


In [6]:
num_classes = len(train_dataset.classes)

print("Number of classes:", num_classes)
print("First 10 classes:", train_dataset.classes[:10])


Number of classes: 429
First 10 classes: ['Audi_Audi_A1', 'Audi_Audi_A3_hatchback', 'Audi_Audi_A3_sedan', 'Audi_Audi_A4L', 'Audi_Audi_A4_estate', 'Audi_Audi_A5_convertible', 'Audi_Audi_A5_coupe', 'Audi_Audi_A5_hatchback', 'Audi_Audi_A6L', 'Audi_Audi_A7']


In [None]:
BATCH_SIZE = 32

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

In [8]:
images, labels = next(iter(train_loader))

print("Images shape:", images.shape)
print("Labels shape:", labels.shape)
print("First 10 labels:", labels[:10])


Images shape: torch.Size([32, 3, 224, 224])
Labels shape: torch.Size([32])
First 10 labels: tensor([ 77, 307,  56,   5, 394, 357, 333,  26, 178,  40])


Module 3.2


In [9]:
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),              # slightly larger
    transforms.RandomCrop(224),                  # position invariance
    transforms.RandomHorizontalFlip(p=0.5),      # left-right symmetry
    transforms.RandomRotation(degrees=7),        # small camera tilt
    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
        hue=0.1
    ),
    transforms.ToTensor()
])

In [10]:
eval_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])


In [11]:
train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
val_dataset   = datasets.ImageFolder(val_dir, transform=eval_transform)
test_dataset  = datasets.ImageFolder(test_dir, transform=eval_transform)


In [12]:
# Re-create DataLoaders after changing transforms
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)


In [13]:
images, labels = next(iter(train_loader))
print(images.shape)


torch.Size([32, 3, 224, 224])


Module 3.3

In [14]:
from torchvision.models import resnet50, ResNet50_Weights

backbone=resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

In [15]:
print(backbone.fc)

Linear(in_features=2048, out_features=1000, bias=True)


In [16]:
num_features = backbone.fc.in_features

backbone.fc = nn.Identity()  # removes ImageNet head


In [17]:
class CarClassifier(nn.Module):
    def __init__(self, backbone, num_classes):
        super().__init__()
        self.backbone = backbone
        self.classifier = nn.Sequential(
            nn.BatchNorm1d(2048),
            nn.Dropout(p=0.5),
            nn.Linear(2048, num_classes)
        )

    def forward(self, x):
        features = self.backbone(x)
        outputs = self.classifier(features)
        return outputs


In [18]:
model = CarClassifier(backbone, num_classes)
model = model.to(device)


In [19]:
model.eval()  # VERY IMPORTANT

dummy_input = torch.randn(1, 3, 224, 224).to(device)

with torch.no_grad():
    dummy_output = model(dummy_input)

print("Output shape:", dummy_output.shape)


Output shape: torch.Size([1, 429])


Module 3.4

In [20]:
# Freeze ALL backbone parameters
for param in model.backbone.parameters():
    param.requires_grad = False


In [21]:
def count_trainable_params(model):
    trainable = 0
    frozen = 0
    for name, param in model.named_parameters():
        if param.requires_grad:
            trainable += param.numel()
        else:
            frozen += param.numel()
    return trainable, frozen

trainable, frozen = count_trainable_params(model)

print(f"Trainable params: {trainable:,}")
print(f"Frozen params:    {frozen:,}")


Trainable params: 883,117
Frozen params:    23,508,032


In [22]:
model.train()


CarClassifier(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
   

Module 3.5

In [23]:
criterion = nn.CrossEntropyLoss()


In [24]:
optimizer = optim.AdamW(
    model.classifier.parameters(),  # ONLY head parameters
    lr=1e-3,
    weight_decay=1e-4
)


In [25]:
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.5,
    patience=3,
)


In [None]:
model.train()

images, labels = next(iter(train_loader))
images = images.to(device)
labels = labels.to(device)

outputs = model(images)
loss = criterion(outputs, labels)

print("Loss value:", loss.item())


Loss value: 6.519368648529053


In [27]:
from tqdm import tqdm

In [28]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(loader, desc="Training", leave=True):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    return epoch_loss, epoch_acc


In [29]:
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validation", leave=True):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    return epoch_loss, epoch_acc


In [30]:
EPOCHS_PHASE1 = 5

for epoch in range(EPOCHS_PHASE1):
    train_loss, train_acc = train_one_epoch(
        model, train_loader, criterion, optimizer, device
    )

    val_loss, val_acc = validate(
        model, val_loader, criterion, device
    )

    scheduler.step(val_loss)

    current_lr = optimizer.param_groups[0]["lr"]

    print(
        f"Epoch [{epoch+1}/{EPOCHS_PHASE1}] | "
        f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
        f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f} | "
        f"LR: {current_lr:.6f}"
    )


Training: 100%|██████████| 330/330 [01:04<00:00,  5.13it/s]
Validation: 100%|██████████| 76/76 [00:26<00:00,  2.86it/s]


Epoch [1/5] | Train Loss: 5.6045, Train Acc: 0.0495 | Val Loss: 4.6191, Val Acc: 0.1424 | LR: 0.001000


Training: 100%|██████████| 330/330 [01:03<00:00,  5.20it/s]
Validation: 100%|██████████| 76/76 [00:26<00:00,  2.88it/s]


Epoch [2/5] | Train Loss: 4.1033, Train Acc: 0.1977 | Val Loss: 3.9758, Val Acc: 0.2301 | LR: 0.001000


Training: 100%|██████████| 330/330 [01:04<00:00,  5.15it/s]
Validation: 100%|██████████| 76/76 [00:27<00:00,  2.80it/s]


Epoch [3/5] | Train Loss: 3.4041, Train Acc: 0.2936 | Val Loss: 3.6969, Val Acc: 0.2637 | LR: 0.001000


Training: 100%|██████████| 330/330 [01:04<00:00,  5.12it/s]
Validation: 100%|██████████| 76/76 [00:27<00:00,  2.78it/s]


Epoch [4/5] | Train Loss: 2.9844, Train Acc: 0.3628 | Val Loss: 3.4518, Val Acc: 0.3104 | LR: 0.001000


Training: 100%|██████████| 330/330 [01:06<00:00,  4.96it/s]
Validation: 100%|██████████| 76/76 [00:27<00:00,  2.79it/s]

Epoch [5/5] | Train Loss: 2.7231, Train Acc: 0.4072 | Val Loss: 3.2877, Val Acc: 0.3390 | LR: 0.001000





Phase 2 training (un-freezing)

In [31]:
# Unfreeze only the last ResNet block
for param in model.backbone.layer4.parameters():
    param.requires_grad = True


In [32]:
trainable, frozen = count_trainable_params(model)

print(f"Trainable params: {trainable:,}")
print(f"Frozen params:    {frozen:,}")


Trainable params: 15,847,853
Frozen params:    8,543,296


In [None]:
optimizer = optim.AdamW(
    [
        {"params": model.backbone.layer4.parameters(), "lr": 1e-4},
        {"params": model.classifier.parameters(), "lr": 1e-3},
    ],
    weight_decay=1e-4
)

In [34]:
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.5,
    patience=3
)


In [35]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = None
        self.counter = 0
        self.best_state = None

    def step(self, val_loss, model):
        if self.best_loss is None or val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
            self.best_state = {
                k: v.detach().cpu().clone()
                for k, v in model.state_dict().items()
            }
            return False  # do not stop
        else:
            self.counter += 1
            return self.counter >= self.patience


In [None]:
EPOCHS_PHASE2 = 15
early_stopper = EarlyStopping(patience=4)


for epoch in range(EPOCHS_PHASE2):
    train_loss, train_acc = train_one_epoch(
        model, train_loader, criterion, optimizer, device
    )

    val_loss, val_acc = validate(
        model, val_loader, criterion, device
    )
    stop = early_stopper.step(val_loss, model)
    if stop:
        print(f"\nEarly stopping triggered at epoch {epoch+1}")
        model.load_state_dict(early_stopper.best_state)
        break


    scheduler.step(val_loss)

    current_lr_head = optimizer.param_groups[1]["lr"]
    current_lr_backbone = optimizer.param_groups[0]["lr"]

    print(
        f"Epoch [{epoch+1}/{EPOCHS_PHASE2}] | "
        f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
        f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f} | "
        f"LR(backbone): {current_lr_backbone:.6f}, "
        f"LR(head): {current_lr_head:.6f}"
    )

Training: 100%|██████████| 330/330 [01:17<00:00,  4.25it/s]
Validation: 100%|██████████| 76/76 [00:30<00:00,  2.47it/s]


Epoch [1/15] | Train Loss: 2.2714, Train Acc: 0.4786 | Val Loss: 2.5020, Val Acc: 0.4636 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:20<00:00,  4.11it/s]
Validation: 100%|██████████| 76/76 [00:27<00:00,  2.75it/s]


Epoch [2/15] | Train Loss: 1.6805, Train Acc: 0.5863 | Val Loss: 2.1672, Val Acc: 0.5224 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:16<00:00,  4.33it/s]
Validation: 100%|██████████| 76/76 [00:32<00:00,  2.32it/s]


Epoch [3/15] | Train Loss: 1.2848, Train Acc: 0.6726 | Val Loss: 1.9054, Val Acc: 0.5741 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:17<00:00,  4.26it/s]
Validation: 100%|██████████| 76/76 [00:26<00:00,  2.84it/s]


Epoch [4/15] | Train Loss: 0.9648, Train Acc: 0.7373 | Val Loss: 1.7821, Val Acc: 0.6014 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:16<00:00,  4.31it/s]
Validation: 100%|██████████| 76/76 [00:28<00:00,  2.66it/s]


Epoch [5/15] | Train Loss: 0.7890, Train Acc: 0.7866 | Val Loss: 1.6816, Val Acc: 0.6196 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:17<00:00,  4.25it/s]
Validation: 100%|██████████| 76/76 [00:28<00:00,  2.67it/s]


Epoch [6/15] | Train Loss: 0.6778, Train Acc: 0.8072 | Val Loss: 1.6268, Val Acc: 0.6407 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:16<00:00,  4.31it/s]
Validation: 100%|██████████| 76/76 [00:27<00:00,  2.79it/s]


Epoch [7/15] | Train Loss: 0.5733, Train Acc: 0.8390 | Val Loss: 1.5425, Val Acc: 0.6623 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:15<00:00,  4.36it/s]
Validation: 100%|██████████| 76/76 [00:27<00:00,  2.78it/s]


Epoch [8/15] | Train Loss: 0.5065, Train Acc: 0.8552 | Val Loss: 1.5014, Val Acc: 0.6701 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:14<00:00,  4.42it/s]
Validation: 100%|██████████| 76/76 [00:26<00:00,  2.86it/s]


Epoch [9/15] | Train Loss: 0.4539, Train Acc: 0.8680 | Val Loss: 1.5180, Val Acc: 0.6755 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:16<00:00,  4.29it/s]
Validation: 100%|██████████| 76/76 [00:27<00:00,  2.73it/s]


Epoch [10/15] | Train Loss: 0.4159, Train Acc: 0.8789 | Val Loss: 1.4570, Val Acc: 0.6941 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:17<00:00,  4.28it/s]
Validation: 100%|██████████| 76/76 [00:27<00:00,  2.72it/s]


Epoch [11/15] | Train Loss: 0.3769, Train Acc: 0.8916 | Val Loss: 1.4701, Val Acc: 0.7003 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:17<00:00,  4.24it/s]
Validation: 100%|██████████| 76/76 [00:28<00:00,  2.66it/s]


Epoch [12/15] | Train Loss: 0.3586, Train Acc: 0.8921 | Val Loss: 1.5451, Val Acc: 0.6945 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:17<00:00,  4.23it/s]
Validation: 100%|██████████| 76/76 [00:27<00:00,  2.75it/s]


Epoch [13/15] | Train Loss: 0.3401, Train Acc: 0.9011 | Val Loss: 1.5017, Val Acc: 0.6941 | LR(backbone): 0.000100, LR(head): 0.001000


Training: 100%|██████████| 330/330 [01:17<00:00,  4.24it/s]
Validation: 100%|██████████| 76/76 [00:28<00:00,  2.63it/s]


Early stopping triggered at epoch 14





In [None]:
model.eval()
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x21591b1bcd0>

In [38]:
def evaluate_top1(model, loader, device):
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            preds = outputs.argmax(dim=1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return correct / total


In [39]:
test_top1 = evaluate_top1(model, test_loader, device)
print(f"Test Top-1 Accuracy: {test_top1:.4f}")

Test Top-1 Accuracy: 0.6982


In [40]:
def evaluate_top5(model, loader, device):
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, top5 = outputs.topk(5, dim=1)

            correct += sum(
                labels[i].item() in top5[i]
                for i in range(labels.size(0))
            )
            total += labels.size(0)

    return correct / total


In [41]:
test_top5 = evaluate_top5(model, test_loader, device)
print(f"Test Top-5 Accuracy: {test_top5:.4f}")


Test Top-5 Accuracy: 0.8798


In [42]:
idx_to_class = {v: k for k, v in train_dataset.class_to_idx.items()}


In [43]:
import torch.nn.functional as F

def predict_top5(model, image_tensor, device):
    model.eval()
    with torch.no_grad():
        image_tensor = image_tensor.unsqueeze(0).to(device)  # [1,3,224,224]
        outputs = model(image_tensor)

        probs = F.softmax(outputs, dim=1)

        top5_probs, top5_indices = probs.topk(5)

        results = []
        for i in range(5):
            class_name = idx_to_class[top5_indices[0][i].item()]
            confidence = top5_probs[0][i].item()
            results.append((class_name, confidence))

    return results


In [46]:
from PIL import Image
from torchvision import transforms

img = Image.open("test_car.jpg").convert("RGB")

img_tensor = eval_transform(img)

preds = predict_top5(model, img_tensor, device)

for name, conf in preds:
    print(f"{name}: {conf*100:.2f}%")

Audi_Audi_A1: 98.65%
Audi_Audi_A3_hatchback: 1.23%
Chevy_Cruze_sedan: 0.03%
Citroen_Quatre_hatchback: 0.02%
Mitsubishi_ASX_abroad_version: 0.02%


In [47]:
import torch
import json
import os
from datetime import datetime

# Create a folder for saved models
os.makedirs("saved_models", exist_ok=True)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# 1. Save model weights
model_path = f"saved_models/resnet50_compcars_{timestamp}.pth"
torch.save(model.state_dict(), model_path)

# 2. Save class mapping
class_map_path = f"saved_models/class_to_idx_{timestamp}.json"
with open(class_map_path, "w") as f:
    json.dump(train_dataset.class_to_idx, f, indent=2)

# 3. Save training metadata
meta = {
    "architecture": "resnet50",
    "num_classes": 429,
    "backbone": "ImageNet pretrained",
    "phase2_unfrozen": "layer4",
    "top1_test": 0.60,
    "top5_test": 0.8001,
    "dataset": "CompCars",
    "split": "80/20 train/val per class, official test",
}

meta_path = f"saved_models/training_meta_{timestamp}.json"
with open(meta_path, "w") as f:
    json.dump(meta, f, indent=2)

print("Model saved to:", model_path)
print("Class mapping saved to:", class_map_path)
print("Metadata saved to:", meta_path)


Model saved to: saved_models/resnet50_compcars_20260111_154612.pth
Class mapping saved to: saved_models/class_to_idx_20260111_154612.json
Metadata saved to: saved_models/training_meta_20260111_154612.json


In [None]:
from scipy.io import loadmat

data = loadmat("D:\CompCars\misc\car_type.mat")
print(data.keys())


dict_keys(['__header__', '__version__', '__globals__', 'types'])


In [None]:
from scipy.io import loadmat

data = loadmat("D:\CompCars\misc\make_model_name.mat")
print(data.keys())

dict_keys(['__header__', '__version__', '__globals__', 'make_names', 'model_names'])
