## Import Required Packages

In [79]:
!pip install torch torchvision pandas matplotlib numpy albumentations scikit-learn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [80]:
from matplotlib import pyplot as plt
import numpy as np
from collections import Counter
import cv2
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets
from torchvision import models
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision.utils import make_grid
import pandas as pd
from sklearn.model_selection import train_test_split
from PIL import Image
import torchvision.transforms as transforms


In this notebook, we define the following constants.

In [81]:
MEAN = (0.485, 0.456, 0.406)
STD = (0.229, 0.224, 0.225)
HEIGHT = 32
WIDTH = 32
BATCH_SIZE = 32
NUM_EPOCHS = 10
BEST_MODEL_PATH = 'best_model.pt'

## Dataset

In [82]:
# declare class
class CarDataset(Dataset):
    def __init__(self, df, data_dir='data', transform=None):
        self.df = df.reset_index(drop=True)
        self.data_dir = data_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # CSV has 'id' column (e.g., 'train/1.jpg' or 'test/4136.jpg')
        img_id = row["id"]
        img_path = f"{self.data_dir}/{img_id}"  # Construct full path
        
        # Load label if it exists (train.csv has 'label', test.csv doesn't)
        if "label" in row:
            label = int(row["label"])  # 0–99
        else:
            label = -1  # No label for test set

        # load image
        image = Image.open(img_path).convert("RGB")

        # apply transforms
        if self.transform:
            image = self.transform(image)

        return image, label

## Transformations

In [83]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),         # resize
    transforms.RandomHorizontalFlip(),     # flip
    transforms.RandomRotation(10),         # rotate ±10 degrees
    transforms.ToTensor(),                 
    transforms.Normalize(MEAN, STD)
])

test_and_val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD)
])

In [None]:
# load the full train data set to be split into train and validation sets
full_dataset = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

train_df, valid_df = train_test_split(full_dataset, test_size=0.2, random_state=42, shuffle=True)

train_dataset = CarDataset(train_df, transform=train_transform)
val_dataset   = CarDataset(valid_df, transform=test_and_val_transform)
test_dataset  = CarDataset(test_df, transform=test_and_val_transform)

## Method 1: Transfer Learning
Here we use a ResNet18 model from torchvision.models

In [85]:
# load pretrained model
model = models.resnet18(pretrained=True)

# freeze all pretrained layers
for param in model.parameters():
    param.requires_grad = False

# replace the final FC layer
model.fc = nn.Linear(512, 100) # 100 classes

# unfreeze ONLY the new fc layer
for param in model.fc.parameters():
    param.requires_grad = True



## Define DataLoader for training, validation and test datasets

In [None]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)

### Select Device for model training and evaluation

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

## LOSS Function

In [88]:
criterion = nn.CrossEntropyLoss()


## Optimizer


In [89]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [90]:
def evaluation(model, loader, criterion, device, phase='Valid'):
  """Evaluate the performance of a model on a given dataset.

  This function calculates the loss and accuracy of the model on the dataset.
    It also returns the ground truth labels and the model's predictions.

  Args:
    model (torch.nn.Module): The model to be evaluated.
    loader (torch.utils.data.DataLoader): The data loader for the dataset.
    criterion (torch.nn.modules.loss._Loss): The loss function.
    device (torch.device): The device (CPU or GPU) where computations will be performed.
    phase (str, optional): The phase of evaluation. Defaults to 'Valid'.

  Returns:
    dict: A dictionary containing the following keys:
      'loss' (float): The average loss of the model on the dataset.
      'accuracy' (float): The accuracy of the model on the dataset.
      'ground_truth' (list): The ground truth labels of the dataset.
      'predictions' (list): The model's predictions on the dataset.
    """
  model.eval()
  predictions = []
  ground_truth = []
  with torch.no_grad():
    total_loss = 0
    total  = 0
    correct = 0
    for batch_index, (images, labels) in enumerate(loader):
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)
      loss = criterion(outputs, labels)
      total_loss += loss.item() * images.size(0)
      total += images.size(0)
      _, preds = torch.max(outputs, 1)
      predictions.extend(preds.cpu().numpy())
      ground_truth.extend(labels.cpu().numpy())
      correct += (preds == labels).sum().item()
    accuracy = correct / total
    loss = total_loss / total
    print(f'     {phase} Accuracy={accuracy:<10.4f}  Loss= {loss:<10.4f}')
    return {'loss': loss,
            'accuracy': accuracy,
            'ground_truth': ground_truth,
            'predictions': predictions}

In [None]:
def training(model, train_loader, valid_loader, criterion, optimizer, device,
             epochs, best_model_path):
  """Train a model and evaluate its performance on a validation set.

  This function trains a model for a specified number of epochs and evaluates
    its performance on a validation set after each epoch. The model with the
    lowest validation loss is saved.

  Args:
    model (torch.nn.Module): The model to be trained.
    train_loader (torch.utils.data.DataLoader): The data loader for the training set.
    valid_loader (torch.utils.data.DataLoader): The data loader for the validation set.
    criterion (torch.nn.modules.loss._Loss): The loss function.
    optimizer (torch.optim.Optimizer): The optimization algorithm.
    device (torch.device): The device (CPU or GPU) where computations will be performed.
    epochs (int): The number of times the learning algorithm will work through the entire training dataset.
    best_model_path (str): The path where the best model will be saved.

  Returns:
    dict: A dictionary containing the 'loss', 'accuracy', 'ground_truth', and 'predictions'
            of the model with the lowest validation loss.
    """
  model.train()
  best_loss = torch.inf
  best_restults = None
  for epoch in range(epochs):
    total_loss = 0
    total  = 0
    correct = 0
    for batch_index, (images, labels) in enumerate(train_loader):
      optimizer.zero_grad()
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()
      total_loss += loss.item() * images.size(0)
      total += images.size(0)
      _, preds = torch.max(outputs, 1)
      correct += (preds == labels).sum().item()
    accuracy = correct / total
    loss = total_loss / total
    print(f'{epoch:<4} Train Accuracy={accuracy:<10.4f}  Loss= {loss:<10.4f}')
    results = evaluation(model, valid_loader, criterion, device)
    if results['loss'] < best_loss:
      torch.save(model, best_model_path)
      best_loss = results['loss']
      best_restults = results
    print()
  return best_restults

In [92]:
best_restults = training(model, train_loader, valid_loader, criterion,
                         optimizer, device, NUM_EPOCHS, BEST_MODEL_PATH)

0    Train Accuracy=0.0450      Loss= 4.5293    
     Valid Accuracy=0.0979      Loss= 4.0140    



KeyboardInterrupt: 