# **Model Training & Testing Notebook**

The first step is to set up our notebook for PyTorch.

In [20]:
# Prepare notebook for PyTorch:
import torch

if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(device)

mps


Next, let's load in our dataset. Recall that we resized our images to be 224 by 224 pixels in the data preprocessing notebook. Because MLP models and CNN models require different types of input, the next code cell will just generically load in the data. We also need to make sure that the labels are displayed correctly to the models, so we'll format them like make_model (i.e., "Toyota_Camry). This step probably should have been done in the data preprocessing notebook but it was a slight oversight.

In [21]:
# Imports for combining labels:
import os
from PIL import Image
from torch.utils.data import Dataset

# Class that combines make & model names (like a custom ImageFolder):
class MakeModelCombinedDataset(Dataset):
    IMG_EXTS = {".jpg", ".jpeg"}

    def __init__(self, root, transform=None):
        self.samples = []  # list of (path, class_name)
        self.transform = transform

        for make in sorted(os.listdir(root)):
            make_dir = os.path.join(root, make)
            if not os.path.isdir(make_dir):
                continue
            for model in sorted(os.listdir(make_dir)):
                model_dir = os.path.join(make_dir, model)
                if not os.path.isdir(model_dir):
                    continue
                class_name = f"{make}_{model}"
                for fname in sorted(os.listdir(model_dir)):
                    _, ext = os.path.splitext(fname)
                    if ext.lower() in self.IMG_EXTS:
                        self.samples.append((os.path.join(model_dir, fname), class_name))

        classes = sorted({c for _, c in self.samples})
        self.class_to_idx = {c: i for i, c in enumerate(classes)}
        self.classes = classes

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, class_name = self.samples[idx]
        img = Image.open(path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = self.class_to_idx[class_name]
        return img, label

In [18]:
# Imports for loading images:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Common transformations (since we already did resizing, we just need to load the images onto Tensors and normalize them):
common_transforms  = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Use ImageLoader:
train_dataset = MakeModelCombinedDataset(root="Final_Dataset/train", transform=common_transforms)
val_dataset = MakeModelCombinedDataset(root="Final_Dataset/val", transform=common_transforms)
test_dataset = MakeModelCombinedDataset(root="Final_Dataset/test", transform=common_transforms)

# Use DataLoader:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) # Shuffle the training data to prevent memorization.
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False) # Validation set doesn't need shuffling because we want consistent evaluation.
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) # Testing set doesn't need shuffling because we want consistent evaluation.

# Check to see that our combinations are correct:
print("Number of classes:", len(train_dataset.classes))
print("Sample class labels:", train_dataset.classes[:10])

Number of classes: 102
Sample class labels: ['Audi_A3', 'Audi_A4', 'Audi_A6', 'BMW_1', 'BMW_3', 'BMW_5', 'Chevrolet_Aveo', 'Chevrolet_Cruze', 'Citroen_Berlingo', 'Citroen_C-Elysee']


Looks good :)

We can write a generic training function for both models to simplify our code.

In [None]:
# Generic model training function:
def train_model(model, train_loader, criterion, optimizer, num_epochs):

    # Keep track of our model's performance:
    train_losses = []
    train_accuracies = []

    for epoch in range(num_epochs):
        model.train() # Set model to training mode.

        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            
            # Clear previous gradients:
            optimizer.zero_grad()

            # Forward pass:
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass:
            loss.backward()
            optimizer.step()

            # Update loss:
            running_loss = running_loss + loss.item()

            # Calculate accuracy:
            _, predicted = torch.max(outputs, 1)
            correct = correct + (predicted == labels).sum().item()
            total = total + labels.size(0)

        # Calculate average training loss and accuracy for the epoch:
        avg_epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = 100 * correct / total

        # Store metrics for plotting:
        train_losses.append(avg_epoch_loss)
        train_accuracies.append(epoch_accuracy)

        # Print progress:
        print(f"Epoch [{epoch+1}/{num_epochs}], "
              f'Train Loss: {avg_epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%')

    return train_losses, train_accuracies

Similarly, we can create a generic function that will evaluate the models' performance. This function will be used for both the validation and test sets!

In [29]:
# Generic model evaluation function:
def evaluate_model(model, data_loader, criterion):

    # Keep track of evaluation metrics:
    evaluation_losses = []
    evaluation_accuracies = []
    
    model.eval() # Set model to evaluation mode.

    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad(): # Disable gradient calculation because we aren't updating weights and biases.
        for images, labels in data_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Update loss:
            running_loss = running_loss + loss.item()

            # Calculate accuracy:
            _, predicted = torch.max(outputs, 1)
            correct = correct + (predicted == labels).sum().item()
            total = total + labels.size(0)

    # Calculate average evaluation loss and accuracy:
    average_loss = running_loss / len(data_loader)
    accuracy = 100 * correct / total

    # Store metrics for plotting:
    evaluation_losses.append(average_loss)
    evaluation_accuracies.append(accuracy)

    # Print results:
    print(f"Evaluation Loss: {average_loss:.4f}, Evaluation Accuracy: {accuracy:.2f}%")

    return evaluation_losses, evaluation_accuracies

And finally, generic plotters for the losses and accuracies of the training and evaluation sets.

In [30]:
# Imports for plotting:
import matplotlib.pyplot as plt

def plot_metrics(train_losses, train_accuracies, evaluation_losses, evaluation_accuracies):
    epochs = range(1, len(train_losses) + 1)

    plt.figure(figsize=(12, 6))

    # Plot for loss:
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label='Train Loss', color='blue', linestyle='-', marker='o')
    plt.plot(epochs, evaluation_losses, label='Evaluation Loss', color='red', linestyle='-', marker='x')
    plt.title('Model Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plot for accuracy:
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracies, label='Train Accuracy', color='blue', linestyle='-', marker='o')
    plt.plot(epochs, evaluation_accuracies, label='Evaluation Accuracy', color='red', linestyle='-', marker='x')
    plt.title('Model Accuracy Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

## **Multi-Layer Perceptron (Baseline Model):**

For our baseline model, we will use a shallow multilayer perceptron. The general structure of a MLP model is as follows:

- *Input Layer*: The first layer that receives the input data. Each node in this layer represents a feature of the input data.

- *Hidden Layer(s)*: One or more layers between the input and output layers. Each node in these layers performs a weighted sum of inputs, applies an activation function, and passes the result to the next layer.

- *Output Layer*: The final layer that produces the prediction or output of the model. The number of neurons in this layer corresponds to the number or classes for classification problems or a single value for regression problems.

- *Weights & Biases*: Connections between neurons have associated weights, and each neuron has a bias. These parameters are learned during training to minimize the loss function.

- *Activation Functions*: Nonlinear functions such as ReLU, Sigmoid, and Tanh are applied to the output of each neuron to introduce non-linearity, which allows the network to model complex relationships.

An MLP is also fully-connected, meaning that each neuron in one layer is connected to every neuron in the next layer. 

**Note:** Multilayer perceptrons require image-flattening into a 1-D vector

In [24]:
# Imports for MLP:
import torch.nn as nn
import torch.optim as optim

# Create MLP model class:
class MLP(nn.Module):
    def __init__(self, input_dims, num_classes):
        super(MLP, self).__init__()
        
        # Keep the network shallow for now:
        self.fc1 = nn.Linear(input_dims, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = torch.flatten(x, 1) # Flatten the image tensor into a 1-D vector.
        x = torch.relu(self.fc1(x)) # Apply activation functions after each layer.
        x = torch.relu(self.fc2(x))
        x = self.fc3(x) # Output logits.
        return x

In [25]:
# Define the MLP model:
MLP_model = MLP(input_dims=224*224*3, num_classes=len(train_dataset.classes))

In [26]:
# Define the loss function and optimizer:
criterion = nn.CrossEntropyLoss()
MLP_optimizer = optim.Adam(MLP_model.parameters(), lr=0.001)

In [32]:
# Train MLP:
MLP_train_losses, MLP_train_accuracies = train_model(MLP_model, train_loader, criterion, MLP_optimizer, num_epochs=10)

KeyboardInterrupt: 

In [None]:
# Evaluate MLP on validation set:
MLP_evaluation_losses, MLP_evaluation_accuracies = evaluate_model(MLP_model, val_loader, criterion)

In [None]:
# Graph the loss and accuracy:
plot_metrics(MLP_train_losses, MLP_train_accuracies, MLP_evaluation_losses, MLP_evaluation_accuracies)


In [9]:
# Evaluate MLP on test set: 
# DO NOT FILL IN THIS CODE CELL UNTIL AFTER HYPERPARAMETER TUNING!!!

## **Convolutional Neural Network:**

We expect that a CNN will perform much better overall than the MLP due to its ability to preserve spatial features. So, we will focus on tuning the CNN more. The general structure of a CNN is as follows:

- *Input Layer*: The input layer takes in the raw data (usually images), represented as a grid of pixels (i.e., width * height * color channels = 32 * 32 * 3).

- *Convolutional Layer(s)*: These layers apply convolutional filters (A.K.A. kernels) to the input or previous layer. Each filter detects specific features, such as edges or textures. These features are learned during training. The result is a set of feature maps, which highlight areas of the input that match the learned features.

- *Activation Functions*: After convolution, an activation function (usually ReLU) is applied to introduce non-linearity. This helps the network learn more complex patterns.

- *Pooling Layers*: Pooling layers (usually Max Pooling) reduce the spatial dimensions (width and height) of the feature maps. This helps reduce computation and makes the network more invariant to small translations of the input (i.e., shifting or zooming in an image). Essentially, pooling retains the most important features of the input data.

- *Fully-Connected Layers*: After several convolutional and pooling layers, the data is flattened into a 1-D vector and passed through one or more fully-connected layers. These layers are similar to those in a traditional MLP and are used to make the final classification or regression prediction.

- *Output Layer*: The final layer produces the output of the network. For classification tasks, this typically uses a softmax activation function (assuming multi-class classification) or sigmoid (assuming binary classification) to output probabilities.

- *Weights and Biases*: Like an MLP, CNNs have weights and biases that are learned during training through backpropagation.

**Note:** Convolutional neural networks do not require images to be flattened beforehand.

In [11]:
# Imports for CNN:

# Create CNN model class:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        
        # Keep the network shallow for now:
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64*32*32, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, X):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [12]:
# Define the CNN model:
CNN_model = CNN(num_classes=len(train_dataset.classes))

In [13]:
# Define the loss function and optimizer:
criterion = nn.CrossEntropyLoss()
CNN_optimizer = optim.Adam(CNN_model.parameters(), lr=0.001)

In [None]:
# Train CNN:
CNN_train_losses, CNN_train_accuracies = train_model(CNN_model, train_loader, criterion, CNN_optimizer, num_epochs=10)

In [None]:
# Evaluate CNN on validation set:
CNN_evaluation_losses, CNN_evaluation_accuracies = evaluate_model(CNN_model, val_loader, criterion)

In [None]:
# Graph the losses and accuracies:
plot_metrics(CNN_evaluation_losses, CNN_evaluation_accuracies)

In [14]:
# Evaluate CNN on test set: 
# DO NOT FILL IN THIS CODE CELL UNTIL AFTER HYPERPARAMETER TUNING!!!