# A [simple] CNN for Image Classification

As described in the `README`, we will begin with a simple CNN that classifies a single image as belong to a cheap, average, or expensive house. The model architecture is reproduced below:

| Layer | Input Dimensions | Output Dimensions | Objective | Activation Function | In Channels | Out Channels | Kernel | Stride | Padding |
|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------| 
| Convolutional Layer 1 | 224, 224, 3 | 224, 224, 32 | Learn spatial features | ReLU | 3 | 32 | 3 | 1 | 1
| Pooling Layer 1 | 224, 224, 32 | 112, 112, 32 | Down-sampling | | 32 | 32 | 2 | 2 | 0
| Convolutional Layer 2 | 112, 112, 32 | 112, 112, 64 | Learn spatial features | ReLU | 32 | 64 | 3 | 1 | 1
| Pooling Layer 2 | 112, 112, 64 | 56, 56, 64 | Down-sampling | | 64 | 64 | 2 | 2 | 0
| Dense Layer 1 | 200704 | 512 | Learn spatial features | ReLU | 200704 | 512 | | | |
| Dense Layer 2 | 512 | 3 | Classification | Softmax | 512 | 3 | | | |

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
from src.preprocessing import get_housing_dataset
from src.logging import get_logger

logger = get_logger(__name__)

%load_ext autoreload
%autoreload 2

In [5]:
# Define the CNN Model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        self.conv1 = nn.Conv2d(
            3, 32, kernel_size=3, stride=1, padding=1
        )
        self.conv2 = nn.Conv2d(
            32, 64, kernel_size=3, stride=1, padding=1
        )
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(64 * 56 * 56, 512)
        self.dropout2 = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(512, 3)  # output layer

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 64 * 56 * 56)  # Flatten the tensor for the dense layer
        x = self.dropout1(x)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)  # No activation function here as CrossEntropyLoss will be used
        return x

In [6]:
dataset = get_housing_dataset()

# Create indices for the full dataset and split them
indices = list(range(len(dataset)))
train_indices, val_indices, _, _ = train_test_split(
    indices, indices, test_size=0.2, random_state=42
)

train_subset = Subset(dataset, train_indices)
val_subset = Subset(dataset, val_indices)

train_loader = DataLoader(train_subset, batch_size=4, shuffle=True, num_workers=0)
val_loader = DataLoader(val_subset, batch_size=4, shuffle=True, num_workers=0)

In [7]:
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)  # TODO try a higher weight decay

# TODO apply learning rate decay


def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        # Run a training epoch
        model.train()
        running_loss = 0.0
        running_training_accuracy = 0.0
        running_validation_accuracy = 0.0

        for images, labels in train_loader:
            optimizer.zero_grad()  # Reset gradients
            outputs = model(images)  # Make prediction
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Compute gradients
            optimizer.step()  # Update weights

            # Compute loss and accuracy
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            running_training_accuracy += accuracy_score(
                labels.numpy(), predicted.numpy()
            )

        # Compare to validation accuracy,
        # this slows things down but is useful for understanding
        model.eval()
        with torch.no_grad():
            for images, labels in val_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                running_validation_accuracy += accuracy_score(
                    labels.numpy(), predicted.numpy()
                )

        logger.info(
            f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.2f}, "
            f"Training Accuracy: {running_training_accuracy/len(train_loader):.2f}, "
            f"Validation Accuracy: {running_validation_accuracy/len(val_loader):.2f}"
        )


# TODO make this work on a GPU and use Google colab?

# Example training call (assuming train_loader is defined)
train_model(model, train_loader, val_loader, criterion, optimizer)

2024-02-19 22:32:56,026 [INFO]: Epoch 1, Loss: 1.29, Training Accuracy: 0.33, Validation Accuracy: 0.32
2024-02-19 22:36:37,639 [INFO]: Epoch 2, Loss: 1.10, Training Accuracy: 0.34, Validation Accuracy: 0.32
2024-02-19 22:40:36,196 [INFO]: Epoch 3, Loss: 1.10, Training Accuracy: 0.34, Validation Accuracy: 0.32
2024-02-19 22:43:51,318 [INFO]: Epoch 4, Loss: 1.10, Training Accuracy: 0.34, Validation Accuracy: 0.34
2024-02-19 22:47:09,898 [INFO]: Epoch 5, Loss: 1.08, Training Accuracy: 0.39, Validation Accuracy: 0.36
2024-02-19 22:50:35,477 [INFO]: Epoch 6, Loss: 1.07, Training Accuracy: 0.43, Validation Accuracy: 0.38
2024-02-19 22:54:07,066 [INFO]: Epoch 7, Loss: 0.96, Training Accuracy: 0.53, Validation Accuracy: 0.38
2024-02-19 22:57:25,070 [INFO]: Epoch 8, Loss: 0.73, Training Accuracy: 0.69, Validation Accuracy: 0.41


# Overfitting

We interrupted the previous training block because the model has clearly just started to massively overfit to the training data, with marginal (or even negative) returns on the validation accuracy. So what can we do to reduce overfitting and ideally attain better validation accuracies?

- Dropout regularization: we will add dropout layers to 
- 

*Notice that we are using accuracy as our score because our labels are perfectly balanced, so we don't need to worry about precision, recall, f1 scores, and all that jazz.*

In [None]:
# TODO plotting function to visualize predictions
# TODO check the shape of the NN's outputs. Are they converging to [0.33, 0.33, 0.33] for most predictions?
# If so then the model is not learning anything meaningful, and we need a different architecture on the output
# layer or a different loss function.
# TODO data augmentation!

In [None]:
def predict(model, val_loader):
    model.eval()
    predictions = []
    with torch.no_grad():  # Don't waste resources on gradients
        for images, _ in val_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted.cpu().numpy())
    return predictions


predictions = predict(model, val_loader)

In [None]:
# TODO: modify the dataset to treat each image as its own sample
# thereby increasing our dataset size by 33x

In [None]:
# Check for overfitting, apply learning rate decay, and use early stopping
# Try doing some hyperparameter tuning!

In [None]:
# extract labels from test_loader
labels = []
for _, label in val_loader:
    labels.extend(label.numpy())

In [None]:
fscore = f1_score(labels, predictions, average=None)
accuracy = accuracy_score(labels, predictions)
logger.info(f"Accuracy: {accuracy}")
logger.info(f"F1 Scores by class: {fscore}")

0.31900826446280994

In [None]:
predictions = predict(model, train_loader)

In [None]:
# extract labels from test_loader
labels = []
for _, label in train_loader:
    labels.extend(label.numpy())

In [None]:
fscore = f1_score(labels, predictions, average=None)
accuracy = accuracy_score(labels, predictions)
logger.info(f"Accuracy: {accuracy}")
logger.info(f"F1 Scores by class: {fscore}")

2024-02-19 15:31:30,228 [INFO]: Accuracy: 0.3406456953642384
2024-02-19 15:31:30,228 [INFO]: Accuracy: 0.3406456953642384
2024-02-19 15:31:30,229 [INFO]: F1 Scores by class: [0.33925291 0.34868017 0.33375796]
2024-02-19 15:31:30,229 [INFO]: F1 Scores by class: [0.33925291 0.34868017 0.33375796]


In [None]:
model_small = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# TODO make this work on a GPU and use Google colab?

# Example training call (assuming train_loader is defined)
train_model(model_small, train_loader, criterion, optimizer, num_epochs=1)

Epoch 1, Loss: 1.0992163652615832


In [None]:
predictions_small = predict(model_small, train_loader)

In [None]:
fscore_small = f1_score(labels, predictions_small, average=None)
accuracy_small = accuracy_score(labels, predictions_small)
logger.info(f"Accuracy: {accuracy_small}")
logger.info(f"F1 Scores by class: {fscore_small}")

2024-02-19 15:45:22,281 [INFO]: Accuracy: 0.3543046357615894
2024-02-19 15:45:22,281 [INFO]: Accuracy: 0.3543046357615894
2024-02-19 15:45:22,282 [INFO]: F1 Scores by class: [0.27328244 0.24938474 0.45592705]
2024-02-19 15:45:22,282 [INFO]: F1 Scores by class: [0.27328244 0.24938474 0.45592705]
