# A [simple] CNN for Image Classification

As described in the `README`, we will begin with a simple CNN that classifies a single image as belong to a cheap, average, or expensive house. The model architecture is reproduced below:

| Layer | Input Dimensions | Output Dimensions | Objective | Activation Function | In Channels | Out Channels | Kernel | Stride | Padding |
|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------| 
| Convolutional Layer 1 | 224, 224, 3 | 224, 224, 32 | Learn spatial features | ReLU | 3 | 32 | 3 | 1 | 1
| Pooling Layer 1 | 224, 224, 32 | 112, 112, 32 | Down-sampling | | 32 | 32 | 2 | 2 | 0
| Convolutional Layer 2 | 112, 112, 32 | 112, 112, 64 | Learn spatial features | ReLU | 32 | 64 | 3 | 1 | 1
| Pooling Layer 2 | 112, 112, 64 | 56, 56, 64 | Down-sampling | | 64 | 64 | 2 | 2 | 0
| Dense Layer 1 | 200704 | 512 | Learn spatial features | ReLU | 200704 | 512 | | | |
| Dense Layer 2 | 512 | 3 | Classification | Softmax | 512 | 3 | | | |

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from src.preprocessing import HousingDataset, CLEANED_DIR, get_transform
from src.logging import get_logger

logger = get_logger(__name__)

In [None]:
# Define the CNN Model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(
            3, 32, kernel_size=3, stride=1, padding=1
        )  # First convolutional layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Pooling layer
        self.conv2 = nn.Conv2d(
            32, 64, kernel_size=3, stride=1, padding=1
        )  # Second convolutional layer
        self.fc1 = nn.Linear(64 * 56 * 56, 512)  # First dense layer
        self.fc2 = nn.Linear(512, 3)  # Second dense layer (output layer)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 64 * 56 * 56)  # Flatten the tensor for the dense layer
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)  # No activation function here as CrossEntropyLoss will be used
        return x

In [None]:
dataset = HousingDataset(
    prices_path=os.path.join(CLEANED_DIR, "prices.csv"),
    img_dir=os.path.join(CLEANED_DIR, "images"),
    transform=get_transform(),
)

# Create indices for the full dataset and split them
indices = list(range(len(dataset)))
train_indices, test_indices, _, _ = train_test_split(
    indices, indices, test_size=0.2, random_state=42
)

train_subset = Subset(dataset, train_indices)
test_subset = Subset(dataset, test_indices)

train_loader = DataLoader(train_subset, batch_size=4, shuffle=True, num_workers=0)
test_loader = DataLoader(test_subset, batch_size=4, shuffle=True, num_workers=0)

In [None]:
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            optimizer.zero_grad()  # Reset gradients
            outputs = model(images)  # Make prediction
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Compute gradients
            optimizer.step()  # Update weights
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")


# TODO make this work on a GPU and use Google colab?

# Example training call (assuming train_loader is defined)
train_model(model, train_loader, criterion, optimizer)

In [None]:
def predict(model, val_loader):
    model.eval()
    predictions = []
    with torch.no_grad():  # Don't waste resources on gradients
        for images, _ in val_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted.cpu().numpy())
    return predictions


predictions = predict(model, test_loader)