## Load the data

In [1]:
import os
import torch
import pandas as pd
import numpy as np
from torchvision.io import read_image
from torch.utils.data import Dataset
from torch.nn.functional import one_hot


class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = torch.as_tensor(np.load(img_path))
        image = image.unsqueeze(0)
        label = one_hot(torch.as_tensor(self.img_labels.iloc[idx, 1]-1),5).unsqueeze(0).to(torch.float)
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

  from pandas.core import (


In [2]:
val_loader = CustomImageDataset("preprocessed/val/labels.csv", "preprocessed/val/specs")
train_loader = CustomImageDataset("preprocessed/train/labels.csv", "preprocessed/train/specs")

In [3]:
my_iter_beloved = iter(val_loader)

In [4]:
next(my_iter_beloved)

(tensor([[[1.0419e-06, 2.5456e-05, 2.9621e-02,  ..., 3.1865e-03,
           1.3618e-01, 1.1408e+00],
          [3.9951e-07, 2.3275e-05, 7.9069e-03,  ..., 2.9012e-02,
           2.5431e-01, 1.8286e+00],
          [7.2498e-07, 5.6006e-05, 8.3671e-03,  ..., 1.0260e-01,
           9.6600e-01, 4.7671e+00],
          ...,
          [1.0570e-07, 9.6049e-08, 1.3035e-04,  ..., 4.3229e-02,
           5.8280e-03, 2.1222e-03],
          [9.1120e-08, 9.0763e-08, 1.2359e-04,  ..., 4.0160e-02,
           2.8678e-02, 8.5541e-03],
          [8.7483e-08, 9.5734e-08, 1.4638e-04,  ..., 4.5134e-02,
           1.5607e-02, 6.8483e-03]]]),
 tensor([[0., 0., 0., 0., 1.]]))

## Simple CNN Model Definition

### Model Architecture
1. **Convolutional Layers:**
   - `self.conv1`: The first convolutional layer takes the input (RGB images) and produces feature maps with 16 output channels. The kernel size is set to 3 and padding needs to be 1 to keep the input dimension.
   - `self.conv2`: The second convolutional layer takes the output of the first after the activation funtion and pooling are applied and produces a feature map with 32 channels, and has the the same kernel size and padding (3 and 1).
   - `self.conv3`: The third convolutional layer further increases the number of output channels to 64.

2. **Activation and Pooling:**
   - `self.relu`: Rectified Linear Unit (ReLU) activation function is applied after each convolutional layer to introduce non-linearity.
   - `self.pool`: Max-pooling layer with a kernel size of 2 and a stride of 2 is used to downsample the spatial dimensions.

3. **Fully Connected Layers:**
   - `self.fc1`: The first fully connected layer takes the flattened output from the last convolutional layer and maps it to 64 units.
   - `self.fc2`: The final fully connected layer maps the 64 units to the output space with 10 units, corresponding to the number of classes in CIFAR-10.

### Forward Pass
The `forward` method defines the forward pass of the model. It specifies how input data flows through the layers to produce the final output. Convolutional and pooling layers are followed by activation functions, and the fully connected layers provide the classification logits. Note that the same relu and pooling layers are used in several parts. That is ok as these layers do not have parameters and are only applying the same function to any input, so no separate layers are needed.

This simple CNN serves as a starting point for image classification tasks and can be further customized or extended for more complex problems.


Channel = 1 

Padding = 2 (because dimension is 2) #only use if moving your filter would move you outside the convolution, so it's not mandatory

filter size = width, height and channels (depends on the input size)

Stride = how much you´d have overlap in your steps

Input first layer = 3 

In [5]:
import numpy as np
import os

In [6]:
# Set random seed for reproducibility
torch.manual_seed(42)

# Check if GPU is available and set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device set to: {device}")

Device set to: cpu


In [7]:
np.load(f"preprocessed/train/specs/{os.listdir('preprocessed/train/specs')[1]}")

array([[7.75734079e-05, 8.10303973e-05, 8.59528227e-05, ...,
        2.56379990e-05, 3.38812315e-05, 1.03176928e-04],
       [4.83323311e-05, 1.34489252e-04, 6.52731978e-05, ...,
        2.19504233e-04, 3.46366811e-04, 2.22038609e-04],
       [1.51195025e-04, 2.51585559e-04, 4.53107874e-04, ...,
        2.88863026e-04, 5.89006813e-04, 3.79063509e-04],
       ...,
       [4.35408722e-08, 9.69061347e-08, 1.73314902e-07, ...,
        1.13678993e-07, 1.68862272e-07, 1.28463967e-07],
       [5.15285379e-08, 1.03113848e-07, 1.11438155e-07, ...,
        1.39387083e-07, 2.19962430e-07, 1.03649207e-07],
       [6.13314484e-08, 1.19523691e-07, 1.11718748e-07, ...,
        1.14359693e-07, 1.10422150e-07, 5.54369919e-08]], dtype=float32)

In [8]:
# Import necesary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, f1_score
import seaborn as sns
from torch.utils.data import DataLoader

## CNN specification

In [9]:
# Define a simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        # First Convolutional Layer
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Second Convolutional Layer
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        
        # Third Convolutional Layer
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        
        # Fourth Convolutional Layer
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)

        # Fifth Convolutional Layer
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)


        # Fully Connected Layers
        fc1_in_features = 256 * 8 * 25
        self.fc1 = nn.Linear(in_features=fc1_in_features, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.fc3 = nn.Linear(in_features=128, out_features=64)
        self.fc4 = nn.Linear(in_features=64, out_features=32)
        self.fc5 = nn.Linear(in_features=32, out_features=5)

    def forward(self, x):
        # First Convolutional Block
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        
        # Second Convolutional Block
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        
        # Third Convolutional Block
        x = self.conv3(x)
        x = self.relu(x)
        x = self.pool(x)

         # Fourth Convolutional Block
        x = self.conv4(x)
        x = self.relu(x)
        x = self.pool(x)

        # Fifth Convolutional Block
        x = self.conv5(x)
        x = self.relu(x)
        
        # Flatten for Fully Connected Layers
        x = x.view(-1, self.fc1.in_features)
        
        # Fully Connected Layers
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        x = self.relu(x)
        x = self.fc5(x)
        
        return x

## Training Loop

In [10]:
# Training Loop with Validation
def train_model(model, train_loader, val_loader, epochs, criterion, optimizer):
    # Lists to store training and validation losses, and accuracies
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []

    # Loop over epochs
    for epoch in range(epochs):
        # Set the model to training mode
        model.train()
        total_train_loss = 0.0
        correct_train = 0
        total_train = 0

        # Training loop
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            # print(outputs)
            # print(labels)
            
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        # Calculate average training loss
        avg_train_loss = total_train_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Calculate training accuracy
        train_accuracy = correct_train / total_train
        train_accuracies.append(train_accuracy)

        # Validation loop
        model.eval()
        total_val_loss = 0.0
        correct_val = 0
        total_val = 0

        # Validation without gradient computation
        with torch.no_grad():
            for val_images, val_labels in val_loader:
                val_images, val_labels = val_images.to(device), val_labels.to(device)
                val_outputs = model(val_images)
                val_loss = criterion(val_outputs, val_labels)
                total_val_loss += val_loss.item()

                _, predicted_val = torch.max(val_outputs.data, 1)
                total_val += val_labels.size(0)
                correct_val += (predicted_val == val_labels).sum().item()

        # Calculate average validation loss
        avg_val_loss = total_val_loss / len(val_loader)
        val_losses.append(avg_val_loss)

        # Calculate validation accuracy
        val_accuracy = correct_val / total_val
        val_accuracies.append(val_accuracy)

        # Print progress every 10 epochs
        # if (epoch + 1) % 10 == 0:
        if True:
            print(f'Epoch [{epoch+1}/{epochs}], '
                f'Training Loss: {avg_train_loss:.4f}, Training Accuracy: {train_accuracy * 100:.2f}%, '
                f'Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy * 100:.2f}%')

    # Plotting the loss and accuracy over epochs
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label='Training Accuracy')
    plt.plot(val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.tight_layout()
    plt.show()

    return val_losses

## Train CNN

In [11]:
# Setting Hyperparameters and Training the Model

# Number of training epochs
epochs = 20

# Create an instance of the SimpleCNN model and move it to the specified device (GPU if available)
model = SimpleCNN().to(device)

# Define the loss criterion (CrossEntropyLoss) and the optimizer (Adam) for training the model
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model using the defined training function
val_losses_simple = train_model(model, train_loader, val_loader, epochs, criterion, optimizer)

Epoch [1/20], Training Loss: 1.6092, Training Accuracy: 243.03%, Validation Loss: 1.5805, Validation Accuracy: 0.00%


KeyboardInterrupt: 

## Regularization techniques

one widely used regularization technique is called Drouppout. Iirc, it systematically deactivates some neurons during training to make the model more robust for when the test data doesn't have some parts of the expected pattern. (I've implmented that below, but if we decide to use something else, then that's also fine.)

More possible techniques: 
- early stopping (would be nice if model overfits)
- L1 or L2 regularization (makes weights smaller/less of them) (affects the los function)

In [12]:
##NN with dropout

class SimpleCNN_with_dropout(nn.Module):
    def __init__(self, dropout_prob=0.5):
        super(SimpleCNN_with_dropout, self).__init__()

        # First Convolutional Layer
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Second Convolutional Layer
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        
        # Third Convolutional Layer
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        
        # Fourth Convolutional Layer
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)

        # Fifth Convolutional Layer
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)

        #Dropouts
        self.dropout1 = nn.Dropout(dropout_prob)
        self.dropout2 = nn.Dropout(dropout_prob)
        self.dropout3 = nn.Dropout(dropout_prob)
        self.dropout4 = nn.Dropout(dropout_prob)
        self.dropout5 = nn.Dropout(dropout_prob)

        # Fully Connected Layers
        fc1_in_features = 256 * 8 * 25
        self.fc1 = nn.Linear(in_features=fc1_in_features, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.fc3 = nn.Linear(in_features=128, out_features=64)
        self.fc4 = nn.Linear(in_features=64, out_features=32)
        self.fc5 = nn.Linear(in_features=32, out_features=5)

    def forward(self, x):
        # First Convolutional Block
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        # Dropout applied after the first convolutional layer
        x = self.dropout1(x)
        
        # Second Convolutional Block
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)

        # Dropout applied after the second convolutional layer
        x = self.dropout2(x)
        
        # Third Convolutional Block
        x = self.conv3(x)
        x = self.relu(x)
        x = self.pool(x)

        # Dropout applied after the third convolutional layer
        x = self.dropout3(x)

         # Fourth Convolutional Block
        x = self.conv4(x)
        x = self.relu(x)
        x = self.pool(x)

        # Dropout applied after the fourth convolutional layer
        x = self.dropout4(x)

        # Fifth Convolutional Block
        x = self.conv5(x)
        x = self.relu(x)

        # Dropout applied after the fifth convolutional layer
        x = self.dropout5(x)
        
        # Flatten for Fully Connected Layers
        x = x.view(-1, self.fc1.in_features)
        
        # Fully Connected Layers
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        x = self.relu(x)
        x = self.fc5(x)
        
        return x


In [13]:
#Train CNN with drouput
epochs = 100
model_d = SimpleCNN_with_dropout(dropout_prob=0.5).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_d.parameters(), lr=0.001)
val_losses_dropout = train_model(model_d, train_loader, val_loader, epochs, criterion, optimizer)

Epoch [1/100], Training Loss: 1.6091, Training Accuracy: 300.40%, Validation Loss: 1.5952, Validation Accuracy: 0.00%


KeyboardInterrupt: 

Why use the parameters we use?

Adam optimizer
- generally concidered the best optimizer
- implements both momentum and adaptive learning rate
    - adaptive learning rate: each parameter gets it's own learning rate which helps with finding the minimum for the cost function connected to that secific parameter
    - momentum: accelerates convergence (finding the minimum faster) because it accumulates information about past gradients (ie: it makes the process faster) and also allows the model not to get stuck in local minima