In [None]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
# from sklearn.model_selection import GridSearchCV
# from skorch import NeuralNetClassifier
import itertools
import torch.optim as optim
# from torchinfo import summary
# import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings(action="ignore")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("NVIDIA GPU available, running stuff on GPU" if DEVICE.type == "cuda" else "No NVIDIA GPU available, running stuff on CPU")

In [None]:
ROOT_DIR = r"C:\Users\delga\Documents\programming\datasets"

train_transforms = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])  #Known metrics from CIFAR10
])

val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])   #Known metrics from CIFAR10
])

full_train_dataset = datasets.CIFAR10(
    root=ROOT_DIR, 
    download=False, 
    train=True, 
    transform=train_transforms
    )

train_size = int(0.9 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size

train_dataset, val_dataset = random_split(
    dataset=full_train_dataset,
    lengths=[train_size, val_size],
    generator=torch.Generator().manual_seed(22)
)

val_dataset.dataset.transform = val_transforms      # Replace val transform (so validation doesn't use augmentations)

test_dataset = datasets.CIFAR10(
    root=ROOT_DIR, 
    download=False, 
    train=False, 
    transform=val_transforms
    )

print(f"\nCompleted data loading and splits:\n * Train dataset: {len(train_dataset)} samples\n * Validation dataset: {len(val_dataset)} samples\n * Test dataset: {len(test_dataset)} samples")

train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True, num_workers=1, pin_memory=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=128, shuffle=True, num_workers=1, pin_memory=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=128, shuffle=True, num_workers=1, pin_memory=True)

In [None]:
# indices = torch.arange(100)
# tiny_train_dataset = Subset(dataset=train_dataset, indices=indices)
# tiny_loader = DataLoader(dataset=tiny_train_dataset, batch_size=16, shuffle=True)
# images, labels = next(iter(tiny_loader))
# print(images.shape, labels.shape)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    """
    A simple Convolutional Neural Network for CIFAR-10 classification.
    Input: 3x32x32 images
    Output: 10 class logits
    """
    def __init__(self, dropout=0.25):
        super(CNN, self).__init__()
        
        # --- Convolutional layers ---
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        
        # --- Batch Normalization layers + Pooling layer ---
        self.bn1 = nn.BatchNorm2d(num_features=32)
        self.bn2 = nn.BatchNorm2d(num_features=64)
        self.bn3 = nn.BatchNorm2d(num_features=128)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # --- Fully Connected layers ---
        self.fc1 = nn.Linear(in_features=128 * 4 * 4, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=10)  # 10 CIFAR-10 classes
        
        # --- Regularization ---
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Convolutional feature extraction
        x = self.pool(F.relu(self.bn1(self.conv1(x))))   # Output: [B, 32, 16, 16]
        x = self.pool(F.relu(self.bn2(self.conv2(x))))   # Output: [B, 64, 8, 8]
        x = self.pool(F.relu(self.bn3(self.conv3(x))))   # Output: [B, 128, 4, 4]

        # --- Flatten for fully connected layers ---
        x = torch.flatten(x, 1)  # Flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x


In [None]:
model = CNN().to(DEVICE)
# summary(model, input_size=(16, 3, 32, 32))

In [None]:
def train_and_validate(model, train_loader, val_loader, epochs, criterion, optimizer):
    """
    Trains the model {epochs} number of times, within each epoch it
    trains the whole data inside the loader and later evaluates it using
    the validation loader
    """
    train_losses, val_losses, val_accuracies = [], [], []
    
    for epoch in range(epochs):
        # --- Training phase ---
        model.train()
        total_train_loss = 0
        
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            total_train_loss += loss.item()
            
        avg_train_loss = total_train_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # --- Evaluation phase ---
        model.eval()
        total_val_loss = 0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(DEVICE), labels.to(DEVICE)
                outputs = model(images)
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()
                
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        avg_val_loss = total_val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        val_accuracy = 100 * correct / total
        val_accuracies.append(val_accuracy)
        
        print(f"Epoch [{epoch+1}/{epochs}] | "
              f"Train Loss: {avg_train_loss:.4f} | "
              f"Val Loss: {avg_val_loss:.4f} | "
              f"Val Acc: {val_accuracy:.2f}%")
    
    # --- Plotting the curves ---    
    epochs_range = range(1, epochs + 1)
    
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, train_losses, label="Train Loss")
    plt.plot(epochs_range, val_losses, label="Validation Loss")
    plt.title("Loss Curves")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, val_accuracies, label="Validation Accuracy", color="green")
    plt.title("Validation Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy (%)")
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    return train_losses, val_losses, val_accuracies
          
        
def test_loop(model, data_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    
    accuracy = 100 * correct / total
    print(f"\nTest Accuracy: {accuracy:.2f}%")
    return accuracy

In [None]:
learning_rates = [1e-2, 5e-3, 1e-3]
weight_decays = [1e-3, 1e-4, 1e-5]
dropouts = [0.25, 0.3, 0.4]
criterion = nn.CrossEntropyLoss()
best_validation_accuracy = 0
best_params = None
best_model_state = None

for lr, wd, drop in itertools.product(learning_rates, weight_decays, dropouts):
    print(f"\n---Testing configuration: learning rate={lr}, weight decay={wd}, dropout={drop}")
    model = CNN(dropout=drop).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    _, _, val_accuracies = train_and_validate(model, train_loader, val_loader, epochs=5, criterion=criterion, optimizer=optimizer)
    val_acc = val_accuracies[-1]
    
    if val_acc > best_validation_accuracy:
        best_validation_accuracy = val_acc
        best_params = (lr, wd, drop)
        best_model_state = model.state_dict()
        
print(f"\nBest hyperparameters found:")
print(f"   Learning rate: {best_params[0]}")
print(f"   Weight decay:  {best_params[1]}")
print(f"   Dropout:       {best_params[2]}")
print(f"   Validation accuracy: {best_validation_accuracy:.2f}%")

# --- Using the best found parameters for the testing phase ---
best_model = CNN(dropout=best_params[2]).to(DEVICE)
best_model.load_state_dict(best_model_state)
test_loop(best_model, test_loader)

In [None]:
"""
Improve current CNN
Tune training hyperparameters
Use a pre trained model (transfer learning)
train on CIFAR100
"""