# Deep Learning - Exercise 3
## Emanuele Fontana

In this notebook we'll try to reach 90% accuracy on CIFAR-10 with 2 approaches:
1. Using a simple Convolutional Neural Network (CNN) built with PyTorch
2. Using Transfer Learning with a pre-trained model

### Imports and Data Loading

In [20]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torchmetrics import Accuracy
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Define transformations for CNN training (32x32)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Define transformations for effnet18 training (224x224 with ImageNet normalization)
transform_effnet = transforms.Compose([
    transforms.Resize((224, 224)),  # Ridimensiona a 224x224 per effnet18
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),  # ImageNet normalization (mean)
                         (0.229, 0.224, 0.225))   # ImageNet normalization (std)
])



# Custom dataset with multiple augmented versions
class AugmentedCIFAR10(torchvision.datasets.CIFAR10):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.num_augmentations = 3
    
    def __len__(self):
        return len(self.data) * self.num_augmentations
    
    def __getitem__(self, idx):
        # Get the original image and label
        original_idx = idx // self.num_augmentations
        image_array = self.data[original_idx]
        label = self.targets[original_idx]
        
        # Convert numpy array to PIL Image
        image = Image.fromarray(image_array)
        
        if idx % self.num_augmentations == 0:
            #original image
            newImage = image
        elif idx % self.num_augmentations == 1:
            #Vertical Flip
            newImage = image.transpose(Image.FLIP_TOP_BOTTOM)
        elif idx % self.num_augmentations == 2:
            # Horizontal Flip
            newImage = image.transpose(Image.FLIP_LEFT_RIGHT)

        
        newImage = self.transform(newImage)
        return newImage, label

# Load dataset with augmentation for CNN
trainset = AugmentedCIFAR10(root='./data', train=True,
                            download=True, transform=transform) 
trainloader = torch.utils.data.DataLoader(trainset, batch_size=512,
                                          shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=512,
                                         shuffle=False)


# Load dataset with augmentation for effnet18
trainset_effnet = torchvision.datasets.CIFAR10(root='./data', train=True,
                                   download=True, transform=transform_effnet) 
trainloader_effnet = torch.utils.data.DataLoader(trainset_effnet, batch_size=64,
                                                 shuffle=True)

testset_effnet = torchvision.datasets.CIFAR10(root='./data', train=False,
                                              download=True, transform=transform_effnet)
testloader_effnet = torch.utils.data.DataLoader(testset_effnet, batch_size=64,
                                                shuffle=False)

classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck']


Using device: cuda


### 1 - Simple CNN

In [21]:


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Block 1
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.rel1=nn.ReLU()
        self.bn1 = nn.BatchNorm2d(16)  
        self.dropout1 = nn.Dropout2d(p=0.5)
        self.pool = nn.MaxPool2d(2, 2)
        
        # Block 2
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.leakyrelu=nn.LeakyReLU()
        self.bn2 = nn.BatchNorm2d(32)  
        self.dropout2 = nn.Dropout2d(p=0.5)
        
        # Block 3
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.gelu=nn.GELU()
        self.bn3 = nn.BatchNorm2d(64) 
        self.dropout3 = nn.Dropout2d(p=0.5)
        
        # MLP head for classification
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.bc1 = nn.BatchNorm1d(128)
        self.dropout_fc = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        # First convolutional block
        x = self.pool(self.dropout1(self.bn1(self.rel1(self.conv1(x)))))
        # Second convolutional block
        x = self.pool(self.dropout2(self.bn2(self.leakyrelu(self.conv2(x)))))
        
        # Third convolutional block
        x = self.pool(self.dropout3(self.bn3(self.gelu(self.conv3(x)))))
        
        # Flatten for MLP
        x = x.view(-1, 64 * 4 * 4)
        
        # MLP
        x = F.relu(self.bc1(self.fc1(x)))
        x = self.dropout_fc(x)
        x = self.fc2(x)
        
        return x


In [24]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.1)

# Learning Rate Scheduler
scheduler = optim.lr_scheduler.ConstantLR(optimizer, factor=0.01, total_iters=5)
# Define torchmetrics objects
metrics = {
    "train_acc": Accuracy(task="multiclass", num_classes=10).to(device),
    "test_acc": Accuracy(task="multiclass", num_classes=10).to(device),
}

patience = 10
not_improved_epochs = 0
best_test_acc = 0.0

# Training
epochs = 30
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    # Reset train metrics each epoch
    metrics["train_acc"].reset()

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        # Update training metrics
        metrics["train_acc"].update(outputs, labels)

    # Compute train metrics
    train_acc = metrics["train_acc"].compute().item()

    # Reset test metrics
    metrics["test_acc"].reset()

    # Evaluate on test set
    model.eval()
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            metrics["test_acc"].update(outputs, labels)

    # Compute test metrics
    test_acc = metrics["test_acc"].compute().item()

    # Early stopping logic
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        not_improved_epochs = 0
    else:
        not_improved_epochs += 1
    
    if not_improved_epochs >= patience:
        print("Early stopping triggered")
        break

    # Step the scheduler
    scheduler.step()

    # Log results
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(trainloader):.4f} | "
            f"Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")


Epoch [1/30] | Loss: 1.8906 | Train Acc: 0.3024 | Test Acc: 0.4389
Epoch [2/30] | Loss: 1.6753 | Train Acc: 0.3833 | Test Acc: 0.4959
Epoch [3/30] | Loss: 1.6105 | Train Acc: 0.4084 | Test Acc: 0.5263
Epoch [4/30] | Loss: 1.5640 | Train Acc: 0.4269 | Test Acc: 0.5530
Epoch [5/30] | Loss: 1.5358 | Train Acc: 0.4405 | Test Acc: 0.5732
Epoch [6/30] | Loss: 2.0136 | Train Acc: 0.2270 | Test Acc: 0.3014
Epoch [7/30] | Loss: 1.9141 | Train Acc: 0.2629 | Test Acc: 0.3476
Epoch [8/30] | Loss: 1.8739 | Train Acc: 0.2801 | Test Acc: 0.3795
Epoch [9/30] | Loss: 1.8514 | Train Acc: 0.2903 | Test Acc: 0.3737
Epoch [10/30] | Loss: 1.8399 | Train Acc: 0.2947 | Test Acc: 0.4183
Epoch [11/30] | Loss: 1.8320 | Train Acc: 0.2988 | Test Acc: 0.4242
Epoch [12/30] | Loss: 1.8147 | Train Acc: 0.3069 | Test Acc: 0.4523
Epoch [13/30] | Loss: 1.8105 | Train Acc: 0.3111 | Test Acc: 0.4492
Epoch [14/30] | Loss: 1.8111 | Train Acc: 0.3098 | Test Acc: 0.4265
Early stopping triggered


### 2 - Transfer Learning with Pre-trained Model

In [None]:

num_classes = 10
# Load pre-trained EfficientNet-B0
effnet = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)

# Replace the final classification layer for 10 classes
effnet.classifier[1] = nn.Linear(effnet.classifier[1].in_features, num_classes)
effnet = effnet.to(device)

# Define optimizer and loss function for effnet
criterion_effnet = nn.CrossEntropyLoss()
optimizer_effnet = optim.AdamW(effnet.parameters(), lr=0.1)

# Start fine-tuning effnet model by learning only the classification head
# Freeze all backbone parameters
for param in effnet.parameters():
    param.requires_grad = False
# Unfreeze only the classifier head
for param in effnet.classifier.parameters():
    param.requires_grad = True

epochs_effnet = 30
patience_effnet = 5
not_improved_epochs_effnet = 0
best_test_acc_effnet = 0.0


for epoch in range(epochs_effnet):
    effnet.train()
    running_loss = 0.0

    # Reset train metrics each epoch
    metrics["train_acc"].reset()

    for images, labels in trainloader_effnet:
        images, labels = images.to(device), labels.to(device)
        optimizer_effnet.zero_grad()
        outputs = effnet(images)
        loss = criterion_effnet(outputs, labels)
        loss.backward()
        optimizer_effnet.step()
        running_loss += loss.item()

        # Update training metrics
        metrics["train_acc"].update(outputs, labels)

    # Compute train metrics
    train_acc = metrics["train_acc"].compute().item()

    # Reset test metrics
    metrics["test_acc"].reset()

    # Evaluate on test set
    effnet.eval()
    with torch.no_grad():
        for images, labels in testloader_effnet:
            images, labels = images.to(device), labels.to(device)
            outputs = effnet(images)
            metrics["test_acc"].update(outputs, labels)

    # Compute test metrics
    test_acc = metrics["test_acc"].compute().item()

    # Early stopping logic
    if test_acc > best_test_acc_effnet:
        best_test_acc_effnet = test_acc
        not_improved_epochs_effnet = 0
    else:
        not_improved_epochs_effnet += 1
    
    if not_improved_epochs_effnet >= patience_effnet:
        print("Early stopping triggered for EfficientNet-B0")
        break

    # Log results
    print(f"EfficientNet-B0 Epoch [{epoch+1}/{epochs_effnet}] | Loss: {running_loss/len(trainloader_effnet):.4f} | "
            f"Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")


EfficientNet-B0 Epoch [1/30] | Loss: 5.0685 | Train Acc: 0.6516 | Test Acc: 0.7293
EfficientNet-B0 Epoch [2/30] | Loss: 5.5770 | Train Acc: 0.6681 | Test Acc: 0.7264
EfficientNet-B0 Epoch [3/30] | Loss: 5.6775 | Train Acc: 0.6715 | Test Acc: 0.7319
EfficientNet-B0 Epoch [4/30] | Loss: 5.7203 | Train Acc: 0.6673 | Test Acc: 0.6858
EfficientNet-B0 Epoch [5/30] | Loss: 5.8351 | Train Acc: 0.6681 | Test Acc: 0.7274
EfficientNet-B0 Epoch [6/30] | Loss: 5.8288 | Train Acc: 0.6692 | Test Acc: 0.7212
EfficientNet-B0 Epoch [7/30] | Loss: 5.7136 | Train Acc: 0.6698 | Test Acc: 0.7237
EfficientNet-B0 Epoch [8/30] | Loss: 5.6361 | Train Acc: 0.6708 | Test Acc: 0.7397
EfficientNet-B0 Epoch [9/30] | Loss: 5.7923 | Train Acc: 0.6696 | Test Acc: 0.7342
EfficientNet-B0 Epoch [10/30] | Loss: 5.8284 | Train Acc: 0.6684 | Test Acc: 0.7405
EfficientNet-B0 Epoch [11/30] | Loss: 5.7387 | Train Acc: 0.6698 | Test Acc: 0.7169
