# Deep Learning - Exercise 3
## Emanuele Fontana

In this notebook we'll try to reach 90% accuracy on CIFAR-10 with 2 approaches:
1. Using a simple Convolutional Neural Network (CNN) built with PyTorch
2. Using Transfer Learning with a pre-trained model

### Imports and Data Loading

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torchmetrics import Accuracy
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Define transformations for CNN training (32x32)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Define transformations for ResNet18 training (224x224 with ImageNet normalization)
transform_resnet = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),  # Ridimensiona a 224x224 per ResNet18
    transforms.Normalize((0.485, 0.456, 0.406),  # ImageNet normalization (mean)
                         (0.229, 0.224, 0.225))   # ImageNet normalization (std)
])



# Custom dataset with multiple augmented versions
class AugmentedCIFAR10(torchvision.datasets.CIFAR10):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.num_augmentations = 3
    
    def __len__(self):
        return len(self.data) * self.num_augmentations
    
    def __getitem__(self, idx):
        # Get the original image and label
        original_idx = idx // self.num_augmentations
        image_array = self.data[original_idx]
        label = self.targets[original_idx]
        
        # Convert numpy array to PIL Image
        image = Image.fromarray(image_array)
        
        if idx % self.num_augmentations == 0:
            #original image
            newImage = image
        elif idx % self.num_augmentations == 1:
            #Vertical Flip
            newImage = image.transpose(Image.FLIP_TOP_BOTTOM)
        elif idx % self.num_augmentations == 2:
            # Horizontal Flip
            newImage = image.transpose(Image.FLIP_LEFT_RIGHT)

        
        newImage = self.transform(newImage)
        return newImage, label

# Load dataset with augmentation for CNN
trainset = AugmentedCIFAR10(root='./data', train=True,
                            download=True, transform=transform) 
trainloader = torch.utils.data.DataLoader(trainset, batch_size=1024,
                                          shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=1024,
                                         shuffle=False)

# Load dataset with augmentation for ResNet18
trainset_resnet = AugmentedCIFAR10(root='./data', train=True,
                                   download=True, transform=transform_resnet) 
trainloader_resnet = torch.utils.data.DataLoader(trainset_resnet, batch_size=1024,
                                                 shuffle=True)

testset_resnet = torchvision.datasets.CIFAR10(root='./data', train=False,
                                              download=True, transform=transform_resnet)
testloader_resnet = torch.utils.data.DataLoader(testset_resnet, batch_size=1024,
                                                shuffle=False)

classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck']


Using device: cuda


### 1 - Simple CNN

In [None]:


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Block 1: 3 -> 8 feature maps
        self.conv1 = nn.Conv2d(3, 8, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(8)  # Deve corrispondere ai canali di conv1
        self.dropout1 = nn.Dropout2d(p=0.5)
        self.pool = nn.MaxPool2d(2, 2)
        
        # Block 2: 8 -> 16 feature maps
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(16)  # Deve corrispondere ai canali di conv2
        self.dropout2 = nn.Dropout2d(p=0.5)
        
        # Block 3: 16 -> 32 feature maps
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(32)  # Deve corrispondere ai canali di conv3
        self.dropout3 = nn.Dropout2d(p=0.5)
        
        # MLP head for classification
        # Dopo 3 pooling: 32x32 -> 16x16 -> 8x8 -> 4x4
        # Feature maps finali: 32 (non 128)
        # Dimensione input MLP: 32 * 4 * 4 = 512
        self.fc1 = nn.Linear(32 * 4 * 4, 256)
        self.bc1 = nn.BatchNorm1d(256)
        self.dropout_fc = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        # First convolutional block
        x = self.pool(self.dropout1(self.bn1(F.relu(self.conv1(x)))))
        
        # Second convolutional block
        x = self.pool(self.dropout2(self.bn2(F.relu(self.conv2(x)))))
        
        # Third convolutional block
        x = self.pool(self.dropout3(self.bn3(F.relu(self.conv3(x)))))
        
        # Flatten for MLP
        x = x.view(-1, 32 * 4 * 4)
        
        # MLP
        x = F.relu(self.bc1(self.fc1(x)))
        x = self.dropout_fc(x)
        x = self.fc2(x)
        
        return x


In [None]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Learning Rate Scheduler
scheduler = optim.lr_scheduler.ConstantLR(optimizer, factor=0.5, total_iters=5)
# Define torchmetrics objects
metrics = {
    "train_acc": Accuracy(task="multiclass", num_classes=10).to(device),
    "test_acc": Accuracy(task="multiclass", num_classes=10).to(device),
}

patience = 5
not_improved_epochs = 0
best_test_acc = 0.0

# Training
epochs = 30
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    # Reset train metrics each epoch
    metrics["train_acc"].reset()

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        # Update training metrics
        metrics["train_acc"].update(outputs, labels)

    # Compute train metrics
    train_acc = metrics["train_acc"].compute().item()

    # Reset test metrics
    metrics["test_acc"].reset()

    # Evaluate on test set
    model.eval()
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            metrics["test_acc"].update(outputs, labels)

    # Compute test metrics
    test_acc = metrics["test_acc"].compute().item()

    # Early stopping logic
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        not_improved_epochs = 0
    else:
        not_improved_epochs += 1
    
    if not_improved_epochs >= patience:
        print("Early stopping triggered")
        break

    # Step the scheduler based on test accuracy
    scheduler.step(test_acc)

    # Log results
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(trainloader):.4f} | "
            f"Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")

RuntimeError: running_mean should contain 8 elements not 32

### 2 - Transfer Learning with Pre-trained Model

In [None]:

num_classes = 10
# Load pre-trained ResNet18
resnet = models.resnet18(pretrained=True)
# Replace the final classification layer for 10 classes
resnet.fc = nn.Linear(resnet.fc.in_features, num_classes)
resnet = resnet.to(device)

# Define optimizer and loss function for ResNet
criterion_resnet = nn.CrossEntropyLoss()
optimizer_resnet = optim.AdamW(resnet.parameters(), lr=0.001)

# Start fine-tuning ResNet model by learning only the classification head
for param in resnet.parameters():
    param.requires_grad = False
for param in resnet.fc.parameters():
    param.requires_grad = True

epochs_resnet = 30
patience_resnet = 5
not_improved_epochs_resnet = 0
best_test_acc_resnet = 0.0


for epoch in range(epochs_resnet):
    resnet.train()
    running_loss = 0.0

    # Reset train metrics each epoch
    metrics["train_acc"].reset()

    for images, labels in trainloader_resnet:
        images, labels = images.to(device), labels.to(device)
        optimizer_resnet.zero_grad()
        outputs = resnet(images)
        loss = criterion_resnet(outputs, labels)
        loss.backward()
        optimizer_resnet.step()
        running_loss += loss.item()

        # Update training metrics
        metrics["train_acc"].update(outputs, labels)

    # Compute train metrics
    train_acc = metrics["train_acc"].compute().item()

    # Reset test metrics
    metrics["test_acc"].reset()

    # Evaluate on test set
    resnet.eval()
    with torch.no_grad():
        for images, labels in testloader_resnet:
            images, labels = images.to(device), labels.to(device)
            outputs = resnet(images)
            metrics["test_acc"].update(outputs, labels)

    # Compute test metrics
    test_acc = metrics["test_acc"].compute().item()

    # Early stopping logic
    if test_acc > best_test_acc_resnet:
        best_test_acc_resnet = test_acc
        not_improved_epochs_resnet = 0
    else:
        not_improved_epochs_resnet += 1
    
    if not_improved_epochs_resnet >= patience_resnet:
        print("Early stopping triggered for ResNet18")
        break

    # Step the scheduler based on test accuracy
    scheduler.step(test_acc)

    # Log results
    print(f"ResNet18 Epoch [{epoch+1}/{epochs_resnet}] | Loss: {running_loss/len(trainloader_resnet):.4f} | "
            f"Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")




Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to C:\Users\fonta/.cache\torch\hub\checkpoints\vit_b_16-c867db91.pth


100.0%

