In [None]:
import random
import numpy as np
from PIL import Image
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader, Dataset, random_split, WeightedRandomSampler, SubsetRandomSampler
from torchvision.transforms import Compose, ToTensor, Normalize, ToPILImage, RandomHorizontalFlip, Resize

!pip install torchsummary
from torchsummary import summary

import matplotlib.pyplot as plt
%matplotlib inline

# This line detects if we have a gpu support on our system
device = ("cuda" if torch.cuda.is_available() else "cpu")
print (device)

### Define the Loss function

In [None]:
#multi-class classification
criterion_multi_class = torch.nn.CrossEntropyLoss()


### Define your train val test dataset

In [None]:
from torchvision import datasets, transforms
from torch.utils.data.sampler import  SubsetRandomSampler  #for validation test

#Define a transform to convert to images to tensor and normalize
transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5,),(0.5,),)]) #mean and std have to be sequences (e.g., tuples), 
                                                                      # therefore we should add a comma after the values

#transform for train also includes augmentation now!
#,transforms.RandomRotation(degrees=(0, 180)),
transform_train = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5,),(0.5,),)])                      
#Load the data: train and test sets
trainset = datasets.CIFAR10('~/Datasets/CF_data', download=True, train=True, transform=transform_train)
validset = datasets.CIFAR10('~/Datasets/CF_data', download=False, train=True, transform=transform)
testset = datasets.CIFAR10('~/Datasets/CF_data', download=True, train=False, transform=transform)

#Preparing the validation test
indices = list(range(len(trainset)))
np.random.shuffle(indices)
#to get 20% of the train set
split = int(np.floor(0.2 * len(trainset)))
print(len(indices[split:]))
train_sample = SubsetRandomSampler(indices[split:])
valid_sample = SubsetRandomSampler(indices[:split])

#Data Loader
trainloader = torch.utils.data.DataLoader(trainset, sampler=train_sample, batch_size=16)
validloader = torch.utils.data.DataLoader(validset, sampler=valid_sample, batch_size=16)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)




In [None]:
 #  defining accuracy function
def accuracy(network, dataloader):
      network.eval()
      total_correct = 0
      total_instances = 0
      for images, labels in tqdm(dataloader):
        images, labels = images.to(device), labels.to(device)
        predictions = torch.argmax(network(images), dim=1)
        correct_predictions = sum(predictions==labels).item()
        total_correct+=correct_predictions
        total_instances+=len(images)
      return round(total_correct/total_instances, 3)

### Simple convolutional neural network

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv_layer = nn.Sequential(
            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Conv Layer block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Conv Layer block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 10)
        )
    def forward(self, x):
        # conv layers
        x = self.conv_layer(x)
        # flatten
        x = x.view(x.size(0), -1)
        # fc layer
        x = self.fc_layer(x)
        return x

cnn_model= SimpleCNN()
cnn_model.to(device)
summary(cnn_model, (3, 32, 32))

## Task 1: 
* Experiment with different Optimizers and learning rate schedulers
* Plot the learning curves and analyze your results

In [None]:
from torch.autograd import Variable
from tqdm import tqdm
import os
from datetime import datetime

from torch.utils.tensorboard import SummaryWriter

def train_model(model, optimizer, scheduler, epochs):
    writer = SummaryWriter()
    model.to(device)

    print(model)  
    
    ####
    #  creating log
    log_dict = {
            'training_loss_per_batch': [],
            'validation_loss_per_batch': [],
            'training_accuracy_per_epoch': [],
            'validation_accuracy_per_epoch': []
        }     

    for epoch in range(epochs):
          print(f'Epoch {epoch+1}/{epochs}')
          train_losses = []
    
          #  training
          print('training...')
          model.train()
          for images, labels in tqdm(trainloader):
            #  sending data to device
            images, labels = images.to(device), labels.to(device)
            #  resetting gradients
            optimizer.zero_grad()
            #  making predictions
            predictions = model(images)
            #  computing loss
            loss = criterion_multi_class(predictions, labels)
            log_dict['training_loss_per_batch'].append(loss.item())
            train_losses.append(loss.item())
            #  computing gradients
            loss.backward()
            #  updating weights
            optimizer.step()
          with torch.no_grad():
            print('deriving training accuracy...')
            #  computing training accuracy
            train_accuracy = accuracy(model, trainloader)
            log_dict['training_accuracy_per_epoch'].append(train_accuracy)
    
          #  validation
          print('validating...')
          val_losses = []
    
          #  setting convnet to evaluation mode
          model.eval()
    
          with torch.no_grad():
            for images, labels in tqdm(validloader):
              #  sending data to device
              images, labels = images.to(device), labels.to(device)
              #  making predictions
              predictions = model(images)
              #  computing loss
              val_loss = criterion_multi_class(predictions, labels)
              log_dict['validation_loss_per_batch'].append(val_loss.item())
              val_losses.append(val_loss.item())
            #  computing accuracy
            print('deriving validation accuracy...')
            val_accuracy = accuracy(model, validloader)
            log_dict['validation_accuracy_per_epoch'].append(val_accuracy)
    
          train_losses = np.array(train_losses).mean()
          writer.add_scalar("Loss/train", train_losses, epoch)
          val_losses = np.array(val_losses).mean()
    
          print(f'training_loss: {round(train_losses, 4)}  training_accuracy: '+
          f'{train_accuracy}  validation_loss: {round(val_losses, 4)} '+  
          f'validation_accuracy: {val_accuracy}\n')
          
          #Update scheduler
          scheduler.step()
    
          
    ####
    #  saving model
    # Get the current datetime
    current_datetime = datetime.now()
    
    # Convert the datetime to a string
    datetime_string = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
    
    print(datetime_string)
    
    torch.save(model.state_dict(), 'models/model_Shallow_FMNIST'+datetime_string+'.pth')
    print('model saved')
    writer.close()

In [None]:
import torch.optim.lr_scheduler as lr_scheduler
learning_rate = 0.01
model = SimpleCNN()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

train_model(model, optimizer, scheduler, 20)

## Task 2: Batch Normalization
* Add Batch Normalization layers to the network architecture
* Train the updated network, try to increase the learning rate
* Analyze the convergence and the final results

In [None]:
#######################################
# TODO: Insert your code below
# Insert BatchNorm2d layers in the CNN model
###########################################
class SimpleCNNwBN(nn.Module):
    def __init__(self):
        super(SimpleCNNwBN, self).__init__()
        self.conv_layer = nn.Sequential(
            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Conv Layer block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Conv Layer block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(4096, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 10)
        )
    def forward(self, x):
        # conv layers
        x = self.conv_layer(x)
        # flatten
        x = x.view(x.size(0), -1)
        # fc layer
        x = self.fc_layer(x)
        return x

cnn_model1= SimpleCNNwBN()
cnn_model1.to(device)
summary(cnn_model1, (3, 32, 32))

In [None]:
import torch.optim.lr_scheduler as lr_scheduler
learning_rate = 0.01
model = SimpleCNNwBN()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

train_model(model, optimizer, scheduler,20)

## Task 3: Complexity of the model
* Simplify your model and observe the underfitting behavior
* Check, if your original model experiences the overfitting behavior. 
* Experiment with different approaches (Regularization, Dropout, Augmentation) to prevent overfitting 

In [None]:
class SimplerCNNModel(nn.Module):
    def __init__(self):
        super(SimplerCNNModel, self).__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64 * 16 * 16, 512),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(512),
            nn.Dropout(0.5),  # Dropout layer with 50% dropout rate
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.conv_layer(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layer(x)
        return x

In [None]:
import torch.optim.lr_scheduler as lr_scheduler
learning_rate = 0.01
model = SimplerCNNModel()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

train_model(model, optimizer, scheduler,20)