# ML Assignment <br>
* To ensure all the code in this notebook works without issues, please ensure all libraries are installed and to run the common code shared between the tasks.
* To avoid issues while running each task, it is advisable to restart the runtime / kernel after each task

* All PyTorch models have been retrieved from: https://pytorch.org/vision/0.8/models.html (Torchvision library)

#### Common Code Shared Between Tasks 1, 2 & 3
* Please run all the blocks (Might not need the first block if all libraries are installed already) to be able to run this notebook without issues

In [None]:
#All necessary libraries that are not installed by default on google colab
!pip install d2l==0.17.2
!pip install skorch
!pip install torchsummary

In [None]:
""" Every import required for this notebook for all tasks"""
%matplotlib inline
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torchsummary import summary
from d2l import torch as d2l
import numpy
from keras.datasets import cifar10
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.multiclass import OneVsOneClassifier
from sklearn.svm import SVC
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

In [None]:
""" The following code is for data loading and setup has been retrieved from https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html"""
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Transformations have been retrived from https://pytorch.org/hub/pytorch_vision_resnet/ which applies to all of the models (AlexNet, ResNet & VGG)
transformations = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

# Data set for training and validation
dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transformations)

# Data split 80/20 for training/validation
train_set, val_set = torch.utils.data.random_split(dataset, [40000, 10000])

train_iter = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True, num_workers=2)
val_iter = torch.utils.data.DataLoader(val_set, batch_size=128, shuffle=True, num_workers=2)

# Test Set loading
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transformations)
test_iter = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=True, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
"""The following code has been retrived from https://d2l.ai/_modules/d2l/torch.html and has been slightly modified"""

# The purpose of the following method is to train the model
def train_ch13(net, train_iter, val_iter, test_iter, loss, trainer, num_epochs,
               devices=d2l.try_all_gpus()):
    """Train a model with mutiple GPUs (defined in Chapter 13).

    Defined in :numref:`sec_image_augmentation`"""
    timer, num_batches = d2l.Timer(), len(train_iter)
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 1],
                            legend=['train loss', 'train acc', 'val acc', 'test acc'])
    net = nn.DataParallel(net, device_ids=devices).to(devices[0]) #Sets up the network to work with the input data in parallel
    for epoch in range(num_epochs): #Runs loop for amount of epochs
        # Sum of training loss, sum of training accuracy, no. of examples,
        # no. of predictions
        metric = d2l.Accumulator(4)
        for i, (features, labels) in enumerate(train_iter): #Loop to do training for each batch
            timer.start()
            l, acc = d2l.train_batch_ch13(
                net, features, labels, loss, trainer, devices)
            metric.add(l, acc, labels.shape[0], labels.numel())
            timer.stop()
            if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
                animator.add(epoch + (i + 1) / num_batches,
                             (metric[0] / metric[2], metric[1] / metric[3],
                              None, None))
        #Evaluate performance on Validation dataset first
        val_acc = d2l.evaluate_accuracy_gpu(net, val_iter)
        animator.add(epoch + 1, (None, None, val_acc, None))

        #Evaluate performance on Test dataset after validation
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        animator.add(epoch + 1, (None, None, None, test_acc))

    #Final Output with final value of train, val & test accuracies as well as speed of training
    print(f'loss {metric[0] / metric[2]:.3f}, train acc '
          f'{metric[1] / metric[3]:.3f}, val acc {val_acc:.3f}, , test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on '
          f'{str(devices)}')

# The purpose of the following method is to setup the training of the model with the optimizer and loss functions
# Despite its name, it is used for training from scratch and also fine tuning depending on the final bool value
def train_fine_tuning(netType, net, learning_rate, num_epochs=10, param_group=True):
    
    #Gets all available GPUs
    devices = d2l.try_all_gpus()
    criterion = nn.CrossEntropyLoss(reduction='mean') #Use cross entropy loss as this is a classification problem

    #If statement for different type of architectures since VGG and AlexNet do not have a fc component
    if netType == "ResNet": #ResNet
        if param_group: # If true, then the learning rate is 10 times greater for final layer
            params_1x = [param for name, param in net.named_parameters()
                if name not in ["fc.weight", "fc.bias"]]
            trainer = torch.optim.SGD([{'params': params_1x},
                                    {'params': net.fc.parameters(),
                                        'lr': learning_rate * 10}],
                                    lr=learning_rate, weight_decay=0.001)
        else: # Uses current parameters for the trainer
            trainer = torch.optim.SGD(net.parameters(), lr=learning_rate,
                                    weight_decay=0.001)
    else: #VGG or AlexNet
        if param_group: # If true, then the learning rate is 10 times greater for final layer
            params_1x = [param for name, param in net.named_parameters()
                if name not in ["classifier.6.weight", "classifier.6.bias"]]
            trainer = torch.optim.SGD([{'params': params_1x},
                                    {'params': net.classifier[6].parameters(),
                                        'lr': learning_rate * 10}],
                                    lr=learning_rate, weight_decay=0.001)
        else: # Uses current parameters for the trainer
            trainer = torch.optim.SGD(net.parameters(), lr=learning_rate,
                                    weight_decay=0.001)
    train_ch13(net, train_iter, val_iter, test_iter, criterion, trainer, num_epochs, devices)

## Task 1
* ResNet-18 on CIFAR-10 (Fine-tune VS From scratch)

In [None]:
# Model Creation - Pretrained
finetune_network = torchvision.models.resnet18(pretrained=True)
finetune_network.fc = nn.Linear(finetune_network.fc.in_features, 10)
nn.init.xavier_uniform_(finetune_network.fc.weight);

""" Train Pretrained Model """
train_fine_tuning("ResNet", finetune_network, learning_rate=4e-3, num_epochs=5, param_group=True)

In [None]:
# Model Creation - Scratch
scratch_network = torchvision.models.resnet18(pretrained=False)
scratch_network.fc = nn.Linear(scratch_network.fc.in_features, 10)
nn.init.xavier_uniform_(scratch_network.fc.weight);

""" Train Model From Scratch """
train_fine_tuning("ResNet", scratch_network, learning_rate=3e-3, num_epochs=15, param_group=False)

## Task 2


#### ResNet18 (Same models with learning rates from Task 1)

In [None]:
# Model Creation - Pretrained (Resnet18)
finetune_network = torchvision.models.resnet18(pretrained=True)
finetune_network.fc = nn.Linear(finetune_network.fc.in_features, 10)
nn.init.xavier_uniform_(finetune_network.fc.weight);

""" Train Pretrained Model """
train_fine_tuning("ResNet", finetune_network, learning_rate=4e-3, num_epochs=5, param_group=True)

In [None]:
# Model Creation - Scratch (Resnet18)
scratch_network = torchvision.models.resnet18(pretrained=False)
scratch_network.fc = nn.Linear(scratch_network.fc.in_features, 10)
nn.init.xavier_uniform_(scratch_network.fc.weight);

""" Train Model From Scratch """
train_fine_tuning("ResNet", scratch_network, learning_rate=3e-3, num_epochs=15, param_group=False)

#### VGG-16
* Please note that training this model will take a couple of hours

In [None]:
# Model Creation - Pretrained (VGG16)
finetune_network = torchvision.models.vgg16(pretrained=True)
finetune_network.classifier[6] = nn.Linear(finetune_network.classifier[6].in_features, 10)
nn.init.xavier_uniform_(finetune_network.classifier[6].weight);

""" Train Pretrained Model """
train_fine_tuning("VGG", finetune_network, learning_rate=5e-3, num_epochs=5, param_group=True)

In [None]:
# Model Creation - Scratch (VGG16)
scratch_network = torchvision.models.vgg16(pretrained=False)
scratch_network.classifier[6] = nn.Linear(scratch_network.classifier[6].in_features, 10)
nn.init.xavier_uniform_(scratch_network.classifier[6].weight);

""" Train Model From Scratch """
train_fine_tuning("VGG", scratch_network, learning_rate=2e-2, num_epochs=10, param_group=False)

#### AlexNet

In [None]:
# Model Creation - Pretrained (AlexNet)
finetune_network = torchvision.models.alexnet(pretrained=True)
finetune_network.classifier[6] = nn.Linear(finetune_network.classifier[6].in_features, 10)
nn.init.xavier_uniform_(finetune_network.classifier[6].weight);

""" Train Pretrained Model """
train_fine_tuning("AlexNet", finetune_network, learning_rate=6e-4, num_epochs=5, param_group=True)

In [None]:
# Model Creation - Scratch (AlexNet)
scratch_network = torchvision.models.alexnet(pretrained=False)
scratch_network.classifier[6] = nn.Linear(scratch_network.classifier[6].in_features, 10)
nn.init.xavier_uniform_(scratch_network.classifier[6].weight);


""" Train Model From Scratch """
train_fine_tuning("AlexNet", scratch_network, learning_rate=6e-2, num_epochs=10, param_group=False)

## Task 3

#### Softmax

* Please do not forget to run the first 3 code blocks of this notebook at the very top to have the libraries, data loading and training methods all setup as it is shared and used throughout the entire notebook, including this
* Please run the next code block in order to be able to run the softmax feature extraction tasks / models

In [None]:
""" The following softmax code has been retrieved from Harrison's Tutorial 7 Video: https://www.youtube.com/watch?v=NVcJhMtRErE&list=PLyd8fQKbk5ej_lNdyAKI5aS_4hm-FhrrF&index=8 """
""" The notebook for tutorial 7 was retrieved from: https://drive.google.com/drive/folders/15w_pTty4nMtFBwc-1iDP4n4q9ydUTahh """
""" The code on how to get features from ResNet has been retrieved from: https://d2l.ai/chapter_computer-vision/fcn.html """

# Class definition of Softmax Regression
class SoftmaxRegression(nn.Module):
  def __init__(self, input_dim, output_dim, *args, **kwargs):
    super(SoftmaxRegression, self).__init__()
    self.layer = nn.Linear(input_dim, output_dim)

  def forward(self, X, *args, **kwargs):
    return F.softmax(self.layer(X), dim=-1)

# Class definition of a new model that extracts features specifically from ResNet and applies a classifier after the features
class NewModel(nn.Module):
  def __init__(self, pretrained_model, output_model, num_layer):
    super(NewModel, self).__init__()

    self.flatten = nn.Flatten()
    self.num_layer = num_layer #Number of layer to extract layers from pre-trained model
    # For all layers in ResNet do 8 (This doesn't include the maxpooling)
    self.new_features = nn.Sequential(*list(pretrained_model.children())[:num_layer]) #Cuts the pretrained model to number of layers selected
    self.pooling = nn.AdaptiveAvgPool2d((1,1)) # Using adaptive pool layer to reduce RAM usage and to use with any of the extracted feature layers
    self.output = output_model # Softmax
  
  def forward(self, x): #The path of calls when training
    x = self.new_features(x) # Input first goes through modified ResNet
    pool_x = self.pooling(x) # Applies pooling
    features = self.flatten(pool_x) # Applies flattening on the data
    return self.output(features) # Classifier / Output model is called as the final step

In [None]:
""" Softmax: Original Input """
softmaxNet = nn.Sequential(nn.Flatten(), nn.Linear(150528, 10)).to(device)

def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)

softmaxNet.apply(init_weights);

loss = nn.CrossEntropyLoss() # Has built in softmax
trainer = torch.optim.SGD(softmaxNet.parameters(), lr=9e-4)
num_epochs = 10
train_ch13(softmaxNet, train_iter, val_iter, test_iter, loss, trainer, num_epochs, devices = d2l.try_all_gpus())

In [None]:
""" Softmax: Last CNN Layer """
softmax = SoftmaxRegression(512, 10) 
res_net = torchvision.models.resnet18(pretrained=True)
new_net = NewModel(res_net, softmax, 8).to(device) # Removes classifier layer (Last CNN)

loss = nn.CrossEntropyLoss() 
trainer = torch.optim.SGD(new_net.parameters(), lr=3e-3)
num_epochs = 10
train_ch13(new_net, train_iter, val_iter, test_iter, loss, trainer, num_epochs, devices=d2l.try_all_gpus())

In [None]:
""" Softmax: Selected Intermediate CNN Layer """
softmax = SoftmaxRegression(256, 10) 
res_net = torchvision.models.resnet18(pretrained=True)
new_net = NewModel(res_net, softmax, 7).to(device) # Removes last major features block (Intermediate CNN Layer)

loss = nn.CrossEntropyLoss() 
trainer = torch.optim.SGD(new_net.parameters(), lr=1e-1)
num_epochs = 10
train_ch13(new_net, train_iter, val_iter, test_iter, loss, trainer, num_epochs, devices=d2l.try_all_gpus())

#### Common code for KNN & SVM
 1. Please run the 1st code block to run KNN and SVM on original input correctly as it is for data loading
 2. Please run the 2nd code block to run KNN and SVM with feature extraction
 3. If you would like to run ResNet-18 till its last CNN layer, use the 3rd block in this section
 4. If you would like to run ResNet-18 till its last CNN layer, use the 4th block in this section 
 * After each run of any of the feature extraction, please restart the runtime /kernel and re-run the code blocks needed to avoid issues 

In [None]:
# Data loading and setup from tensorflow.keras library
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.20) # Further splits training dataset by 80/20 to have validation dataset

# Normalizing data as shown in tutorial 6 (https://lms.curtin.edu.au/bbcswebdav/pid-10051001-dt-content-rid-62217054_1/xid-62217054_1)
X_train = X_train.astype('float32')
y_train.astype('int64')
X_val = X_val.astype('float32')
y_val = y_val.astype('int64')
X_test = X_test.astype('float32')
y_test = y_test.astype('int64')
X_train /= 255.0
X_val /= 255.0
X_test /= 255.0

In [None]:
""" The following class code for the new model has been retrieved from Harrison's Tutorial 7 Video: https://www.youtube.com/watch?v=NVcJhMtRErE&list=PLyd8fQKbk5ej_lNdyAKI5aS_4hm-FhrrF&index=8 """
""" The code on how to get features from ResNet has been retrieved from: https://d2l.ai/chapter_computer-vision/fcn.html """

class NewModel_SVMKNN(nn.Module):
  def __init__(self, pretrained_model, num_layer):
    super(NewModel_SVMKNN, self).__init__()
    self.pretrained = pretrained_model #ResNet
    self.flatten = nn.Flatten() 
    self.num_layer = num_layer
    # For all layers in ResNet do 8 (This doesn't include the maxpooling)
    self.new_features = nn.Sequential(*list(pretrained_model.children())[:num_layer]) #Cuts the pretrained model to number of layers selected
    self.pooling = nn.AdaptiveAvgPool2d((1,1)) #Using adaptiove pool layer to reduce RAM usage

  def forward(self, x):
    x = self.new_features(x)
    pool_x = self.pooling(x)
    return self.flatten.forward(pool_x)


In [None]:
""" Best Task 2 Model for KNN / SVM with last CNN layer """
# Best Task 2 pre-trained model setup
res_net_trained = torchvision.models.resnet18(pretrained=True)
new_net = NewModel_SVMKNN(res_net_trained, 8) #Creates ResNet without classifier (Last CNN Layer)

loss = nn.CrossEntropyLoss()
trainer = torch.optim.SGD(new_net.parameters(), lr=3e-3) # Use same learning rate as from Task 2
num_epochs = 5 # 5 Epochs as task 2

# Training of pretrained model without classifier
train_ch13(new_net, train_iter, val_iter, test_iter, loss, trainer, num_epochs, devices=d2l.try_all_gpus())

In [None]:
""" Best Task 2 Model for KNN / SVM with intermediate CNN layer (2nd Last) """
# Best Task 2 pre-trained model setup
res_net_trained = torchvision.models.resnet18(pretrained=True)
new_net = NewModel_SVMKNN(res_net_trained, 7) #Creates ResNet without last major featurs block (Intermediate CNN Layer)

loss = nn.CrossEntropyLoss()
trainer = torch.optim.SGD(new_net.parameters(), lr=3e-3) # Use same learning rate as from Task 2 (Didn't modify to keep it as the same model)
num_epochs = 5 # 5 Epochs as task 2

# Training of pretrained model without classifier
train_ch13(new_net, train_iter, val_iter, test_iter, loss, trainer, num_epochs, devices=d2l.try_all_gpus())

#### KNN

In [None]:
""" KNN on original input """

# Data reshape for KNN classifier
X_train = numpy.reshape(X_train, (X_train.shape[0], -1))
X_val = numpy.reshape(X_val, (X_val.shape[0], -1))
X_test = numpy.reshape(X_test, (X_test.shape[0], -1))

# Creating and training KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=8) #Neighbours of 8 appears to be the best performer
knn_classifier.fit(X_train, y_train.ravel()) #Ravel is used to convert array shape to be 1d (Basically flattens)

# Accuracy on Validation
test_predictions = knn_classifier.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, test_predictions)) # Calculate score of trained classifer comparing with test run and test labels

# Accuracy on Test
val_predictions = knn_classifier.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, val_predictions)) # Calculate score of trained classifer comparing with test run and test labels

In [None]:
""" KNN on last CNN / selected intermediate features """
# Please run this code block once one of the ResNet-18 models without a classifier has been trained in the shared section above

# Converting each dataset into a tensor and preparing it according to how pytorch model expects the tensor
X_train = torch.tensor(X_train)
X_train = X_train.permute(0,3,1,2)

X_val = torch.tensor(X_val)
X_val = X_val.permute(0,3,1,2)

X_test = torch.tensor(X_test)
X_test = X_test.permute(0,3,1,2)

# Create Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=10)
with torch.no_grad(): # No grad is temporary on so it doesn't store the graph or variables for computing gradients
  new_net = new_net.cpu() # Puts model onto CPU since after training the model, there is not enough GPU resources for this step
  output_train = new_net(X_train) # Gets the features of the entire dataset once put through the ResNet-18 model
  knn_classifier.fit(output_train, y_train)
  del output_train # Deletes the variable to free memory space for the next dataset as it is no longer needed

  output_val = new_net(X_val)
  val_predictions = knn_classifier.predict(output_val)
  del output_val # Deletes the variable to free memory space for the next dataset as it is no longer needed

  output_test = new_net(X_test)
  test_predictions = knn_classifier.predict(output_test)
  del output_test 

# Accuracy on Validation
print("Validation Accuracy:", accuracy_score(y_val, val_predictions)) # Calculate score of trained classifer comparing with validation run and validation labels

# Accuracy on Test
print("Test Accuracy:", accuracy_score(y_test, test_predictions)) # Calculate score of trained classifer comparing with test run and test labels

#### SVM

In [None]:
""" SVM on original input """

# Data reshape for KNN classifier
X_train = numpy.reshape(X_train, (X_train.shape[0], -1))
X_val = numpy.reshape(X_val, (X_val.shape[0], -1))
X_test = numpy.reshape(X_test, (X_test.shape[0], -1))

# Creating and training SVM classifier
svm_classifier = SVC(decision_function_shape='ovo')
svm_classifier.fit(X_train, y_train.ravel())

# Accuracy on Validation
test_predictions = svm_classifier.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, test_predictions)) # Calculate score of trained classifer comparing with validation run and validation labels

# Accuracy on Test
test_predictions = svm_classifier.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, test_predictions)) # Calculate score of trained classifer comparing with test run and test labels

In [None]:
""" SVM on last CNN / selected intermediate features """
# Please run this code block once one of the ResNet-18 models without a classifier has been trained in the shared section above

# Converting each dataset into a tensor and preparing it according to how pytorch model expects the tensor
X_train = torch.tensor(X_train)
X_train = X_train.permute(0,3,1,2)

X_val = torch.tensor(X_val)
X_val = X_val.permute(0,3,1,2)

X_test = torch.tensor(X_test)
X_test = X_test.permute(0,3,1,2)

# Create Classifier
svm_classifier = SVC(decision_function_shape='ovo')
with torch.no_grad(): # No grad is temporary on so it doesn't store the graph or variables for computing gradients
  new_net = new_net.cpu() # Puts model onto CPU since after training the model, there is not enough GPU resources for this step
  output_train = new_net(X_train) # Gets the features of the entire dataset once put through the ResNet-18 model
  svm_classifier.fit(output_train, y_train)
  del output_train # Deletes the variable to free memory space for the next dataset as it is no longer needed

  output_val = new_net(X_val)
  output_val = output_val.cpu()
  val_predictions = svm_classifier.predict(output_val)
  del output_val # Deletes the variable to free memory space for the next dataset as it is no longer needed

  output_test = new_net(X_test)
  output_test = output_test.cpu()
  test_predictions = svm_classifier.predict(output_test)
  del output_test

# Accuracy on Validation
print("Validation Accuracy:", accuracy_score(y_val, val_predictions)) # Calculate score of trained classifer comparing with validation run and validation labels

# Accuracy on Test
print("Test Accuracy:", accuracy_score(y_test, test_predictions)) # Calculate score of trained classifer comparing with test run and test labels

#### Extras

In [None]:
""" This code has been taken from Senjian An's code snippet off a blackboard announcement """
from google.colab import drive
drive.mount('/content/gdrive')

# Saving model to G Drive
#model_save_name = 'ResNet18_FineTune.pt'
#path = F"/content/gdrive/MyDrive/ResNet18_FineTune.pt" 
#torch.save(model.state_dict(), path)


# Loading model from G Drive
model_save_name = 'ResNet18_FineTune.pt'
path = F"/content/gdrive/MyDrive/ResNet18_FineTune.pt"

# Creating a pre-trained ResNet18 the same as the model being loaded
finetune_network = torchvision.models.resnet18(pretrained=True)
finetune_network.fc = nn.Linear(finetune_network.fc.in_features, 10)
nn.init.xavier_uniform_(finetune_network.fc.weight);

# Loads the parameter values into the following model
finetune_network.load_state_dict(torch.load(path))

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


<All keys matched successfully>

In [None]:
# If you would like to just evaluate the accuracy of a model, please ensure the model is on the GPU and run the following code
print("Model accuracy is: {}".format(d2l.evaluate_accuracy_gpu(finetune_network.to(device), test_iter, device)))

Model accuracy is: 0.9179
