<a href="https://colab.research.google.com/github/IIF0403/Thesis/blob/main/Finetuning_SimSiam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd 
import torch
import torch.nn as nn
import torch.nn.functional as F
from google.colab import files
from google.colab import output
from google.colab import drive
from torch.nn.utils.rnn import pack_sequence
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from sklearn.model_selection import train_test_split
from copy import deepcopy
import random
from datetime import datetime
from sklearn.metrics import accuracy_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
#Importing Dataset class from other ipynb file "SimSiam_training"
!pip install import_ipynb
drive.mount('/content/drive')
%cd '/content/drive/MyDrive/Colab Notebooks'

#!ls
import import_ipynb
from SimSiam_training import *


In [None]:
### Some helping functions ###

#Function to calculate Accuracy score
def Accuracy_score(labels, preds):
  if len(labels)!=len(preds):
    print("sizes does not match")
  else:
    total=0
    correct=0
    for i in range(len(labels)):
      if labels[i]==preds[i]:
        correct+=1
      total+=1
    return (correct/total)

## Function to calculate classification_accuracy of a classifier given a frozen backbone model and the test set
def evaluate_classifier(test_loader, backbone_model, classifier):
  classifier.eval()
  accuracies = []
  for batch in enumerate(test_loader):
    time_series_batch = batch[1]['time_series']
    label_batch = batch[1]['label']

    with torch.no_grad():
      feature = backbone_model(time_series_batch)
      y_hat = classifier(feature)
      pred = torch.max(y_hat,1)[1]
      #print("pred: ", pred)
      #print("true: ", label_batch)
      accuracy = Accuracy_score(label_batch, pred) 
      accuracies.append(accuracy)
  
  Accuracy = np.mean(accuracies)
  return Accuracy

## Function to calculate classification_accuracy of a model (compplete model) given the test set
def evaluate_model(test_loader, model):
  model.eval()
  accuracies = []
  for batch in enumerate(test_loader):
    time_series_batch = batch[1]['time_series']
    label_batch = batch[1]['label']

    with torch.no_grad():
      y_hat = model(time_series_batch)
      pred = torch.max(y_hat,1)[1]
      #print("pred: ", pred)
      #print("true: ", label_batch)
      accuracy = Accuracy_score(label_batch, pred) 
      accuracies.append(accuracy)
  
  Accuracy = np.mean(accuracies)
  return Accuracy


## function to save checkpoint of linear classifier
def save_checkpoint_classifier(SaveName, model, epoch, optimizer, loss_list, accuracy_list, lr, train_bs):
  drive.mount('/content/drive')
  PATH = f"/content/drive/MyDrive/checkpoints/classifier_{SaveName}.pth"
  checkpoint = {'epoch': epoch, 
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict(),
              'loss_list': loss_list, 
              'accuracy_list': accuracy_list,
              'lr': lr,
              'train_bs' : train_bs}
  torch.save(checkpoint, PATH)


## function to load checkpoint of linear classifier
def load_checkpoint_classifier(name):
  drive.mount('/content/drive')
  PATH = f"/content/drive/MyDrive/checkpoints/classifier_{name}.pth"
  checkpoint = torch.load(PATH)

  epoch = checkpoint['epoch']
  train_loss = checkpoint['loss_list'][-1]
  max_accuracy = max(checkpoint['accuracy_list'])

  print("checkpoint loaded:"," Accuracy: ",max_accuracy," Loss: ", train_loss)
  return checkpoint

def load_trained_classifier(checkpoint, classes):
  #epoch = checkpoint['epoch']

  lr = checkpoint['lr']

  Lin = nn.Linear(128, classes).to(device)
  optim = torch.optim.Adam(Lin.parameters(), lr=lr)

  Lin.load_state_dict(checkpoint['model_state_dict'])
  optim.load_state_dict(checkpoint['optimizer_state_dict'])

  Lin.eval()

  return Lin.to(device), optim

#Function to print value rounded to 4 desimals 
def rounded(value):
  format = "{:.4f}".format(value)
  float_value = float(format)
  return float_value

import matplotlib.pyplot as plt

#Function to plot Accuracy or Loss over epochs
def plot_progress(res, title, save=True):
  N = len(res)
  epochs = [i for i in range(N)]

  plt.figure()
  plt.plot(epochs, res)
  plt.xlabel('epoch')
  plt.title(title)

  if (save==True):
    plt.savefig(title+".png")
    files.download(title+".png")
  plt.show()

#### FineTune network model ####
#Combines the Backbone model and the Linear to a complete model for finetuning
class FineTuneModel(nn.Module):
  def __init__(self, Backbone, Linear):
    super(FineTuneModel, self).__init__()
    self.Backbone = Backbone
    self.Linear = Linear
  
  def forward(self, x):
    feature = self.Backbone(x)
    pred = self.Linear(feature)
    return pred


In [None]:

## Function to do FineTuning 
def FineTuning(trained_model, train_set, test_set, Linear = None, SaveName=None, epochs_Linear = 10, epochs_finetune=10, train_bs = 40, lr_linear=0.001, lr_finetune=0.001, backbone = "FCN"):
  #trained model: a model previously trained with SimSiam
  #train_set, test_set: The datasets to train and evaluate the classification 
  
  dataset = train_set.Datasets[0]
  classes = test_set.classes

  Results = []

  train_loader = DataLoader(train_set, batch_size = train_bs, shuffle=True)
  test_loader = DataLoader(test_set, batch_size = train_bs, shuffle=True)
  
  #Get the frozen backbone of the trained model
  Backbone_model = trained_model.backbone
  Backbone_model = Backbone_model.to(device)
  Backbone_model = nn.DataParallel(Backbone_model)

  accuracies_linear = []
  losses_linear = []

  if (Linear==None):
    #############################################################
    #If trained Linear layer is not given 
    #Train Linear layer on frozen Backbone
    lr_linear = lr_linear
    Linear = nn.Linear(128, classes).to(device)
    optimizer_linear = torch.optim.Adam(Linear.parameters(), lr=lr_linear)
  
    print("Training Linear Layer")
    for e in range(epochs_Linear):
      Backbone_model.eval()
      Linear.train()
      loss_list_linear = []

      for batch in enumerate(train_loader):
        time_series_batch = batch[1]['time_series']
        label_batch = batch[1]['label']

        #Train linear classifier on top of backbone model
        Linear.zero_grad()
        with torch.no_grad():
          feature = Backbone_model(time_series_batch.to(device))
        pred = Linear(feature.to(device))
        loss_linear = F.cross_entropy(pred, label_batch)
        loss_linear.backward()
        optimizer_linear.step()
        loss_list_linear.append(loss_linear.item())
        
      Loss_linear = np.mean(loss_list_linear)
      losses_linear.append(Loss_linear)
      accuracy_linear = evaluate_classifier(test_loader, Backbone_model, Linear)
      accuracies_linear.append(accuracy_linear)

      if (e in [50,100,200, 299]):
        print("e:  ", e, " dataset: ", dataset,"  SimSiam:   accuracy; ", rounded(accuracy_linear)," max Acc: ",max(accuracies_linear) ," loss; ",rounded(Loss_linear) )

    results_linear = [dataset, "LinearEval", e , train_bs, lr_linear, accuracies_linear[-1], max(accuracies_linear), losses_linear[-1], -1, -1]
    Results.append(results_linear)

    #############################################################
  
  else: #If Linear already trained is given, use this
    Linear = nn.DataParallel(Linear).to(device)

  ############### Unfreeze and train the whole network ##################
  print("Fine tuning the whole network")

  #Get a copy of the trained frozen backbone and the trained Linear classifier
  Backbone_finetune = nn.DataParallel(Backbone_model).to(device)
  Linear_finetune = nn.DataParallel(Linear).to(device)

  FineTune_model = FineTuneModel(Backbone_finetune, Linear_finetune) #Combine Backbone and Linear to a single model for finetuning

  lr_FineTune =lr_finetune
  optimizer_FineTune = torch.optim.Adam(FineTune_model.parameters(), lr=lr_FineTune)

  losses_FineTune = []
  accuracies_FineTune = []

  for e in range(epochs_finetune):
    FineTune_model.train()
    loss_list_Finetune = []

    for batch in enumerate(train_loader):
      time_series_batch = batch[1]['time_series']
      label_batch = batch[1]['label']

      #Train the FineTune model
      FineTune_model.zero_grad()
      pred = FineTune_model(time_series_batch.to(device))

      loss_FineTune = F.cross_entropy(pred, label_batch)
      loss_FineTune.backward()
      optimizer_FineTune.step()
      loss_list_Finetune.append(loss_FineTune.item())


    Loss_FineTune = np.mean(loss_list_Finetune)
    losses_FineTune.append(Loss_FineTune)
    accuracy_FineTune = evaluate_model(test_loader, FineTune_model)
    accuracies_FineTune.append(accuracy_FineTune)

    print("e:  ", e, " dataset: ", dataset,"  SimSiam:   accuracy; ",rounded(accuracy_FineTune)," max Acc: ",max(accuracies_FineTune), " loss; ",rounded(Loss_FineTune))

  results_finetune = [dataset, "FineTuning", e , train_bs, lr_FineTune, accuracies_FineTune[-1], max(accuracies_FineTune), losses_FineTune[-1], .1, -1]
  Results.append(results_finetune)
  #column_names = ["Dataset", "Type", "epochs", "batch_size", "lr", "last_Acc", "Model Accuracy", "Loss Model", "Random model accuracy", "Loss Random", "trained model used", "date model", "labeled size"]

  #Plotting Accuracy and Loss over epochs
  plot_progress(accuracies_FineTune, "Accuracy_FineTune_"+dataset, save=False)
  plot_progress(losses_FineTune, "Loss_FineTune_"+dataset, save=False)

  if (SaveName !=None):
    save_checkpoint_classifier(SaveName, FineTuneModel, e, optimizer_FineTune, losses_FineTune, accuracies_FineTune, lr_FineTune, train_bs) #Save checkpoint

  return Results



In [None]:
##### FineTuning on each dataset of the big dataset #####

### The datasets
Datasets = ["ChlorineConcentration", "ECG5000", "ElectricDevices", "FordA", "FordB","Two_Patterns", "wafer", "yoga"]

### Learning rates found using Optuna 
lr_FCN_linear = [ 0.0097,0.0278 ,0.0623 ,0.0689 , 0.0222, 0.0854, 0.0623 ,0.0544]  
lr_ResNet_linear = [ 0.0072, 0.0191, 0.0046, 0.0233, 0.0002,0.0370, 0.0265,0.0234]

Lr_FCN_finetune = [0.0263,0.0461,0.0059,0.0376,0.0032,0.0062,0.0305,0.0057]
Lr_ResNet_finetune = [0.0095, 0.0027,0.0037,0.0011,0.0052,0.0004,0.0153,0.0094]


### For finetuning of the new datasets
#Datasets = ["TwoLeadECG", "SmallKitchenAppliances", "MoteStrain", "CinC_ECG_torso"]
#Lr_FCN_finetune = [0.0536, 0.0105, 0.0032,0.00176] #Found with Optuna
#Lr_ResNet_finetune = [0.00446, 0.04377,0.00412,0.00344]


Backbones = ["FCN", "ResNet"]

batch_size = 512

labeled_sizes = [0.2, 0.1, 0.05]

rounds = 1

epochs_Linear = 300 
epochs_FineTune = 300 

Results = []

for backbone in Backbones:
  if (backbone=="FCN"):
    #Information to load the saved SimSiam_FCN model from Google drive
    SaveName = "Window38"
    date = 2704
    epoch = 99

    #Set the Learning rate list as the optimal learning rates previously found by Optuna
    Lr_linear = Lr_FCN_finetune
    Lr_Finetune = Lr_FCN_finetune

  else:
    #Information to load the saved SimSiam_ResNet model from Google drive
    SaveName = "ResNet_W38"
    date = 2904
    epoch = 99

    #Set the Learning rate list as the optimal learning rates previously found by Optuna
    Lr_linear = Lr_ResNet_finetune
    Lr_Finetune = Lr_ResNet_finetune


  ##Loading the pre-trained SimSiam model
  checkpoint = load_checkpoint(SaveName, date, epoch)
  trained_model, optimizer_model, epoch_model, lr_model = load_trained_model(checkpoint, backbone=backbone)

  for i in range(len(Datasets)):
    dataset = Datasets[i]
    lr_linear = Lr_linear[i]
    lr_finetune = Lr_Finetune[i]

    for labeled_size in labeled_sizes:
      Train_set = Timeseries_Dataset([dataset], train=True, Save=None, transform = transforms.Compose( [ ToTensor()] )) #the training set
      N_train = len(Train_set)

      Train_set.shuffle()
      Train_set_labeled = Train_set[:int(N_train*labeled_size)] #the labeled set

      Test_set = Timeseries_Dataset([dataset], train=False, Save=None, transform = transforms.Compose( [ ToTensor()] )) #the test set

      for round in range(rounds):
        Train_set_labeled.shuffle()
        Test_set.shuffle()
        
        Save_checkpoint_name = None

        Linear_trained = None

        Results_1 = FineTuning(trained_model, Train_set_labeled, Test_set, Linear = Linear_trained, SaveName=Save_checkpoint_name, epochs_Linear = epochs_Linear, epochs_finetune=epochs_FineTune, train_bs = batch_size, lr_linear=lr_linear, lr_finetune=lr_finetune, backbone = backbone)
        
        for res in Results_1:
          result = res
          result.append(SaveName)
          result.append(date)
          result.append(labeled_size)

          Results.append(result)
  
    column_names = ["Dataset", "Type", "epochs", "batch_size", "lr", "last_Acc", "Model Accuracy", "Loss Model", "Random model accuracy", "Loss Random", "trained model used", "date model", "labeled size"]    
    to_Excel(Results, column_names, "FineTune_"+SaveName+"_"+dataset+".xlsx")


column_names = ["Dataset", "Type", "epochs", "batch_size", "lr", "last_Acc", "Model Accuracy", "Loss Model", "Random model accuracy", "Loss Random", "trained model used", "date model", "labeled size"]

name = "_FineTune_"
to_Excel(Results, column_names, name+".xlsx")


