<a href="https://colab.research.google.com/github/IIF0403/Thesis/blob/main/MTL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
 %matplotlib inline
import numpy as np
import pandas as pd 
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from google.colab import files
from google.colab import output
from google.colab import drive

In [None]:
seed1 = 14
seed2 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Function to load UCR time_series data
def load_data(Dataset):
  url_raw = 'https://raw.githubusercontent.com/IIF0403/Thesis/main/data/'
  url_train = url_raw + Dataset+'/'+Dataset+'_TRAIN'
  url_test = url_raw + Dataset+'/'+Dataset+'_TEST'

  data_train = pd.read_csv(url_train,header=None)
  data_test = pd.read_csv(url_test, header=None)

  data = pd.concat((data_train, data_test))

  #Want all datasets to have classes as integers starting from 0 
  Y = data.values[:,0]
  classes = len(np.unique(Y))
  Y_transformed = ( (Y-Y.min())/(Y.max()-Y.min()) )*(classes-1)
  data[data.columns[0]] = Y_transformed

  return data

def explore_data(data):
  time_series = data.shape[0] # amount of time series in the dataset
  classes = len( np.unique(data.iloc[:,0])) # amount of classes/labels in the dataset
  T = data.shape[1]-1  # amount of datapoints in each time series

  print("#: ", time_series)
  print("C: ", classes)
  print("T: ", T)

  return time_series, classes, T

#Function prepares data and removes labels from y_train
def prepare_data(data, test_data_size=0.2, unlabeled_data_size = 0.9 ):
  data = data.sample(frac = 1)  #shuffle the data

  #Split data into 80% train data and 20% test data
  data_train_before, data_test = train_test_split(data, test_size=test_data_size, random_state=seed1)
  
  #Split the train data into 90% unlabeled data and 10% labeled data
  data_train = data_train_before
  while (  len(np.unique(data_train.iloc[:,0])) != (classes+1)  ): #Make sure that all labels/classes are included in the labeled train data
    data_train_before = data_train_before.sample(frac=1) #Shuffle the original train data
    train_labeled, train_unlabeled = train_test_split(data_train_before, test_size=unlabeled_data_size, random_state=seed2)  
    train_unlabeled[train_unlabeled.columns[0]] = -1 #Set labels to -1
    #print("labaled: ",len(train_labeled))
    #print("unlabaled: ",len(train_unlabeled))
    train = pd.concat((train_labeled,train_unlabeled))
    data_train = train.sample(frac = 1)  #shuffle the train data

  x_train = data_train.iloc[:,1:].to_numpy()
  y_train = data_train.iloc[:,0].to_numpy()
  x_test = data_test.iloc[:,1:].to_numpy()
  y_test = data_test.iloc[:,0].to_numpy() 

  x_train=x_train[:,np.newaxis,:]  
  x_test=x_test[:,np.newaxis,:]

  #to PyTorch
  x_train = torch.from_numpy(x_train).to(device)
  y_train = torch.from_numpy(y_train).to(device)
  x_test = torch.from_numpy(x_test).to(device)
  y_test = torch.from_numpy(y_test).to(device)

  return x_train, y_train, x_test, y_test



In [None]:
#The FCN model with only classification
class class_model(nn.Module):
  def __init__(self, classes):
    super(class_model, self).__init__()
    self.conv1 = nn.Conv1d(1, 128, 9, padding=(9 // 2))
    self.bnorm1 = nn.BatchNorm1d(128)

    self.conv2 = nn.Conv1d(128, 256, 5, padding=(5 // 2))
    self.bnorm2 = nn.BatchNorm1d(256)

    self.conv3 = nn.Conv1d(256, 128, 3, padding=(3 // 2))
    self.bnorm3 = nn.BatchNorm1d(128)
            
    self.classification_head = nn.Linear(128, classes)

  def forward(self, x_class):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    features_class = torch.mean(b3_class, 2)   
    out_class = self.classification_head(features_class)

    return out_class


In [None]:
#The Multi task learning model with classification and forecasting
class MTL(nn.Module):
  def __init__(self, horizon, classes):
    super(MTL, self).__init__()
    self.conv1 = nn.Conv1d(1, 128, 9, padding=(9 // 2))
    self.bnorm1 = nn.BatchNorm1d(128)        
    self.conv2 = nn.Conv1d(128, 256, 5, padding=(5 // 2))
    self.bnorm2 = nn.BatchNorm1d(256)
    self.conv3 = nn.Conv1d(256, 128, 3, padding=(3 // 2))
    self.bnorm3 = nn.BatchNorm1d(128)        
    self.classification_head = nn.Linear(128, classes)
    self.forecasting_head = nn.Linear(128, horizon)

  def forward(self, x_class, x_forecast):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    b1_forecast = F.relu(self.bnorm1(self.conv1(x_forecast)))
    b2_forecast = F.relu(self.bnorm2(self.conv2(b1_forecast)))
    b3_forecast = F.relu(self.bnorm3(self.conv3(b2_forecast)))
        
    features_class = torch.mean(b3_class, 2)
    features_forecast = torch.mean(b3_forecast, 2)
        
    out_class = self.classification_head(features_class)
    out_forecast = self.forecasting_head(features_forecast)

    return out_class, out_forecast

  def forward_test(self, x_class):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    features_class = torch.mean(b3_class, 2)
    out_class =self.classification_head(features_class)
    return out_class


In [None]:
#Importing Dataset class from other ipynb file "SimSiam_training"
!pip install import_ipynb
drive.mount('/content/drive')
%cd '/content/drive/MyDrive/Colab Notebooks'

#!ls
import import_ipynb
from SimSiam_training import *

In [None]:
#Some necessary functions

#Function to split training set into sliding window sets
def sliding_window(X, stride, horizon):
  T = X.shape[2] #number of datapoints in each time series

  s = int(stride* T) #stride
  h = int(horizon* T) #horizon

  #print("s: ", s)
  #print("h: ", h)


  X_F = [] 
  Y_F = []

  for i in range(0, T, s):
    if (i+2*h <= T):
      xf_i = X[:,:, i:i+h]
      yf_i = X[:,:, i+h:i+2*h ]
      X_F.append(xf_i)
      Y_F.append(yf_i)
  
  return torch.cat(X_F), torch.cat(Y_F)

#Function to shuffle data
def shuffle(X,Y):
  index = np.array( [int(i) for i in range(X.shape[0]) ] )
  np.random.shuffle(index)
  return X[index], Y[index]

#Function to export resulting data in Excel file
def to_Excel(Results, output_name):
  columns = ["Dataset","dataload","Model","Epochs", "alpha", "stride", "horizon", "batch size", "Accuracy", "Classification loss", "Forecasting loss", "learning rate", "labeled size"]
  dataframe = pd.DataFrame(Results, columns = columns)
  # create excel writer object
  writer = pd.ExcelWriter(output_name)
  # write dataframe to excel
  dataframe.to_excel(writer)
  # save the excel
  writer.save()
  files.download(output_name)
  print('DataFrame is written successfully to Excel File.') 

#Function to play sound when code is finished
def sound():
  output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/9/91/Sound4.wav").play()')

def Accuracy_score(labels, preds):
  if len(labels)!=len(preds):
    print("sizes does not match")
  else:
    total=0
    correct=0
    for i in range(len(labels)):
      if labels[i]==preds[i]:
        correct+=1
      total+=1
    return (correct/total)



In [None]:

Datasets = ["ChlorineConcentration", "ECG5000", "ElectricDevices", "FordA", "FordB", "Two_Patterns", "wafer", "yoga"]

sizes  =[0.8, 0.9, 0.95] #sizes of the unlabeled set

Results = []
for Dataset in Datasets: 
  for unlabeled_size in sizes:
    dataload = 1

    #Loading and preparing data
    Data = load_data(Dataset)
    time_series, classes, T = explore_data(Data)
    x_train, y_train, x_test, y_test = prepare_data(Data, unlabeled_data_size = unlabeled_size)

    lr = 0.1
    max_epochs = 100

    ##### MTL: Forecasting + classification  #####

    Horizon = [0.2] #The horizon value to train with
    Stride =  [0.2] #The stride value to train with
    Alpha = [0.1] #The alpha value to train with

    batch_size = 512 #The batch sizes to train with

    loss_func_class = nn.CrossEntropyLoss()
    loss_func_forecast = nn.MSELoss()

    for horizon in Horizon:
      for stride in Stride:
        for alpha in Alpha:
          MTL_net = MTL(int(T*horizon),classes)

          optimizer = torch.optim.Adam(MTL_net.parameters(), lr=lr)

          loss_func_class = nn.CrossEntropyLoss()
          loss_func_forecast = nn.MSELoss()

          X_F, Y_F = sliding_window(x_train, stride, horizon)

          accuracies = []
          losses_class = []
          losses_forecast = []

          for e in range(max_epochs):
            #Shuffle data
            MTL_net.train()
            x_train, y_train = shuffle(x_train,y_train)
            X_F, Y_F = shuffle(X_F, Y_F)

            x_train_labeled = x_train[y_train!=-1] #the x part of the training set that has a label
            y_train_labeled = y_train[y_train!=-1] #the y part of the training set that has a label

            for i in range(0, X_F.shape[0], batch_size):
              if (i+batch_size <= X_F.shape[0]):
                x_forecast = X_F[i:i+batch_size]
                y_forecast = Y_F[i:i+batch_size]
              else:
                x_forecast = X_F[i:]
                y_forecast = Y_F[i:]
                
              y_hat_class, y_hat_forecast = MTL_net(x_train_labeled.float(), x_forecast.float())
      
              loss_c = loss_func_class(y_hat_class, y_train_labeled.long())
              loss_f = loss_func_forecast(y_hat_forecast, torch.squeeze(y_forecast).float() )
      
              loss_MTL = loss_c + alpha*loss_f
      
              optimizer.zero_grad()
              loss_MTL.backward()
              optimizer.step()
              loss_class, loss_forecast = loss_c.item(), loss_f.item()

              losses_class.append(loss_class)
              losses_forecast.append(loss_forecast)

            
            MTL_net.eval()
            y_hat_class = MTL_net.forward_test(x_test.float())
            predicted = torch.max(y_hat_class,1)[1]

            accuracy = Accuracy_score(y_test, predicted) 
            accuracies.append(accuracy)
            avg_loss_class = np.mean(losses_class)
            avg_loss_forecast = np.mean(losses_forecast)

            print("----------------------")
            print("E: ", e, " Acc: ", accuracy,"MaxAcc: ",max(accuracies), "loss: ",avg_loss_class ,"bs: ", batch_size)

            if accuracy==1.0:
              break;
            if(e==100 or e==150):
              result = [Dataset, version, "MTL", e+1, alpha, stride, horizon, batch_size, max(accuracies), avg_loss_class, avg_loss_forecast, lr, 1-unlabeled_size]
              Results.append(result)

          result = [Dataset, version, "MTL", e+1, alpha, stride, horizon, batch_size, max(accuracies), avg_loss_class, avg_loss_forecast, lr, 1-unlabeled_size]

          print(result)
          Results.append(result)


#Export results to Excel
output_name = 'MTL2_2.xlsx'
to_Excel(Results, output_name)

