<a href="https://colab.research.google.com/github/IIF0403/Project/blob/master/Experiment1_MTL%2BClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd 
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from google.colab import files
from google.colab import output

In [None]:
seed1 = 14
seed2 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Function to load UCR time_series data
def load_data(Dataset):
  url_raw = 'https://raw.githubusercontent.com/IIF0403/timeseries/master/data/'
  url_train = url_raw + Dataset+'/'+Dataset+'_TRAIN'
  url_test = url_raw + Dataset+'/'+Dataset+'_TEST'

  data_train = pd.read_csv(url_train,header=None)
  data_test = pd.read_csv(url_test, header=None)

  data = pd.concat((data_train, data_test))

  #Want all datasets to have classes as integers starting from 0 
  Y = data.values[:,0]
  classes = len(np.unique(Y))
  Y_transformed = ( (Y-Y.min())/(Y.max()-Y.min()) )*(classes-1)
  data[data.columns[0]] = Y_transformed

  return data


def explore_data(data):
  time_series = data.shape[0] # amount of time series in the dataset
  classes = len( np.unique(data.iloc[:,0])) # amount of classes/labels in the dataset
  T = data.shape[1]-1  # amount of datapoints in each time series

  #print("#: ", time_series)
  #print("C: ", classes)
  #print("T: ", T)

  return time_series, classes, T

#Function prepares data and removes labels from y_train
def prepare_data(data, test_data_size=0.2, unlabeled_data_size = 0.9 ):
  data = data.sample(frac = 1)  #shuffle the data

  #Split data into 80% train data and 20% test data
  data_train_before, data_test = train_test_split(data, test_size=test_data_size, random_state=seed1)
  
  #Split the train data into 90% unlabeled data and 10% labeled data
  data_train = data_train_before
  while (  len(np.unique(data_train.iloc[:,0])) != (classes+1)  ): #Make sure that all labels/classes are included in the labeled train data
    data_train_before = data_train_before.sample(frac=1) #Shuffle the original train data
    train_labeled, train_unlabeled = train_test_split(data_train_before, test_size=unlabeled_data_size, random_state=seed2)  
    train_unlabeled[train_unlabeled.columns[0]] = -1 #Set labels to -1
    #print("labaled: ",len(train_labeled))
    #print("unlabaled: ",len(train_unlabeled))
    train = pd.concat((train_labeled,train_unlabeled))
    data_train = train.sample(frac = 1)  #shuffle the train data

  x_train = data_train.iloc[:,1:].to_numpy()
  y_train = data_train.iloc[:,0].to_numpy()
  x_test = data_test.iloc[:,1:].to_numpy()
  y_test = data_test.iloc[:,0].to_numpy() 

  x_train=x_train[:,np.newaxis,:]  
  x_test=x_test[:,np.newaxis,:]

  #to PyTorch
  x_train = torch.from_numpy(x_train).to(device)
  y_train = torch.from_numpy(y_train).to(device)
  x_test = torch.from_numpy(x_test).to(device)
  y_test = torch.from_numpy(y_test).to(device)

  return x_train, y_train, x_test, y_test



In [None]:
#The FCN model with only classification
class class_model(nn.Module):
  def __init__(self, classes):
    super(class_model, self).__init__()
    self.conv1 = nn.Conv1d(1, 128, 9, padding=(9 // 2))
    self.bnorm1 = nn.BatchNorm1d(128)        
    self.conv2 = nn.Conv1d(128, 256, 5, padding=(5 // 2))
    self.bnorm2 = nn.BatchNorm1d(256)
    self.conv3 = nn.Conv1d(256, 128, 3, padding=(3 // 2))
    self.bnorm3 = nn.BatchNorm1d(128)        
    self.classification_head = nn.Linear(128, classes)

  def forward(self, x_class):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    features_class = torch.mean(b3_class, 2)   
    out_class = self.classification_head(features_class)

    return out_class


In [None]:
#The Multi task learning model with classification and forecasting
class MTL(nn.Module):
  def __init__(self, horizon, classes):
    super(MTL, self).__init__()
    self.conv1 = nn.Conv1d(1, 128, 9, padding=(9 // 2))
    self.bnorm1 = nn.BatchNorm1d(128)        
    self.conv2 = nn.Conv1d(128, 256, 5, padding=(5 // 2))
    self.bnorm2 = nn.BatchNorm1d(256)
    self.conv3 = nn.Conv1d(256, 128, 3, padding=(3 // 2))
    self.bnorm3 = nn.BatchNorm1d(128)        
    self.classification_head = nn.Linear(128, classes)
    self.forecasting_head = nn.Linear(128, horizon)

  def forward(self, x_class, x_forecast):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    b1_forecast = F.relu(self.bnorm1(self.conv1(x_forecast)))
    b2_forecast = F.relu(self.bnorm2(self.conv2(b1_forecast)))
    b3_forecast = F.relu(self.bnorm3(self.conv3(b2_forecast)))
        
    features_class = torch.mean(b3_class, 2)
    features_forecast = torch.mean(b3_forecast, 2)
        
    out_class = self.classification_head(features_class)
    out_forecast = self.forecasting_head(features_forecast)

    return out_class, out_forecast

  def forward_test(self, x_class):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    features_class = torch.mean(b3_class, 2)
    out_class =self.classification_head(features_class)
    return out_class


In [None]:
#Some necessary functions

#Function to split training set into sliding window sets
def sliding_window(X, stride, horizon):
  T = X.shape[2] #number of datapoints in each time series

  s = int(stride* T) #stride
  h = int(horizon* T) #horizon

  #print("s: ", s)
  #print("h: ", h)


  X_F = [] 
  Y_F = []

  for i in range(0, T, s):
    if (i+2*h <= T):
      xf_i = X[:,:, i:i+h]
      yf_i = X[:,:, i+h:i+2*h ]
      X_F.append(xf_i)
      Y_F.append(yf_i)
  
  return torch.cat(X_F), torch.cat(Y_F)

#Function to shuffle data
def shuffle(X,Y):
  index = np.array( [int(i) for i in range(X.shape[0]) ] )
  np.random.shuffle(index)
  return X[index], Y[index]

#Function to export resulting data in Excel file
def to_Excel(Results, output_name):
  columns = ["Dataset","dataload","Model","Epochs", "alpha", "stride", "horizon", "batch size", "Accuracy", "Classification loss", "Forecasting loss", "learning rate"]
  dataframe = pd.DataFrame(Results, columns = columns)
  # create excel writer object
  writer = pd.ExcelWriter(output_name)
  # write dataframe to excel
  dataframe.to_excel(writer)
  # save the excel
  writer.save()
  files.download(output_name)
  print('DataFrame is written successfully to Excel File.') 

#Function to play sound when code is finished
def sound():
  output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/9/91/Sound4.wav").play()')



In [None]:
#Testing data loading and preparing
Datasets = ["CBF", "Coffee", "ECG200", "FaceFour","ItalyPowerDemand","Lighting7", "OliveOil"]

Dataset = Datasets[0]

Data = load_data(Dataset)
time_series, classes, T = explore_data(Data)
x_train, y_train, x_test, y_test = prepare_data(Data)



In [None]:
#Experiments: 
#For one Dateset:
#    Each load of the dataset results in a different train/test split + unlabeled/labeled split
#    For one data-load:
#        - Test the accuracy of the MTL with different stride and horizon values
#        - Also test the accuracy of the model with only classification for the same data-load
#    Testing this for different data loads of the same Dataset
#    Export the result to an Excel sheet


Datasets = ["CBF", "Coffee", "ECG200", "FaceFour","ItalyPowerDemand","Lighting7", "OliveOil"] #The datasets

Dataload = [1,2,3,4,5,6,7,8,9,10] #How many data-loads to run experiment for 

Results = []
for Dataset in Datasets: 
  for dataload in Dataload:
    #dataload = 1

    #Loading and preparing data
    Data = load_data(Dataset)
    time_series, classes, T = explore_data(Data)
    x_train, y_train, x_test, y_test = prepare_data(Data)

    lr = 1e-4
    max_epochs = 60

    ##### MTL: Forecasting + classification  #####

    Horizon = [0.1, 0.2] #The horizon values to train with
    Stride =  [0.1, 0.2] #The stride values to train with
    Alpha = [0.1] #The alpha values to train with

    #batch_size = 34 #The batch sizes to train with
    batch_size = 100 #The batch sizes to train with

    loss_func_class = nn.CrossEntropyLoss()
    loss_func_forecast = nn.MSELoss()

    for horizon in Horizon:
      for stride in Stride:
        for alpha in Alpha:
          MTL_net = MTL(int(T*horizon),classes).to(device)

          optimizer = torch.optim.Adam(MTL_net.parameters(), lr=lr)

          loss_func_class = nn.CrossEntropyLoss()
          loss_func_forecast = nn.MSELoss()

          X_F, Y_F = sliding_window(x_train, stride, horizon)

          accuracies = []
          losses_class = []
          losses_forecast = []

          for e in range(max_epochs):
            #Shuffle data
            x_train, y_train = shuffle(x_train,y_train)
            X_F, Y_F = shuffle(X_F, Y_F)

            x_train_labeled = x_train[y_train!=-1] #the x part of the training set that has a label
            y_train_labeled = y_train[y_train!=-1] #the y part of the training set that has a label

            for i in range(0, X_F.shape[0], batch_size):
              if (i+batch_size <= X_F.shape[0]):
                x_forecast = X_F[i:i+batch_size]
                y_forecast = Y_F[i:i+batch_size]
              else:
                x_forecast = X_F[i:]
                y_forecast = Y_F[i:]
                
              y_hat_class, y_hat_forecast = MTL_net(x_train_labeled.float(), x_forecast.float())
      
              loss_c = loss_func_class(y_hat_class, y_train_labeled.long())
              loss_f = loss_func_forecast(y_hat_forecast, torch.squeeze(y_forecast).float() )
      
              loss_MTL = loss_c + alpha*loss_f
      
              optimizer.zero_grad()
              loss_MTL.backward()
              optimizer.step()
              loss_class, loss_forecast = loss_c.item(), loss_f.item()

              losses_class.append(loss_class)
              losses_forecast.append(loss_forecast)

            y_hat_class = MTL_net.forward_test(x_test.float())
            predicted = torch.max(y_hat_class,1)[1]

            accuracy = accuracy_score(y_test, predicted) 
            accuracies.append(accuracy)
            avg_loss_class = np.mean(losses_class)
            avg_loss_forecast = np.mean(losses_forecast)

            print("----------------------")
            print("E: ", e, " Acc: ", accuracy,"MaxAcc: ",max(accuracies), " s:", stride, " h: ", horizon, "bs: ", batch_size)

            if accuracy==1.0:
              break;

          result = [Dataset, dataload, "MTL", e+1, alpha, stride, horizon, batch_size, max(accuracies),avg_loss_class, avg_loss_forecast, lr]

          print(result)
          Results.append(result)


    ##### Only Classification (FCN) #####

    Batch_size_c = [4]

    for batch_size in Batch_size_c:
      class_net = class_model(classes).to(device)

      optimizer_classification = torch.optim.Adam(class_net.parameters(), lr=lr)
      loss_func_classification = nn.CrossEntropyLoss()

      accuracies_classification = []
      losses_classification = []

      x_train_labeled = x_train[y_train!=-1] #the x part of the training set that has a label
      y_train_labeled = y_train[y_train!=-1] #the y part of the training set that has a label

      N_L = x_train_labeled.shape[0]

      for e in range(max_epochs):
        x_train, y_train = shuffle(x_train,y_train) #Shuffle training data

        x_train_labeled = x_train[y_train!=-1] #the x part of the training set that has a label
        y_train_labeled = y_train[y_train!=-1] #the y part of the training set that has a label

        for i in range(0, N_L, batch_size):
          if ( i+batch_size <= x_train_labeled.shape[0]):
            x_batch = x_train_labeled[i:i+batch_size]
            y_batch = y_train_labeled[i:i+batch_size]
          else:
            x_batch = x_train_labeled[i:]
            y_batch = y_train_labeled[i:]

          y_hat = class_net(x_train_labeled.float())

          loss_cl = loss_func_classification(y_hat, y_train_labeled.long())

          optimizer_classification.zero_grad()
          loss_cl.backward()
          optimizer_classification.step()

          loss_classification = loss_cl.item()
          losses_classification.append(loss_classification)

        y_hat_classsification = class_net(x_test.float())
        predicted_classification = torch.max(y_hat_classsification,1)[1]

        accuracy_classification = accuracy_score(y_test, predicted_classification) 
        accuracies_classification.append(accuracy_classification)

        avg_loss_classification = np.mean(losses_classification)

        print("----------------------")
        print("E: ", e, " Acc: ", accuracy_classification, "bs: ", batch_size)

        #print("Epoch: ",e, "  Accuracy: ",accuracy_classification,"MaxAcc: ", max(accuracies_classification)," Avg loss class: ", avg_loss_classification)

        if accuracy_classification==1.0:
          break;

      result = [Dataset, dataload, "Classification", e+1, -1, -1, -1, batch_size, max(accuracies_classification),avg_loss_classification, -1, lr]
      print(result)
      Results.append(result)

#Export results to Excel
output_name = 'exp_'+Dataset+'2.xlsx'
to_Excel(Results, output_name)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


----------------------
E:  0  Acc:  0.9590909090909091 MaxAcc:  0.9590909090909091  s: 0.1  h:  0.4 bs:  100
----------------------
E:  1  Acc:  0.95 MaxAcc:  0.9590909090909091  s: 0.1  h:  0.4 bs:  100
----------------------
E:  2  Acc:  0.95 MaxAcc:  0.9590909090909091  s: 0.1  h:  0.4 bs:  100
----------------------
E:  3  Acc:  0.9454545454545454 MaxAcc:  0.9590909090909091  s: 0.1  h:  0.4 bs:  100
----------------------
E:  4  Acc:  0.95 MaxAcc:  0.9590909090909091  s: 0.1  h:  0.4 bs:  100
----------------------
E:  5  Acc:  0.95 MaxAcc:  0.9590909090909091  s: 0.1  h:  0.4 bs:  100
----------------------
E:  6  Acc:  0.95 MaxAcc:  0.9590909090909091  s: 0.1  h:  0.4 bs:  100
----------------------
E:  7  Acc:  0.9454545454545454 MaxAcc:  0.9590909090909091  s: 0.1  h:  0.4 bs:  100
----------------------
E:  8  Acc:  0.9454545454545454 MaxAcc:  0.9590909090909091  s: 0.1  h:  0.4 bs:  100
----------------------
E:  9  Acc:  0.9409090909090909 MaxAcc:  0.9590909090909091  s: 0.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

DataFrame is written successfully to Excel File.


In [None]:


for i in range(len(Results)):
  print(Results[i])

output_name = 'Experiments'+Dataset+'_.xlsx'
to_Excel(Results, output_name)
