<a href="https://colab.research.google.com/github/IIF0403/Project/blob/master/MTL_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
%matplotlib inline
import numpy as np
import pandas as pd 
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from google.colab import files

In [4]:
seed1 = 14
seed2 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Function to load UCR time_series data
def load_data(Dataset):
  url_raw = 'https://raw.githubusercontent.com/IIF0403/timeseries/master/data/'
  url_train = url_raw + Dataset+'/'+Dataset+'_TRAIN'
  url_test = url_raw + Dataset+'/'+Dataset+'_TEST'

  data_train = pd.read_csv(url_train,header=None)
  data_test = pd.read_csv(url_test, header=None)

  data = pd.concat((data_train, data_test))

  #Want all datasets to have classes starting from 0
  Y = data.values[:,0]
  classes = len(np.unique(Y))
  Y_transformed = ( (Y-Y.min())/(Y.max()-Y.min()) )*(classes-1)
  data[data.columns[0]] = Y_transformed

  return data

def explore_data(data):
  #data = load_data(Dataset)

  time_series = data.shape[0]
  classes = len( np.unique(data.iloc[:,0]))
  T = data.shape[1]-1

  #print("#: ", time_series)
  #print("C: ", classes)
  #print("T: ", T)

  return time_series, classes, T

#Function prepares data and removes labels from y_train
def prepare_data(data, test_data_size=0.2, unlabeled_data_size = 0.9 ):
  data = data.sample(frac = 1)  #shuffle the data

  #Split data into 80% train data and 20% test data
  data_train_before, data_test = train_test_split(data, test_size=test_data_size, random_state=seed1)
  
  #Split the train data into 90% unlabeled data and 10% labeled data
  data_train = data_train_before
  while (  len(np.unique(data_train.iloc[:,0])) != (classes+1)  ): #Make sure that all labels are included in the labeled train data
    data_train_before = data_train_before.sample(frac=1) #Shuffle the original train data
    train_labeled, train_unlabeled = train_test_split(data_train_before, test_size=unlabeled_data_size, random_state=seed2)  
    train_unlabeled[train_unlabeled.columns[0]] = -1 #Set labels to -1
    train = pd.concat((train_labeled,train_unlabeled))
    data_train = train.sample(frac = 1)  #shuffle the train data
    #print("np unique:", len(np.unique(data_train.iloc[:,0])))


  x_train = data_train.iloc[:,1:].to_numpy()
  y_train = data_train.iloc[:,0].to_numpy()
  x_test = data_test.iloc[:,1:].to_numpy()
  y_test = data_test.iloc[:,0].to_numpy() 

  x_train=x_train[:,np.newaxis,:] 
  x_test=x_test[:,np.newaxis,:]


  #Torch
  x_train = torch.from_numpy(x_train).to(device)
  y_train = torch.from_numpy(y_train).to(device)
  x_test = torch.from_numpy(x_test).to(device)
  y_test = torch.from_numpy(y_test).to(device)

  return x_train, y_train, x_test, y_test



In [5]:
#Multi task learning model with classification and forecasting
class MTL(nn.Module):
  def __init__(self, horizon, classes):
    super(MTL, self).__init__()
    self.conv1 = nn.Conv1d(1, 128, 9, padding=(9 // 2))
    self.bnorm1 = nn.BatchNorm1d(128)        
    self.conv2 = nn.Conv1d(128, 256, 5, padding=(5 // 2))
    self.bnorm2 = nn.BatchNorm1d(256)
    self.conv3 = nn.Conv1d(256, 128, 3, padding=(3 // 2))
    self.bnorm3 = nn.BatchNorm1d(128)        
    self.classification_head = nn.Linear(128, classes)
    self.forecasting_head = nn.Linear(128, horizon)

  def forward(self, x_class, x_forecast):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    b1_forecast = F.relu(self.bnorm1(self.conv1(x_forecast)))
    b2_forecast = F.relu(self.bnorm2(self.conv2(b1_forecast)))
    b3_forecast = F.relu(self.bnorm3(self.conv3(b2_forecast)))
        
    features_class = torch.mean(b3_class, 2)
    features_forecast = torch.mean(b3_forecast, 2)
        
    out_class = self.classification_head(features_class)
    out_forecast = self.forecasting_head(features_forecast)

    return out_class, out_forecast

  def forward_test(self, x_class):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    features_class = torch.mean(b3_class, 2)
    out_class =self.classification_head(features_class)
    return out_class


In [6]:
#Some necessary functions

#Function to split training set inro sliding window sets
def sliding_window(X, stride, horizon):
  T = X.shape[2] #number of datapoints in each time series

  s = int(stride* T) #stride
  h = int(horizon* T) #horizon

  X_F = [] 
  Y_F = []

  for i in range(0, T, s):
    if (i+2*h <= T):
      xf_i = X[:,:, i:i+h]
      yf_i = X[:,:, i+h:i+2*h ]

      X_F.append(xf_i)
      Y_F.append(yf_i)
  
  return torch.cat(X_F), torch.cat(Y_F)

#Function to shuffle data
def shuffle(X,Y):
  index = np.array( [int(i) for i in range(X.shape[0]) ] )
  np.random.shuffle(index)
  return X[index], Y[index]
  
#Function to export resulting data in Excel file
def to_Excel(Results, output_name):
  columns = ["Dataset","dataload","Model","Epochs", "alpha", "stride", "horizon", "batch size", "Accuracy", "Classification loss", "Forecasting loss"]
  dataframe = pd.DataFrame(Results, columns = columns)
  # create excel writer object
  writer = pd.ExcelWriter(output_name)
  # write dataframe to excel
  dataframe.to_excel(writer)
  # save the excel
  writer.save()
  files.download(output_name)
  print('DataFrame is written successfully to Excel File.') 


In [7]:
dataload = 1

Datasets = ["Coffee", "CBF", "ECG200", "FaceFour","ItalyPowerDemand","Lighting7", "OliveOil"]
Dataset = Datasets[-1]

Data = load_data(Dataset)
time_series, classes, T = explore_data(Data)
x_train, y_train, x_test, y_test = prepare_data(Data)

print("x_train shape: ", x_train.shape, "  y_train shape: ",x_train.shape)
print("x_test shape: ", x_test.shape, "  y_test shape: ",x_test.shape)


x_train shape:  torch.Size([48, 1, 570])   y_train shape:  torch.Size([48, 1, 570])
x_test shape:  torch.Size([12, 1, 570])   y_test shape:  torch.Size([12, 1, 570])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
Datasets = ["Coffee", "CBF", "ECG200", "FaceFour","ItalyPowerDemand","Lighting7","OliveOil"]
Dataset = Datasets[0]

Data = load_data(Dataset)
time_series, classes, T = explore_data(Data)
x_train, y_train, x_test, y_test = prepare_data(Data)

print("Dataset: ", Dataset)

alpha = 0.1
stride = 0.2
horizon = 0.2

batch_size = 34

MTL_net = MTL(int(T*horizon), classes).to(device)

optimizer = torch.optim.Adam(MTL_net.parameters(), lr=1e-4)

loss_func_class = nn.CrossEntropyLoss()
loss_func_forecast = nn.MSELoss()

X_F, Y_F = sliding_window(x_train, stride, horizon)

max_epochs = 20

avg_losses_class = []
avg_losses_forecast = []
epochs = []

accuracies = []
losses_class = []
losses_forecast = []

Results = []

for e in range(max_epochs):
  #Shuffle data
  x_train, y_train = shuffle(x_train,y_train)
  X_F, Y_F = shuffle(X_F, Y_F)

  x_train_labeled = x_train[y_train!=-1] #the x part of the training set that has a label
  y_train_labeled = y_train[y_train!=-1] #the y part of the training set that has a label


  for i in range(0, X_F.shape[0], batch_size):
    if (i+batch_size <= X_F.shape[0]):
      x_forecast = X_F[i:i+batch_size]
      y_forecast = Y_F[i:i+batch_size]
    else:
      x_forecast = X_F[i:]
      y_forecast = Y_F[i:]
              
    y_hat_class, y_hat_forecast = MTL_net(x_train_labeled.float(), x_forecast.float())
    loss_c = loss_func_class(y_hat_class, y_train_labeled.long())
    loss_f = loss_func_forecast(y_hat_forecast, torch.squeeze(y_forecast).float() )
    
    loss_MTL = loss_c + alpha*loss_f
    
    optimizer.zero_grad()
    loss_MTL.backward()
    optimizer.step()

    loss_class, loss_forecast = loss_c.item(), loss_f.item()
      
    losses_class.append(loss_class)
    losses_forecast.append(loss_forecast)

  y_hat_class = MTL_net.forward_test(x_test.float())
  predicted = torch.max(y_hat_class,1)[1]

  accuracy = accuracy_score(y_test, predicted) 
  accuracies.append(accuracy)
  avg_loss_class = np.mean(losses_class)
  avg_loss_forecast = np.mean(losses_forecast)

  avg_losses_class.append(avg_loss_class)
  avg_losses_forecast.append(avg_loss_forecast)
  epochs.append(e+1)

  print("----------------------")
  print("E: ", e, " Acc: ", accuracy, " s:", stride, " h: ", horizon, "bs: ", batch_size)

  if accuracy==1.0:
    break;

result = [Dataset, dataload, "MTL", e+1, alpha, stride, horizon, batch_size, max(accuracies), avg_loss_class, avg_loss_forecast]
print(result)

Results.append(result)

#output_name = "MTL_"+Dataset+' lr:'+str(lr)+'.xlsx'
#to_Excel(Results, output_name)



In [None]:
#Plotting loss against epochs
plt.figure()
plt.title(Dataset+' lr:'+str(lr)+": Loss  ")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(epochs, avg_losses_class, label ="loss classification")

plt.plot(epochs, avg_losses_forecast, label ="loss forecast")
plt.legend()

plt.savefig(Dataset+"_loss"+".png")
files.download(Dataset+"_loss"+".png")