<a href="https://colab.research.google.com/github/IIF0403/Project/blob/master/MTL_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [67]:
%matplotlib inline
import numpy as np
import pandas as pd 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import copy
import sys

In [68]:
seed1 = 14
seed2 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Function loads data from csv and removes labels from y_train
def prepare_data(Dataset, test_data_size=0.2, unlabeled_data_size = 0.9 ):
  url_raw = 'https://raw.githubusercontent.com/IIF0403/timeseries/master/data/' 
  
  url_train = url_raw + Dataset+'/'+Dataset+'_TRAIN'
  url_test = url_raw + Dataset+'/'+Dataset+'_TEST'

  data_train = pd.read_csv(url_train,header=None)
  data_test = pd.read_csv(url_test, header=None)

  #Split data into 80% train data and 20% test data
  data = pd.concat((data_train, data_test)) 
  data_train, data_test = train_test_split(data, test_size=test_data_size, random_state=seed1)

  #Some of the datasets has classes starting from 1, need them to start from 0
  if ( ((data_train.iloc[:,0]==0).sum())==0 ): #If there is no class 0
    data_train.iloc[:,0] = data_train.iloc[:,0]-1
    data_test.iloc[:,0] = data_test.iloc[:,0]-1
  
  #Split the train data into 90% unlabeled data and 10% labeled data
  train_labeled, train_unlabeled = train_test_split(data_train, test_size=unlabeled_data_size, random_state=seed2)  
  train_unlabeled[train_unlabeled.columns[0]]=-1 #Set labels to -1
  train = pd.concat((train_labeled,train_unlabeled))
  data_train = train.sample(frac = 1)  #shuffle the data

  x_train = data_train.iloc[:,1:].to_numpy()
  y_train = data_train.iloc[:,0].to_numpy()
  x_test = data_test.iloc[:,1:].to_numpy()
  y_test = data_test.iloc[:,0].to_numpy() 

  x_train=x_train[:,np.newaxis,:] 
  x_test=x_test[:,np.newaxis,:]


  #Torch
  x_train = torch.from_numpy(x_train).to(device)
  y_train = torch.from_numpy(y_train).to(device)
  x_test = torch.from_numpy(x_test).to(device)
  y_test = torch.from_numpy(y_test).to(device)

  return x_train, y_train, x_test, y_test

x_train, y_train, x_test, y_test = prepare_data(Dataset)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [69]:
#Dataset = "Coffee"
Dataset = "CBF"
#Dataset = "FaceFour"

x_train, y_train, x_test, y_test = prepare_data(Dataset)

classes = len(np.unique(y_test))
T = x_train.shape[2]

print("x_train shape: ", x_train.shape, "  y_train shape: ",x_train.shape)
print("x_test shape: ", x_test.shape, "  y_test shape: ",x_test.shape)
print("#classes: ",classes)
print("#data points in each time series: ", T)

x_train shape:  torch.Size([744, 1, 128])   y_train shape:  torch.Size([744, 1, 128])
x_test shape:  torch.Size([186, 1, 128])   y_test shape:  torch.Size([186, 1, 128])
#classes:  3
#data points in each time series:  128


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [70]:
class MTL(nn.Module):
  def __init__(self, horizon):
    super(MTL, self).__init__()
    self.conv1 = nn.Conv1d(x_train.shape[1], 128, 9, padding=(9 // 2))
    self.bnorm1 = nn.BatchNorm1d(128)        
    self.conv2 = nn.Conv1d(128, 256, 5, padding=(5 // 2))
    self.bnorm2 = nn.BatchNorm1d(256)
    self.conv3 = nn.Conv1d(256, 128, 3, padding=(3 // 2))
    self.bnorm3 = nn.BatchNorm1d(128)        
    self.classification_head = nn.Linear(128, classes)
    self.forecasting_head = nn.Linear(128, horizon)

  def forward(self, x_class, x_forecast):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    b1_forecast = F.relu(self.bnorm1(self.conv1(x_forecast)))
    b2_forecast = F.relu(self.bnorm2(self.conv2(b1_forecast)))
    b3_forecast = F.relu(self.bnorm3(self.conv3(b2_forecast)))
        
    features_class = torch.mean(b3_class, 2)
    features_forecast = torch.mean(b3_forecast, 2)
        
    out_class = self.classification_head(features_class)
    out_forecast = self.forecasting_head(features_forecast)

    return out_class, out_forecast

  def forward_test(self, x_class):
    b1_class = F.relu(self.bnorm1(self.conv1(x_class)))
    b2_class = F.relu(self.bnorm2(self.conv2(b1_class)))
    b3_class = F.relu(self.bnorm3(self.conv3(b2_class)))

    features_class = torch.mean(b3_class, 2)
    out_class =self.classification_head(features_class)
    return out_class


In [71]:
stride = 0.2
horizon = 0.2
alpha = 0.1



MTL_net = MTL(int(T*horizon)).to(device)


#Loss functions and optimizer
loss_func_class = nn.CrossEntropyLoss()
loss_func_forecast = nn.MSELoss()
optimizer = torch.optim.Adam(MTL_net.parameters(), lr=1e-4)

def optimize(x_labeled, y_labeled, x_forecast, y_forecast):
  y_hat_class, y_hat_forecast = MTL_net(x_labeled.float(), x_forecast.float())
  
  loss_class = loss_func_class(y_hat_class, y_labeled)
  loss_forecast = loss_func_forecast(y_hat_forecast, torch.squeeze(y_forecast).float() )
    
  loss_MTL = loss_class + alpha*loss_forecast
  optimizer.zero_grad()
  loss_MTL.backward()
  optimizer.step()

  return loss_class.item(), loss_forecast.item()




In [72]:
#Function to shuffle data
def shuffle(X,Y):
  index = np.array( [int(i) for i in range(x_train.shape[0]) ] )
  np.random.shuffle(index)
  return X[index], Y[index]


In [73]:
def sliding_window(X, stride, horizon):
  T = X.shape[2] #number of datapoints in each time series

  s = int(stride* T) #stride
  h = int(horizon* T) #horizon

  X_F = [] 
  Y_F = []

  for i in range(0, T, s):
    if (i+2*h <= T):
      xf_i = X[:,:, i:i+h]
      yf_i = X[:,:, i+h:i+2*h ]

      X_F.append(xf_i)
      Y_F.append(yf_i)
  
  return torch.cat(X_F), torch.cat(Y_F)


In [None]:
X_F, Y_F = sliding_window(x_train, stride, horizon)

batch_size = 35
max_epochs = 5000

accuracies = []
losses_class = []
losses_forecast = []

for e in range(max_epochs):
  #Shuffle data
  x_train, y_train = shuffle(x_train,y_train)
  X_F, Y_F = shuffle(X_F, Y_F)

  x_train_labeled = x_train[y_train!=-1] #the x part of the training set that has a label
  y_train_labeled = y_train[y_train!=-1] #the y part of the training set that has a label

  for i in range(0, X_F.shape[0], batch_size):
    if (i+batch_size <= X_F.shape[0]):
      x_forecast = X_F[i:i+batch_size]
      y_forecast = Y_F[i:i+batch_size]
    else:
      x_forecast = X_F[i:]
      y_forecast = Y_F[i:]
    
    loss_class, loss_forecast = optimize(x_train_labeled, y_train_labeled, x_forecast, y_forecast)
    losses_class.append(loss_class)
    losses_forecast.append(loss_forecast)

  y_hat_class = MTL_net.forward_test(x_test.float())
  predicted = torch.max(y_hat_class,1)[1]
  #print("Predicted: ", predicted)

  accuracy = accuracy_score(y_test, predicted) 
  accuracies.append(accuracy)

  avg_loss_class = np.mean(losses_class)
  avg_loss_forecast = np.mean(losses_forecast)

  print("Epoch: ",e, "  Accuracy: ",accuracy," Avg loss classification: ", avg_loss_class , "  Avg loss forecast: ", avg_loss_forecast)

  if accuracy==1.0:
    break;

