In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
import torchvision
import torchvision.transforms as transforms

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold, KFold
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from random import shuffle
import warnings

warnings.filterwarnings('ignore')

print(torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

  from .autonotebook import tqdm as notebook_tqdm


True
cuda


In [2]:
learning_rate = 0.001
epochs = 40

hidden_size = 128
num_layers = 4

In [3]:
user = pd.read_csv('postprocessed_id_200-100.csv')

user = user.set_index(pd.DatetimeIndex(user['Time']))

user = user.filter(items=['Inactivity Duration(s)', 'Speed(ms)', 'Prev_PAM_Val', 'PAM_Val', 'example_id'])


print(user.head(5))
print(user.shape)

                     Inactivity Duration(s)  Speed(ms)  Prev_PAM_Val  PAM_Val  \
Time                                                                            
2021-09-10 13:30:00             5157.560547  12.659453           0.0        3   
2021-09-10 14:00:00             4467.798340  12.544724           3.0        1   
2021-09-10 14:30:00             5052.699219  11.190955           1.0        3   
2021-09-10 15:00:00             4072.605957  12.184985           3.0        3   
2021-09-10 15:30:00             3371.592285  11.939060           3.0        3   

                     example_id  
Time                             
2021-09-10 13:30:00           0  
2021-09-10 14:00:00           0  
2021-09-10 14:30:00           0  
2021-09-10 15:00:00           0  
2021-09-10 15:30:00           0  
(9600, 5)


In [5]:
print(user.shape)
print(user.PAM_Val.value_counts().sort_index())
print(user.PAM_Val.value_counts(normalize=True).sort_index())

(9600, 5)
1     737
2    6971
3    1881
4      11
Name: PAM_Val, dtype: int64
1    0.076771
2    0.726146
3    0.195937
4    0.001146
Name: PAM_Val, dtype: float64


In [6]:
user['PAM_Val'] = user['PAM_Val'] - 1.0

user['Prev_PAM_Val'] = user['Prev_PAM_Val'] - 1.0

print(user.head(5))

                     Inactivity Duration(s)  Speed(ms)  Prev_PAM_Val  PAM_Val  \
Time                                                                            
2021-09-10 13:30:00             5157.560547  12.659453          -1.0      2.0   
2021-09-10 14:00:00             4467.798340  12.544724           2.0      0.0   
2021-09-10 14:30:00             5052.699219  11.190955           0.0      2.0   
2021-09-10 15:00:00             4072.605957  12.184985           2.0      2.0   
2021-09-10 15:30:00             3371.592285  11.939060           2.0      2.0   

                     example_id  
Time                             
2021-09-10 13:30:00           0  
2021-09-10 14:00:00           0  
2021-09-10 14:30:00           0  
2021-09-10 15:00:00           0  
2021-09-10 15:30:00           0  


In [7]:
user_train_group = user.groupby(by='example_id')

In [8]:
user_train_X = []
user_train_y = []
for group, df in user_train_group:
    user_train_X.append(df.drop(columns=['PAM_Val', 'example_id']))
    user_train_y.append(df.filter(['PAM_Val']))


print(len(user_train_X))
print(user_train_X[0])
print(user_train_y[0])

1200
                     Inactivity Duration(s)  Speed(ms)  Prev_PAM_Val
Time                                                                
2021-09-10 13:30:00             5157.560547  12.659453          -1.0
2021-09-10 14:00:00             4467.798340  12.544724           2.0
2021-09-10 14:30:00             5052.699219  11.190955           0.0
2021-09-10 15:00:00             4072.605957  12.184985           2.0
2021-09-10 15:30:00             3371.592285  11.939060           2.0
2021-09-10 16:00:00             1631.225342   8.710327           2.0
2021-09-10 16:30:00             1460.209229   5.946492           1.0
2021-09-10 17:00:00              648.464111   5.012145           1.0
                     PAM_Val
Time                        
2021-09-10 13:30:00      2.0
2021-09-10 14:00:00      0.0
2021-09-10 14:30:00      2.0
2021-09-10 15:00:00      2.0
2021-09-10 15:30:00      2.0
2021-09-10 16:00:00      1.0
2021-09-10 16:30:00      1.0
2021-09-10 17:00:00      1.0


In [9]:
X = user_train_X
y = user_train_y

In [10]:
class RNN_Model(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, num_layers):
    super(RNN_Model, self).__init__()

    self.input_size = input_size
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.output_size = output_size

    self.rnn = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
    self.fc = nn.Linear(self.hidden_size, self.output_size)
    #self.sm = nn.Softmax(dim=0)    # not needed if using Cross Entropy loss
  
  def forward(self, x):
    batch_size = x.size(0)

    h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
    c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)

    out, hidden = self.rnn(x, (h0, c0))
    out = self.fc(out.contiguous().view(-1, self.hidden_size))

    return out, hidden

In [11]:
loss_fn = nn.CrossEntropyLoss()

In [12]:
kfold = KFold(n_splits=5, shuffle=True)

In [13]:
comb_accuracy = []
comb_precision = []
comb_recall = []
comb_f1 = []
comb_accuracy_train = []
comb_precision_train = []
comb_recall_train = []
comb_f1_train = []
for i, (train_index, test_index) in enumerate(kfold.split(X)):
    print(f"======================== Fold {i} ========================")

    X_train = np.array(X)[train_index].tolist()
    y_train = np.array(y)[train_index].tolist()

    model = RNN_Model(len(X_train[0][0]), hidden_size, 4, num_layers)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    for epoch in range(epochs):
      losses_in_epoch = []
      for sample, label in zip(X_train, y_train):

        sample = torch.Tensor([sample]).to(device)
        label = torch.Tensor(label).flatten().type(torch.cuda.LongTensor).to(device)

        outputs, hidden = model(sample)

        loss = loss_fn(outputs, label)
        losses_in_epoch.append(loss)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

      average_loss = sum(losses_in_epoch) / len(losses_in_epoch)
      print(f'epoch {epoch+1}/{epochs}: loss = {average_loss:.4f}')
      scheduler.step()

    conf_matrix_train = []
    accuracy_train = []
    precision_train = []
    recall_train = []
    f1_train = []
    with torch.no_grad():
      for sample, label in zip(X_train, y_train):
        sample = torch.Tensor([sample]).to(device)
        label = torch.Tensor(label).flatten().type(torch.cuda.LongTensor).to(device)
        label = label.to('cpu')

        outputs, hidden = model(sample)
        _, y_pred_train = torch.max(outputs, 1)  # returns value, index

        y_pred_train_cpu = y_pred_train.to('cpu')

        conf_matrix_train.append(confusion_matrix(label, y_pred_train_cpu, labels=[0,1,2,3]))
        accuracy_train.append(accuracy_score(label, y_pred_train_cpu))
        precision_train.append(precision_score(label, y_pred_train_cpu, labels=[0,1,2,3], average='macro'))
        recall_train.append(recall_score(label, y_pred_train_cpu, labels=[0,1,2,3], average='macro'))
        f1_train.append(f1_score(label, y_pred_train_cpu, labels=[0,1,2,3], average='macro'))
    
    conf_matrix_train = np.array(conf_matrix_train).sum(axis=0).tolist()
    accuracy_train = np.array(accuracy_train).mean(axis=0).tolist()
    precision_train = np.array(precision_train).mean(axis=0).tolist()
    recall_train = np.array(recall_train).mean(axis=0).tolist()
    f1_train = np.array(f1_train).mean(axis=0).tolist()

    X_test = np.array(X)[test_index].tolist()
    y_test = np.array(y)[test_index].tolist()
    conf_matrix = []
    accuracy = []
    precision = []
    recall = []
    f1 = []
    with torch.no_grad():
      for sample, label in zip(X_test, y_test):
        sample = torch.Tensor([sample]).to(device)
        label = torch.Tensor(label).flatten().type(torch.cuda.LongTensor).to(device)
        label = label.to('cpu')

        outputs, hidden = model(sample)
        _, y_pred = torch.max(outputs, 1)  # returns value, index

        y_pred_cpu = y_pred.to('cpu')

        conf_matrix.append(confusion_matrix(label, y_pred_cpu, labels=[0,1,2,3]))
        accuracy.append(accuracy_score(label, y_pred_cpu))
        precision.append(precision_score(label, y_pred_cpu, labels=[0,1,2,3], average='macro'))
        recall.append(recall_score(label, y_pred_cpu, labels=[0,1,2,3], average='macro'))
        f1.append(f1_score(label, y_pred_cpu, labels=[0,1,2,3], average='macro'))
    
    conf_matrix = np.array(conf_matrix).sum(axis=0).tolist()
    accuracy = np.array(accuracy).mean(axis=0).tolist()
    precision = np.array(precision).mean(axis=0).tolist()
    recall = np.array(recall).mean(axis=0).tolist()
    f1 = np.array(f1).mean(axis=0).tolist()



    comb_accuracy_train.append(accuracy_train)
    comb_precision_train.append(precision_train)
    comb_recall_train.append(recall_train)
    comb_f1_train.append(f1_train)

    comb_accuracy.append(accuracy)
    comb_precision.append(precision)
    comb_recall.append(recall)
    comb_f1.append(f1)

    print('Confusion Matrix:')
    print(np.array(conf_matrix_train))
    print('Accuracy:')
    print(accuracy_train)
    print('Precision:')
    print(precision_train)
    print('Recall:')
    print(recall_train)
    print('F1:')
    print(f1_train)
    print()

epoch 1/40: loss = 0.7341
epoch 2/40: loss = 0.7040
epoch 3/40: loss = 0.7004
epoch 4/40: loss = 0.6989
epoch 5/40: loss = 0.6981
epoch 6/40: loss = 0.6945
epoch 7/40: loss = 0.6921
epoch 8/40: loss = 0.6919
epoch 9/40: loss = 0.6919
epoch 10/40: loss = 0.6918
epoch 11/40: loss = 0.6907
epoch 12/40: loss = 0.6907
epoch 13/40: loss = 0.6907
epoch 14/40: loss = 0.6906
epoch 15/40: loss = 0.6906
epoch 16/40: loss = 0.6905
epoch 17/40: loss = 0.6905
epoch 18/40: loss = 0.6905
epoch 19/40: loss = 0.6905
epoch 20/40: loss = 0.6905
epoch 21/40: loss = 0.6905
epoch 22/40: loss = 0.6905
epoch 23/40: loss = 0.6905
epoch 24/40: loss = 0.6905
epoch 25/40: loss = 0.6905
epoch 26/40: loss = 0.6905
epoch 27/40: loss = 0.6905
epoch 28/40: loss = 0.6905
epoch 29/40: loss = 0.6905
epoch 30/40: loss = 0.6905
epoch 31/40: loss = 0.6905
epoch 32/40: loss = 0.6905
epoch 33/40: loss = 0.6905
epoch 34/40: loss = 0.6905
epoch 35/40: loss = 0.6905
epoch 36/40: loss = 0.6905
epoch 37/40: loss = 0.6905
epoch 38/4

In [14]:
comb_accuracy_train = np.array(comb_accuracy_train)
comb_precision_train = np.array(comb_precision_train)
comb_recall_train = np.array(comb_recall_train)
comb_f1_train = np.array(comb_f1_train)

comb_accuracy = np.array(comb_accuracy)
comb_precision = np.array(comb_precision)
comb_recall = np.array(comb_recall)
comb_f1 = np.array(comb_f1)

print('Average Accuracy:')
print(comb_accuracy_train.mean(axis=0))
print(comb_accuracy.mean(axis=0))
print('Average Precision:')
print(comb_precision_train.mean(axis=0))
print(comb_precision.mean(axis=0))
print('Average Recall:')
print(comb_recall_train.mean(axis=0))
print(comb_recall.mean(axis=0))
print('Average F1:')
print(comb_f1_train.mean(axis=0))
print(comb_f1.mean(axis=0))

Average Accuracy:
0.7261458333333334
0.7261458333333333
Average Precision:
0.18153645833333334
0.18153645833333332
Average Recall:
0.24562499999999998
0.24562499999999998
Average F1:
0.1998947163947164
0.1998947163947164
