In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
import torchvision
import torchvision.transforms as transforms
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold, KFold
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from random import shuffle
import warnings

warnings.filterwarnings('ignore')

print(torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

  from .autonotebook import tqdm as notebook_tqdm


True
cuda


In [2]:
learning_rate = 0.001
epochs = 20

n_estimators = 100
max_depth = 7
max_features = 0.4

In [3]:
user = pd.concat([pd.read_csv('postprocessed_gretel.csv'), pd.read_csv('user1_preprocessed_2.csv'), pd.read_csv('user2_preprocessed_2.csv')], axis=0)

user = user.set_index(pd.DatetimeIndex(user['Time']))

user = user.filter(items=['Inactivity Duration(s)', 'Speed(ms)', 'PAM_Val'])

print(user.head(5))
print(user.shape)

                     Inactivity Duration(s)  Speed(ms)  PAM_Val
Time                                                           
2021-09-10 13:30:00             4069.192139   6.602493        2
2021-09-10 14:00:00             3073.023438   5.796617        2
2021-09-10 14:30:00             2250.151123   3.741216        2
2021-09-10 15:00:00             1751.982178   2.708780        2
2021-09-10 15:30:00             2818.204834   3.040749        2
(7176, 3)


In [5]:
print(user.shape)
print(user.PAM_Val.value_counts().sort_index())
print(user.PAM_Val.value_counts(normalize=True).sort_index())

(7176, 3)
1     608
2    5170
3    1396
4       2
Name: PAM_Val, dtype: int64
1    0.084727
2    0.720457
3    0.194537
4    0.000279
Name: PAM_Val, dtype: float64


In [6]:
user['PAM_Val'] = user['PAM_Val'] - 1.0

print(user.head(5))

                     Inactivity Duration(s)  Speed(ms)  PAM_Val
Time                                                           
2021-09-10 13:30:00             4069.192139   6.602493      1.0
2021-09-10 14:00:00             3073.023438   5.796617      1.0
2021-09-10 14:30:00             2250.151123   3.741216      1.0
2021-09-10 15:00:00             1751.982178   2.708780      1.0
2021-09-10 15:30:00             2818.204834   3.040749      1.0


In [7]:
X = user.drop(columns='PAM_Val')
y = user['PAM_Val']

print(X)
print(y)

                     Inactivity Duration(s)  Speed(ms)
Time                                                  
2021-09-10 13:30:00             4069.192139   6.602493
2021-09-10 14:00:00             3073.023438   5.796617
2021-09-10 14:30:00             2250.151123   3.741216
2021-09-10 15:00:00             1751.982178   2.708780
2021-09-10 15:30:00             2818.204834   3.040749
...                                     ...        ...
2021-09-10 15:00:00             1793.473999   2.572861
2021-09-10 15:30:00             2023.965454   2.534229
2021-09-10 16:00:00             2068.907471   2.476279
2021-09-10 16:30:00             5908.447266   2.622682
2021-09-10 17:00:00             4763.646973   2.835002

[7176 rows x 2 columns]
Time
2021-09-10 13:30:00    1.0
2021-09-10 14:00:00    1.0
2021-09-10 14:30:00    1.0
2021-09-10 15:00:00    1.0
2021-09-10 15:30:00    1.0
                      ... 
2021-09-10 15:00:00    1.0
2021-09-10 15:30:00    1.0
2021-09-10 16:00:00    1.0
2021-09-10 1

In [8]:
loss_fn = nn.CrossEntropyLoss()

In [9]:
kfold = KFold(n_splits=5, shuffle=True)

In [10]:
comb_accuracy = []
comb_precision = []
comb_recall = []
comb_f1 = []
comb_accuracy_train = []
comb_precision_train = []
comb_recall_train = []
comb_f1_train = []
for i, (train_index, test_index) in enumerate(kfold.split(X)):
    print(f"======================== Fold {i} ========================")

    X_train = np.array(X)[train_index]
    y_train = np.array(y)[train_index]
    X_test = np.array(X)[test_index]
    y_test = np.array(y)[test_index]

    classifier = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features, criterion = 'entropy')
    classifier.fit(X_train, y_train.flatten())

    y_pred_train = classifier.predict(X_train)

    conf_matrix_train = confusion_matrix(y_train.flatten(), y_pred_train, labels=[0,1,2,3])
    accuracy_train = accuracy_score(y_train.flatten(), y_pred_train)
    precision_train = precision_score(y_train.flatten(), y_pred_train, labels=[0,1,2,3], average='macro')
    recall_train = recall_score(y_train.flatten(), y_pred_train, labels=[0,1,2,3], average='macro')
    f1_train = f1_score(y_train.flatten(), y_pred_train, labels=[0,1,2,3], average='macro')

    y_pred = classifier.predict(X_test)

    conf_matrix = confusion_matrix(y_test.flatten(), y_pred, labels=[0,1,2,3])
    accuracy = accuracy_score(y_test.flatten(), y_pred)
    precision = precision_score(y_test.flatten(), y_pred, labels=[0,1,2,3], average='macro')
    recall = recall_score(y_test.flatten(), y_pred, labels=[0,1,2,3], average='macro')
    f1 = f1_score(y_test.flatten(), y_pred, labels=[0,1,2,3], average='macro')

    comb_accuracy_train.append(accuracy_train)
    comb_precision_train.append(precision_train)
    comb_recall_train.append(recall_train)
    comb_f1_train.append(f1_train)

    comb_accuracy.append(accuracy)
    comb_precision.append(precision)
    comb_recall.append(recall)
    comb_f1.append(f1)

    print('Confusion Matrix:')
    print(np.array(conf_matrix))
    print('Accuracy:')
    print(accuracy)
    print('Precision:')
    print(precision)
    print('Recall:')
    print(recall)
    print('F1:')
    print(f1)
    print()

Confusion Matrix:
[[  4  84  32   0]
 [  4 976  72   0]
 [  1  48 215   0]
 [  0   0   0   0]]
Accuracy:
0.8321727019498607
Precision:
0.49982301541494867
Recall:
0.44387098167991706
F1:
0.42582088251241385

Confusion Matrix:
[[  2  74  36   0]
 [  4 973  69   0]
 [  5  37 235   0]
 [  0   0   0   0]]
Accuracy:
0.8432055749128919
Precision:
0.4426490321052943
Recall:
0.44911072979212047
F1:
0.426971438465954

Confusion Matrix:
[[  5  96  42   0]
 [  7 949  49   0]
 [  2  59 225   0]
 [  0   0   1   0]]
Accuracy:
0.821602787456446
Precision:
0.4816308715572363
Recall:
0.441489232160874
F1:
0.427478877098863

Confusion Matrix:
[[  4  95  24   0]
 [  2 943  72   0]
 [  3  61 230   0]
 [  0   0   1   0]]
Accuracy:
0.8202090592334494
Precision:
0.5014652835169402
Recall:
0.43551755546451976
F1:
0.4231627872932221

Confusion Matrix:
[[  7  74  29   0]
 [  1 969  80   0]
 [  1  58 216   0]
 [  0   0   0   0]]
Accuracy:
0.8306620209059233
Precision:
0.5806255385546939
Recall:
0.442987012987012

In [11]:
comb_accuracy_train = np.array(comb_accuracy_train)
comb_precision_train = np.array(comb_precision_train)
comb_recall_train = np.array(comb_recall_train)
comb_f1_train = np.array(comb_f1_train)

comb_accuracy = np.array(comb_accuracy)
comb_precision = np.array(comb_precision)
comb_recall = np.array(comb_recall)
comb_f1 = np.array(comb_f1)

print('Average Accuracy:')
print('\tTrain:', comb_accuracy_train.mean(axis=0))
print('\tTest:', comb_accuracy.mean(axis=0))
print('Average Precision:')
print('\tTrain:', comb_precision_train.mean(axis=0))
print('\tTest:', comb_precision.mean(axis=0))
print('Average Recall:')
print('\tTrain:', comb_recall_train.mean(axis=0))
print('\tTest:', comb_recall.mean(axis=0))
print('Average F1:')
print('\tTrain:', comb_f1_train.mean(axis=0))
print('\tTest:', comb_f1.mean(axis=0))

Average Accuracy:
	Train: 0.8461539194509571
	Test: 0.8295704288917143
Average Precision:
	Train: 0.5996951195935941
	Test: 0.5012387482298226
Average Recall:
	Train: 0.4635180731392058
	Test: 0.44259510241688876
Average F1:
	Train: 0.4574259313817475
	Test: 0.4276179645201486
