In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
import torchvision
import torchvision.transforms as transforms
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold, KFold
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from random import shuffle
import warnings

warnings.filterwarnings('ignore')

print(torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

  from .autonotebook import tqdm as notebook_tqdm


True
cuda


In [2]:
learning_rate = 0.001
epochs = 20

hidden_size = 8
num_layers = 2

In [3]:
user1 = pd.read_csv('user1_preprocessed.csv')
user2 = pd.read_csv('user2_preprocessed.csv')
user = user1.append(user2)

user = user.set_index(pd.DatetimeIndex(user['Time']))

user = user.filter(items=['Inactivity Duration(s)', 'Speed(ms)', 'PAM_Val'])

print(user.head(5))
print(user.shape)

                     Inactivity Duration(s)  Speed(ms)  PAM_Val
Time                                                           
2021-09-10 11:30:00              784.401936   2.290322      2.0
2021-09-10 12:00:00              784.401936   6.713206      2.0
2021-09-10 12:30:00              618.809407   7.769313      1.0
2021-09-10 13:00:00             1048.829218  10.621018      1.0
2021-09-10 13:30:00             1219.861964   5.499976      2.0
(131, 3)


In [5]:
print(user.shape)
print(user.PAM_Val.value_counts().sort_index())
print(user.PAM_Val.value_counts(normalize=True).sort_index())

(131, 3)
1.0    32
2.0    73
3.0    17
4.0     9
Name: PAM_Val, dtype: int64
1.0    0.244275
2.0    0.557252
3.0    0.129771
4.0    0.068702
Name: PAM_Val, dtype: float64


In [6]:
user['PAM_Val'] = user['PAM_Val'] - 1.0

print(user.head(5))

                     Inactivity Duration(s)  Speed(ms)  PAM_Val
Time                                                           
2021-09-10 11:30:00              784.401936   2.290322      1.0
2021-09-10 12:00:00              784.401936   6.713206      1.0
2021-09-10 12:30:00              618.809407   7.769313      0.0
2021-09-10 13:00:00             1048.829218  10.621018      0.0
2021-09-10 13:30:00             1219.861964   5.499976      1.0


In [7]:
X = user.drop(columns='PAM_Val')
y = user['PAM_Val']

print(X)
print(y)

                     Inactivity Duration(s)  Speed(ms)
Time                                                  
2021-09-10 11:30:00              784.401936   2.290322
2021-09-10 12:00:00              784.401936   6.713206
2021-09-10 12:30:00              618.809407   7.769313
2021-09-10 13:00:00             1048.829218  10.621018
2021-09-10 13:30:00             1219.861964   5.499976
...                                     ...        ...
2021-09-14 14:00:00             2688.232416   5.285862
2021-09-14 14:30:00             3336.329708   8.408446
2021-09-14 15:00:00             1974.323680   4.339882
2021-09-14 15:30:00             1748.367357   3.852752
2021-09-14 16:00:00             1204.302734   4.095116

[131 rows x 2 columns]
Time
2021-09-10 11:30:00    1.0
2021-09-10 12:00:00    1.0
2021-09-10 12:30:00    0.0
2021-09-10 13:00:00    0.0
2021-09-10 13:30:00    1.0
                      ... 
2021-09-14 14:00:00    1.0
2021-09-14 14:30:00    1.0
2021-09-14 15:00:00    1.0
2021-09-14 15

In [8]:
loss_fn = nn.CrossEntropyLoss()

In [9]:
kfold = KFold(n_splits=5, shuffle=True)

In [10]:
comb_accuracy = []
comb_precision = []
comb_recall = []
comb_f1 = []
comb_accuracy_train = []
comb_precision_train = []
comb_recall_train = []
comb_f1_train = []
for i, (train_index, test_index) in enumerate(kfold.split(X)):
    print(f"======================== Fold {i} ========================")

    X_train = np.array(X)[train_index]
    y_train = np.array(y)[train_index]
    X_test = np.array(X)[test_index]
    y_test = np.array(y)[test_index]

    classifier = RandomForestClassifier(n_estimators = 20, max_depth=2, max_features=0.5, criterion = 'entropy')
    classifier.fit(X_train, y_train.flatten())

    y_pred_train = classifier.predict(X_train)

    conf_matrix_train = confusion_matrix(y_train.flatten(), y_pred_train, labels=[0,1,2,3])
    accuracy_train = accuracy_score(y_train.flatten(), y_pred_train)
    precision_train = precision_score(y_train.flatten(), y_pred_train, labels=[0,1,2,3], average='macro')
    recall_train = recall_score(y_train.flatten(), y_pred_train, labels=[0,1,2,3], average='macro')
    f1_train = f1_score(y_train.flatten(), y_pred_train, labels=[0,1,2,3], average='macro')

    y_pred = classifier.predict(X_test)

    conf_matrix = confusion_matrix(y_test.flatten(), y_pred, labels=[0,1,2,3])
    accuracy = accuracy_score(y_test.flatten(), y_pred)
    precision = precision_score(y_test.flatten(), y_pred, labels=[0,1,2,3], average='macro')
    recall = recall_score(y_test.flatten(), y_pred, labels=[0,1,2,3], average='macro')
    f1 = f1_score(y_test.flatten(), y_pred, labels=[0,1,2,3], average='macro')

    comb_accuracy_train.append(accuracy_train)
    comb_precision_train.append(precision_train)
    comb_recall_train.append(recall_train)
    comb_f1_train.append(f1_train)

    comb_accuracy.append(accuracy)
    comb_precision.append(precision)
    comb_recall.append(recall)
    comb_f1.append(f1)

    print('Confusion Matrix:')
    print(np.array(conf_matrix_train))
    print('Accuracy:')
    print(accuracy_train)
    print('Precision:')
    print(precision_train)
    print('Recall:')
    print(recall_train)
    print('F1:')
    print(f1_train)
    print()

Confusion Matrix:
[[10 14  0  0]
 [ 2 57  0  0]
 [ 0 13  0  0]
 [ 3  5  0  0]]
Accuracy:
0.6442307692307693
Precision:
0.32677902621722843
Recall:
0.3456920903954802
F1:
0.3207726957726958

Confusion Matrix:
[[ 9 13  0  0]
 [ 2 58  0  0]
 [ 0 16  0  0]
 [ 4  3  0  0]]
Accuracy:
0.638095238095238
Precision:
0.3111111111111111
Recall:
0.34393939393939393
F1:
0.31495495495495496

Confusion Matrix:
[[12 18  0  0]
 [ 3 54  0  0]
 [ 1 11  0  0]
 [ 2  4  0  0]]
Accuracy:
0.6285714285714286
Precision:
0.3218390804597701
Recall:
0.3368421052631579
F1:
0.3125

Confusion Matrix:
[[11 13  0  0]
 [ 3 57  0  0]
 [ 3 10  0  0]
 [ 4  4  0  0]]
Accuracy:
0.6476190476190476
Precision:
0.30059523809523814
Recall:
0.3520833333333333
F1:
0.3201388888888889

Confusion Matrix:
[[10 18  0  0]
 [ 2 54  0  0]
 [ 1 13  0  0]
 [ 3  4  0  0]]
Accuracy:
0.6095238095238096
Precision:
0.307935393258427
Recall:
0.33035714285714285
F1:
0.29984326018808777



In [11]:
comb_accuracy_train = np.array(comb_accuracy_train)
comb_precision_train = np.array(comb_precision_train)
comb_recall_train = np.array(comb_recall_train)
comb_f1_train = np.array(comb_f1_train)

comb_accuracy = np.array(comb_accuracy)
comb_precision = np.array(comb_precision)
comb_recall = np.array(comb_recall)
comb_f1 = np.array(comb_f1)

print('Average Accuracy:')
print('\tTrain:', comb_accuracy_train.mean(axis=0))
print('\tTest:', comb_accuracy.mean(axis=0))
print('Average Precision:')
print('\tTrain:', comb_precision_train.mean(axis=0))
print('\tTest:', comb_precision.mean(axis=0))
print('Average Recall:')
print('\tTrain:', comb_recall_train.mean(axis=0))
print('\tTest:', comb_recall.mean(axis=0))
print('Average F1:')
print('\tTrain:', comb_f1_train.mean(axis=0))
print('\tTest:', comb_f1.mean(axis=0))

Average Accuracy:
	Train: 0.6336080586080586
	Test: 0.6031339031339031
Average Precision:
	Train: 0.31365196982835497
	Test: 0.26866104242820493
Average Recall:
	Train: 0.3417828131577016
	Test: 0.3017651098901099
Average F1:
	Train: 0.31364195996092553
	Test: 0.2720551064390735
