In [2]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import normalize
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.utils import shuffle

os.chdir(r'F:\\KARTIK\\2021\\Freezing of Gait')

In [20]:
data_path = os.path.join(os.getcwd(), 'dataset_fog_release', 'dataset', 'FINAL FEATURES', 'final_ftrs_size_1.5.csv')
if os.path.exists(data_path):
    print(True)

True


In [21]:
df = pd.read_csv(data_path)
df.head()

Unnamed: 0.1,Unnamed: 0,0,Variance_A_F,STD_T_L,FI_A_F,FI_A_V,FI_A_L,EN_A_F,EN_A_V,EN_A_L
0,5827,0,129.1931,10.626735,0.208812,0.197859,0.22838,564225,66177982,2900396
1,1010,0,1218303.0,167.212717,2.211117,0.569717,3.58649,95100630,92777583,35465995
2,9345,0,76434.04,32.947694,0.977541,0.240672,0.251569,9383529,62431234,7084634
3,8576,0,294386.2,97.367236,0.570907,0.459756,1.136647,21979375,75595449,2519586
4,7598,0,137049.1,49.887843,0.594945,0.155911,1.087455,12419721,76300428,5632880


In [22]:
df['Energy_Mean'] = (df['EN_A_F'] + df['EN_A_V'] + df['EN_A_L'])/3
df = df.drop(['EN_A_F','EN_A_V','EN_A_L','Unnamed: 0'], axis = 1)

In [23]:
df.head()

Unnamed: 0,0,Variance_A_F,STD_T_L,FI_A_F,FI_A_V,FI_A_L,Energy_Mean
0,0,129.1931,10.626735,0.208812,0.197859,0.22838,23214200.0
1,0,1218303.0,167.212717,2.211117,0.569717,3.58649,74448070.0
2,0,76434.04,32.947694,0.977541,0.240672,0.251569,26299800.0
3,0,294386.2,97.367236,0.570907,0.459756,1.136647,33364800.0
4,0,137049.1,49.887843,0.594945,0.155911,1.087455,31451010.0


In [34]:
# Split the data for the classifier
y = df['0'].values
X = df.iloc[:,1:]
X, y = shuffle(X, y, random_state = 2)

In [36]:
# Initiate the 10 - fold split 
kf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 2)

In [71]:
# Initiate a random forest model
clf = RandomForestClassifier(
    n_estimators=500,
    criterion='gini',
    max_depth=50,
    min_samples_split=5,
    min_samples_leaf=15,
    max_features='auto',
    max_leaf_nodes=None,
    min_impurity_split=None,
    bootstrap=True,
    n_jobs=-1,
    random_state=2,
    class_weight='balanced'
)

In [98]:
# split()  method generate indices to split data into training and test set.
for (train_index, test_index), i in zip(kf.split(X, y), range(10)):
    print(f'Fold:{i}, Train set: {len(train_index)}, Test set:{len(test_index)}')


Fold:0, Train set: 3851, Test set:428
Fold:1, Train set: 3851, Test set:428
Fold:2, Train set: 3851, Test set:428
Fold:3, Train set: 3851, Test set:428
Fold:4, Train set: 3851, Test set:428
Fold:5, Train set: 3851, Test set:428
Fold:6, Train set: 3851, Test set:428
Fold:7, Train set: 3851, Test set:428
Fold:8, Train set: 3851, Test set:428
Fold:9, Train set: 3852, Test set:427


In [100]:
score = cross_val_score(clf, X, y, cv= kf, scoring="accuracy")
print(f'Scores for each fold are: {score}')
print(f'Average score: {"{:.2f}".format(score.mean())}')

Scores for each fold are: [0.68224299 0.70794393 0.73831776 0.71495327 0.70327103 0.71728972
 0.70093458 0.71495327 0.70794393 0.72131148]
Average score: 0.71


In [107]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 2, stratify = y)

In [108]:
m = clf.fit(X_train, y_train)
y_pred = m.predict(X_test)
accuracy_score(y_test, y_pred)

0.7252336448598131

In [109]:
cnf = confusion_matrix(y_test, y_pred)
print(cnf)

[[445 134  28]
 [ 47 324  33]
 [  7  45   7]]


In [110]:
# For class pre-FoG
tp = cnf[1][1]
tn = cnf[0][0] + cnf[0][2] + cnf[2][0] + cnf[2][2]
fp = cnf[1][0] + cnf[1][2]
fn = cnf[0][1] + cnf[2][1]

In [111]:
precision = tp/(tp + fp)
recall =  tp / (tp + fn)    #sensitivity
F1_score = (2*tp) / (2*tp + fp + fn)

In [112]:
print("The evaluation metrics are:\nprecision = {},\nrecall = {}, \nF1-score = {}".format(precision, recall, F1_score) )

The evaluation metrics are:
precision = 0.801980198019802,
recall = 0.6441351888667992, 
F1-score = 0.7144432194046306
