In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import normalize
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.utils import shuffle

os.chdir(r"F:\\KARTIK\\2021\\Freezing of Gait")

In [2]:
data_path = os.path.join(os.getcwd(), 'dataset_fog_release', 'dataset', 'FINAL FEATURES', 'final_ftrs_size_1.5_2.csv')
if os.path.exists(data_path):
    print(True)

True


In [3]:
df = pd.read_csv(data_path)
df.head()

Unnamed: 0.1,Unnamed: 0,0,RMS_A_F,Variance_A_F,RangeA_F,Minimum_A_F,STD_A_F,RMS_A_V,Variance_A_V,RangeA_V,EN_A_F,EN_A_V,EN_A_L
0,7413,0,455.891246,144639.5818,2302,-848,380.315109,1251.760454,92842.65991,1187,13301557,100281871,7930989
1,7086,0,231.283646,102.246094,40,212,10.111681,988.759972,81.514648,39,3423496,62569362,3413816
2,9439,0,540.666631,240819.7773,2786,-1090,490.733917,1231.822462,114531.4001,1510,18708506,97112741,6079041
3,2852,0,511.45213,209428.1406,2888,-2171,457.633194,1018.50227,48689.52734,1010,16741330,66390200,7123326
4,10419,0,327.939019,62.475586,30,313,7.90415,950.64714,71.800537,39,6882816,57838719,4991550


In [4]:
df['Energy_Mean'] = (df['EN_A_F'] + df['EN_A_V'] + df['EN_A_L'])/3
df = df.drop(['EN_A_F','EN_A_V','EN_A_L','Unnamed: 0'], axis = 1)

In [5]:
df.head()

Unnamed: 0,0,RMS_A_F,Variance_A_F,RangeA_F,Minimum_A_F,STD_A_F,RMS_A_V,Variance_A_V,RangeA_V,Energy_Mean
0,0,455.891246,144639.5818,2302,-848,380.315109,1251.760454,92842.65991,1187,40504810.0
1,0,231.283646,102.246094,40,212,10.111681,988.759972,81.514648,39,23135560.0
2,0,540.666631,240819.7773,2786,-1090,490.733917,1231.822462,114531.4001,1510,40633430.0
3,0,511.45213,209428.1406,2888,-2171,457.633194,1018.50227,48689.52734,1010,30084950.0
4,0,327.939019,62.475586,30,313,7.90415,950.64714,71.800537,39,23237700.0


In [15]:
# Split the data for the classifier
y = df['0'].values
X = df.iloc[:,1:]
X = normalize(X)
X, y = shuffle(X, y, random_state = 2)

In [16]:
# Initiate the 10 - fold split 
kf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 2)


# Initiate a decision tree model
clf = DecisionTreeClassifier(class_weight=None, 
                            criterion='entropy',
                            max_depth=10,
                            max_features=3, 
                            max_leaf_nodes=None,
                            min_impurity_decrease=0.0, 
                            min_samples_leaf=18, 
                            min_samples_split=10,
                            min_weight_fraction_leaf=0.0, 
                            #presort=False,
                            random_state=2, 
                            splitter='best'
                            )



# split()  method generate indices to split data into training and test set.
for (train_index, test_index), i in zip(kf.split(X, y), range(10)):
    print(f'Fold:{i}, Train set: {len(train_index)}, Test set:{len(test_index)}')

Fold:0, Train set: 1988, Test set:221
Fold:1, Train set: 1988, Test set:221
Fold:2, Train set: 1988, Test set:221
Fold:3, Train set: 1988, Test set:221
Fold:4, Train set: 1988, Test set:221
Fold:5, Train set: 1988, Test set:221
Fold:6, Train set: 1988, Test set:221
Fold:7, Train set: 1988, Test set:221
Fold:8, Train set: 1988, Test set:221
Fold:9, Train set: 1989, Test set:220


In [17]:
score = cross_val_score(clf, X, y, cv= kf, scoring="accuracy")
print(f'Scores for each fold are: {score}')
print(f'Average score: {"{:.2f}".format(score.mean())}')

Scores for each fold are: [0.77828054 0.7918552  0.76923077 0.76923077 0.77828054 0.7918552
 0.74208145 0.78280543 0.7918552  0.77727273]
Average score: 0.78


In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 2, stratify = y, test_size = 0.40)

In [19]:
m = clf.fit(X_train, y_train)
y_pred = m.predict(X_test)
accuracy_score(y_test, y_pred)

0.747737556561086

In [20]:
cnf = confusion_matrix(y_test, y_pred)
print(cnf)

[[ 57  75  10]
 [ 28 599  20]
 [  6  84   5]]


In [21]:
# For class pre-FoG
tp = cnf[1][1]
tn = cnf[0][0] + cnf[0][2] + cnf[2][0] + cnf[2][2]
fp = cnf[1][0] + cnf[1][2]
fn = cnf[0][1] + cnf[2][1]

In [22]:
precision = tp/(tp + fp)
recall =  tp / (tp + fn)    #sensitivity
F1_score = (2*tp) / (2*tp + fp + fn)

In [23]:
print("The evaluation metrics are:\nprecision = {},\nrecall = {}, \nF1-score = {}".format(precision, recall, F1_score) )

The evaluation metrics are:
precision = 0.9258114374034003,
recall = 0.7902374670184696, 
F1-score = 0.8526690391459075
