# BioBot_FDS_02: KNN_Model
## Deliverable_02: Implementing a K-Nearest Neightbors (KNN) Classifier model
Author/code developer: Yan Bello. 14/11/2018. As part of the Master in Artificial Intelligence (UNIR). 
This file/code is part of the development and exploration/experimentation on a Fall Detection System (FDS). 

---


In the following sections, we used this dataset: 
SisFall: A Fall and Movement Dataset. 
Created by: A. Sucerquia, J.D. López, J.F. Vargas-Bonilla
SISTEMIC, Faculty of Engineering, Universidad de Antiquia UDEA.
Detailed information about this dataset can be found in this website: http://sistemic.udea.edu.co/en/investigacion/proyectos/english-falls/.
Reference paper: Sucerquia A, López JD, Vargas-Bonilla JF. SisFall: A Fall and Movement Dataset. Sensors (Basel). 2017;17(1):198. Published 2017 Jan 20. doi:10.3390/s17010198

---



In [1]:
# Preliminary step 0. We need to establish/select our working folders. First, ensure  the previous dataset files are available.
# The code below is prepared to work with two options: local drive or mounting a Google Drive for Colab
# Select the appropriate configuration for your environment by commenting/un-commenting the following lines:

# To work with Google Colab and Google Drive: 
from google.colab import drive 
drive.mount('/content/gdrive')
FILE_DIRECTORY =  "gdrive/My Drive/Colab Notebooks/"
SisFall_ALL_DIRECTORY =  FILE_DIRECTORY + "SisFall_dataset_ALL/"

# To work with a local drive, uncomment these line:
# FILE_DIRECTORY =  os.getcwd() + "\\"
# SisFall_ALL_DIRECTORY =  FILE_DIRECTORY + "SisFall_dataset_ALL\\"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## 2.1 Load a dataframe with prepared info from ADL/Falls dataset

In [3]:
# We work with the prepared file Unified_ADL_Falls, which is based on the previous dataset
my_data_file_name = FILE_DIRECTORY + "Unified_ADL_Falls.txt"

import pandas as pd

# Creamos un data frame y cargamos los datos del fichero
df_ADL_Falls = pd.DataFrame(pd.read_csv(my_data_file_name, sep = ','))

df_ADL_Falls.drop('0', axis=1, inplace=True)

df_only_ADLs = df_ADL_Falls[df_ADL_Falls.Fall_ADL == "D"]
df_only_Falls = df_ADL_Falls[df_ADL_Falls.Fall_ADL == "F"]

# mostramos el data frame
print(df_only_ADLs.tail())
print(df_only_Falls.tail())

     Act_Type Age_Cat Fall_ADL              File  kurtosis_S1_X  max_S1_X  \
2697      D19      SE        D  D19_SE06_R01.txt       8.727956       190   
2698      D19      SE        D  D19_SE06_R02.txt      10.096698        86   
2699      D19      SE        D  D19_SE06_R03.txt       9.540330       259   
2700      D19      SE        D  D19_SE06_R04.txt      20.191198       393   
2701      D19      SE        D  D19_SE06_R05.txt       9.022231       230   

      mean_S1_X  min_S1_X  range_S1_X  skewness_S1_X    ...     \
2697  20.204659      -195         385      -1.745292    ...      
2698 -33.031614      -324         410      -1.976282    ...      
2699   8.276206      -154         413       0.398760    ...      
2700   9.514143      -255         648       0.993127    ...      
2701   9.554077      -164         394      -0.149056    ...      

      range_S1_N_VER  skewness_S1_N_VER  std_S1_N_VER  var_S1_N_VER   corr_HV  \
2697        1.531165           2.009740      0.189131      

### Shuffle and set up training and test samples for ADL/Falls

In [0]:
import random
import math
from numpy.random import permutation

# Randomly shuffle the index of each set (ADLs and Falls)
# -------------------------------------------------------
# First we prepare the sets of ADLs
random_indices = permutation(df_only_ADLs.index)
# Use a test-split (of 30% of the items)
test_split = math.floor(len(df_only_ADLs)*0.3)
# Test set with 30% of items
df_only_ADLs_test = df_only_ADLs.loc[random_indices[0:test_split]]
# Train set with 70% of the items.
df_only_ADLs_train = df_only_ADLs.loc[random_indices[test_split:]]


# -------------------------------------------------------
# Now we prepare the sets of Falls
random_indices = permutation(df_only_Falls.index)
# Use a test-split (of 30% of the items)
test_split = math.floor(len(df_only_Falls)*0.3)
# Test set with 30% of items
df_only_Falls_test = df_only_Falls.loc[random_indices[0:test_split]]
# Train set with 70% of the items.
df_only_Falls_train = df_only_Falls.loc[random_indices[test_split:]]



print("Total ADL: " + str(len(df_only_ADLs)))
print("Total Falls: " + str(len(df_only_Falls)))
print("GRAND Total: " + str(len(df_only_Falls)+len(df_only_ADLs)))
print("---------------------------------------")
print("Train Falls: "+ str(len(df_only_Falls_train)))
print("Train ADL: "+ str(len(df_only_ADLs_train)))
print("Train TOTAL: "+ str(len(df_only_ADLs_train)+len(df_only_Falls_train)))
print("---------------------------------------")
print("Test Falls: "+ str(len(df_only_Falls_test)))
print("Test ADL: "+ str(len(df_only_ADLs_test)))
print("Test TOTAL: "+ str(len(df_only_ADLs_test)+len(df_only_Falls_test)))

Total ADL: 2702
Total Falls: 1798
GRAND Total: 4500
---------------------------------------
Train Falls: 1259
Train ADL: 1892
Train TOTAL: 3151
---------------------------------------
Test Falls: 539
Test ADL: 810
Test TOTAL: 1349


In [0]:
# Prepare dataset with Test examplars

frames = [df_only_Falls_test, df_only_ADLs_test]
df_ADL_Falls_test = pd.concat(frames)
print("Test ADLs: "+ str(len(df_only_ADLs_test)))
print("Test Falls: "+ str(len(df_only_Falls_test)))
print("Test ALL: "+ str(len(df_ADL_Falls_test)))

print(df_ADL_Falls_test.head())
print(df_ADL_Falls_test.tail())


Test ADLs: 810
Test Falls: 539
Test ALL: 1349
     Act_Type Age_Cat Fall_ADL              File  kurtosis_S1_X  max_S1_X  \
3477      F07      SA        F  F07_SA12_R02.txt      28.158868      1730   
4496      F15      SE        F  F15_SE06_R02.txt      14.164169       128   
3031      F03      SA        F  F03_SA19_R01.txt      88.093106       191   
4384      F15      SA        F  F15_SA01_R05.txt      64.593266      2619   
3719      F09      SA        F  F09_SA12_R04.txt       2.035594        19   

       mean_S1_X  min_S1_X  range_S1_X  skewness_S1_X    ...     \
3477   63.324459      -510        2240       3.887497    ...      
4496 -155.198003      -911        1039      -3.036554    ...      
3031 -146.509151     -4096        4287      -8.141281    ...      
4384 -159.004992     -4053        6672      -5.225071    ...      
3719 -147.504160      -835         854      -1.118020    ...      

      range_S1_N_VER  skewness_S1_N_VER  std_S1_N_VER  var_S1_N_VER   corr_HV  \
3477   

In [0]:
# Prepare dataset with Train examplars

frames = [df_only_Falls_train, df_only_ADLs_train]
df_ADL_Falls_train = pd.concat(frames)
print("train ADLs: "+ str(len(df_only_ADLs_train)))
print("train Falls: "+ str(len(df_only_Falls_train)))
print("train ALL: "+ str(len(df_ADL_Falls_train)))

print(df_ADL_Falls_train.head())
print(df_ADL_Falls_train.tail())


train ADLs: 1892
train Falls: 1259
train ALL: 3151
     Act_Type Age_Cat Fall_ADL              File  kurtosis_S1_X  max_S1_X  \
4149      F13      SA        F  F13_SA02_R05.txt       4.165379        53   
3575      F08      SA        F  F08_SA07_R05.txt       4.422467       205   
4172      F13      SA        F  F13_SA07_R03.txt       2.065487       221   
3303      F06      SA        F  F06_SA01_R03.txt      12.473522       893   
3400      F06      SA        F  F06_SA20_R05.txt      10.054013      1396   

       mean_S1_X  min_S1_X  range_S1_X  skewness_S1_X    ...     \
4149 -147.003328      -918         971      -1.356286    ...      
3575 -143.532446      -989        1194      -1.547840    ...      
4172  -81.241265      -463         684      -0.420039    ...      
3303  129.995008       -88         981       2.952118    ...      
3400  123.337770      -565        1961       2.090652    ...      

      range_S1_N_VER  skewness_S1_N_VER  std_S1_N_VER  var_S1_N_VER   corr_HV  \
41

## 2.2 Define and train a K-Neighbors Classifiers
Below we use KNeighborsClassifier from sklearn.neighbors, experimenting with various parameter settings. For clarity and simplicity here only two model configuration are included.

### 2.2-A) K-Neighbors Classifier with default parameters
#### The KNN model

In [0]:
# The columns that we will be making predictions with.
x_columns = ['kurtosis_S1_X','max_S1_X','mean_S1_X','min_S1_X','range_S1_X','skewness_S1_X','std_S1_X','var_S1_X',
             'kurtosis_S1_Y','max_S1_Y','mean_S1_Y','min_S1_Y','range_S1_Y','skewness_S1_Y','std_S1_Y','var_S1_Y',
             'kurtosis_S1_Z','max_S1_Z','mean_S1_Z','min_S1_Z','range_S1_Z','skewness_S1_Z','std_S1_Z','var_S1_Z',
             'kurtosis_S1_N_XYZ','max_S1_N_XYZ','mean_S1_N_XYZ','min_S1_N_XYZ','range_S1_N_XYZ','skewness_S1_N_XYZ','std_S1_N_XYZ','var_S1_N_XYZ',
             'kurtosis_S1_N_HOR','max_S1_N_HOR','mean_S1_N_HOR','min_S1_N_HOR','range_S1_N_HOR','skewness_S1_N_HOR','std_S1_N_HOR','var_S1_N_HOR',
             'kurtosis_S1_N_VER','max_S1_N_VER','mean_S1_N_VER','min_S1_N_VER','range_S1_N_VER','skewness_S1_N_VER','std_S1_N_VER','var_S1_N_VER',
             'corr_HV','corr_NH','corr_NV','corr_XY','corr_XZ','corr_YZ']
# The column that we want to predict.
y_column = ["Fall_ADL"]

from sklearn.neighbors import KNeighborsClassifier
# Create the knn model.
# Look at the five closest neighbors.
knn = KNeighborsClassifier(n_neighbors=5)
# Fit the model on the training data.
y = df_ADL_Falls_train.loc[:,['Fall_ADL']]
train_y = np.array(y)

knn.fit(df_ADL_Falls_train[x_columns], train_y.ravel())
# Make point predictions on the test set using the fit model.
predictions = knn.predict(df_ADL_Falls_test[x_columns])

print(predictions)
print(knn.score(df_ADL_Falls_test[x_columns], df_ADL_Falls_test[y_column]))

['F' 'D' 'F' ... 'D' 'D' 'D']
0.9636767976278725


#### Confusion matrix for K-Neighbors Classifier with default parameters

In [0]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(df_ADL_Falls_test[y_column], predictions, labels=["D", "F"])
print("Confusion Matrix:")
print("-----------------")
print(cm)
print("-----------------")
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Confusion Matrix (Normalized):")
print("-----------------------------")
print(cm_norm)
print("-----------------------------")

Confusion Matrix:
-----------------
[[794  16]
 [ 33 506]]
-----------------
Confusion Matrix (Normalized):
-----------------------------
[[0.98024691 0.01975309]
 [0.06122449 0.93877551]]
-----------------------------


#### Sensitivity, Specificity, Precision and Accuracy

In [0]:
# calculations of measurements of performance

knn_TP = cm[1,1]
knn_FP = cm[1,0]
knn_TN = cm[0,0]
knn_FN = cm[0,1]

# SENSITIVITY = TP / (TP + FN)
knn_Sensitivity = knn_TP / (knn_TP + knn_FN)
print("knn_Sensitivity = "+ str(knn_Sensitivity))

# SPECIFICITY = TN / (FP + TN)
knn_Specificity = knn_TN / (knn_FP + knn_TN)
print("knn_Specificity = "+ str(knn_Specificity))

# Precision = TP / (TP + FP)
knn_Precision = knn_TP / (knn_TP + knn_FP)
print("knn_Precision = "+ str(knn_Precision))

# Accuracy = (TP + TN) / (TP + FP + TN + FN)
knn_Accuracy = (knn_TP + knn_TN) / (knn_TP + knn_FP + knn_TN + knn_FN)
print("knn_Accuracy = "+ str(knn_Accuracy))

knn_Sensitivity = 0.9693486590038314
knn_Specificity = 0.9600967351874244
knn_Precision = 0.9387755102040817
knn_Accuracy = 0.9636767976278725


### 2.2-B) K-Neighbors Classifier with K = 10
#### The KNN model

In [0]:
"""
After experimenting and trying out various parameter configurations, K=10 gave the best performance
KNN MODEL WITH K = 10

"""

# The columns that we will be making predictions with.
x_columns = ['kurtosis_S1_X','max_S1_X','mean_S1_X','min_S1_X','range_S1_X','skewness_S1_X','std_S1_X','var_S1_X',
             'kurtosis_S1_Y','max_S1_Y','mean_S1_Y','min_S1_Y','range_S1_Y','skewness_S1_Y','std_S1_Y','var_S1_Y',
             'kurtosis_S1_Z','max_S1_Z','mean_S1_Z','min_S1_Z','range_S1_Z','skewness_S1_Z','std_S1_Z','var_S1_Z',
             'kurtosis_S1_N_XYZ','max_S1_N_XYZ','mean_S1_N_XYZ','min_S1_N_XYZ','range_S1_N_XYZ','skewness_S1_N_XYZ','std_S1_N_XYZ','var_S1_N_XYZ',
             'kurtosis_S1_N_HOR','max_S1_N_HOR','mean_S1_N_HOR','min_S1_N_HOR','range_S1_N_HOR','skewness_S1_N_HOR','std_S1_N_HOR','var_S1_N_HOR',
             'kurtosis_S1_N_VER','max_S1_N_VER','mean_S1_N_VER','min_S1_N_VER','range_S1_N_VER','skewness_S1_N_VER','std_S1_N_VER','var_S1_N_VER',
             'corr_HV','corr_NH','corr_NV','corr_XY','corr_XZ','corr_YZ']
# The column that we want to predict.
y_column = ["Fall_ADL"]

from sklearn.neighbors import KNeighborsClassifier
# Create the knn model.
# Look at the five closest neighbors.
knn = KNeighborsClassifier(n_neighbors=15)
# Fit the model on the training data.
y = df_ADL_Falls_train.loc[:,['Fall_ADL']]
train_y = np.array(y)

knn.fit(df_ADL_Falls_train[x_columns], train_y.ravel())
# Make point predictions on the test set using the fit model.
predictions = knn.predict(df_ADL_Falls_test[x_columns])

print(predictions)
print(knn.score(df_ADL_Falls_test[x_columns], df_ADL_Falls_test[y_column]))

#### Confusion matrix for K-Neighbors Classifier with K=10

In [0]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(df_ADL_Falls_test[y_column], predictions, labels=["D", "F"])
print("Confusion Matrix:")
print("-----------------")
print(cm)
print("-----------------")
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Confusion Matrix (Normalized):")
print("-----------------------------")
print(cm_norm)
print("-----------------------------")

Confusion Matrix:
-----------------
[[794  16]
 [ 33 506]]
-----------------
Confusion Matrix (Normalized):
-----------------------------
[[0.98024691 0.01975309]
 [0.06122449 0.93877551]]
-----------------------------


#### Sensitivity, Specificity, Precision and Accuracy

In [0]:
# calculations of measurements of performance

knn_TP = cm[1,1]
knn_FP = cm[1,0]
knn_TN = cm[0,0]
knn_FN = cm[0,1]

# SENSITIVITY = TP / (TP + FN)
knn_Sensitivity = knn_TP / (knn_TP + knn_FN)
print("knn_Sensitivity = "+ str(knn_Sensitivity))

# SPECIFICITY = TN / (FP + TN)
knn_Specificity = knn_TN / (knn_FP + knn_TN)
print("knn_Specificity = "+ str(knn_Specificity))

# Precision = TP / (TP + FP)
knn_Precision = knn_TP / (knn_TP + knn_FP)
print("knn_Precision = "+ str(knn_Precision))

# Accuracy = (TP + TN) / (TP + FP + TN + FN)
knn_Accuracy = (knn_TP + knn_TN) / (knn_TP + knn_FP + knn_TN + knn_FN)
print("knn_Accuracy = "+ str(knn_Accuracy))

knn_Sensitivity = 0.9693486590038314
knn_Specificity = 0.9600967351874244
knn_Precision = 0.9387755102040817
knn_Accuracy = 0.9636767976278725
