# UP-Fall Bi-Classifier

### Libraries

In [1]:
#Main libraries
import pandas as pd
import numpy as np

#Utilities
from time import time
from statistics import mean 
from statistics import stdev

#Models
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier as RndFC
from sklearn.neural_network import MLPClassifier as ffp
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

#Dataset Preparation
from sklearn.model_selection import train_test_split

#Metrics
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn import metrics as met

#Cross validation
from sklearn.model_selection import KFold

import warnings
warnings.filterwarnings('ignore')

### Parameters

In [6]:
train_percentage = 0.7
test_percentage = 0.3

In [7]:
features_inputfile = '../Databases (preparation)/features_no_repeated.csv'
labels_inputfile = '../Databases (preparation)/labels_no_repeated.csv'
metrics_outfile = "metrics_no_repeated.csv"
confusion_matrix_outfile = "_confusion_matrix_no_repeated.csv"

### Funciones

In [4]:
def create_df(method, counter, total_time, metric, values):
    df = pd.DataFrame({
            'Method':[method],
            'Counter':[counter],
            'Time':[total_time],
            'Metric':[metric]
    })
    for i in range (0,len(values)):
        df[str(i)] = [values[i]]
    
    return df

In [5]:
def change_labels(l):
    #Fall
    l.loc[(l.label == 1.0),'label'] = 'Fall'
    l.loc[(l.label == 2.0),'label'] = 'Fall'
    l.loc[(l.label == 3.0),'label'] = 'Fall'
    l.loc[(l.label == 4.0),'label'] = 'Fall'
    l.loc[(l.label == 5.0),'label'] = 'Fall'
    l.loc[(l.label == 11.0),'label'] = 'Fall'
    #Not Fall
    l.loc[(l.label == 6.0),'label'] = 'Not Fall'
    l.loc[(l.label == 7.0),'label'] = 'Not Fall'
    l.loc[(l.label == 8.0),'label'] = 'Not Fall'
    l.loc[(l.label == 9.0),'label'] = 'Not Fall'
    l.loc[(l.label == 10.0),'label'] = 'Not Fall'
    l.loc[(l.label == 20.0),'label'] = 'Not Fall'

### Dataset Preparation

In [9]:
features = pd.read_csv(features_inputfile)

In [10]:
labels = pd.read_csv(labels_inputfile)

In [11]:
print(labels['label'].value_counts())

7.0     56676
8.0     47500
11.0    42394
6.0     40471
10.0    23361
9.0      1662
5.0      1617
3.0      1566
20.0     1446
1.0      1343
2.0      1326
4.0      1298
Name: label, dtype: int64


In [13]:
change_labels(labels)

In [15]:
print(labels['label'].value_counts())

Not Fall    171116
Fall         49544
Name: label, dtype: int64


In [16]:
X_train, X_test, y_train, y_test = train_test_split(features, 
                                                    labels, 
                                                    train_size=train_percentage, 
                                                    test_size=test_percentage)

In [17]:
print (X_train.shape, y_train.shape)

(154462, 51) (154462, 1)


In [18]:
print (X_test.shape, y_test.shape)

(66198, 51) (66198, 1)


## Classifiers

In [19]:
methods = ['RF','KNN','AdaBoost','MLP','SVM']

In [20]:
for method in methods:
    counter = 0
    
    print('\n\nMethod: {}'.format(method))

    if method == 'RF':
        classifier = RndFC(n_estimators=100)
    elif method == 'SVM':
        classifier = svm.SVC(C=1000, gamma=0.0001)
    elif method == 'MLP': 
        classifier = ffp()
    elif method == 'KNN': 
        classifier = KNN()
    else:
        classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=15, min_samples_split=20), n_estimators=10,
                         learning_rate=1)
    
    #Train
    start_time = time()
    classifier.fit(X_train, np.array(y_train).flatten())
    
    #Test
    estimates = classifier.predict(X_test)
    print("Test time: %0.10f seconds." % (time() - start_time))
    
    #KPIs
    precision, recall, fscore, support = score(y_test, estimates)
    print('Precision: {}'.format(precision))
    print('Recall: {}'.format(recall))
    print('Fscore: {}'.format(fscore))
    print('Support: {}'.format(support))
    
    print('Confusion Matrix')
    print(pd.crosstab(np.array(y_test).flatten(), estimates))

    #Save KPIs
    create_df(method,counter,(time() - start_time),'Precision',precision).to_csv(metrics_outfile, mode='a',header='False',index=False,sep=';')
    create_df(method,counter,(time() - start_time),'Recall',recall).to_csv(metrics_outfile, mode='a',header='False',index=False,sep=';')
    create_df(method,counter,(time() - start_time),'Fscore',fscore).to_csv(metrics_outfile, mode='a',header='False',index=False,sep=';')
    create_df(method,counter,(time() - start_time),'Support',support).to_csv(metrics_outfile, mode='a',header='False',index=False,sep=';')
    pd.crosstab(np.array(y_test).flatten(), estimates).to_csv(method+confusion_matrix_outfile, mode='a',header='True',index=True,sep=';')

    #K-fold Cross-Validation
    print('K-Fold Cross Validation')
    start_time = time()
    cv = KFold(n_splits=10, random_state=42, shuffle=True)
    for train_index, test_index in cv.split(features):
        start_time = time()
        counter += 1
        print('K-Fold - Counter: %d' % (counter))
        
        X_train_kf, X_test_kf, y_train_kf, y_test_kf = features.iloc[train_index], features.iloc[test_index], labels.iloc[train_index], labels.iloc[test_index]
        classifier.fit(X_train_kf, y_train_kf)
        estimates_kf = classifier.predict(X_test_kf)
        
        # K-Fold KPI
        precision_kf, recall_kf, fscore_kf, support_kf = score(y_test_kf, estimates_kf)
        print('K-Fold - Precision: {}'.format(precision_kf))
        print('K-Fold - Recall: {}'.format(recall_kf))
        print('K-Fold - Fscore: {}'.format(fscore_kf))
        print('K-Fold - Support: {}'.format(support_kf))
        
        #Save KPIs
        create_df(method,counter,(time() - start_time),'Precision',precision_kf).to_csv(metrics_outfile, mode='a',header='False',index=False,sep=';')
        create_df(method,counter,(time() - start_time),'Recall',recall_kf).to_csv(metrics_outfile, mode='a',header='False',index=False,sep=';')
        create_df(method,counter,(time() - start_time),'Fscore',fscore_kf).to_csv(metrics_outfile, mode='a',header='False',index=False,sep=';')
        create_df(method,counter,(time() - start_time),'Support',support_kf).to_csv(metrics_outfile, mode='a',header='False',index=False,sep=';')
        pd.crosstab(np.array(y_test_kf).flatten(), estimates_kf).to_csv(method+confusion_matrix_outfile, mode='a',header='False',index=False,sep=';')

    



Method: RF
Test time: 132.9252035618 seconds.
Precision: [0.98189563 0.99607222]
Recall: [0.98655788 0.99469217]
Fscore: [0.98422124 0.99538172]
Support: [14953 51245]
Confusion Matrix
col_0      Fall  Not Fall
row_0                    
Fall      14752       201
Not Fall    272     50973
K-Fold Cross Validation
K-Fold - Counter: 1
K-Fold - Precision: [0.98380324 0.99677703]
K-Fold - Recall: [0.98894472 0.99526066]
K-Fold - Fscore: [0.98636728 0.99601827]
K-Fold - Support: [ 4975 17091]
K-Fold - Counter: 2


KeyboardInterrupt: 