# UR-Fall

## 1. Data Preparation

### Libraries

In [None]:
import json
import pandas

import warnings
warnings.filterwarnings('ignore')

### Downloads files

From http://fenix.univ.rzeszow.pl/~mkepski/ds/uf.html 

Image files to 'IMG_UR_FALL'

CSV files to Root Path

### Alphapose Execution

command: python demo.py --indir 'IMG_UR_FALL'

Output file is alphapose-results.json and this file is included in root path

### Output processing

#### Process Labels

In [None]:
labels_dict = {}

files = ["urfall-cam0-adls.csv","urfall-cam0-falls.csv"]

for file in files:
    opened_file = open(file, "r")
    for line in opened_file.readlines():
        filename = "{}-cam0-rgb-{}.png".format(line.split(',')[0],line.split(',')[1].zfill(3))
        label = line.split(',')[2].split('\n')[0]
        labels_dict[filename] = label
    opened_file.close()
    
print("Total labels: {}".format(len(labels_dict)))

#### Process JSON

In [None]:
dict_columns = ['X_nose','Y_nose','confidence_nose',
           'X_Leye','Y_Leye','confidence_Leye',
           'X_Reye','Y_Reye','confidence_Reye',
           'X_Lear','Y_Lear','confidence_Lear',
           'X_Rear','Y_Rear','confidence_Rear',
           'X_LShoulder','Y_LShoulder','confidence_LShoulder',
           'X_RShoulder','Y_RShoulder','confidence_RShoulder',
           'X_LElbow','Y_LElbow','confidence_LElbow',
           'X_RElbow','Y_RElbow','confidence_RElbow',
           'X_LWrist','Y_LWrist','confidence_LWrist',
           'X_RWrist','Y_RWrist','confidence_RWrist',
           'X_LHip','Y_LHip','confidence_LHip',
           'X_RHip','Y_RHip','confidence_RHip',
           'X_LKnee','Y_LKnee','confidence_LKnee',
           'X_RKnee','Y_RKnee','confidence_RKnee',
           'X_LAnkle','Y_LAnkle','confidence_LAnkle',
           'X_RAnkle','Y_RAnkle','confidence_RAnkle',
           'Label','Score','Image'
           ]

In [None]:
with open('alphapose-results.json') as json_file:
    d = json.load(json_file)
    data = pandas.DataFrame(columns = dict_columns)
    for s in d:
        try:
            points = s['keypoints']
            label = [labels_dict[s['image_id']]]
            score = [s['score']]
            image = [s['image_id']]
            if labels_dict[s['image_id']]:
                data.loc[len(data)] = points+label+score+image
        except:
            pass

    data.to_csv('UR_Fall_data_labeled.csv',index=True,decimal=",")

### Data filtered

In [None]:
data = pandas.read_csv('UR_Fall_data_labeled.csv')

In [None]:
data = data.sort_values(by=['Score'])

In [None]:
len(data)

In [None]:
data.drop_duplicates(subset ="Image", keep = 'first', inplace = True)

In [None]:
len(data)

In [None]:
data['Image'].to_csv('UR_Fall_images_no_repeated.csv',index=None,header=True)

In [None]:
data['Label'].to_csv('UR_Fall_labels_no_repeated.csv',index=None,header=True)

In [None]:
data.columns

In [None]:
data = data.drop(['Unnamed: 0'], axis=1).drop(['Label'], axis=1).drop(['Score'], axis=1).drop(['Image'], axis=1)

In [None]:
for col in data.columns:
    data[col] = data[col].str.replace(',', '.').astype(float)

In [None]:
data.dtypes

In [None]:
data.to_csv('UR_Fall_features_no_repeated.csv',index=None,header=True)

## 2. Classifier Execution

### Libraries

In [None]:
#Main libraries
import pandas as pd
import numpy as np

#Utilities
from time import time
from statistics import mean 
from statistics import stdev

#Models
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier as RndFC
from sklearn.neural_network import MLPClassifier as ffp
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

#Dataset Preparation
from sklearn.model_selection import train_test_split

#Metrics
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn import metrics as met

#Cross validation
from sklearn.model_selection import KFold

### Paramaters 

In [None]:
train_percentage = 0.7
test_percentage = 0.3

In [None]:
features_inputfile = 'UR_Fall_features_no_repeated.csv'
labels_inputfile = 'UR_Fall_labels_no_repeated.csv'
metrics_outfile = "UR_Fall_metrics_no_repeated.csv"
confusion_matrix_outfile = "_UR_Fall_Confusion_matrix_no_repeated.csv"

### Functions

In [None]:
# To create a generic dataframe to save metrics
def create_df(method, counter, total_time, metric, values):
    df = pd.DataFrame({
            'Method':[method],
            'Counter':[counter],
            'Time':[total_time],
            'Metric':[metric]
    })
    for i in range (0,len(values)):
        df[str(i)] = [values[i]]
    
    return df

In [None]:
# To change labels from multiple to bi-classifier
def change_labels(l):
    l.loc[(l.Label == 0.0),'Label'] = 1.0

### Dataset Preparation

In [None]:
features = pd.read_csv(features_inputfile)

In [None]:
features.head(5)

In [None]:
labels = pd.read_csv(labels_inputfile)

In [None]:
labels.head(5)

In [None]:
print(labels['Label'].value_counts())

In [None]:
change_labels(labels)

In [None]:
print(labels['Label'].value_counts())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, 
                                                    labels, 
                                                    train_size=train_percentage, 
                                                    test_size=test_percentage)

In [None]:
print (X_train.shape, y_train.shape)

In [None]:
print (X_test.shape, y_test.shape)

### Classifier and Metrics

In [None]:
methods = ['RF','KNN','AdaBoost','MLP','SVM']

In [None]:
for method in methods:
    counter = 0
    
    print('\n\nMethod: {}'.format(method))

    if method == 'RF':
        classifier = RndFC(n_estimators=100)
    elif method == 'SVM':
        classifier = svm.SVC(C=1000, gamma=0.0001)
    elif method == 'MLP': 
        classifier = ffp()
    elif method == 'KNN': 
        classifier = KNN()
    else:
        classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=15, min_samples_split=20), n_estimators=10,
                         learning_rate=1)
    
    #Train
    start_time = time()
    classifier.fit(X_train, np.array(y_train).flatten())
    
    #Test
    estimates = classifier.predict(X_test)
    print("Test time: %0.10f seconds." % (time() - start_time))
    
    #KPIs
    precision, recall, fscore, support = score(y_test, estimates)
    print('Precision: {}'.format(precision))
    print('Recall: {}'.format(recall))
    print('Fscore: {}'.format(fscore))
    print('Support: {}'.format(support))
    
    print('Confusion Matrix')
    print(pd.crosstab(np.array(y_test).flatten(), estimates))

    #Save KPIs
    if counter == 0:
        create_df(method,counter,(time() - start_time),'Precision',precision).to_csv(metrics_outfile, mode='a',header=True,index=False,sep=';')
    else:
        create_df(method,counter,(time() - start_time),'Precision',precision).to_csv(metrics_outfile, mode='a',header=False,index=False,sep=';')        
    create_df(method,counter,(time() - start_time),'Recall',recall).to_csv(metrics_outfile, mode='a',header=False,index=False,sep=';')
    create_df(method,counter,(time() - start_time),'Fscore',fscore).to_csv(metrics_outfile, mode='a',header=False,index=False,sep=';')
    create_df(method,counter,(time() - start_time),'Support',support).to_csv(metrics_outfile, mode='a',header=False,index=False,sep=';')
    pd.crosstab(np.array(y_test).flatten(), estimates).to_csv(method+confusion_matrix_outfile, mode='a',header=True,index=True,sep=';')

    #K-fold Cross-Validation
    print('K-Fold Cross Validation')
    start_time = time()
    cv = KFold(n_splits=10, random_state=42, shuffle=True)
    for train_index, test_index in cv.split(features):
        start_time = time()
        counter += 1
        print('K-Fold - Counter: %d' % (counter))
        
        X_train_kf, X_test_kf, y_train_kf, y_test_kf = features.iloc[train_index], features.iloc[test_index], labels.iloc[train_index], labels.iloc[test_index]
        classifier.fit(X_train_kf, y_train_kf)
        estimates_kf = classifier.predict(X_test_kf)
        
        # K-Fold Metrics
        precision_kf, recall_kf, fscore_kf, support_kf = score(y_test_kf, estimates_kf)
        print('K-Fold - Precision: {}'.format(precision_kf))
        print('K-Fold - Recall: {}'.format(recall_kf))
        print('K-Fold - Fscore: {}'.format(fscore_kf))
        print('K-Fold - Support: {}'.format(support_kf))
        
        #Save Metrics
        create_df(method,counter,(time() - start_time),'Precision',precision_kf).to_csv(metrics_outfile, mode='a',header=False,index=False,sep=';')
        create_df(method,counter,(time() - start_time),'Recall',recall_kf).to_csv(metrics_outfile, mode='a',header=False,index=False,sep=';')
        create_df(method,counter,(time() - start_time),'Fscore',fscore_kf).to_csv(metrics_outfile, mode='a',header=False,index=False,sep=';')
        create_df(method,counter,(time() - start_time),'Support',support_kf).to_csv(metrics_outfile, mode='a',header=False,index=False,sep=';')
        pd.crosstab(np.array(y_test_kf).flatten(), estimates_kf).to_csv(method+confusion_matrix_outfile, mode='a',header=False,index=False,sep=';')


### 2 files with metrics for each classifier will be found in root path