<a href="https://colab.research.google.com/github/Aniket-tempest/HAR-Employee-Identification/blob/main/HAR_MachineLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


# Importing Libraries and Different Classifiers

In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import sklearn
from sklearn import metrics

In [None]:
# Import different classifiers
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier

# Functions

In [None]:
def read_data(file):
    data = pd.read_csv(file)
    
    # suffle data
    data = sklearn.utils.shuffle(data)
    
    X_data = data.drop(['subject', 'Activity', 'ActivityName'], axis=1)
    y_data = data.ActivityName
    
    return np.array(X_data), np.array(y_data)

In [None]:
def train_model(train_x, train_y, model_name='NB', validation=None):
    """
    Possible model names: ['NB', 'SVM', 'XGB', 'MLP', 'ADA', 'BAG', 'RF']
    default = 'NB'
    
    validation: (val_x, val_y) tupple for validation accuracy score.
    
    return: trained model
    """
    model = None
    if model_name == 'SVM':
        model = svm.SVC(gamma='scale', probability=True)
    elif model_name == 'XGB':
        model = XGBClassifier(n_estimators=200, max_depth=5, n_jobs=2)
#         model = XGBClassifier()
    elif model_name == 'MLP':
        model = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=800, alpha=0.0001,
                     solver='sgd', verbose=10, tol=0.000000001)
    elif model_name == 'ADA':
        model = AdaBoostClassifier(n_estimators=50)
    elif model_name == 'BAG':
        model = BaggingClassifier(n_jobs=2, n_estimators=50)
    elif model_name == 'RF':
        model = RandomForestClassifier(n_estimators=200, max_depth=10)
    elif model_name == 'KNN':
        model = KNeighborsClassifier(n_neighbors=5, weights='distance', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None)
    else:
        model = GaussianNB()
    
    model.fit(train_x, train_y)
    
    if validation is not None:
        y_hat = model.predict(validation[0])
        acc = metrics.accuracy_score(validation[1], y_hat)
        print(f"Validation Accuracy in '{model_name}' = {acc}")
        cm = metrics.confusion_matrix(validation[1], y_hat)
        print(cm)
        recall = cm[0][0] / (cm[0][0] + cm[0][1])
        precision = cm[0][0] / (cm[0][0] + cm[1][0])
        f1 = 2*(precision*recall)/(precision+recall)
        print(f"Recall in '{model_name}' = {recall}")
        print(f"Precision in '{model_name}' = {precision}")
        print(f"F1 Score in '{model_name}' = {f1}")
               
    return model

# Main Cells

In [None]:
train_X, train_y = read_data('drive/MyDrive/data/train.csv')
test_X, test_y = read_data('drive/MyDrive/data/test.csv')

In [None]:
print("Train  : ", train_X.shape, train_y.shape)
print("Test   : ", test_X.shape, test_y.shape)

Train  :  (7352, 561) (7352,)
Test   :  (2947, 561) (2947,)


In [None]:
train_y

array(['LAYING', 'LAYING', 'SITTING', ..., 'WALKING_UPSTAIRS', 'STANDING',
       'WALKING'], dtype=object)

# Classification

In [None]:
model1 = train_model(train_X, train_y, model_name='RF', validation=(test_X, test_y))

Validation Accuracy in 'RF' = 0.9226331862911435
[[537   0   0   0   0   0]
 [  0 435  56   0   0   0]
 [  0  46 486   0   0   0]
 [  0   0   0 477  11   8]
 [  0   0   0  26 354  40]
 [  0   0   0  34   7 430]]
Recall in 'RF' = 1.0
Precision in 'RF' = 1.0
F1 Score in 'RF' = 1.0


In [None]:
model2 = train_model(train_X, train_y, model_name='BAG', validation=(test_X, test_y))

Validation Accuracy in 'BAG' = 0.9039701391245334
[[537   0   0   0   0   0]
 [  0 403  88   0   0   0]
 [  0  52 480   0   0   0]
 [  0   0   0 477  13   6]
 [  0   0   0   8 367  45]
 [  0   0   0  58  13 400]]
Recall in 'BAG' = 1.0
Precision in 'BAG' = 1.0
F1 Score in 'BAG' = 1.0


In [None]:
model3 = train_model(train_X, train_y, model_name='ADA', validation=(test_X, test_y))

Validation Accuracy in 'ADA' = 0.5310485239226331
[[537   0   0   0   0   0]
 [  0   0 491   0   0   0]
 [  0   0 532   0   0   0]
 [  0   0   0 496   0   0]
 [  0   0   0 420   0   0]
 [  0   0   0 471   0   0]]
Recall in 'ADA' = 1.0
Precision in 'ADA' = 1.0
F1 Score in 'ADA' = 1.0


In [None]:
model4 = train_model(train_X, train_y, model_name='NB', validation=(test_X, test_y))

Validation Accuracy in 'NB' = 0.7702748557855447
[[323 211   0   0   0   3]
 [  5 368 111   0   0   7]
 [  8  54 455   0   0  15]
 [  0   0   0 416  42  38]
 [  0   0   0  80 257  83]
 [  0   0   0   9  11 451]]
Recall in 'NB' = 0.6048689138576779
Precision in 'NB' = 0.9847560975609756
F1 Score in 'NB' = 0.7494199535962879


In [None]:
model5 = train_model(train_X, train_y, model_name='SVM', validation=(test_X, test_y))

Validation Accuracy in 'SVM' = 0.9504580929759077
[[537   0   0   0   0   0]
 [  0 438  51   0   0   2]
 [  0  29 503   0   0   0]
 [  0   0   0 488   3   5]
 [  0   0   0  10 384  26]
 [  0   0   0  20   0 451]]
Recall in 'SVM' = 1.0
Precision in 'SVM' = 1.0
F1 Score in 'SVM' = 1.0


In [None]:
model6 = train_model(train_X, train_y, model_name='XGB', validation=(test_X, test_y))

Validation Accuracy in 'XGB' = 0.9392602646759416
[[537   0   0   0   0   0]
 [  0 426  63   0   0   2]
 [  0  30 502   0   0   0]
 [  0   0   0 487   5   4]
 [  0   0   0   9 383  28]
 [  0   0   0  32   6 433]]
Recall in 'XGB' = 1.0
Precision in 'XGB' = 1.0
F1 Score in 'XGB' = 1.0


In [None]:
model7 = train_model(train_X, train_y, model_name='KNN', validation=(test_X, test_y))

Validation Accuracy in 'KNN' = 0.9002375296912114
[[534   2   1   0   0   0]
 [  0 388 100   0   0   3]
 [  0  37 495   0   0   0]
 [  0   0   0 484  10   2]
 [  0   0   0  44 331  45]
 [  0   0   0  38  12 421]]
Recall in 'KNN' = 0.996268656716418
Precision in 'KNN' = 1.0
F1 Score in 'KNN' = 0.9981308411214954


In [None]:
model8 = train_model(train_X, train_y, model_name='MLP', validation=(test_X, test_y))

Iteration 1, loss = 1.62746714
Iteration 2, loss = 1.19476348
Iteration 3, loss = 0.90534199
Iteration 4, loss = 0.74026797
Iteration 5, loss = 0.64002235
Iteration 6, loss = 0.57059450
Iteration 7, loss = 0.51745949
Iteration 8, loss = 0.47126123
Iteration 9, loss = 0.43067241
Iteration 10, loss = 0.39658304
Iteration 11, loss = 0.36767495
Iteration 12, loss = 0.34079071
Iteration 13, loss = 0.31958282
Iteration 14, loss = 0.30046327
Iteration 15, loss = 0.28397574
Iteration 16, loss = 0.26819889
Iteration 17, loss = 0.25469793
Iteration 18, loss = 0.24222361
Iteration 19, loss = 0.23157087
Iteration 20, loss = 0.22005752
Iteration 21, loss = 0.21231349
Iteration 22, loss = 0.20306080
Iteration 23, loss = 0.19541170
Iteration 24, loss = 0.18810904
Iteration 25, loss = 0.18146156
Iteration 26, loss = 0.17505310
Iteration 27, loss = 0.16923577
Iteration 28, loss = 0.16281571
Iteration 29, loss = 0.15716938
Iteration 30, loss = 0.15196712
Iteration 31, loss = 0.14695784
Iteration 32, los