<a href="https://colab.research.google.com/github/Ivan-Nebogatikov/HumanActivityRecognition/blob/master/HierarchyClassifiers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import json
from datetime import datetime
from datetime import date
from math import sqrt

def getValue(x):
    l = list(json.loads(x.replace('\'', '"')))
    return l

def getDiff(x):
    res = list()
    for i, v in enumerate(x):
        res.append(v)
        #if i > 0:
        #    res.append(v - x[i - 1])
        #else:
        #    res.append(0)
    return res

features = pd.read_csv('https://raw.githubusercontent.com/Ivan-Nebogatikov/HumanActivityRecognition/master/datasets/WinterDataset/dataset.csv', sep=r'\s*,\s*')
print(features)
activities = list(sorted(set(features['ACT'])))
print("Activities:", activities)


states = np.array(features['STATE'])
labels = np.array(features['ACT'])

passive_features = features[features['STATE'] == 'passive']
passive_labels = np.array(passive_features['ACT'])

active_features = features[features['STATE'] == 'active']
active_labels = np.array(active_features['ACT'])

passive_features = passive_features.drop(['ACT', 'STATE'], axis = 1)
active_features = active_features.drop(['ACT', 'STATE'], axis = 1)



        HR  SC  WF  BT    STATE        ACT
0        0   0 -73 -54  passive        eat
1        0   0 -76 -39  passive        eat
2        0   0 -73 -27  passive        eat
3        0   0 -71 -27  passive        eat
4        0   0 -73 -27  passive        eat
...    ...  ..  ..  ..      ...        ...
26704  110   0 -53 -45   active  household
26705  112   0 -53 -40   active  household
26706  113   0 -53 -40   active  household
26707  113   0 -53 -40   active  household
26708  113   0 -53 -37   active  household

[26709 rows x 6 columns]
Activities: ['eat', 'household', 'inactive', 'preparing_food', 'shower', 'sport', 'videogames', 'walk', 'work']


In [2]:
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import pandas as pd

methods = {
    "Random Forest" : lambda:RandomForestClassifier(n_estimators = 400, random_state = 3, class_weight='balanced'),
    # "MLP" : lambda:MLPClassifier(random_state=1, max_iter=300),
    "K-neigh" : lambda:KNeighborsClassifier(), # default k = 5
    # "Bayes" : lambda:GaussianNB(),
    # "AdaBoost" : lambda:AdaBoostClassifier(),
    # "SVM" : lambda:svm.SVC(probability=True, class_weight='balanced')
}

In [3]:

def PredictWithHierarcy(base_classifier, passive_classifier, active_classifier):
    train_features, test_features, train_labels, test_labels = train_test_split(features, states, test_size = 0.25, random_state = 242)
    base_classifier = base_classifier()
    passive_classifier = passive_classifier()
    active_classifier = active_classifier()
    # print('Training Features Shape:', train_features.shape)
    # print('Testing Features Shape:', test_features.shape)
    # print("\n")

    base_classifier.fit(train_features.drop(['ACT', 'STATE'], axis = 1), train_labels);

    passive_train_features, passive_test_features, passive_train_labels, passive_test_labels = train_test_split(passive_features, passive_labels, test_size = 0.25, random_state = 242)
    passive_classifier.fit(passive_train_features, passive_train_labels);

    active_train_features, active_test_features, active_train_labels, active_test_labels = train_test_split(active_features, active_labels, test_size = 0.25, random_state = 242)
    active_classifier.fit(active_train_features, active_train_labels);

    t_f = test_features.drop(['ACT', 'STATE'], axis = 1)
    pr_states = base_classifier.predict(t_f)
    p_ind = list()
    a_ind = list()
    for i, x in enumerate(pr_states):
      if x == 'passive':
        p_ind.append(i)
      else:
        a_ind.append(i)
    p_s = test_features.iloc[p_ind, :]
    a_s = test_features.iloc[a_ind, :]

    result = 0
    acts = list(set(test_features['ACT']))
    presicionTp = { acts[i] : 0 for i in range(0, len(acts) ) }
    presicionFp = { acts[i] : 0 for i in range(0, len(acts) ) }
    p_a = passive_classifier.predict(p_s.drop(['ACT', 'STATE'], axis = 1))
    for i in range(0, len(p_a)):
      v = p_s.iloc[i]
      if (p_a[i] == v['ACT']):
        result = result + 1
        presicionTp[p_a[i]] = presicionTp[p_a[i]] + 1
      else:
        presicionFp[p_a[i]] = presicionFp[p_a[i]] + 1

    a_a = active_classifier.predict(a_s.drop(['ACT', 'STATE'], axis = 1))
    for i in range(0, len(a_a)):
      v = a_s.iloc[i]
      if (a_a[i] == v['ACT']):
        result = result + 1
        presicionTp[a_a[i]] = presicionTp[a_a[i]] + 1
      else:
        presicionFp[a_a[i]] = presicionFp[a_a[i]] + 1

    presicion = { key : presicionTp[key] / (presicionFp[key] + presicionTp[key]) for key in presicionTp.keys() }

    return (result / len(test_features), presicion) 
  
accuracies = {}
for base_name, base_value in methods.items():
  for passive_name, passive_value in methods.items():
    for active_name, active_value in methods.items():
      print(base_name + " " + passive_name + " " + active_name + "")
      accuracy, presicion = PredictWithHierarcy(base_value, passive_value, active_value)
      accuracies[base_name + " " + passive_name + " " + active_name] = accuracy
      print(accuracy)
      print(pd.DataFrame.from_dict(presicion, orient='index'))
      print("\n")
df = pd.DataFrame(accuracies.items(), columns=["Method", "Accuracy"])
print(df)

Random Forest Random Forest Random Forest
0.8388739143456124
                       0
work            0.949971
videogames      0.785592
preparing_food  1.000000
household       1.000000
eat             0.597633
sport           0.474490
walk            0.714697
inactive        0.878812
shower          0.804097


Random Forest Random Forest K-neigh
0.8357292602575621
                       0
work            0.949971
videogames      0.785592
preparing_food  0.996183
household       1.000000
eat             0.597633
sport           0.710843
walk            0.657623
inactive        0.878812
shower          0.773284


Random Forest K-neigh Random Forest
0.8321353698712189
                       0
work            0.930738
videogames      0.752679
preparing_food  1.000000
household       1.000000
eat             0.685039
sport           0.474490
walk            0.714697
inactive        0.865587
shower          0.804097


Random Forest K-neigh K-neigh
0.8289907157831686
                       0

In [4]:
acc = df.sort_values(by=["Accuracy"])
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(acc)
  

                                      Method  Accuracy
7                    K-neigh K-neigh K-neigh  0.821953
6              K-neigh K-neigh Random Forest  0.824798
3              Random Forest K-neigh K-neigh  0.828991
5              K-neigh Random Forest K-neigh  0.829739
2        Random Forest K-neigh Random Forest  0.832135
4        K-neigh Random Forest Random Forest  0.832585
1        Random Forest Random Forest K-neigh  0.835729
0  Random Forest Random Forest Random Forest  0.838874
