## Import libraries

In [1]:
import numpy as np
import pandas as pd
import scipy as sc
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

## Read object-feature matrix

In [51]:
data_wisdm = pd.read_table("../data/WISDM/WISDM_ar_v1.1_raw_cleared.txt", delimiter=',')
data_wisdm.columns = ['id_user', 'activity', 'timestamp', 'x', 'y', 'z']

data_uschad = pd.read_table("../data/USC-HAD/USC-HAD_cleared.txt", delimiter=',')
data_uschad.columns = ['id_user', 'activity', 'timestamp', 'x', 'y', 'z']

In [3]:
df_expert_wisdm = pd.read_csv("../data/features/expert_wisdm.csv")
df_ar_wisdm = pd.read_csv("../data/features/ar_wisdm.csv")
df_ssa_wisdm = pd.read_csv("../data/features/ssa_wisdm.csv")

In [4]:
df_expert_uschad = pd.read_csv("../data/features/expert_uschad.csv")
df_ar_uschad = pd.read_csv("../data/features/ar_uschad.csv")
df_ssa_uschad = pd.read_csv("../data/features/ssa_uschad.csv")

# Classification

In [39]:
def get_internal_score(clf, X, y, max_iter=25):
    nb = np.unique(y).shape[0]
    scores = np.zeros(nb+1)
    for j in range(max_iter):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        scores[0] += accuracy_score(y_test, y_predict)
        for i in range(nb):
            scores[i+1] += accuracy_score(1*(np.array(y_test) == i), 
                                          1*(np.array(y_predict) == i))
            
    return scores / max_iter

In [40]:
def get_score(df, estimator, params_grid, test_size=0.3):
    X = df.iloc[:, 1:].values
    y = df['activity'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    
    clf = GridSearchCV(estimator, params_grid)
    clf.fit(X_train, list(y_train))
    clf_lr = clf.best_estimator_
    scores = get_internal_score(clf_lr, X, list(y))
    
    return scores

## Testing part 

In [27]:
parameters_lr = {'penalty': ['l1', 'l2'], 
                 'class_weight': ['balanced', None], 
                 'C': 10. ** np.arange(-1, 4, 1)}
parameters_svm = {'kernel': ['rbf'], 
                  'C': 10. ** np.arange(-1, 4, 1), 
                  'gamma': 10. ** np.arange(-3, 2, 1),
                  'class_weight': ['balanced', None]}

parameters_rf = {'n_estimators': [200], 
                 'class_weight': ['balanced', None], 
                 'max_depth': [None, 3, 5, 11]}

scores_wisdm = {}
scores_uschad = {}

**Expert** features:

In [41]:
scores_wisdm['lr_expert'] = get_score(df_expert_wisdm, LogisticRegression(), parameters_lr)
scores_wisdm['svm_expert'] = get_score(df_expert_wisdm, SVC(), parameters_svm)
scores_wisdm['rf_expert'] = get_score(df_expert_wisdm, RFC(), parameters_rf)

In [42]:
scores_uschad['lr_expert'] = get_score(df_expert_uschad, LogisticRegression(), parameters_lr)
scores_uschad['svm_expert'] = get_score(df_expert_uschad, SVC(), parameters_svm)
scores_uschad['rf_expert'] = get_score(df_expert_uschad, RFC(), parameters_rf)

From **autoregression model** features:

In [43]:
n = 20

In [44]:
scores_wisdm['lr_ar_' + str(n)] = get_score(df_ar_wisdm, LogisticRegression(), parameters_lr)
scores_wisdm['svm_ar_' + str(n)] = get_score(df_ar_wisdm, SVC(), parameters_svm)
scores_wisdm['rf_ar_' + str(n)] = get_score(df_ar_wisdm, RFC(), parameters_rf)

In [45]:
scores_uschad['lr_ar_' + str(n)] = get_score(df_ar_uschad, LogisticRegression(), parameters_lr)
scores_uschad['svm_ar_' + str(n)] = get_score(df_ar_uschad, SVC(), parameters_svm)
scores_uschad['rf_ar_' + str(n)] = get_score(df_ar_uschad, RFC(), parameters_rf)

From **spectrum analysis** features:

In [46]:
n = 20

In [47]:
scores_wisdm['lr_ssa_' + str(n)] = get_score(df_ssa_wisdm, LogisticRegression(), parameters_lr)
scores_wisdm['svm_ssa_' + str(n)] = get_score(df_ssa_wisdm, SVC(), parameters_svm)
scores_wisdm['rf_ssa_' + str(n)] = get_score(df_ssa_wisdm, RFC(), parameters_rf)

In [48]:
scores_uschad['lr_ssa_' + str(n)] = get_score(df_ssa_uschad, LogisticRegression(), parameters_lr)
scores_uschad['svm_ssa_' + str(n)] = get_score(df_ssa_uschad, SVC(), parameters_svm)
scores_uschad['rf_ssa_' + str(n)] = get_score(df_ssa_uschad, RFC(), parameters_rf)

## Results 

In [55]:
results_wisdm = pd.DataFrame.from_dict(scores_wisdm, orient='index')
results_wisdm.columns = ['all'] + list(set(data_wisdm['activity']))
results_wisdm.to_csv("results_wisdm.csv")

results_uschad = pd.DataFrame.from_dict(scores_uschad, orient='index')
results_uschad.columns = ['all'] + list(set(data_uschad['activity']))
results_uschad.to_csv("results_uschad.csv")

In [53]:
results_wisdm

Unnamed: 0,all,Standing,Walking,Upstairs,Sitting,Jogging,Downstairs
lr_ssa_20,0.84111,0.99596,0.865752,0.903315,0.995898,0.985505,0.93579
lr_expert,0.855513,0.991981,0.893816,0.910625,0.990902,0.984456,0.939244
lr_ar_20,0.910224,0.98313,0.960524,0.951056,0.980817,0.987633,0.957286
svm_ar_20,0.938072,0.987016,0.973724,0.968173,0.989453,0.99081,0.96697
svm_ssa_20,0.443423,0.946214,0.443423,0.889746,0.93613,0.754356,0.916978
svm_expert,0.972737,0.999136,0.98714,0.982113,0.998365,0.993246,0.985474
rf_ar_20,0.925705,0.988342,0.972799,0.958828,0.984765,0.987078,0.959599
rf_expert,0.933138,0.99744,0.958643,0.960894,0.996669,0.988281,0.964348
rf_ssa_20,0.925181,0.9967,0.94825,0.959692,0.996453,0.986769,0.962498


In [54]:
results_uschad

Unnamed: 0,all,Standing,Elevator-up,Walking-forward,Sitting,Walking-downstairs,Sleeping,Elevator-down,Walking-upstairs,Jumping,Walking-right,Walking-left,Running
lr_ssa_20,0.641713,0.934929,0.927048,0.885394,0.941743,0.931973,0.994195,0.941282,0.934518,0.996632,0.906177,0.89488,0.994655
lr_expert,0.665698,0.93582,0.939697,0.866256,0.978062,0.950876,1.0,0.937474,0.945903,0.992717,0.902643,0.894596,0.987352
lr_ar_20,0.648164,0.934968,0.945512,0.864621,0.949946,0.933647,0.978443,0.943231,0.950866,0.990152,0.905267,0.906549,0.993128
svm_ar_20,0.707871,0.949829,0.943387,0.900685,0.966236,0.940627,0.982398,0.941185,0.956319,0.990338,0.923603,0.924572,0.996564
svm_ssa_20,0.626657,0.97816,0.946442,0.948595,0.979971,0.941811,0.982252,0.946951,0.955859,0.96374,0.726089,0.945727,0.937719
svm_expert,0.835458,0.957132,0.937327,0.961703,0.983573,0.984444,0.999883,0.936466,0.982477,0.998522,0.969006,0.96232,0.998062
rf_ar_20,0.703935,0.942927,0.946579,0.893989,0.962203,0.958013,0.981654,0.945688,0.947264,0.986676,0.923505,0.928057,0.991317
rf_expert,0.871698,0.975869,0.950034,0.9693,0.992188,0.985844,0.999951,0.949692,0.980538,0.998277,0.972795,0.972217,0.996691
rf_ssa_20,0.842467,0.966216,0.948057,0.963299,0.976221,0.979677,0.995193,0.948243,0.977944,0.997954,0.96512,0.969662,0.997347
