## Import libraries

In [80]:
import numpy as np
import pandas as pd
import scipy as sc
import time
import matplotlib.pyplot as plt
%matplotlib inline

In [18]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

## Read object-feature matrix

In [6]:
data_wisdm = pd.read_table("../data/WISDM/WISDM_ar_v1.1_raw_cleared.txt", delimiter=',')
data_wisdm.columns = ['id_user', 'activity', 'timestamp', 'x', 'y', 'z']

data_uschad = pd.read_table("../data/USC-HAD/USC-HAD_cleared.txt", delimiter=',')
data_uschad.columns = ['id_user', 'activity', 'timestamp', 'x', 'y', 'z']

In [7]:
df_expert_wisdm = pd.read_csv("../data/features/expert_wisdm.csv")
df_ar_wisdm = pd.read_csv("../data/features/ar_wisdm.csv")
df_ssa_wisdm = pd.read_csv("../data/features/ssa_wisdm.csv")
df_spl_wisdm = pd.read_csv("../data/features/spl_wisdm_11.csv")

In [8]:
df_expert_uschad = pd.read_csv("../data/features/expert_uschad.csv")
df_ar_uschad = pd.read_csv("../data/features/ar_uschad.csv")
df_ssa_uschad = pd.read_csv("../data/features/ssa_uschad.csv")
df_spl_uschad = pd.read_csv("../data/features/spl_uschad_11.csv")

# Classification

In [9]:
def get_internal_score(clf, X, y, max_iter=25):
    nb = np.unique(y).shape[0]
    scores = np.zeros(nb+1)
    for j in range(max_iter):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)
        scores[0] += accuracy_score(y_test, y_predict)
        for i in range(nb):
            scores[i+1] += accuracy_score(1*(np.array(y_test) == i), 
                                          1*(np.array(y_predict) == i))
            
    return scores / max_iter

In [10]:
def get_score(df, estimator, params_grid, test_size=0.3):
    X = df.iloc[:, 1:].values
    y = df['activity'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    
    clf = GridSearchCV(estimator, params_grid)
    clf.fit(X_train, list(y_train))
    clf_lr = clf.best_estimator_
    scores = get_internal_score(clf_lr, X, list(y))
    
    return scores

## Testing part 

In [32]:
parameters_lr = {'lr__penalty': ['l1', 'l2'], 
                 'lr__class_weight': ['balanced', None], 
                 'lr__C': 10. ** np.arange(-1, 4, 1)}
parameters_svm = {'svc__kernel': ['rbf'], 
                  'svc__C': 10. ** np.arange(-1, 4, 1), 
                  'svc__gamma': 10. ** np.arange(-3, 2, 1),
                  'svc__class_weight': ['balanced', None]}

parameters_rf = {'n_estimators': [200], 
                 'class_weight': ['balanced', None], 
                 'max_depth': [None, 3, 5, 11]}

scores_wisdm = {}
scores_uschad = {}

In [36]:
lr_pipeline = Pipeline([('scaler', StandardScaler()), ('lr', LogisticRegression())])
svc_pipeline = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])
rfc = RFC()

**Expert** features:

In [37]:
scores_wisdm['lr_expert'] = get_score(df_expert_wisdm, lr_pipeline, parameters_lr)
scores_wisdm['svm_expert'] = get_score(df_expert_wisdm, svc_pipeline, parameters_svm)
scores_wisdm['rf_expert'] = get_score(df_expert_wisdm, rfc, parameters_rf)

In [38]:
scores_uschad['lr_expert'] = get_score(df_expert_uschad, lr_pipeline, parameters_lr)
scores_uschad['svm_expert'] = get_score(df_expert_uschad, svc_pipeline, parameters_svm)
scores_uschad['rf_expert'] = get_score(df_expert_uschad, rfc, parameters_rf)

From **autoregression model** features:

In [39]:
n = 20

In [40]:
scores_wisdm['lr_ar_' + str(n)] = get_score(df_ar_wisdm, lr_pipeline, parameters_lr)
scores_wisdm['svm_ar_' + str(n)] = get_score(df_ar_wisdm, svc_pipeline, parameters_svm)
scores_wisdm['rf_ar_' + str(n)] = get_score(df_ar_wisdm, rfc, parameters_rf)

In [41]:
scores_uschad['lr_ar_' + str(n)] = get_score(df_ar_uschad, lr_pipeline, parameters_lr)
scores_uschad['svm_ar_' + str(n)] = get_score(df_ar_uschad, svc_pipeline, parameters_svm)
scores_uschad['rf_ar_' + str(n)] = get_score(df_ar_uschad, rfc, parameters_rf)

From **spectrum analysis** features:

In [42]:
n = 20

In [43]:
scores_wisdm['lr_ssa_' + str(n)] = get_score(df_ssa_wisdm, lr_pipeline, parameters_lr)
scores_wisdm['svm_ssa_' + str(n)] = get_score(df_ssa_wisdm, svc_pipeline, parameters_svm)
scores_wisdm['rf_ssa_' + str(n)] = get_score(df_ssa_wisdm, rfc, parameters_rf)

In [44]:
scores_uschad['lr_ssa_' + str(n)] = get_score(df_ssa_uschad, lr_pipeline, parameters_lr)
scores_uschad['svm_ssa_' + str(n)] = get_score(df_ssa_uschad, svc_pipeline, parameters_svm)
scores_uschad['rf_ssa_' + str(n)] = get_score(df_ssa_uschad, rfc, parameters_rf)

From **splines** features:

In [None]:
n = 11

In [None]:
scores_wisdm['lr_spl_' + str(n)] = get_score(df_spl_wisdm, LogisticRegression(), parameters_lr)
scores_wisdm['svm_spl_' + str(n)] = get_score(df_spl_wisdm, SVC(), parameters_svm)
scores_wisdm['rf_spl_' + str(n)] = get_score(df_spl_wisdm, RFC(), parameters_rf)

In [None]:
scores_uschad['lr_spl_' + str(n)] = get_score(df_spl_uschad, LogisticRegression(), parameters_lr)
scores_uschad['svm_spl_' + str(n)] = get_score(df_spl_uschad, SVC(), parameters_svm)
scores_uschad['rf_spl_' + str(n)] = get_score(df_spl_uschad, RFC(), parameters_rf)

**Features union** approach:

In [77]:
df_all_wisdm = df_expert_wisdm.join(df_ar_wisdm.iloc[:, 1:])
df_all_wisdm = df_all_wisdm.join(df_ssa_wisdm.iloc[:, 1:])

df_all_uschad = df_expert_uschad.join(df_ar_uschad.iloc[:, 1:])
df_all_uschad = df_all_uschad.join(df_ssa_uschad.iloc[:, 1:])

print(df_all_wisdm.shape, df_all_uschad.shape)

((4321, 164), (13620, 164))


In [None]:
start_time = time.time()
scores_wisdm['lr_all'] = get_score(df_all_wisdm, lr_pipeline, parameters_lr)
print('Logistic regression, all features, WISDM: {:.4f}'.format(time.time() - start_time))
start_time = time.time()
scores_wisdm['svm_all'] = get_score(df_all_wisdm, svc_pipeline, parameters_svm)
print('Support Vector Machine, all features, WISDM: {:.4f}'.format(time.time() - start_time))
start_time = time.time()
scores_wisdm['rf_all'] = get_score(df_all_wisdm, rfc, parameters_rf)
print('Random Forest, all features, WISDM: {:.4f}'.format(time.time() - start_time))

start_time = time.time()
scores_uschad['lr_all'] = get_score(df_all_uschad, lr_pipeline, parameters_lr)
print('Logistic regression, all features, USCHAD: {:.4f}'.format(time.time() - start_time))
start_time = time.time()
scores_uschad['svm_all'] = get_score(df_all_uschad, svc_pipeline, parameters_svm)
print('Support Vector Machine, all features, USCHAD: {:.4f}'.format(time.time() - start_time))
start_time = time.time()
scores_uschad['rf_all'] = get_score(df_all_uschad, rfc, parameters_rf)
print('Random Forest, all features, USCHAD: {:.4f}'.format(time.time() - start_time))

Logistic regression, all features, WISDM: 303.3561
Support Vector Machine, all features, WISDM: 386.9076
Random Forest, all features, WISDM: 156.0494


## Results 

In [74]:
results_wisdm = pd.DataFrame.from_dict(scores_wisdm, orient='index').sort_index()
results_wisdm.columns = ['all'] + list(set(data_wisdm['activity']))
results_wisdm.to_csv("results_wisdm_.csv")

results_uschad = pd.DataFrame.from_dict(scores_uschad, orient='index').sort_index()
results_uschad.columns = ['all'] + list(set(data_uschad['activity']))
results_uschad.to_csv("results_uschad_.csv")

In [96]:
results_wisdm

Unnamed: 0,all,Standing,Walking,Upstairs,Sitting,Jogging,Downstairs
lr_all,0.952814,0.995713,0.975605,0.970455,0.996176,0.992968,0.974711
lr_ar_20,0.908373,0.983161,0.960247,0.950254,0.980015,0.987417,0.955652
lr_expert,0.854433,0.991149,0.907448,0.908867,0.990069,0.983963,0.927371
lr_ssa_20,0.842005,0.996268,0.86128,0.907016,0.995929,0.987695,0.935821
rf_all,0.934217,0.996577,0.965767,0.953863,0.996207,0.990686,0.965335
rf_ar_20,0.928697,0.988065,0.968697,0.956207,0.988157,0.989915,0.966353
rf_expert,0.932953,0.998088,0.958304,0.959322,0.997348,0.987849,0.964996
rf_ssa_20,0.924842,0.996638,0.94788,0.959753,0.99633,0.986307,0.962776
svm_all,0.974372,0.995127,0.98788,0.98751,0.994757,0.995497,0.987972
svm_ar_20,0.950594,0.983315,0.981311,0.979368,0.983932,0.993924,0.979337


In [97]:
results_uschad

Unnamed: 0,all,Standing,Elevator-up,Walking-forward,Sitting,Walking-downstairs,Sleeping,Elevator-down,Walking-upstairs,Jumping,Walking-right,Walking-left,Running
lr_all,0.775585,0.961145,0.949183,0.915634,0.984121,0.968458,0.999971,0.946383,0.971336,0.997768,0.934185,0.926246,0.99674
lr_ar_20,0.650876,0.936495,0.944395,0.865972,0.952501,0.934998,0.979041,0.943446,0.950171,0.990631,0.904425,0.906667,0.99301
lr_expert,0.668135,0.936711,0.940773,0.866197,0.978708,0.950739,1.0,0.939608,0.943857,0.992648,0.905335,0.894195,0.987499
lr_ssa_20,0.63675,0.923749,0.927332,0.885198,0.939726,0.931229,0.994459,0.941674,0.934635,0.996505,0.908057,0.896407,0.994528
rf_all,0.830602,0.970387,0.945707,0.948536,0.989819,0.979285,0.999745,0.946158,0.974547,0.997543,0.956417,0.955526,0.997533
rf_ar_20,0.703025,0.943651,0.946559,0.892785,0.961654,0.957709,0.981654,0.947636,0.948791,0.986314,0.921811,0.925688,0.991796
rf_expert,0.871229,0.97533,0.950426,0.967744,0.991826,0.986177,0.999902,0.949995,0.980822,0.998257,0.97301,0.972227,0.99674
rf_ssa_20,0.840147,0.966618,0.946432,0.962986,0.97723,0.98,0.995467,0.947068,0.977592,0.997866,0.964023,0.967998,0.997014
svm_all,0.750387,0.95397,0.946236,0.903798,0.983505,0.965991,0.999354,0.943955,0.965198,0.995154,0.927734,0.921664,0.994214
svm_ar_20,0.654606,0.94277,0.943054,0.87163,0.960666,0.930269,0.976662,0.943045,0.947234,0.989271,0.901419,0.906853,0.996339
