In [14]:
import pandas as pd
import numpy as np
from sklearn.externals import joblib
from utils.classifications_utils import *
from utils.data_processing_utils import *
from utils.data_visualization_utils import *
from utils.metrics_utils import *
from utils.grid_search_utils import *
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [15]:
FOLDER_PATH = "pickled_models/adaBoost/"

# Accelerometer


In [68]:
df_acc = pd.read_csv("acc_transformed_3_min.csv", index_col=0)
X_acc = df_acc.iloc[:, :-1]
y_acc = df_acc["label"]

Unnamed: 0,mu_x_acc,mu_y_acc,mu_z_acc,std_x_acc,std_y_acc,std_z_acc,avg_resultant_acc_acc,bins_0_acc,bins_1_acc,bins_2_acc,...,bins_24_acc,bins_25_acc,bins_26_acc,bins_27_acc,bins_28_acc,bins_29_acc,mean_average_difference_x_acc,mean_average_difference_y_acc,mean_average_difference_z_acc,label
0,-0.509557,-0.279534,1.018882,0.346928,0.273213,0.079338,1.239601,1.0,4.0,7.0,...,17.0,22.0,16.0,15.0,5.0,2.0,0.287634,0.210639,0.063305,0
1,-0.501934,-0.278313,1.018911,0.343203,0.273325,0.079296,1.235087,1.0,4.0,6.0,...,17.0,22.0,16.0,15.0,5.0,2.0,0.283445,0.211025,0.063272,0
2,-0.498901,-0.276406,1.018525,0.343154,0.272539,0.079937,1.23271,1.0,4.0,6.0,...,17.0,22.0,16.0,15.0,5.0,2.0,0.282941,0.208936,0.063698,0
3,-0.491748,-0.267881,1.018433,0.343136,0.252441,0.080053,1.224862,1.0,4.0,6.0,...,17.0,22.0,16.0,15.0,5.0,2.0,0.281943,0.199781,0.063799,0
4,-0.485814,-0.267094,1.017896,0.344372,0.251208,0.080409,1.222093,1.0,4.0,6.0,...,16.0,22.0,16.0,15.0,5.0,2.0,0.282471,0.198947,0.06439,0


In [17]:
X_train, X_test, y_train, y_test = train_test_split(X_acc, y_acc, test_size=0.2, random_state=42)

In [18]:
param_grid = {
    "n_estimators": [10, 300],
    "learning_rate": [1, 3, 5]
    }
clf = AdaBoostClassifier(random_state=0)

best_model_rf = model_selection(clf, param_grid, X_train, y_train)

In [19]:
joblib.dump(best_model_rf, FOLDER_PATH + 'adaBoost_acc.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'adaBoost_acc.pkl')

In [20]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.6372, Standard Deviation: 0.6372
Parameters: {'learning_rate': 1, 'n_estimators': 300}

Model with rank: 2
Mean validation score: 0.5674, Standard Deviation: 0.5674
Parameters: {'learning_rate': 1, 'n_estimators': 10}



In [21]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.6308739814711464

In [22]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,2653,536,194,167,586
predicted_1,517,1959,486,115,257
predicted_2,167,336,1623,790,91
predicted_3,152,219,1076,2488,18
predicted_4,79,529,268,31,2581


# Stabilizer

In [26]:
df_sta = pd.read_csv("sta_transformed_3_min.csv", index_col=0)
X_sta = df_sta.iloc[:, :-1]
y_sta = df_sta["label"]
print("Stabilizer: " + str(df_sta.shape[0]))

Stabilizer: 89586


In [27]:
X_train, X_test, y_train, y_test = train_test_split(X_sta, y_sta, test_size=0.2, random_state=42)

In [28]:
param_grid = {
    "n_estimators":[10, 100],
    "learning_rate": [0.1, 1]
    }
clf = AdaBoostClassifier(random_state=0)

best_model_rf = model_selection(clf, param_grid, X_train, y_train)

In [29]:
joblib.dump(best_model_rf, FOLDER_PATH + 'adaBoost_sta.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'adaBoost_sta.pkl')

In [30]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.5139, Standard Deviation: 0.5139
Parameters: {'learning_rate': 0.1, 'n_estimators': 100}

Model with rank: 2
Mean validation score: 0.5031, Standard Deviation: 0.5031
Parameters: {'learning_rate': 1, 'n_estimators': 100}



In [31]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.5201473378725304

In [32]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3304,1,7,0,0
predicted_1,81,842,609,265,379
predicted_2,159,1038,1687,553,674
predicted_3,0,1301,779,2228,1221
predicted_4,24,397,565,545,1259


# Gyro 

In [33]:
df_gyro = pd.read_csv("gyro_transformed_3_min.csv", index_col=0)
X_gyro = df_gyro.iloc[:, :-1]
y_gyro = df_gyro["label"]
print("Accelerometer: " + str(df_gyro.shape[0]))

Accelerometer: 89586


In [34]:
X_train, X_test, y_train, y_test = train_test_split(X_gyro, y_gyro, test_size=0.2, random_state=42)

In [35]:
param_grid = {
    "n_estimators": [10, 100],
    "learning_rate": [0.001, 1]
    }
clf = AdaBoostClassifier(random_state=0)

best_model_rf = model_selection(clf, param_grid, X_train, y_train)

In [36]:
joblib.dump(best_model_rf, FOLDER_PATH + 'adaBoost_gyro.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'adaBoost_gyro.pkl')

In [37]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.5448, Standard Deviation: 0.5448
Parameters: {'learning_rate': 1, 'n_estimators': 10}

Model with rank: 2
Mean validation score: 0.5212, Standard Deviation: 0.5212
Parameters: {'learning_rate': 0.001, 'n_estimators': 100}



In [38]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.5818171670945418

In [39]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,1444,565,297,72
predicted_2,0,864,1231,365,522
predicted_3,0,900,864,1855,612
predicted_4,0,371,987,1074,2327


# Gyro + Accelerometer

In [40]:
X_gyro_acc = pd.concat([X_gyro, X_acc], axis=1)
y_gyro_acc = y_gyro
X_gyro_acc.head()

Unnamed: 0,mu_x_gyro,mu_y_gyro,mu_z_gyro,std_x_gyro,std_y_gyro,std_z_gyro,avg_resultant_acc_gyro,bins_0_gyro,bins_1_gyro,bins_2_gyro,...,bins_23_acc,bins_24_acc,bins_25_acc,bins_26_acc,bins_27_acc,bins_28_acc,bins_29_acc,mean_average_difference_x_acc,mean_average_difference_y_acc,mean_average_difference_z_acc
0,0.423966,0.689068,0.143795,5.258876,5.135687,1.60727,6.642985,2.0,3.0,5.0,...,7.0,17.0,22.0,16.0,15.0,5.0,2.0,0.287634,0.210639,0.063305
1,0.409638,0.708761,0.134244,5.258213,5.137686,1.603247,6.646277,2.0,3.0,5.0,...,7.0,17.0,22.0,16.0,15.0,5.0,2.0,0.283445,0.211025,0.063272
2,0.338286,0.776274,0.12026,5.244151,5.22275,1.613862,6.692912,2.0,3.0,5.0,...,7.0,17.0,22.0,16.0,15.0,5.0,2.0,0.282941,0.208936,0.063698
3,0.238611,0.802374,0.093104,5.234158,5.215794,1.602169,6.671035,2.0,3.0,5.0,...,6.0,17.0,22.0,16.0,15.0,5.0,2.0,0.281943,0.199781,0.063799
4,0.141042,0.821388,0.069367,5.263072,5.233489,1.610829,6.704195,2.0,3.0,6.0,...,7.0,16.0,22.0,16.0,15.0,5.0,2.0,0.282471,0.198947,0.06439


In [41]:
X_train, X_test, y_train, y_test = train_test_split(X_gyro_acc, y_gyro_acc, test_size=0.2, random_state=42)

In [42]:
param_grid = {
    "n_estimators": [300],
    "learning_rate": [0.001, 1]
    }
clf = AdaBoostClassifier(random_state=0)

best_model_rf = model_selection(clf, param_grid, X_train, y_train)

In [43]:
joblib.dump(best_model_rf, FOLDER_PATH + 'adaBoost_gyro_acc.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'adaBoost_gyro_acc.pkl')

In [44]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.6895, Standard Deviation: 0.6895
Parameters: {'learning_rate': 0.001, 'n_estimators': 300}

Model with rank: 2
Mean validation score: 0.6260, Standard Deviation: 0.6260
Parameters: {'learning_rate': 1, 'n_estimators': 300}



In [45]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.6892510324813037

In [46]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,2678,1348,393,533
predicted_2,0,0,1,0,0
predicted_3,0,720,2037,3185,82
predicted_4,0,181,261,13,2918


# Gyro + Stabilizer

In [47]:
X_gyro_sta = pd.concat([X_gyro, X_sta], axis=1)
y_gyro_sta = y_gyro
X_gyro_sta.head()

Unnamed: 0,mu_x_gyro,mu_y_gyro,mu_z_gyro,std_x_gyro,std_y_gyro,std_z_gyro,avg_resultant_acc_gyro,bins_0_gyro,bins_1_gyro,bins_2_gyro,...,bins_23_stabilizer,bins_24_stabilizer,bins_25_stabilizer,bins_26_stabilizer,bins_27_stabilizer,bins_28_stabilizer,bins_29_stabilizer,mean_average_difference_x_stabilizer,mean_average_difference_y_stabilizer,mean_average_difference_z_stabilizer
0,0.423966,0.689068,0.143795,5.258876,5.135687,1.60727,6.642985,2.0,3.0,5.0,...,16.0,10.0,6.0,5.0,4.0,10.0,13.0,0.1634,0.310911,0.158776
1,0.409638,0.708761,0.134244,5.258213,5.137686,1.603247,6.646277,2.0,3.0,5.0,...,18.0,8.0,6.0,5.0,4.0,10.0,13.0,0.164636,0.311551,0.15803
2,0.338286,0.776274,0.12026,5.244151,5.22275,1.613862,6.692912,2.0,3.0,5.0,...,16.0,8.0,6.0,6.0,3.0,10.0,13.0,0.167053,0.312735,0.157362
3,0.238611,0.802374,0.093104,5.234158,5.215794,1.602169,6.671035,2.0,3.0,5.0,...,16.0,8.0,7.0,4.0,4.0,9.0,13.0,0.168144,0.313757,0.156745
4,0.141042,0.821388,0.069367,5.263072,5.233489,1.610829,6.704195,2.0,3.0,6.0,...,17.0,7.0,7.0,4.0,4.0,9.0,13.0,0.16895,0.314334,0.156337


In [48]:
X_train, X_test, y_train, y_test = train_test_split(X_gyro_sta, y_gyro_sta, test_size=0.2, random_state=42)

In [49]:
param_grid = {
    "n_estimators": [10, 200],
    "learning_rate": [0.001, 1]
    }
clf = AdaBoostClassifier(random_state=0)

best_model_rf = model_selection(clf, param_grid, X_train, y_train)

In [50]:
joblib.dump(best_model_rf, FOLDER_PATH + 'adaBoost_gyro_stb.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'adaBoost_gyro_stb.pkl')

In [51]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.5631, Standard Deviation: 0.5631
Parameters: {'learning_rate': 1, 'n_estimators': 10}

Model with rank: 2
Mean validation score: 0.5274, Standard Deviation: 0.5274
Parameters: {'learning_rate': 0.001, 'n_estimators': 200}



In [52]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.5189195222681103

In [53]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,1680,690,364,80
predicted_2,0,733,1181,342,477
predicted_3,0,678,1245,2005,2112
predicted_4,0,488,531,880,864


# Accelerometer + Stabilizer

In [54]:
X_acc_sta = pd.concat([X_acc, X_sta], axis=1)
y_acc_sta = y_gyro
X_acc_sta.head()

Unnamed: 0,mu_x_acc,mu_y_acc,mu_z_acc,std_x_acc,std_y_acc,std_z_acc,avg_resultant_acc_acc,bins_0_acc,bins_1_acc,bins_2_acc,...,bins_23_stabilizer,bins_24_stabilizer,bins_25_stabilizer,bins_26_stabilizer,bins_27_stabilizer,bins_28_stabilizer,bins_29_stabilizer,mean_average_difference_x_stabilizer,mean_average_difference_y_stabilizer,mean_average_difference_z_stabilizer
0,-0.509557,-0.279534,1.018882,0.346928,0.273213,0.079338,1.239601,1.0,4.0,7.0,...,16.0,10.0,6.0,5.0,4.0,10.0,13.0,0.1634,0.310911,0.158776
1,-0.501934,-0.278313,1.018911,0.343203,0.273325,0.079296,1.235087,1.0,4.0,6.0,...,18.0,8.0,6.0,5.0,4.0,10.0,13.0,0.164636,0.311551,0.15803
2,-0.498901,-0.276406,1.018525,0.343154,0.272539,0.079937,1.23271,1.0,4.0,6.0,...,16.0,8.0,6.0,6.0,3.0,10.0,13.0,0.167053,0.312735,0.157362
3,-0.491748,-0.267881,1.018433,0.343136,0.252441,0.080053,1.224862,1.0,4.0,6.0,...,16.0,8.0,7.0,4.0,4.0,9.0,13.0,0.168144,0.313757,0.156745
4,-0.485814,-0.267094,1.017896,0.344372,0.251208,0.080409,1.222093,1.0,4.0,6.0,...,17.0,7.0,7.0,4.0,4.0,9.0,13.0,0.16895,0.314334,0.156337


In [55]:
X_train, X_test, y_train, y_test = train_test_split(X_acc_sta, y_acc_sta, test_size=0.2, random_state=42)

In [56]:
param_grid = {
    "n_estimators": [10, 200],
    "learning_rate": [1, 3, 5]
    }
clf = AdaBoostClassifier(random_state=0)

best_model_rf = model_selection(clf, param_grid, X_train, y_train)

In [57]:
joblib.dump(best_model_rf, FOLDER_PATH + 'adaBoost_acc_stb.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'adaBoost_acc_stb.pkl')

In [58]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.7193, Standard Deviation: 0.7193
Parameters: {'learning_rate': 1, 'n_estimators': 200}

Model with rank: 2
Mean validation score: 0.6743, Standard Deviation: 0.6743
Parameters: {'learning_rate': 1, 'n_estimators': 10}



In [59]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.7177140305837705

In [60]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3531,4,29,0,0
predicted_1,25,1517,544,87,124
predicted_2,12,354,1247,177,58
predicted_3,0,1036,1398,3296,82
predicted_4,0,668,429,31,3269


# All 

In [61]:
X_all = pd.concat([X_gyro, X_acc, X_sta], axis=1)
y_all = y_gyro
X_all.head()

Unnamed: 0,mu_x_gyro,mu_y_gyro,mu_z_gyro,std_x_gyro,std_y_gyro,std_z_gyro,avg_resultant_acc_gyro,bins_0_gyro,bins_1_gyro,bins_2_gyro,...,bins_23_stabilizer,bins_24_stabilizer,bins_25_stabilizer,bins_26_stabilizer,bins_27_stabilizer,bins_28_stabilizer,bins_29_stabilizer,mean_average_difference_x_stabilizer,mean_average_difference_y_stabilizer,mean_average_difference_z_stabilizer
0,0.423966,0.689068,0.143795,5.258876,5.135687,1.60727,6.642985,2.0,3.0,5.0,...,16.0,10.0,6.0,5.0,4.0,10.0,13.0,0.1634,0.310911,0.158776
1,0.409638,0.708761,0.134244,5.258213,5.137686,1.603247,6.646277,2.0,3.0,5.0,...,18.0,8.0,6.0,5.0,4.0,10.0,13.0,0.164636,0.311551,0.15803
2,0.338286,0.776274,0.12026,5.244151,5.22275,1.613862,6.692912,2.0,3.0,5.0,...,16.0,8.0,6.0,6.0,3.0,10.0,13.0,0.167053,0.312735,0.157362
3,0.238611,0.802374,0.093104,5.234158,5.215794,1.602169,6.671035,2.0,3.0,5.0,...,16.0,8.0,7.0,4.0,4.0,9.0,13.0,0.168144,0.313757,0.156745
4,0.141042,0.821388,0.069367,5.263072,5.233489,1.610829,6.704195,2.0,3.0,6.0,...,17.0,7.0,7.0,4.0,4.0,9.0,13.0,0.16895,0.314334,0.156337


In [62]:
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

In [63]:
param_grid = {
    "n_estimators": [10, 100],
    "learning_rate": [0.001, 1, 3]
    }
clf = AdaBoostClassifier(random_state=0)

best_model_rf = model_selection(clf, param_grid, X_train, y_train)

In [64]:
joblib.dump(best_model_rf, FOLDER_PATH + 'adaBoost_all.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'adaBoost_all.pkl')

In [65]:
report(best_model_rf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.7119, Standard Deviation: 0.7119
Parameters: {'learning_rate': 1, 'n_estimators': 10}

Model with rank: 2
Mean validation score: 0.6650, Standard Deviation: 0.6650
Parameters: {'learning_rate': 1, 'n_estimators': 100}



In [66]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.6988503181158612

In [67]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,1918,682,109,47
predicted_2,0,737,1017,398,384
predicted_3,0,689,1652,3066,149
predicted_4,0,235,296,18,2953
