In [1]:
import pandas as pd
import numpy as np
from sklearn.externals import joblib
from utils.classifications_utils import *
from utils.data_processing_utils import *
from utils.data_visualization_utils import *
from utils.metrics_utils import *
from utils.grid_search_utils import *
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
FOLDER_PATH = "pickled_models/svm/"

# Accelerometer

In [3]:
df_acc = pd.read_csv("acc_transformed_3_min.csv", index_col=0)
X_acc = df_acc.iloc[:, :-1]
y_acc = df_acc["label"]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_acc, y_acc, test_size=0.2, random_state=42)

In [5]:
Cs = [10, 12, 15]
gammas = [1, 3, 5]
param_grid = {'C': Cs, 'gamma' : gammas}
clf = SVC(kernel='rbf')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
print("Passed")
#best_model_svm = model_selection(clf, param_grid, X_train, y_train)

Passed


In [6]:
joblib.dump(best_model_svm, FOLDER_PATH + 'svm_acc.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'svm_acc.pkl')

In [7]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.9991, Standard Deviation: 0.9991
Parameters: {'C': 10, 'gamma': 5}

Model with rank: 2
Mean validation score: 0.9991, Standard Deviation: 0.9991
Parameters: {'C': 12, 'gamma': 5}



In [8]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.9996093313985936

In [9]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,3575,0,0,0
predicted_2,0,3,3647,3,0
predicted_3,0,0,0,3588,0
predicted_4,0,1,0,0,3533


# Stabilizer

In [4]:
df_sta = pd.read_csv("sta_transformed_3_min.csv", index_col=0)
X_sta = df_sta.iloc[:, :-1]
y_sta = df_sta["label"]
print("Stabilizer: " + str(df_sta.shape[0]))

Stabilizer: 89586


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_sta, y_sta, test_size=0.2, random_state=42)

In [12]:
Cs = [10, 12, 15]
gammas = [1, 3, 5]
param_grid = {'C': Cs, 'gamma' : gammas}
clf = SVC(kernel='rbf')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#best_model_svm = model_selection(clf, param_grid, X_train, y_train)

In [13]:
joblib.dump(best_model_svm, FOLDER_PATH + 'svm_sta.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'svm_sta.pkl')

In [14]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.9983, Standard Deviation: 0.9983
Parameters: {'C': 10, 'gamma': 5}

Model with rank: 2
Mean validation score: 0.9983, Standard Deviation: 0.9983
Parameters: {'C': 12, 'gamma': 5}



In [15]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.9994977117981918

In [16]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,3577,2,0,2
predicted_2,0,1,3645,1,1
predicted_3,0,0,0,3589,0
predicted_4,0,1,0,1,3530


# Gyro

In [5]:
df_gyro = pd.read_csv("gyro_transformed_3_min.csv", index_col=0)
X_gyro = df_gyro.iloc[:, :-1]
y_gyro = df_gyro["label"]
print("Accelerometer: " + str(df_gyro.shape[0]))

Accelerometer: 89586


In [18]:
X_train, X_test, y_train, y_test = train_test_split(X_gyro, y_gyro, test_size=0.2, random_state=42)

In [19]:
Cs = [10, 12, 15]
gammas = [1, 3, 5]
param_grid = {'C': Cs, 'gamma' : gammas}
clf = SVC(kernel='rbf')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#best_model_svm = model_selection(clf, param_grid, X_train, y_train)

In [20]:
joblib.dump(best_model_svm, FOLDER_PATH + 'svm_gyro.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'svm_gyro.pkl')

In [21]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.9988, Standard Deviation: 0.9988
Parameters: {'C': 10, 'gamma': 5}

Model with rank: 2
Mean validation score: 0.9988, Standard Deviation: 0.9988
Parameters: {'C': 12, 'gamma': 5}



In [22]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

0.9996651411987945

In [23]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,3576,0,0,1
predicted_2,0,0,3647,1,1
predicted_3,0,2,0,3590,0
predicted_4,0,1,0,0,3531


# Gyro + Accelerometer

In [18]:
X_gyro_acc = pd.concat([X_gyro, X_acc], axis=1)
y_gyro_acc = y_gyro
X_gyro_acc.head()

Unnamed: 0,mu_x_gyro,mu_y_gyro,mu_z_gyro,std_x_gyro,std_y_gyro,std_z_gyro,avg_resultant_acc_gyro,bins_0_gyro,bins_1_gyro,bins_2_gyro,...,bins_23_acc,bins_24_acc,bins_25_acc,bins_26_acc,bins_27_acc,bins_28_acc,bins_29_acc,mean_average_difference_x_acc,mean_average_difference_y_acc,mean_average_difference_z_acc
0,0.423966,0.689068,0.143795,5.258876,5.135687,1.60727,6.642985,2.0,3.0,5.0,...,7.0,17.0,22.0,16.0,15.0,5.0,2.0,0.287634,0.210639,0.063305
1,0.409638,0.708761,0.134244,5.258213,5.137686,1.603247,6.646277,2.0,3.0,5.0,...,7.0,17.0,22.0,16.0,15.0,5.0,2.0,0.283445,0.211025,0.063272
2,0.338286,0.776274,0.12026,5.244151,5.22275,1.613862,6.692912,2.0,3.0,5.0,...,7.0,17.0,22.0,16.0,15.0,5.0,2.0,0.282941,0.208936,0.063698
3,0.238611,0.802374,0.093104,5.234158,5.215794,1.602169,6.671035,2.0,3.0,5.0,...,6.0,17.0,22.0,16.0,15.0,5.0,2.0,0.281943,0.199781,0.063799
4,0.141042,0.821388,0.069367,5.263072,5.233489,1.610829,6.704195,2.0,3.0,6.0,...,7.0,16.0,22.0,16.0,15.0,5.0,2.0,0.282471,0.198947,0.06439


In [19]:
X_train, X_test, y_train, y_test = train_test_split(X_gyro_acc, y_gyro_acc, test_size=0.2, random_state=42)

In [20]:
Cs = [12, 15]
gammas = [1, 3, 5]
param_grid = {'C': Cs, 'gamma' : gammas}
clf = SVC(kernel='rbf')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#best_model_svm = model_selection(clf, param_grid, X_train, y_train)

In [21]:
pickled_clf = joblib.load(FOLDER_PATH + 'svm_gyro_acc.pkl')

In [22]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 1.0000, Standard Deviation: 1.0000
Parameters: {'C': 12, 'gamma': 1}

Model with rank: 2
Mean validation score: 1.0000, Standard Deviation: 1.0000
Parameters: {'C': 15, 'gamma': 1}



In [23]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

1.0

In [24]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,3579,0,0,0
predicted_2,0,0,3647,0,0
predicted_3,0,0,0,3591,0
predicted_4,0,0,0,0,3533


# Gyro + Stabilizer

In [44]:
X_gyro_sta = pd.concat([X_gyro, X_sta], axis=1)
y_gyro_sta = y_gyro
X_gyro_sta.head()

Unnamed: 0,mu_x_gyro,mu_y_gyro,mu_z_gyro,std_x_gyro,std_y_gyro,std_z_gyro,avg_resultant_acc_gyro,bins_0_gyro,bins_1_gyro,bins_2_gyro,...,bins_23_stabilizer,bins_24_stabilizer,bins_25_stabilizer,bins_26_stabilizer,bins_27_stabilizer,bins_28_stabilizer,bins_29_stabilizer,mean_average_difference_x_stabilizer,mean_average_difference_y_stabilizer,mean_average_difference_z_stabilizer
0,0.423966,0.689068,0.143795,5.258876,5.135687,1.60727,6.642985,2.0,3.0,5.0,...,16.0,10.0,6.0,5.0,4.0,10.0,13.0,0.1634,0.310911,0.158776
1,0.409638,0.708761,0.134244,5.258213,5.137686,1.603247,6.646277,2.0,3.0,5.0,...,18.0,8.0,6.0,5.0,4.0,10.0,13.0,0.164636,0.311551,0.15803
2,0.338286,0.776274,0.12026,5.244151,5.22275,1.613862,6.692912,2.0,3.0,5.0,...,16.0,8.0,6.0,6.0,3.0,10.0,13.0,0.167053,0.312735,0.157362
3,0.238611,0.802374,0.093104,5.234158,5.215794,1.602169,6.671035,2.0,3.0,5.0,...,16.0,8.0,7.0,4.0,4.0,9.0,13.0,0.168144,0.313757,0.156745
4,0.141042,0.821388,0.069367,5.263072,5.233489,1.610829,6.704195,2.0,3.0,6.0,...,17.0,7.0,7.0,4.0,4.0,9.0,13.0,0.16895,0.314334,0.156337


In [45]:
X_train, X_test, y_train, y_test = train_test_split(X_gyro_sta, y_gyro_sta, test_size=0.2, random_state=42)

In [46]:
Cs = [3, 10]
gammas = [0.1, 1]
param_grid = {'C': Cs, 'gamma' : gammas}
clf = SVC(kernel='rbf')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#best_model_svm = model_selection(clf, param_grid, X_train, y_train)

In [55]:
joblib.dump(best_model_svm, FOLDER_PATH + 'svm_gyro_stb.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'svm_gyro_stb.pkl')

In [56]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.9997, Standard Deviation: 0.9997
Parameters: {'C': 10, 'gamma': 1}

Model with rank: 2
Mean validation score: 0.9996, Standard Deviation: 0.9996
Parameters: {'C': 3, 'gamma': 1}



In [57]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

1.0

In [58]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,3579,0,0,0
predicted_2,0,0,3647,0,0
predicted_3,0,0,0,3591,0
predicted_4,0,0,0,0,3533


# Accelerometer + Stabilizer

In [59]:
X_acc_sta = pd.concat([X_acc, X_sta], axis=1)
y_acc_sta = y_gyro
X_acc_sta.head()

Unnamed: 0,mu_x_acc,mu_y_acc,mu_z_acc,std_x_acc,std_y_acc,std_z_acc,avg_resultant_acc_acc,bins_0_acc,bins_1_acc,bins_2_acc,...,bins_23_stabilizer,bins_24_stabilizer,bins_25_stabilizer,bins_26_stabilizer,bins_27_stabilizer,bins_28_stabilizer,bins_29_stabilizer,mean_average_difference_x_stabilizer,mean_average_difference_y_stabilizer,mean_average_difference_z_stabilizer
0,-0.509557,-0.279534,1.018882,0.346928,0.273213,0.079338,1.239601,1.0,4.0,7.0,...,16.0,10.0,6.0,5.0,4.0,10.0,13.0,0.1634,0.310911,0.158776
1,-0.501934,-0.278313,1.018911,0.343203,0.273325,0.079296,1.235087,1.0,4.0,6.0,...,18.0,8.0,6.0,5.0,4.0,10.0,13.0,0.164636,0.311551,0.15803
2,-0.498901,-0.276406,1.018525,0.343154,0.272539,0.079937,1.23271,1.0,4.0,6.0,...,16.0,8.0,6.0,6.0,3.0,10.0,13.0,0.167053,0.312735,0.157362
3,-0.491748,-0.267881,1.018433,0.343136,0.252441,0.080053,1.224862,1.0,4.0,6.0,...,16.0,8.0,7.0,4.0,4.0,9.0,13.0,0.168144,0.313757,0.156745
4,-0.485814,-0.267094,1.017896,0.344372,0.251208,0.080409,1.222093,1.0,4.0,6.0,...,17.0,7.0,7.0,4.0,4.0,9.0,13.0,0.16895,0.314334,0.156337


In [60]:
X_train, X_test, y_train, y_test = train_test_split(X_acc_sta, y_acc_sta, test_size=0.2, random_state=42)

In [61]:
Cs = [3, 10]
gammas = [0.1, 1]
param_grid = {'C': Cs, 'gamma' : gammas}
clf = SVC(kernel='rbf')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

best_model_svm = model_selection(clf, param_grid, X_train, y_train)

In [62]:
joblib.dump(best_model_svm, FOLDER_PATH + 'svm_acc_stb.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'svm_acc_stb.pkl')

In [63]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 1.0000, Standard Deviation: 1.0000
Parameters: {'C': 3, 'gamma': 1}

Model with rank: 2
Mean validation score: 1.0000, Standard Deviation: 1.0000
Parameters: {'C': 10, 'gamma': 1}



In [64]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

1.0

In [65]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,3579,0,0,0
predicted_2,0,0,3647,0,0
predicted_3,0,0,0,3591,0
predicted_4,0,0,0,0,3533


# All

In [66]:
X_all = pd.concat([X_gyro, X_acc, X_sta], axis=1)
y_all = y_gyro
X_all.head()

Unnamed: 0,mu_x_gyro,mu_y_gyro,mu_z_gyro,std_x_gyro,std_y_gyro,std_z_gyro,avg_resultant_acc_gyro,bins_0_gyro,bins_1_gyro,bins_2_gyro,...,bins_23_stabilizer,bins_24_stabilizer,bins_25_stabilizer,bins_26_stabilizer,bins_27_stabilizer,bins_28_stabilizer,bins_29_stabilizer,mean_average_difference_x_stabilizer,mean_average_difference_y_stabilizer,mean_average_difference_z_stabilizer
0,0.423966,0.689068,0.143795,5.258876,5.135687,1.60727,6.642985,2.0,3.0,5.0,...,16.0,10.0,6.0,5.0,4.0,10.0,13.0,0.1634,0.310911,0.158776
1,0.409638,0.708761,0.134244,5.258213,5.137686,1.603247,6.646277,2.0,3.0,5.0,...,18.0,8.0,6.0,5.0,4.0,10.0,13.0,0.164636,0.311551,0.15803
2,0.338286,0.776274,0.12026,5.244151,5.22275,1.613862,6.692912,2.0,3.0,5.0,...,16.0,8.0,6.0,6.0,3.0,10.0,13.0,0.167053,0.312735,0.157362
3,0.238611,0.802374,0.093104,5.234158,5.215794,1.602169,6.671035,2.0,3.0,5.0,...,16.0,8.0,7.0,4.0,4.0,9.0,13.0,0.168144,0.313757,0.156745
4,0.141042,0.821388,0.069367,5.263072,5.233489,1.610829,6.704195,2.0,3.0,6.0,...,17.0,7.0,7.0,4.0,4.0,9.0,13.0,0.16895,0.314334,0.156337


In [67]:
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

In [68]:
Cs = [3, 10]
gammas = [0.1, 1]
param_grid = {'C': Cs, 'gamma' : gammas}
clf = SVC(kernel='rbf')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

best_model_svm = model_selection(clf, param_grid, X_train, y_train)

In [69]:
joblib.dump(best_model_svm, FOLDER_PATH + 'svm_all.pkl')
pickled_clf = joblib.load(FOLDER_PATH + 'svm_all.pkl')

In [71]:
report(best_model_svm.grid_scores_, 2)

Model with rank: 1
Mean validation score: 1.0000, Standard Deviation: 1.0000
Parameters: {'C': 3, 'gamma': 1}

Model with rank: 2
Mean validation score: 1.0000, Standard Deviation: 1.0000
Parameters: {'C': 10, 'gamma': 1}



In [72]:
y_pred = pickled_clf.predict(X_test)
pickled_clf.score(X_test, y_test)

1.0

In [73]:
get_confusion_matrix(y_pred, y_test)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,3568,0,0,0,0
predicted_1,0,3579,0,0,0
predicted_2,0,0,3647,0,0
predicted_3,0,0,0,3591,0
predicted_4,0,0,0,0,3533
