# Machine Learning

In [48]:
import pandas as pd
import numpy as np 


df = pd.read_csv("out.csv")

In [49]:
df.head()

Unnamed: 0,Time(s),acc_x(g),acc_y(g),acc_z(g),gyr_x(°/s),gyr_y(°/s),gyr_z(°/s),mag_x(G),mag_y(G),mag_z(G),SVM(g),yaw(°),pitch(°),roll(°),Class
0,0.0,-0.86,0.06,0.41,-18.57,31.65,1.32,320.73,-179.24,-376.0,0.95,-102.51,-26.65,0.0,0
1,0.03,-0.84,0.09,0.45,-18.3,24.9,-12.8,319.73,-176.37,-378.0,0.96,-102.7,-27.9,-0.42,0
2,0.06,-0.81,0.16,0.45,-13.98,10.3,-28.03,312.74,-170.61,-379.0,0.94,-102.46,-28.84,-0.99,0
3,0.09,-0.75,0.3,0.37,-14.05,-6.61,-29.77,308.74,-170.61,-376.0,0.89,-102.05,-29.21,-1.49,0
4,0.12,-0.7,0.37,0.3,-18.64,-12.45,-26.78,301.75,-166.78,-378.0,0.85,-101.76,-29.26,-1.97,0


In [50]:
df.columns

Index(['Time(s)', 'acc_x(g)', 'acc_y(g)', 'acc_z(g)', 'gyr_x(°/s)',
       'gyr_y(°/s)', 'gyr_z(°/s)', 'mag_x(G)', 'mag_y(G)', 'mag_z(G)',
       'SVM(g)', 'yaw(°)', 'pitch(°)', 'roll(°)', 'Class'],
      dtype='object')

In [51]:
variables = ['Time(s)', 'acc_x(g)', 'acc_y(g)', 'acc_z(g)', 'gyr_x(°/s)',
       'gyr_y(°/s)', 'gyr_z(°/s)', 'mag_x(G)', 'mag_y(G)', 'mag_z(G)',
       'SVM(g)', 'yaw(°)', 'pitch(°)', 'roll(°)']
target = 'Class'

In [52]:
X = df[variables]
y = df[target]

print(X.shape)
print(y.shape)

(115368, 14)
(115368,)


## Train and Test Split

In [53]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, stratify = y)

In [54]:
# Handling Imbalanced Dataset

from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)

X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

In [55]:
from imblearn.over_sampling import RandomOverSampler 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

# Logistic Regression

In [56]:
model = LogisticRegression(solver ='liblinear',random_state=42)

In [57]:
model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)


cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

In [58]:
print(auc)

0.7255636393161825


In [59]:
print(cm)

[[ 3904   835]
 [ 6833 11502]]


In [60]:
print(cr)

              precision    recall  f1-score   support

           0       0.36      0.82      0.50      4739
           1       0.93      0.63      0.75     18335

    accuracy                           0.67     23074
   macro avg       0.65      0.73      0.63     23074
weighted avg       0.82      0.67      0.70     23074



# Decision Tree

In [61]:
model = DecisionTreeClassifier()

In [62]:
model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)


cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

In [63]:
print(auc)

0.9760240254396487


In [64]:
print(cm)

[[ 4565   174]
 [  206 18129]]


In [65]:
print(cr)

              precision    recall  f1-score   support

           0       0.96      0.96      0.96      4739
           1       0.99      0.99      0.99     18335

    accuracy                           0.98     23074
   macro avg       0.97      0.98      0.97     23074
weighted avg       0.98      0.98      0.98     23074



# SVM 

In [66]:
model = SVC()

In [67]:
model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)


cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

In [68]:
print(auc)

0.7325024529700431


In [69]:
print(cm)

[[ 4244   495]
 [ 7894 10441]]


In [70]:
print(cr)

              precision    recall  f1-score   support

           0       0.35      0.90      0.50      4739
           1       0.95      0.57      0.71     18335

    accuracy                           0.64     23074
   macro avg       0.65      0.73      0.61     23074
weighted avg       0.83      0.64      0.67     23074



# KNN

In [71]:
model = KNeighborsClassifier(n_neighbors = 3)

In [72]:
model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

In [73]:
print(auc)

0.9959500890584503


In [74]:
print(cm)

[[ 4720    19]
 [   75 18260]]


In [75]:
print(cr)

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      4739
           1       1.00      1.00      1.00     18335

    accuracy                           1.00     23074
   macro avg       0.99      1.00      0.99     23074
weighted avg       1.00      1.00      1.00     23074



# Random Forest

In [30]:
model = RandomForestClassifier()

In [31]:
model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)


cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

In [32]:
print(auc)

0.997353847956311


In [33]:
print(cm)

[[ 4724    15]
 [   39 18296]]


In [34]:
print(cr)

              precision    recall  f1-score   support

           0       0.99      1.00      0.99      4739
           1       1.00      1.00      1.00     18335

    accuracy                           1.00     23074
   macro avg       1.00      1.00      1.00     23074
weighted avg       1.00      1.00      1.00     23074



# XGBoost

In [77]:
import warnings
warnings.filterwarnings('ignore')

In [79]:
import xgboost as xgb

In [80]:
clf_gbt = xgb.XGBClassifier(objective ='binary:logistic', colsample_bytree = 0.1,learning_rate = 0.1,
                max_depth= 2, alpha= 5, eval_metric = 'error').fit(X_resampled, y_resampled)

# Predict with a model
gbt_preds = clf_gbt.predict(X_test)
auc = roc_auc_score(y_test, y_pred)

In [81]:
print(auc)

0.9959500890584503


In [82]:
cm = confusion_matrix(y_test, gbt_preds)
print(cm)

[[ 4458   281]
 [ 5409 12926]]


In [83]:
cr = classification_report(y_test, gbt_preds)
print(cr)

              precision    recall  f1-score   support

           0       0.45      0.94      0.61      4739
           1       0.98      0.70      0.82     18335

    accuracy                           0.75     23074
   macro avg       0.72      0.82      0.72     23074
weighted avg       0.87      0.75      0.78     23074

