In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import sklearn

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics

In [None]:
import plotly.express as px

In [None]:
%matplotlib widget

In [None]:
# ONLY FOR COLAB
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## Load data

In [None]:
DATA_PATH = "/content/gdrive/My Drive/DA/WESAD/"

In [None]:
subjects = [s.split(".")[0] for s in os.listdir(DATA_PATH) if s.startswith("S")]
subjects.sort()
subjects

['S10',
 'S11',
 'S13',
 'S14',
 'S15',
 'S16',
 'S17',
 'S2',
 'S3',
 'S4',
 'S5',
 'S6',
 'S7',
 'S8',
 'S9']

In [None]:
selected_subject = subjects[10]
selected_subject

'S5'

In [None]:
%cp "{os.path.join(DATA_PATH, selected_subject + ".csv")}" .

In [None]:
data = pd.read_csv(f"{selected_subject}.csv")

In [None]:
data.head()

Unnamed: 0.1,Unnamed: 0,chest_ECG,chest_EMG,chest_EDA,chest_Temp,chest_Resp,chest_ACC1,chest_ACC2,chest_ACC3,wrist_BVP,wrist_EDA,wrist_TEMP,wrist_ACC1,wrist_ACC2,wrist_ACC3,label,subject
0,0,-0.275803,0.0168,3.888321,34.119934,0.04425,0.8606,0.0742,0.857,-7.25,0.547723,34.09,-16.0,-45.0,127.0,0,S5
1,1,-0.229752,0.014969,3.913116,34.142822,0.032043,0.8456,0.078,0.8574,-7.25,0.547723,34.09,-16.0,-45.0,127.0,0,S5
2,2,-0.195282,0.015335,3.873062,34.135193,0.015259,0.841,0.0874,0.757,-7.25,0.547723,34.09,-16.0,-45.0,127.0,0,S5
3,3,-0.164474,-0.003708,3.883362,34.13675,-0.062561,0.8418,0.0986,0.5782,-7.25,0.547723,34.09,-16.0,-45.0,127.0,0,S5
4,4,-0.134811,0.017624,3.886795,34.11844,0.016785,0.8554,0.0984,0.3332,-7.25,0.547723,34.09,-16.0,-45.0,127.0,0,S5


## Data Pre-process

In [None]:
data.columns

Index(['Unnamed: 0', 'chest_ECG', 'chest_EMG', 'chest_EDA', 'chest_Temp',
       'chest_Resp', 'chest_ACC1', 'chest_ACC2', 'chest_ACC3', 'wrist_BVP',
       'wrist_EDA', 'wrist_TEMP', 'wrist_ACC1', 'wrist_ACC2', 'wrist_ACC3',
       'label', 'subject'],
      dtype='object')

In [None]:
data_columns = [
    'chest_ECG', 'chest_EMG', 'chest_EDA', 'chest_Temp',
    'chest_Resp', 'chest_ACC1', 'chest_ACC2', 'chest_ACC3', 'wrist_BVP',
    'wrist_EDA', 'wrist_TEMP', 'wrist_ACC1', 'wrist_ACC2', 'wrist_ACC3'
]

In [None]:
target_column = "label"

In [None]:
# new_data = data[data["label"].isin((1, 2, 3, 4))]

In [None]:
# new_data = new_data.reset_index()

In [None]:
# new_data.drop(["index"], axis=1)

In [None]:
data.drop(data.loc[~data["label"].isin((1, 2, 3, 4))].index, inplace=True)

In [None]:
data["label"].unique

<bound method Series.unique of 195560     1
195561     1
195562     1
195563     1
195564     1
          ..
4112055    4
4112056    4
4112057    4
4112058    4
4112059    4
Name: label, Length: 2107700, dtype: int64>

In [None]:
data.reset_index(drop=True, inplace=True)

In [None]:
data

Unnamed: 0.1,Unnamed: 0,chest_ECG,chest_EMG,chest_EDA,chest_Temp,chest_Resp,chest_ACC1,chest_ACC2,chest_ACC3,wrist_BVP,wrist_EDA,wrist_TEMP,wrist_ACC1,wrist_ACC2,wrist_ACC3,label,subject
0,195560,-0.028976,-0.007828,6.387711,34.251160,2.789307,0.8408,-0.0390,-0.3862,25.52,1.363788,34.34,63.0,4.0,9.0,1,S5
1,195561,-0.029114,-0.003296,6.384277,34.252716,2.891541,0.8418,-0.0422,-0.3862,25.52,1.363788,34.34,63.0,4.0,9.0,1,S5
2,195562,-0.030212,-0.005264,6.398392,34.249634,2.861023,0.8386,-0.0390,-0.3906,25.52,1.363788,34.34,63.0,4.0,9.0,1,S5
3,195563,-0.030991,-0.003159,6.392288,34.298553,2.833557,0.8406,-0.0394,-0.3914,25.52,1.363788,34.34,63.0,4.0,9.0,1,S5
4,195564,-0.031906,-0.001968,6.385803,34.326050,2.857971,0.8424,-0.0434,-0.3914,25.52,1.363788,34.34,63.0,4.0,9.0,1,S5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2107695,4112055,-0.132889,-0.005768,6.108475,34.826170,-3.486633,0.8318,-0.0254,-0.4156,-5.70,0.873854,30.41,61.0,4.0,17.0,4,S5
2107696,4112056,-0.133118,-0.002747,6.103134,34.830780,-3.498840,0.8318,-0.0252,-0.4134,-5.70,0.873854,30.41,61.0,4.0,17.0,4,S5
2107697,4112057,-0.132614,-0.006363,6.110001,34.826170,-3.504944,0.8330,-0.0242,-0.4114,-5.70,0.873854,30.41,61.0,4.0,17.0,4,S5
2107698,4112058,-0.131607,-0.007278,6.103897,34.850800,-3.480530,0.8334,-0.0238,-0.4130,-5.70,0.873854,30.41,61.0,4.0,17.0,4,S5


In [None]:
new_data = data.sample(50000)

In [None]:
X = new_data[data_columns]

In [None]:
y = new_data[target_column]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((35000, 14), (15000, 14), (35000,), (15000,))

## All Models

In [None]:
all_metrics = {}

In [None]:
def classify_and_show(name, classifier, X_train, X_test, y_train, y_test):
    all_metrics[name] = {}
    classifier.fit(X_train, y_train)
    
    train_predicts = classifier.predict(X_train)
    train_acc = (sum(train_predicts == y_train)/len(y_train))*100
    all_metrics[name]["train_acc"] = train_acc
    print("\nTrain accuracy:", train_acc, "%\n")

    predicts = classifier.predict(X_test)
    test_acc = (sum(predicts == y_test)/len(y_test))*100
    all_metrics[name]["test_acc"] = test_acc
    print("\nTest accuracy:", test_acc, "%\n")
    
    print("\n\nPrecision")
    prec = metrics.precision_score(y_test, predicts, average="macro")
    all_metrics[name]["precision"] = prec
    print(prec)
    
    print("\n\nRecall")
    recall = metrics.recall_score(y_test, predicts, average="macro")
    all_metrics[name]["recall"] = recall
    print(recall)
    
    print("\n\nF1 Score")
    f1_score = metrics.f1_score(y_test, predicts, average="macro")
    all_metrics[name]["f1_score"] = f1_score
    print(f1_score)
    
    print("\n\nClassification Report")
    print(metrics.classification_report(y_test, predicts, target_names=["Baseline", "Stress", "Amusement", "Meditation"]))
    
    print("\n\nConfusion matrix")
    print(metrics.confusion_matrix(y_test, predicts, labels=[1, 2, 3, 4]))
    
    print("\n\nCohen Kappa score")
    # near 1 is better, 0 is bad
    cohen = metrics.cohen_kappa_score(y_test, predicts)
    all_metrics[name]["cohen"] = cohen
    print(cohen)
    
    print("\n\nMatthews Correlation Coefficient")
    # near 1 or -1 is better, 0 is bad
    matthews = metrics.matthews_corrcoef(y_test, predicts)
    all_metrics[name]["matthews"] = matthews
    print(matthews)
    
    return train_predicts, predicts

In [None]:
classifier = KNeighborsClassifier(n_neighbors=3)

In [None]:
classify_and_show("KNN", classifier, X_train, X_test, y_train, y_test)


Train accuracy: 99.27428571428571 %


Test accuracy: 98.6 %



Precision
0.983554216412079


Recall
0.98695548200297


F1 Score
0.9851814975913616


Classification Report
              precision    recall  f1-score   support

    Baseline       0.99      0.99      0.99      6052
      Stress       1.00      1.00      1.00      3137
   Amusement       0.96      0.99      0.98      1869
  Meditation       0.99      0.97      0.98      3942

    accuracy                           0.99     15000
   macro avg       0.98      0.99      0.99     15000
weighted avg       0.99      0.99      0.99     15000



Confusion matrix
[[5976    1   42   33]
 [   0 3137    0    0]
 [  16    1 1849    3]
 [  73   10   31 3828]]


Cohen Kappa score
0.9802621810211021


Matthews Correlation Coefficient
0.9802906021666866


(array([4, 3, 4, ..., 1, 1, 4]), array([4, 1, 4, ..., 2, 3, 1]))

In [None]:
classifier = SVC(kernel="linear")
classify_and_show("SVM Linear", classifier, X_train, X_test, y_train, y_test)


Train accuracy: 99.99714285714286 %


Test accuracy: 99.98666666666666 %



Precision
0.999895278582231


Recall
0.9998028190463161


F1 Score
0.9998490292113993


Classification Report
              precision    recall  f1-score   support

    Baseline       1.00      1.00      1.00      6052
      Stress       1.00      1.00      1.00      3137
   Amusement       1.00      1.00      1.00      1869
  Meditation       1.00      1.00      1.00      3942

    accuracy                           1.00     15000
   macro avg       1.00      1.00      1.00     15000
weighted avg       1.00      1.00      1.00     15000



Confusion matrix
[[6052    0    0    0]
 [   0 3137    0    0]
 [   0    0 1868    1]
 [   1    0    0 3941]]


Cohen Kappa score
0.9998119072952


Matthews Correlation Coefficient
0.9998119139077892


(array([4, 3, 4, ..., 1, 1, 4]), array([4, 1, 4, ..., 2, 3, 1]))

In [None]:
classifier = SVC()
classify_and_show("SVM RBF", classifier, X_train, X_test, y_train, y_test)


Train accuracy: 98.09428571428572 %


Test accuracy: 98.06666666666666 %



Precision
0.979649616512791


Recall
0.9800530288560965


F1 Score
0.979712121316088


Classification Report
              precision    recall  f1-score   support

    Baseline       0.97      0.99      0.98      6052
      Stress       1.00      1.00      1.00      3137
   Amusement       0.96      0.98      0.97      1869
  Meditation       1.00      0.96      0.98      3942

    accuracy                           0.98     15000
   macro avg       0.98      0.98      0.98     15000
weighted avg       0.98      0.98      0.98     15000



Confusion matrix
[[5976    2   60   14]
 [   0 3137    0    0]
 [  44    1 1823    1]
 [ 133   10   25 3774]]


Cohen Kappa score
0.9727064310500073


Matthews Correlation Coefficient
0.9728145357298751


(array([4, 3, 4, ..., 1, 1, 4]), array([4, 1, 4, ..., 2, 3, 1]))

In [None]:
classifier = DecisionTreeClassifier(max_depth=5)
classify_and_show("DT", classifier, X_train, X_test, y_train, y_test)


Train accuracy: 99.83714285714285 %


Test accuracy: 99.80666666666667 %



Precision
0.9980283170178641


Recall
0.9963982784124886


F1 Score
0.9972000189641022


Classification Report
              precision    recall  f1-score   support

    Baseline       1.00      1.00      1.00      6052
      Stress       1.00      1.00      1.00      3137
   Amusement       1.00      0.99      0.99      1869
  Meditation       0.99      1.00      1.00      3942

    accuracy                           1.00     15000
   macro avg       1.00      1.00      1.00     15000
weighted avg       1.00      1.00      1.00     15000



Confusion matrix
[[6049    0    2    1]
 [   0 3137    0    0]
 [   0    0 1843   26]
 [   0    0    0 3942]]


Cohen Kappa score
0.9972719846368209


Matthews Correlation Coefficient
0.9972761317612946


(array([4, 3, 4, ..., 1, 1, 4]), array([4, 1, 4, ..., 2, 3, 1]))

In [None]:
classifier = RandomForestClassifier(max_depth=5, n_estimators=10)
classify_and_show("Random Forest", classifier, X_train, X_test, y_train, y_test)


Train accuracy: 99.98 %


Test accuracy: 99.95333333333333 %



Precision
0.9995568498354014


Recall
0.9991177378065659


F1 Score
0.9993364457612643


Classification Report
              precision    recall  f1-score   support

    Baseline       1.00      1.00      1.00      6052
      Stress       1.00      1.00      1.00      3137
   Amusement       1.00      1.00      1.00      1869
  Meditation       1.00      1.00      1.00      3942

    accuracy                           1.00     15000
   macro avg       1.00      1.00      1.00     15000
weighted avg       1.00      1.00      1.00     15000



Confusion matrix
[[6052    0    0    0]
 [   0 3136    0    1]
 [   0    0 1863    6]
 [   0    0    0 3942]]


Cohen Kappa score
0.9993416381356147


Matthews Correlation Coefficient
0.9993419110405988


(array([4, 3, 4, ..., 1, 1, 4]), array([4, 1, 4, ..., 2, 3, 1]))

In [None]:
classifier = MLPClassifier(alpha=1, max_iter=1000)
classify_and_show("MLP", classifier, X_train, X_test, y_train, y_test)


Train accuracy: 99.55428571428571 %


Test accuracy: 99.61333333333333 %



Precision
0.9951263324921766


Recall
0.996152917497271


F1 Score
0.9956333158147693


Classification Report
              precision    recall  f1-score   support

    Baseline       1.00      1.00      1.00      6052
      Stress       1.00      1.00      1.00      3137
   Amusement       0.99      1.00      0.99      1869
  Meditation       1.00      0.99      1.00      3942

    accuracy                           1.00     15000
   macro avg       1.00      1.00      1.00     15000
weighted avg       1.00      1.00      1.00     15000



Confusion matrix
[[6031    0   16    5]
 [   0 3137    0    0]
 [   7    0 1860    2]
 [  18    1    9 3914]]


Cohen Kappa score
0.9945463276459158


Matthews Correlation Coefficient
0.9945485657178672


(array([4, 3, 4, ..., 1, 1, 4]), array([4, 1, 4, ..., 2, 3, 1]))

In [None]:
classifier = AdaBoostClassifier()
classify_and_show("AdaBoost", classifier, X_train, X_test, y_train, y_test)


Train accuracy: 87.52857142857144 %


Test accuracy: 87.32666666666667 %



Precision
0.6679593571949298


Recall
0.7480811279414028


F1 Score
0.7005014978799744


Classification Report
              precision    recall  f1-score   support

    Baseline       1.00      1.00      1.00      6052
      Stress       1.00      1.00      1.00      3137
   Amusement       0.00      0.00      0.00      1869
  Meditation       0.68      0.99      0.80      3942

    accuracy                           0.87     15000
   macro avg       0.67      0.75      0.70     15000
weighted avg       0.79      0.87      0.82     15000



Confusion matrix
[[6047    0    0    5]
 [   0 3137    0    0]
 [   0    0    0 1869]
 [  27    0    0 3915]]


Cohen Kappa score
0.8167170799145348


Matthews Correlation Coefficient
0.8362353730791847



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



(array([4, 4, 4, ..., 1, 1, 4]), array([4, 1, 4, ..., 2, 4, 1]))

## Compare classifiers

In [None]:
all_metrics

{'AdaBoost': {'cohen': 0.8167170799145348,
  'f1_score': 0.7005014978799744,
  'matthews': 0.8362353730791847,
  'precision': 0.6679593571949298,
  'recall': 0.7480811279414028,
  'test_acc': 87.32666666666667,
  'train_acc': 87.52857142857144},
 'DT': {'cohen': 0.9972719846368209,
  'f1_score': 0.9972000189641022,
  'matthews': 0.9972761317612946,
  'precision': 0.9980283170178641,
  'recall': 0.9963982784124886,
  'test_acc': 99.80666666666667,
  'train_acc': 99.83714285714285},
 'KNN': {'cohen': 0.9802621810211021,
  'f1_score': 0.9851814975913616,
  'matthews': 0.9802906021666866,
  'precision': 0.983554216412079,
  'recall': 0.98695548200297,
  'test_acc': 98.6,
  'train_acc': 99.27428571428571},
 'MLP': {'cohen': 0.9945463276459158,
  'f1_score': 0.9956333158147693,
  'matthews': 0.9945485657178672,
  'precision': 0.9951263324921766,
  'recall': 0.996152917497271,
  'test_acc': 99.61333333333333,
  'train_acc': 99.55428571428571},
 'Random Forest': {'cohen': 0.9993416381356147,
 

In [None]:
models = list(all_metrics.keys())

In [None]:
fig = px.bar(x=models,
             y=[all_metrics[model]["train_acc"] for model in models],
             labels={
                 "x": "Model Name",
                 "y": "Train Accuracy"
             })
fig.update_yaxes(range=[80, 100])
fig.show()

In [None]:
fig = px.bar(x=models,
             y=[all_metrics[model]["test_acc"] for model in models],
             labels={
                 "x": "Model Name",
                 "y": "Test Accuracy"
             })
fig.update_yaxes(range=[80, 100])
fig.show()

In [None]:
fig = px.bar(x=models,
             y=[all_metrics[model]["f1_score"] for model in models],
             labels={
                 "x": "Model Name",
                 "y": "Average F1 Score"
             })
fig.update_yaxes(range=[0.6, 1])
fig.show()

In [None]:
fig = px.bar(x=models,
             y=[all_metrics[model]["precision"] for model in models],
             labels={
                 "x": "Model Name",
                 "y": "Precision"
             })
fig.update_yaxes(range=[0.6, 1])
fig.show()

In [None]:
fig = px.bar(x=models,
             y=[all_metrics[model]["recall"] for model in models],
             labels={
                 "x": "Model Name",
                 "y": "Recall"
             })
fig.update_yaxes(range=[0.6, 1])
fig.show()