## Traditional ML classification models under wavelet analysis structures

In [1]:
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import pandas as pd
from sklearn.metrics import accuracy_score

### SVM
#### Read Data

In [2]:
# Configurations
LOWLUX = 'lowlux'
MIDLUX = 'middlelux'
HIGHLUX = 'highlux'
NOBACK = 'nothing'
ONEBACK = 'ONEBACK'
TWOBACK = 'TWOBACK'
THREEBACK = 'THREEBACK'
FILENAME = 'Filename'
IPA2 = 'IPA 2'
VALIDATIONSET = [1, 7, 14, 21, 24, 38, 40, 47]

In [3]:
# Read data and make them into processable formalities.
filepath = '../Data/Results/07-11-12-50/results.csv'
df_raw = pd.read_csv(filepath)
# df_raw

In [4]:
df_labels_IPA2 = df_raw[[FILENAME, IPA2]].copy()
df_labels_IPA2

Unnamed: 0,Filename,IPA 2
0,05-10-13-15-lowlux-nothing,0.120845
1,05-10-13-17-lowlux-ONEBACK,0.29444
2,05-10-13-19-lowlux-TWOBACK,0.135787
3,05-10-13-21-lowlux-THREEBACK,0.372591
4,05-10-13-28-highlux-nothing,0.196893
5,05-10-13-30-highlux-ONEBACK,0.229213
6,05-10-13-33-highlux-TWOBACK,0.314535
7,05-10-13-36-highlux-THREEBACK,0.25498
8,06-10-13-59-middlelux-nothing,0.321147
9,06-10-14-01-middlelux-ONEBACK,0.228015


In [5]:
# Labeling
luxes = []
nbacks = []
df_labels_IPA2 = df_labels_IPA2.reset_index()  # make sure indexes pair with number of rows
for index, row in df_labels_IPA2.iterrows():
    # Label luxes
    if LOWLUX in row[FILENAME]:
        luxes.append(10)
    elif MIDLUX in row[FILENAME]:
        luxes.append(20)
    elif HIGHLUX in row[FILENAME]:
        luxes.append(30)
    
    # Label task difficulties
    if NOBACK in row[FILENAME]:
        nbacks.append(0)
    elif ONEBACK in row[FILENAME]:
        nbacks.append(1)
    elif TWOBACK in row[FILENAME]:
        nbacks.append(2)
    elif THREEBACK in row[FILENAME]:
        nbacks.append(3)
df_labels_IPA2['LUX'] = luxes
df_labels_IPA2['nback_labels'] = nbacks

In [6]:
df_labels_IPA2

Unnamed: 0,index,Filename,IPA 2,LUX,nback_labels
0,0,05-10-13-15-lowlux-nothing,0.120845,10,0
1,1,05-10-13-17-lowlux-ONEBACK,0.29444,10,1
2,2,05-10-13-19-lowlux-TWOBACK,0.135787,10,2
3,3,05-10-13-21-lowlux-THREEBACK,0.372591,10,3
4,4,05-10-13-28-highlux-nothing,0.196893,30,0
5,5,05-10-13-30-highlux-ONEBACK,0.229213,30,1
6,6,05-10-13-33-highlux-TWOBACK,0.314535,30,2
7,7,05-10-13-36-highlux-THREEBACK,0.25498,30,3
8,8,06-10-13-59-middlelux-nothing,0.321147,20,0
9,9,06-10-14-01-middlelux-ONEBACK,0.228015,20,1


#### Model

In [7]:
# Split the data into training dataset and validation dataset.
# Training data
df_training = df_labels_IPA2.copy().drop(VALIDATIONSET, axis=0)
X_train = df_training[['IPA 2','LUX']].to_numpy()
y_train = df_training[['nback_labels']].to_numpy()
y_train = np.reshape(y_train, -1)
                                      
# Validation data 
df_validation = df_labels_IPA2.copy().loc[VALIDATIONSET]
X_test = df_validation[['IPA 2','LUX']].to_numpy()
y_test = df_validation[['nback_labels']].to_numpy()
y_test = np.reshape(y_test, -1)

In [8]:
# Try SVM, bacause it is one of the most widely used models in cognitive workload claissfications.
# X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
# y = np.array([1, 1, 2, 2])
clf = make_pipeline(StandardScaler(), SVC(kernel='rbf', gamma='auto'))
clf.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(gamma='auto'))])

In [9]:
y_pred = clf.predict(X_test)
print(y_pred, y_test)

[2 0 0 2 2 0 2 2] [1 3 2 1 0 2 0 3]


In [10]:
# Accuracy score calculation
accuracy_score(y_test, y_pred)

0.0