Stress identification from physiological data.

In [17]:
import time
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [18]:
# Load the datasets
X = pd.read_csv(r"all_physiological_features.csv")      # Feature dataset
y = pd.read_csv(r"binary_stress_affect3_class_phy.csv").iloc[:, 1] 

In [19]:
X = X.iloc[:, 1:]


In [20]:
X

Unnamed: 0,meanHR,minHR,maxHR,sdHR,modeHR,nNN,meanNN,SDSD,CVNN,SDNN,...,min_scl,mean_scl,sd_scl,nSCR,aucSCR,meanAmpSCR,maxAmpSCR,meanRespSCR,sumAmpSCR,sumRespSCR
0,63.430940,57.034221,79.575597,4.686314,22.541376,62.000000,950.677419,42.033401,64.843179,0.068207,...,-1.132743,0.022668,0.891539,10.000000,-302.978934,0.666899,1.475682,1.160000,6.668994,11.600000
1,61.712465,45.871560,84.269663,11.008515,38.398103,59.666667,1002.893855,106.659486,173.316531,0.172816,...,-0.968230,-0.006435,0.951185,7.333333,120.038332,0.651699,1.748524,1.126095,4.779126,7.882667
2,70.973331,58.365759,82.872928,5.449003,24.507169,69.000000,850.550725,42.532503,67.609179,0.079489,...,-2.159093,0.012778,0.827482,7.000000,-92.589522,0.860715,2.091642,2.433429,6.025006,17.034000
3,64.301154,56.285178,79.787234,5.497478,23.502056,63.000000,939.587302,58.359502,76.110328,0.081004,...,-1.167276,-0.011765,0.937448,6.000000,315.591734,0.543249,1.502306,1.810000,3.259493,10.860000
4,66.253133,55.762082,80.645161,5.443261,24.883080,65.000000,911.661538,42.853573,74.014193,0.081186,...,-1.494708,-0.015553,0.904099,12.000000,558.708830,0.721123,1.743292,1.282167,8.653481,15.386000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
768,58.768442,50.933786,75.000000,4.025131,24.066214,58.000000,1025.475862,73.281653,66.410643,0.064761,...,-1.017887,-0.000668,0.990734,2.400000,122.464218,0.197116,0.600114,13.550667,0.473079,32.521600
769,75.767263,61.601643,173.410405,22.408291,111.808762,71.000000,833.521127,138.003990,143.843994,0.172574,...,-2.730086,0.022081,0.680286,5.000000,205.591733,0.939958,1.721442,2.791200,4.699788,13.956000
770,64.712474,58.027079,73.710074,3.843868,15.682994,63.000000,930.412698,45.572140,54.571530,0.058653,...,-1.291715,-0.029019,0.932817,5.000000,608.317194,0.320072,0.772637,1.639200,1.600359,8.196000
771,60.061609,53.191489,75.757576,3.684374,22.566086,59.310345,1002.581395,58.998370,59.073755,0.058922,...,-1.833577,0.002666,0.974272,3.103448,5.575997,0.310237,1.061803,3.314667,0.962805,10.286897


In [21]:
y

0      0
1      0
2      0
3      0
4      1
      ..
768    0
769    0
770    0
771    1
772    1
Name: binary-stress, Length: 773, dtype: int64

In [22]:
y.value_counts()


binary-stress
0    391
1    382
Name: count, dtype: int64

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


binary classification of stress

In [24]:
list_classif = [RandomForestClassifier(max_depth=5, random_state=0), 
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf', random_state=0), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[])
]

In [25]:
results = {}


In [26]:
# Loop through each classifier
for clf in list_classif:
    name = clf.__class__.__name__  # Get the classifier name for display
    
    start_time = time.time()  # Start time for training
    
    # Train the classifier
    clf.fit(X_train, y_train)
    
    # Calculate training time
    training_time = time.time() - start_time
    
    # Make predictions
    y_pred = clf.predict(X_test)
    
    # Calculate accuracy and F1 score
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Store results
    results[name] = {
        "Training Time (s)": training_time,
        "Accuracy": accuracy,
        "F1 Score": f1
    }

In [27]:
results_df = pd.DataFrame(results).T
print(results_df)

                        Training Time (s)  Accuracy  F1 Score
RandomForestClassifier           0.691219  0.438710  0.400000
KNeighborsClassifier             0.008094  0.438710  0.423841
SVC                              0.066396  0.509677  0.000000
MLPClassifier                    0.033462  0.503226  0.000000


3 class classification of stress

In [28]:
y = pd.read_csv(r"binary_stress_affect3_class_phy.csv").iloc[:, 2] 

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [30]:
results_3class = {}


In [31]:
for clf in list_classif:
    name = clf.__class__.__name__  # Get the classifier name for display
    
    start_time = time.time()  # Start time for training
    
    # Train the classifier
    clf.fit(X_train, y_train)
    
    # Calculate training time
    training_time = time.time() - start_time
    
    # Make predictions
    y_pred = clf.predict(X_test)
    
    # Calculate accuracy and F1 score (for 3 classes, we use the macro average)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    
    # Store results
    results_3class[name] = {
        "Training Time (s)": training_time,
        "Accuracy": accuracy,
        "F1 Score (macro)": f1
    }

In [32]:
results_3class_df = pd.DataFrame(results_3class).T
print(results_3class_df)

                        Training Time (s)  Accuracy  F1 Score (macro)
RandomForestClassifier           0.674387  0.322581          0.311141
KNeighborsClassifier             0.002444  0.322581          0.320759
SVC                              0.088079  0.322581          0.162602
MLPClassifier                    0.029590  0.341935          0.322055
