In [1]:
import pandas as pd
import numpy as np
import datetime
from datetime import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
ACC, BVP, EDA, HR, IBI, tags, TEMP = [], [], [], [], [], [], []

## Loading The CSV files

In [3]:
# This Code block will load the csv files for every student.
# The index of every list actually contains the data of that perticular student 
# e.g: ACC[0] contains ACC values of Student # 1, ACC[1] contains ACC Values for Student # 2
# All the other lists follow the same pattern

for i in range(1,36):
    ACC.append(pd.read_csv('Raw_data/S' + '%02d'%i + '/ACC.csv', header=None, index_col=None))
    BVP.append(pd.read_csv('Raw_data/S' + '%02d'%i + '/BVP.csv', header=None, index_col=None))
    EDA.append(pd.read_csv('Raw_data/S' + '%02d'%i + '/EDA.csv', header=None, index_col=None))
    HR.append(pd.read_csv('Raw_data/S' + '%02d'%i + '/HR.csv', header=None, index_col=None))
    IBI.append(pd.read_csv('Raw_data/S' + '%02d'%i + '/IBI.csv', header=None, index_col=None))
    tags.append(pd.read_csv('Raw_data/S' + '%02d'%i + '/tags_S' + '%02d'%i + '.csv', header=None))
    TEMP.append(pd.read_csv('Raw_data/S' + '%02d'%i + '/TEMP.csv', header=None, index_col=None))

## Combining the BVP, EDA, HR and TEMP of for each student.

In [4]:
# This Code block actually combines the BVP, EDA, HR and TEMP values of each studeint in one dictonary.

# Also, it checks for the start time of each signal and considers only those values which come after the start time of signal which start last.

S = []
first_start, last_start, last_start_simple, diff = [], [], [], []
for i in range(0, 35):
    S.append({"BVP": [], "EDA": [], "HR": [], "TEMP": []})
    last_start.append(max([BVP[i][0][0], EDA[i][0][0], HR[i][0][0], TEMP[i][0][0]]))
    first_start.append(min([BVP[i][0][0], EDA[i][0][0], HR[i][0][0], TEMP[i][0][0]]))
    diff.append(int(last_start[i] - first_start[i]))
    S[i]["BVP"].append(BVP[i][0].drop(BVP[i][0].index[0:((diff[i] * int(BVP[i][0][1])) - 1)]).reset_index(drop=True))
    S[i]["EDA"].append(EDA[i][0].drop(EDA[i][0].index[0:((diff[i] * int(EDA[i][0][1])) - 1)]).reset_index(drop=True))
    S[i]["HR"].append(HR[i][0].drop(HR[i][0].index[0:((diff[i] * int(HR[i][0][1])) - 1)]).reset_index(drop=True))
    S[i]["TEMP"].append(TEMP[i][0].drop(TEMP[i][0].index[0:((diff[i] * int(TEMP[i][0][1])) - 1)]).reset_index(drop=True))
    last_start_simple.append(datetime.datetime.fromtimestamp(last_start[i]).strftime('%I:%M:%S'))
    first_start[i] = datetime.datetime.fromtimestamp(first_start[i]).strftime('%H:%M:%S')
    S[i]["BVP"] = S[i]["BVP"][0].tolist()
    S[i]["EDA"] = S[i]["EDA"][0].tolist()
    S[i]["HR"] = S[i]["HR"][0].tolist()
    S[i]["TEMP"] = S[i]["TEMP"][0].tolist()

## Dealing with the different Sampling Rate of Signals

In [5]:
# This block is actually taking the sample from all the signals ater one second because, the lowest samppling rate is of HR (1 sample per Second)

for i in range(0,35):
    for j in range(int(len(S[i]["BVP"])/BVP[i][0][1])):
        Y = 0
        del S[i]["BVP"][1+Y : Y + int(BVP[i][0][1])]
        Y = Y + int(BVP[i][0][1])

    for j in range(int(len(S[i]["EDA"])/EDA[i][0][1])):
        Y = 0
        del S[i]["EDA"][1+Y : Y + int(EDA[i][0][1])]
        Y = Y + int(EDA[i][0][1])

    for j in range(int(len(S[i]["HR"])/HR[i][0][1])):
        Y = 0
        del S[i]["HR"][1+Y : Y + int(HR[i][0][1])]
        Y = Y + int(HR[i][0][1])

    for j in range(int(len(S[i]["TEMP"])/TEMP[i][0][1])):
        Y = 0
        del S[i]["TEMP"][1+Y : Y + int(TEMP[i][0][1])]
        Y = Y + int(TEMP[i][0][1])

## Reading the Time Log for Students

In [6]:
time_log = pd.read_excel('Processed_data/Time_logs.xlsx')

## Defining stressed and non-Stressed periods

In [7]:
# This block of code actually markes the time period of Stroop Test, Interview and Hyperventilation test as the stressfull periods.

# Label column represents the stress level as 0 or 1.
# 0 = No stress
# 1 = Stress

for i in range(0, 35):
    minimun = min(len(lst) for lst in S[i].values())
    S[i]["Label"] = [0] * minimun
    for j in [8, 10, 12]:
        if time_log.iloc[i+1,8] > dt.time(dt.strptime(last_start_simple[i], "%H:%M:%S")):
            differ1 = (time_log.iloc[i+1,j].hour * 3600 + time_log.iloc[i+1,j].minute * 60) - (dt.time(dt.strptime(last_start_simple[i], "%H:%M:%S")).hour *3600 + dt.time(dt.strptime(last_start_simple[i], "%H:%M:%S")).minute *60 + dt.time(dt.strptime(last_start_simple[i], "%H:%M:%S")).second)
            differ2 = (time_log.iloc[i+1,j+1].hour * 3600 + time_log.iloc[i+1,j+1].minute * 60) - (dt.time(dt.strptime(last_start_simple[i], "%H:%M:%S")).hour *3600 + dt.time(dt.strptime(last_start_simple[i], "%H:%M:%S")).minute *60 + dt.time(dt.strptime(last_start_simple[i], "%H:%M:%S")).second)
            S[i]["Label"][differ1:differ2] = [1]*(differ2-differ1)

    del S[i]["BVP"][minimun:]
    del S[i]["EDA"][minimun:]
    del S[i]["HR"][minimun:]
    del S[i]["TEMP"][minimun:]
    del S[i]["Label"][minimun:]

## Converting the Dictionaries of every students into separate dataframes.

In [8]:
S_df = []
for i in range(0, 35):
    S_df.append(pd.DataFrame(S[i]))

In [9]:
S_df[0]

Unnamed: 0,BVP,EDA,HR,TEMP,Label
0,-27.92,0.170407,82.38,28.39,0
1,-32.25,0.477908,82.67,28.13,0
2,-36.93,0.479189,83.10,28.13,0
3,-40.81,0.480470,83.27,28.13,0
4,-41.42,0.479189,83.25,28.13,0
...,...,...,...,...,...
3238,-14.82,0.080719,83.35,28.41,0
3239,-21.09,0.029469,83.33,28.41,0
3240,-25.67,0.039719,83.28,28.41,0
3241,-28.75,0.019219,83.22,28.49,0


## Combining all the dataframes into single dataframe.

In [10]:
Students = pd.concat([S_df[k] for k in range(0,35)])
Students.reset_index(drop=True)

Unnamed: 0,BVP,EDA,HR,TEMP,Label
0,-27.92,0.170407,82.38,28.39,0
1,-32.25,0.477908,82.67,28.13,0
2,-36.93,0.479189,83.10,28.13,0
3,-40.81,0.480470,83.27,28.13,0
4,-41.42,0.479189,83.25,28.13,0
...,...,...,...,...,...
115470,23.84,0.238594,65.37,32.18,0
115471,27.86,0.236032,65.42,32.18,0
115472,33.12,0.218093,65.45,32.18,0
115473,39.34,0.219374,65.47,32.18,0


## Exporting the Last DataFrame to cvs file

In [11]:
Students.to_csv("Combined.csv")

## Classification

## Decision Tree Classification

In [12]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score

X = Students.iloc[:,:4]
y = Students["Label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # 70% training and 30% test

tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)

In [13]:
preds = tree.predict(X_test)
print(classification_report(preds, y_test))
print("Confusion Metrix", confusion_matrix(preds, y_test))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98     22342
           1       0.96      0.96      0.96     12301

    accuracy                           0.97     34643
   macro avg       0.97      0.97      0.97     34643
weighted avg       0.97      0.97      0.97     34643

Confusion Metrix [[21903   439]
 [  451 11850]]


In [14]:
# Cross Validation on Decision Tree Classifier

scores = cross_val_score(tree, X, y, cv=4)

print(scores)
print("Cross-Validation average Score: ", np.mean(scores))

[0.56926115 0.49063009 0.54113409 0.53602605]
Cross-Validation average Score:  0.5342628427209949


## Defining MLP

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import BinaryCrossentropy

model = Sequential()
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', metrics='acc', loss=BinaryCrossentropy())

2023-04-24 07:44:55.237441: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-24 07:44:58.129634: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
model.fit(X_train, y_train, epochs=500, batch_size=512, validation_split=0.15, verbose=False)

  return t[start:end]


<keras.callbacks.History at 0x7f8832715390>

In [17]:
model.evaluate(X_test, y_test)



[0.48956725001335144, 0.7515226602554321]

## References

[1] Measurement Device for Stress Level and Vital Sign Based on Sensor Fusion.
    https://doi.org/10.4258/hir.2021.27.1.11

[2] Efficient methods for acute stress detection using heart rate variability data from Ambient Assisted Living sensors.
    https://doi.org/10.1186/s12938-021-00911-6

[3] Continuous Stress Detection Using Wearable Sensors in Real Life: Algorithmic Programming Contest Case Study.
    https://doi.org/10.3390/s19081849