# Import libraries 

In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt 
from scipy.fft import fft, fftfreq

# Import Mental workload sheet

In [2]:
import os 

os.getcwd()

'C:\\Users\\Mohamed Mellouky\\cerv_internship\\MentalWorkload-PhysiologicalSignals-ML'

In [3]:
nasa_tlx_df = pd.read_excel('./src/notebooks/data/All_Participant_NASATLX.xlsx')

In [4]:
nasa_tlx_df['ID'] = nasa_tlx_df['ID'].str.replace('ID', 'S')
nasa_tlx_df['ESSAI'] = nasa_tlx_df['ESSAI'].str.replace('Essai', 'trial')


In [5]:
session1_nasatlx = nasa_tlx_df[nasa_tlx_df['SESSION'] == 'Session1']

In [6]:
session1_nasatlx.rename(columns={
    'ESSAI' :  'trial', 
    'ID' : 'subject' 
    }, 
    inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  session1_nasatlx.rename(columns={


In [7]:
session1_nasatlx.to_csv('./src/notebooks/data/session1_nasatlx_scores.csv', sep=';', index=False)

# Read physiological signals

In [8]:

bvp_df = pd.read_csv("./src/notebooks/data/BVP.csv", sep=';')
acc_df = pd.read_csv("./src/notebooks/data/ACC.csv", sep=';')
temp_df = pd.read_csv("./src/notebooks/data/TEMP.csv", sep=';')
eda_df = pd.read_csv("./src/notebooks/data/EDA.csv", sep=';')

In [9]:
acc_df['magnitude'] = np.sqrt(acc_df['x_axis']**2 + acc_df['y_axis']**2 + acc_df['z_axis']**2)

# Feature Extraction 

In [40]:

def non_overlapping_rolling_stats(data, window_size, signal_name, sampling_rate):
    
    peak_freq_signals = ['EMG', 'BVP']
    slope_signals = ["TEMP"]
    
    mean_vals = []
    std_vals = []
    min_vals = []
    max_vals = []
    trial_lst = []
    peak_freqs = []
    slopes = []
    
    for start in range(0, len(data), window_size):
        window_data = data[start:start + window_size][signal_name]
        print(f'subject_data trial : {data[start:start + window_size].trial.unique()}')
        trial_lst.append(data[start:start + window_size].trial.unique()[0])
        if len(window_data) == window_size:
            mean_vals.append(np.round(window_data.mean(), 2))
            std_vals.append(np.round(window_data.std(), 2))
            min_vals.append(np.round(window_data.min(), 2))
            max_vals.append(np.round(window_data.max(), 2))
            
            if signal_name in peak_freq_signals : 
                # FFT to calculate peak frequency
                yf = fft(window_data.values)
                xf = fftfreq(window_size, 1 / sampling_rate)
                peak_freq = xf[np.argmax(np.abs(yf))]
                peak_freqs.append(peak_freq)
                
            if signal_name in slope_signals : 
                x = np.arange(window_size)
                y = window_data
                p = np.polyfit(x, y, 1)
                slopes.append(p[0])
    
    if signal_name in peak_freq_signals : 
        result = pd.DataFrame({
        f'{signal_name}mean': mean_vals,
        f'{signal_name}std': std_vals,
        f'{signal_name}min': min_vals,
        f'{signal_name}max': max_vals, 
        f'{signal_name}peak_freq': peak_freqs, 
        f'trial' : trial_lst
        })
    elif signal_name in slope_signals : 
        result = pd.DataFrame({
        f'{signal_name}mean': mean_vals,
        f'{signal_name}std': std_vals,
        f'{signal_name}min': min_vals,
        f'{signal_name}max': max_vals, 
        f'{signal_name}slope': slopes, 
        f'trial' : trial_lst
        })
    else : 
        result = pd.DataFrame({
        f'{signal_name}mean': mean_vals,
        f'{signal_name}std': std_vals,
        f'{signal_name}min': min_vals,
        f'{signal_name}max': max_vals, 
        f'trial' : trial_lst
    })
    return result

In [11]:
eda_df[eda_df['subject'] == 'S10'].trial.values

array(['trial1', 'trial1', 'trial1', ..., 'trial5', 'trial5', 'trial5'],
      dtype=object)

In [43]:
def feature_extraction(data, window_size, signal_name, sampling_rate) : 
    subjects_lst = data.subject.unique() 

    temp_subject_lst = []
    temp_feats_lst = []
    for subject in subjects_lst : 
        subject_data = data[data['subject'] == subject]
        features = non_overlapping_rolling_stats(data=subject_data, window_size=window_size, signal_name=signal_name, sampling_rate=sampling_rate)
        temp_feats_lst.append(features)
        for _ in range(0, int(features.shape[0])) : 
            temp_subject_lst.append(subject)


    features = pd.DataFrame()
    for feat_df in temp_feats_lst : 
        features = pd.concat([features, feat_df], axis=0)


    features = features.reset_index()
    features = pd.concat([features, pd.DataFrame(temp_subject_lst)], axis=1)
    # features.columns = ['index', f'{signal_name}mean', f'{signal_name}std', f'{signal_name}min', f'{signal_name}max', 'subject']
    features = features.rename(columns={
        0 : 'subject'
    })
    
    print(f'{signal_name} feature extraction : feature set shape : {features.shape}')
    
    return features


In [22]:
eda_df

Unnamed: 0,subject,trial,EDA
0,S10,trial1,4.218009
1,S10,trial1,4.216728
2,S10,trial1,4.202634
3,S10,trial1,4.219291
4,S10,trial1,4.235948
...,...,...,...
147667,S9,trial7,0.362884
147668,S9,trial7,0.380822
147669,S9,trial7,0.394917
147670,S9,trial7,0.391073


In [44]:
temp = feature_extraction(eda_df, 4, 'EDA', 4)
temp

subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_data trial : ['trial1']
subject_

Unnamed: 0,index,EDAmean,EDAstd,EDAmin,EDAmax,trial,subject
0,0,4.21,0.01,4.20,4.22,trial1,S10
1,1,4.25,0.01,4.24,4.25,trial1,S10
2,2,4.24,0.01,4.22,4.25,trial1,S10
3,3,4.23,0.01,4.22,4.24,trial1,S10
4,4,4.21,0.01,4.21,4.22,trial1,S10
...,...,...,...,...,...,...,...
36913,523,0.28,0.00,0.28,0.28,trial7,S9
36914,524,0.28,0.00,0.27,0.28,trial7,S9
36915,525,0.29,0.01,0.28,0.31,trial7,S9
36916,526,0.34,0.02,0.32,0.36,trial7,S9


In [18]:
sampling_rate = 4
window_size = 4
eda_features = feature_extraction(eda_df, window_size=window_size, signal_name='EDA', sampling_rate=sampling_rate)
temp_features = feature_extraction(temp_df, window_size=window_size, signal_name='TEMP', sampling_rate=sampling_rate)

window_size = 32
x_acc_features = feature_extraction(acc_df, window_size=window_size, signal_name='x_axis',sampling_rate=sampling_rate)
y_acc_features = feature_extraction(acc_df, window_size=window_size, signal_name='y_axis', sampling_rate=sampling_rate)
z_acc_features = feature_extraction(acc_df, window_size=window_size, signal_name='z_axis', sampling_rate=sampling_rate)
magnitude_acc_features = feature_extraction(acc_df, window_size=window_size, signal_name='magnitude', sampling_rate=sampling_rate)

window_size = 64
sampling_rate = 4
bvp_features = feature_extraction(bvp_df, window_size=window_size, signal_name='BVP', sampling_rate=sampling_rate)

EDA feature extraction : feature set shape : (36918, 6)
TEMP feature extraction : feature set shape : (36918, 7)
x_axis feature extraction : feature set shape : (36918, 6)
y_axis feature extraction : feature set shape : (36918, 6)
z_axis feature extraction : feature set shape : (36918, 6)
magnitude feature extraction : feature set shape : (36918, 6)
BVP feature extraction : feature set shape : (36918, 7)


In [19]:
features = pd.concat([eda_features, temp_features, x_acc_features, y_acc_features, z_acc_features, magnitude_acc_features, bvp_features], axis=1)
features.drop(labels=['index'], inplace=True, axis=1)

In [20]:
session1_nasatlx[session1_nasatlx['subject'] == 'S10']

Unnamed: 0,subject,SESSION,trial,Exigence Mentale,Exigence Physique,Exigence Temporelle,Effort,Performance,Frustration,UNWEIGHTED SUM (R-TLX)
0,S10,Session1,trial1,19,0,0,16,65,0,16.666667
1,S10,Session1,trial2,0,0,80,0,50,0,21.666667
2,S10,Session1,trial3,0,0,50,0,50,0,16.666667
3,S10,Session1,trial4,0,0,60,0,34,0,15.666667
4,S10,Session1,trial5,0,0,82,0,22,0,17.333333


In [45]:
features

Unnamed: 0,EDAmean,EDAstd,EDAmin,EDAmax,0,TEMPmean,TEMPstd,TEMPmin,TEMPmax,TEMPslope,...,magnitudestd,magnitudemin,magnitudemax,0.1,BVPmean,BVPstd,BVPmin,BVPmax,BVPpeak_freq,0.2
0,4.21,0.01,4.20,4.22,S10,32.99,0.0,32.99,32.99,-3.303073e-15,...,8.34,41.30,82.28,S10,28.77,58.53,-92.77,148.96,0.0000,S10
1,4.25,0.01,4.24,4.25,S10,33.00,0.0,33.00,33.00,-2.067289e-15,...,6.02,53.71,76.40,S10,-87.36,51.50,-163.29,47.23,0.0000,S10
2,4.24,0.01,4.22,4.25,S10,33.03,0.0,33.03,33.03,4.322210e-16,...,2.33,60.82,69.25,S10,37.30,36.45,-39.66,118.71,0.0000,S10
3,4.23,0.01,4.22,4.24,S10,32.99,0.0,32.99,32.99,-3.303073e-15,...,5.98,51.49,74.63,S10,27.65,118.70,-101.89,234.32,0.0625,S10
4,4.21,0.01,4.21,4.22,S10,33.00,0.0,33.00,33.00,-2.067289e-15,...,1.93,59.67,67.61,S10,-9.94,97.66,-165.46,156.80,0.0625,S10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36913,0.28,0.00,0.28,0.28,S9,33.49,0.0,33.49,33.49,-1.123330e-15,...,1.24,61.60,66.96,S9,-0.34,48.03,-93.23,82.03,0.1250,S9
36914,0.28,0.00,0.27,0.28,S9,33.49,0.0,33.49,33.49,-1.123330e-15,...,0.57,62.23,64.41,S9,12.17,71.64,-120.19,114.68,0.1250,S9
36915,0.29,0.01,0.28,0.31,S9,33.49,0.0,33.49,33.49,-1.123330e-15,...,4.64,60.22,84.59,S9,15.98,44.72,-61.89,103.38,0.1875,S9
36916,0.34,0.02,0.32,0.36,S9,33.47,0.0,33.47,33.47,-3.269080e-15,...,5.01,59.18,84.08,S9,-20.41,68.71,-163.68,98.08,0.0625,S9
