In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from scipy.stats import kurtosis, skew
from scipy.signal import find_peaks
from multiprocessing import Pool
from sklearn.preprocessing import MinMaxScaler
import warnings

In [2]:
def getFeatures(a):
    min = np.amin(a)
    max = np.amax(a)
    mean = np.mean(a)
    std = np.std(a)
    return min, max, mean, std

def getEDAStats(a):
    skewness = skew(a)
    kurt = kurtosis(a)
    return skewness, kurt

In [3]:
userList = ['HT','IT','JT','KT','LT','MT','NT','PT','QT','RT','ST']

df = pd.read_csv('Emotions.csv')
df =df[['Angry','Disgust','Scared','Happy','Sad','Surprised','Neutral','hr','eda','temp','stress','id']]
df['subject']=df['id'].str[:2]
df = df.drop(columns =['id'])
df.stress.value_counts()

1.0     102749
2.0      14406
3.0      13442
7.0       6962
13.0      6720
5.0       6243
10.0      6002
11.0      5042
16.0      4803
14.0      2881
9.0       2881
4.0       2880
6.0       2640
20.0      1921
17.0      1921
15.0      1680
8.0       1440
18.0      1440
12.0       960
Name: stress, dtype: int64

In [20]:
index = 0
s0,s1,s2=0,0,0
fdf = pd.DataFrame(columns=['Angry','Disgust','Scared','Happy','Sad','Surprised','Neutral',
                                'eda_mean','eda_min','eda_max','eda_std',
                                'eda_kurtosis','eda_skew','eda_num_peaks','eda_amphitude','eda_duration',
                                'hr_mean','hr_min','hr_max','hr_std','hr_rms',
                                'hr_num_peaks','hr_amphitude','hr_duration',
                                'temp_mean','temp_min','temp_max','temp_mtd','stress','user'])

for user in userList:
    print(user)
    data_original = df[df['subject'] == user]
    scaler = MinMaxScaler()
    norm = data_original.drop(columns=['subject','stress'])
    #display(data_original)
    array = scaler.fit_transform(norm)
    data1 = pd.DataFrame(array)
    data1.columns = ['Angry','Disgust','Scared','Happy','Sad','Surprised','Neutral','hr','eda','temp']
    data = pd.concat([data1,data_original[['stress','subject']].reset_index()],axis=1)
    #display(data.stress.value_counts())
    length = len(data['Angry'])
    

    for i in range(0,length, 20):
        partialDF = data.iloc[i:i+40,]
        plen = len(partialDF['eda'])


        if plen < 40:
            continue

        eda = partialDF['eda'].values
        hr = partialDF['hr'].values

        tmp = partialDF['temp'].values

        Angry = partialDF['Angry'].values.mean()
        Disgust = partialDF['Disgust'].values.mean()
        Scared = partialDF['Scared'].values.mean()
        Happy = partialDF['Happy'].values.mean()
        Sad = partialDF['Sad'].values.mean()
        Surprised = partialDF['Surprised'].values.mean()
        Neutral = partialDF['Neutral'].values.mean()


        stress = partialDF['stress'].values

        eda_min, eda_max, eda_mean, eda_std = getFeatures(eda)
        
        hr_min, hr_max, hr_mean, hr_std = getFeatures(hr)
        
        tmp_min, tmp_max, tmp_mean, tmp_std = getFeatures(tmp)

        #stress_mean = np.mean(stress)
        stress_mean = stress.mean()
        eda_skew, eda_kurtosis = getEDAStats(eda)

        rms = np.sqrt(np.mean(np.square(np.ediff1d(hr))))

        peaks,properties = find_peaks(eda, width=5)


        num_Peaks = len(peaks)
        prominences = np.array(properties['prominences'])
        widths = np.array(properties['widths'])
        amphitude = np.sum(prominences)
        duration = np.sum(widths)


        hrpeaks,hrproperties = find_peaks(hr, width=5)
        hrnum_Peaks = len(hrpeaks)
        hrprominences = np.array(hrproperties['prominences'])
        hrwidths = np.array(hrproperties['widths'])
        hramphitude = np.sum(hrprominences)
        hrduration = np.sum(hrwidths)

        ###############################

        stress_label = ''
        if stress_mean <=6.7:
            #print(stress_mean)
            stress_label = '0'
            s0 += 1
        elif stress_mean <= 13.4:
            stress_label = '1'
            s1 += 1
        else:
            stress_label = '2'
            s2 += 1
        fdf.loc[index] = [Angry,Disgust,Scared,Happy,Sad,Surprised,Neutral,
                            eda_mean, eda_min, eda_max, eda_std,
                            eda_kurtosis, eda_skew, num_Peaks, amphitude, duration,
                            hr_mean, hr_min, hr_max, hr_std, rms,
                            hrnum_Peaks, hramphitude, hrduration,
                            tmp_mean, tmp_min, tmp_max, tmp_std,
                            stress_label,user]

        index = index+1
    #fdf['EDAR_num_Peaks'] = fdf['EDAR_num_Peaks'].apply(lambda x: x/10 if x<=10 else 1)
fdf.to_csv('em_feat_0510.csv',index=False)



HT
IT
JT
KT
LT
MT
NT
PT
QT
RT
ST


In [15]:
fdf.to_csv('majid.csv',index=False)

In [18]:
print(s0,s1,s2)

7100 1507 886


In [19]:
fdf.stress.value_counts()

0    538
1    313
2     12
Name: stress, dtype: int64