In [60]:
#import libraries
from pathlib import Path
import os, string


import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from tqdm import tqdm

In [62]:
emotion_path = '/nfsmount/majid/Emotion detection/Cropped/'
bio_path = '/nfsmount/majid/Emotion detection/Cropped/bioSignals_sep/'

In [63]:
def ext_emotion_df(subject,task):
    
    #generate the path 
    emotion_dir = f'{emotion_path}{subject}'
    
    #generate the file name
    emotion_filename = f'{emotion_dir}/{subject}V{task}XCEPTION.xlsx'
    
    #generate the dataframe
    emotion_df = pd.read_excel(emotion_filename)

    return emotion_df

In [64]:
def ext_hr_df(subject,task):
    hr_dir = f'{bio_path}{subject}'
    
    #generate the file name
    hr_filename = f'{hr_dir}/T{task}HR.csv'
    
    #generate the dataframe
    hr_df = pd.read_csv(hr_filename)

    hr_df.columns = ['hr','epoch']
    
    return hr_df

def ext_eda_df(subject,task):
    eda_dir = f'{bio_path}{subject}'
    
    #generate the file name
    eda_filename = f'{eda_dir}/T{task}EDA.csv'
    
    #generate the dataframe
    eda_df = pd.read_csv(eda_filename)

    eda_df.columns = ['eda','epoch']
    
    return eda_df

def ext_temp_df(subject,task):
    temp_dir = f'{bio_path}{subject}'
    
    #generate the file name
    temp_filename = f'{temp_dir}/T{task}TEMP.csv'
    
    #generate the dataframe
    temp_df = pd.read_csv(temp_filename)

    temp_df.columns = ['temp','epoch']
    
    return temp_df

In [82]:
def srcl(emotion_df, hr_df, eda_df, temp_df, labels,task):

    #find the data frame sizes
    a, b = emotion_df.shape 
    c, d = temp_df.shape

    lst =['Angry', 'Disgust', 'Scared', 'Happy', 'Sad', 'Surprised', 'Neutral']
    aggdict = {}

    for l in lst:
        aggdict[l] = 'mean'


    emdf = emotion_df.copy()
    emdf['Time (seconds)'] = emdf['Time (seconds)'].round(3)
    
    #define a new dataframe
    emdf['TimeInt'] = emdf['Time (seconds)'].apply(np.floor)
    emdf['FrameSpec'] = (emdf['Time (seconds)'] - emdf['TimeInt']).apply(lambda x: 1 if x<0.24 else(2 if x<0.48 else(3 if x<0.72 else 4)))
    emdf_gb = emdf.groupby(['TimeInt','FrameSpec']).agg(aggdict).reset_index()

    
    #find missing values and repeat them for each timestep of emotions
    for i in range(540):
        vals = emdf_gb[emdf_gb['TimeInt']==i]['FrameSpec'].unique()
        if len(vals)<4:
            actval = range(1,5)
            missing =  np.setdiff1d(actval, vals)
            for j in missing:
                frame = i if j!=1 else i-1
                spec = j-1 if j!=1 else 4
                
                insertDict = {}
                insertDict['TimeInt'] = i
                insertDict['FrameSpec'] = j
                getdictvals = emdf_gb[((emdf_gb['TimeInt']==frame) & (emdf_gb['FrameSpec']==spec))][['Angry', 'Disgust', 'Scared', 'Happy', 'Sad', 'Surprised', 'Neutral']]
                
                if len(getdictvals.keys())>0:
                    for key, value in getdictvals.to_dict().items():
                        insertDict[key] = value
                insertDict_DF = pd.DataFrame.from_dict(insertDict)
                emdf_gb = pd.concat([emdf_gb, insertDict_DF], ignore_index=True)
                
                
    emotion_df = emdf_gb[emdf_gb['TimeInt'] != 540]

    
    #repeat the hr columns
    hr_df1 = hr_df.loc[hr_df.index.repeat(4)] 
    
    #reset the indexes to make it compatible with the other signals for concatenation
    dfhr = hr_df1[['hr']].reset_index()
    

    total_df = pd.concat([emotion_df,dfhr[['hr']],eda_df['eda'],temp_df['temp']],axis = 1)

    total_df['stress'] = 1

    #extract the labels from excel files
    if (task ==1) or (task == 4) or (task == 6) or (task == 8):
        total_df["stress"] = 1
            
    elif (task == 2) or (task == 3):
        total_df["stress"][:240] = labels.iloc[task - 1,1]

        for i in range (4):
            total_df["stress"][240 + i * 480: 240 + (i + 1)*480] = labels.iloc[task - 1, i + 2]
        
        total_df["stress"][1920:] = labels.iloc[task - 1, 5]    
    
    elif (task == 5):
        total_df["stress"][:240] = labels.iloc[task - 3, 1]
        
        for i in range (4):
            total_df["stress"][240 + i * 480: 240 + (i + 1) * 480] = labels.iloc[task - 3, i + 2]

        total_df["stress"][1920:] = labels.iloc[task - 3, 5] 
    
    elif (task == 7):
        total_df["stress"][:240] = labels.iloc[task - 4, 1]
        
        for i in range (4):
            total_df["stress"][240 + i * 480: 240 + (i + 1) * 480] = labels.iloc[task - 4, i + 2]

        total_df["stress"][1920:] = labels.iloc[task - 4, 5] 
    return total_df

In [None]:
# make array of subjects
subject_list = ['G','H','I','J','K','L','M','N','O','P','Q','R','S','T']

#iterate over subjects and bring different signals

for subject in subject_list:

    try:
        #generate emotion directory
        emotion_dir = f'{emotion_path}{subject}'
        #print(emotion_dir)
        for task in range (1,9):

            #extract emotion data
            emotion_df = ext_emotion_df(subject,task)

            #import Signals data
            hr_df = ext_hr_df(subject,task)
            eda_df = ext_eda_df(subject,task)
            temp_df = ext_temp_df(subject,task)

            # import labels
            labels = pd.read_excel(f'Labels/{subject}.xlsx')

            #synchronize, resample, concatenate, label the data
            total_df = srcl(emotion_df, hr_df, eda_df, temp_df, labels,task)

            total_df.to_csv(f'Significance/{subject}T{task}.csv')
    except:
        print(subject, task)

In [None]:
cname = 'Time (seconds)'

In [None]:
#find the data frame sizes
a, b = emotion_df.shape 
c, d = temp_df.shape
print(a,b,c,d)
lst =['Angry', 'Disgust', 'Scared', 'Happy', 'Sad', 'Surprised', 'Neutral']
aggdict = {}

for l in lst:
    aggdict[l] = 'mean'

print(aggdict)

emdf = emotion_df.copy()
emdf['Time (seconds)'] = emdf['Time (seconds)'].round(3)
#define a new dataframe
emdf['TimeInt'] = emdf['Time (seconds)'].apply(np.floor)
emdf['FrameSpec'] = (emdf['Time (seconds)'] - emdf['TimeInt']).apply(lambda x: 1 if x<0.24 else(2 if x<0.48 else(3 if x<0.72 else 4)))
emdf_gb = emdf.groupby(['TimeInt','FrameSpec']).agg(aggdict).reset_index()





In [254]:
for i in range(540):
    vals = emdf_gb[emdf_gb['TimeInt']==i]['FrameSpec'].unique()
    if len(vals)<4:
        actval = range(1,5)
        missing =  np.setdiff1d(actval, vals)
        for j in missing:
            frame = i if j!=1 else i-1
            spec = j-1 if j!=1 else 4
            #print(i,j, frame, spec)
            insertDict = {}
            insertDict['TimeInt'] = i
            insertDict['FrameSpec'] = j
            getdictvals = emdf_gb[((emdf_gb['TimeInt']==frame) & (emdf_gb['FrameSpec']==spec))][['Angry', 'Disgust', 'Scared', 'Happy', 'Sad', 'Surprised', 'Neutral']]
            #print(getdictvals)
            if len(getdictvals.keys())>0:
                for key, value in getdictvals.to_dict().items():
                    insertDict[key] = value
            insertDict_DF = pd.DataFrame.from_dict(insertDict)
            emdf_gb = pd.concat([emdf_gb, insertDict_DF], ignore_index=True)
            #print(insertDict)

In [257]:
emotion_df = emdf_gb[emdf_gb['TimeInt'] != 540]

In [258]:
print(emotion_df.shape)

(2160, 9)


In [261]:
hr_df1 = hr_df.loc[hr_df.index.repeat(4)]
    

In [262]:
print(emotion_df.shape, hr_df1.shape, eda_df.shape, temp_df.shape)

(2160, 9) (2160, 2) (2160, 2) (2160, 2)


In [267]:
temp_df = emotion_df.copy()

In [268]:
temp_df["Stress"] = 2

In [None]:
labels = pd.read_excel('Labels/K.xlsx')
temp_df["Stress"][:240] = labels.iloc[1,1]
for i in range (4):
    temp_df["Stress"][240+i*480:240+(i+1)*480] = labels.iloc[1,i+2]

temp_df["Stress"][1920:] = labels.iloc[1,5]    
