In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
from scipy import stats


%matplotlib inline

# Preprocessing of the data:

Including aggregation of data from both measurements and all sessions, as well as normalization

In [None]:
l_files = ['EDA_1Hz_Normalized1s_-1_15_Dennis.xlsx', 'HR_1Hz_Normalized1s_-1_15_Dennis.xlsx']
l_dfs_per_measurement = []

for file in l_files:
    if file.startswith('EDA'):
        measurement = 'EDA'
    elif file.startswith('HR'):
        measurement = 'HR'
    
    # Read excel file and extract all tab names (= sessions) as list
    df_dict = pd.read_excel(file, sheet_name = None, index_col = 0) 
    l_sessions = list(df_dict.keys())


    # Some reshaping is required to make subsequent work more efficient
    # Data is first cleared from subjects that have no data
    # Then it is split into two subsets (CS+ and CS- data)
    l_dfs_per_session = []
    for session in l_sessions:

        # Select data from respective session and drop columns without data
        df_temp = df_dict[session].dropna(axis = 1)

        # Rename index that corresponds to subject row
        l_idx = df_temp.index.tolist()
        l_idx[0] = 'subject'
        df_temp.index = l_idx

        # Identify number of subjects that were investigated
        for elem in df_temp.columns.to_list():
            if elem.startswith('CS-'):
                all_kids = df_temp.columns.to_list().index(elem)
                break

        # Set subjects as column headers
        df_temp.columns = df_temp.iloc[0]
        df_temp = df_temp.drop(df_temp.index[0])

        # Split data into CS+ and CS- subsets
        # Add metadata about session and value_type
        # For CS+ subset:
        df_cs_plus = df_temp.iloc[:, :all_kids].copy().transpose()
        df_cs_plus.insert(loc=0, column='value_type', value='abs_CS+')
        df_cs_plus.insert(loc=0, column='session', value=session)

        # For CS- subset:
        df_cs_minus = df_temp.iloc[:, all_kids:df_temp.shape[1]].copy().transpose()
        df_cs_minus.insert(loc=2, column='value_type', value='abs_CS-')
        df_cs_minus.insert(loc=1, column='session', value=session)

        # Concat the two subsets again and append them to list
        df_reshaped = pd.concat([df_cs_plus, df_cs_minus], axis=0)
        l_dfs_per_session.append(df_reshaped)

    # Concat the reshaped dataframes from all sessions that contain the "absolute" values 
    # Note: already normalized with Jérémys code to stimulus onset -1.0s
    df_abs = pd.concat(l_dfs_per_session, axis=0)
    df_abs.reset_index(inplace = True)

    # Now the data is normalized and the discrimination ratio is calculated
    # Calculations are performed iteratively and based only on the data from one subject and one session
    l_subjects = list(df_abs.subject.unique())
    l_dfs_per_subject = []

    for subject in l_subjects:
        l_dfs_per_session = []

        for session in l_sessions:
            # Select the data
            df_temp = df_abs.loc[(df_abs['session'] == session) & (df_abs['subject'] == subject)].iloc[:,3:20].copy()

            # Since the data contains positive and negative values, the absolute value of the min is added to each value
            # This forces all values to be positive and regular min-max-scaling can be performed
            session_min = df_temp.min().min()
            df_temp = df_temp + abs(session_min)
            new_min = df_temp.min().min()
            new_max = df_temp.max().max()
            df_temp = (df_temp - new_min) / (new_max - new_min)

            # It may be that the normalized value (0.0 at -1.0s) is the session_min, which would cause division by zero
            # Since both CS- and CS+ are 0.0 at -1.0s and the discrimination ratio is always 0.5 for -1.0s
            # It is set to 0.5 for these cases to handle the Error        
            if session_min == 0:
                l_values = [0.5] + list(df_temp.iloc[0,1:] / (df_temp.iloc[0,1:] + df_temp.iloc[1,1:]))
                columns = [-1.0] + list(df_temp.iloc[0,1:].index)
                pd.Series(l_values, index = columns)
                df_temp = df_temp.append(pd.Series(l_values, index = columns), ignore_index = True)

            # Regular computations:
            else:
                df_temp = df_temp.append(df_temp.iloc[0,:] / (df_temp.iloc[0,:] + df_temp.iloc[1,:]), ignore_index=True)

            # Metadata is added and the df is appended to the list that will ultimately contain data from all sessions per subject
            df_temp.insert(loc=0, column='value_type', value=['norm_subject_and_session_CS+', 'norm_subject_and_session_CS-', 'norm_subject_and_session_discrimination_ratio'])
            df_temp.insert(loc=0, column='session', value=session)
            df_temp.insert(loc=0, column='subject', value=subject)

            l_dfs_per_session.append(df_temp)

        # The individual dfs per session from one subject are concated and appended to the list that will ultimately contain the data from all subjects
        df_all_sessions_per_subject = pd.concat(l_dfs_per_session, axis=0)
        l_dfs_per_subject.append(df_all_sessions_per_subject)

    # The data from all subjects and sessions (min-max-scaled & discrimination_ratio) is concated and fused with the absolute values to create the final df   
    df_norm = pd.concat(l_dfs_per_subject, axis=0)

    # As last step information about measurement (HR or EDA) and about group (control or anxiety) are added and the df is appended to list for final concatenation
    df_measurement = pd.concat([df_abs, df_norm], axis=0)
    df_measurement.reset_index(inplace = True, drop = True)
    df_measurement.insert(loc=1, column='group', value = np.NaN)
    for subject in l_subjects:
        group = np.NaN
        if subject.startswith('GES'):
            group = 'control'
        elif subject.startswith('ANX'):
            group = 'anxiety'
        df_measurement.loc[df_measurement['subject'] == subject, 'group'] = group
    df_measurement.insert(loc=2, column = 'measurement', value = measurement)
    
    # Tab names in EDA file contain 'EDA ' as prefix for each session - this has to be removed:
    if measurement == 'EDA':
        df_measurement['session'] = [elem.replace('EDA ', '') for elem in df_measurement['session']]
    l_dfs_per_measurement.append(df_measurement)

df = pd.concat(l_dfs_per_measurement, axis=0)
df.reset_index(inplace=True, drop=True)

# Now global normalization is done
l_dfs_global_norm = []

for measurement in ['EDA', 'HR']:
       
    # Select the data
    df_temp = df.loc[(df['measurement'] == measurement) & (df['value_type'].isin(['abs_CS+', 'abs_CS-']))].copy()

    # Since the data contains positive and negative values, the absolute value of the min is added to each value
    # This forces all values to be positive and regular min-max-scaling can be performed
    global_min = df_temp.iloc[:, 5:22].min().min()
    df_temp.iloc[:, 5:22] = df_temp.iloc[:, 5:22] + abs(global_min)
    new_min = df_temp.iloc[:, 5:22].min().min()
    new_max = df_temp.iloc[:, 5:22].max().max()
    df_temp.iloc[:, 5:22] = (df_temp.iloc[:, 5:22] - new_min) / (new_max - new_min)
    df_temp.loc[df_temp['value_type'] == 'abs_CS+', 'value_type'] = 'norm_global_CS+'
    df_temp.loc[df_temp['value_type'] == 'abs_CS-', 'value_type'] = 'norm_global_CS-'
    l_dfs_global_norm.append(df_temp)
    
df = pd.concat([df] + l_dfs_global_norm, axis = 0)


for measurement in ['EDA', 'HR']:
    for subject in l_subjects:
        for session in l_sessions:
            df_meta = df.loc[(df['measurement'] == measurement) & (df['session'] == session) 
                                     & (df['subject'] == subject) & (df['value_type'].isin(['norm_global_CS+', 'norm_global_CS-']))].copy().iloc[0, 0:5]

            df_discrim = df.loc[(df['measurement'] == measurement) & (df['session'] == session) 
                                     & (df['subject'] == subject) & (df['value_type'].isin(['norm_global_CS+', 'norm_global_CS-']))].copy().iloc[:, 5:22]


            df_discrim = df_discrim.append(df_discrim.iloc[0,:] / (df_discrim.iloc[0,:] + df_discrim.iloc[1,:]), ignore_index=True)
            df_fused = pd.concat([df_meta] + [df_discrim.iloc[2, :]], axis = 0)
            df_fused['value_type'] = 'norm_global_discrimination_ratio'
            df = df.append(df_fused, ignore_index = True)

df.head()

    

df.to_csv('all_normalized_data.csv')

# ... or just read data:

In [None]:
df = pd.read_csv('all_normalized_data.csv', index_col = 0)

In [None]:
df.head()

### ToDos:
* ~~Korrelation der discrimination ratios across measures within individuals? ~~~
* ~~Plotting all sessions~~

* ~~Select specific timepoint to compare discrimination ratios between measures and between individuals (pre- and post-stimulus! Anticipation = learned vs. real reaction)~~
* ~~Cross-correlation of these determined discrimination ratios between the two measures on individual basis?~~
* ~~Predictions possible? From HR to EDA? from early EDA to late EDA?~~
* ~~Classification as Responder / non-Responder for each measurement & check for correlation of this classification between HR and EDA?~~
* Development of discrimintation during EXT (subsets of 3 parts)
* Discrimination between different generalization faces? 


# How does the data look like after scaling?
## Representative plotting of one value/measurement for individual subjects per session plus group mean:

`SESSION`: 'All_preacq', 'All_acq1', 'All_acq2', 'All_gen1', 'All_gen2', 'All_ext' <br>
`MEASUREMENT`: 'HR', 'EDA' <br>
`VALUE_TYPE`: 'norm_global_CS+', 'norm_global_CS-', 'norm_global_discrimination_ratio'<br>

In [None]:
VALUE_TYPE = 'norm_global_CS+'
MEASUREMENT = 'HR'
SESSION = 'All_acq2'

In [None]:
df_anx = df.loc[(df['session'] == SESSION) & (df['value_type'] == VALUE_TYPE) & (df['measurement'] == MEASUREMENT) & (df['group'] == 'anxiety')].copy()
df_ctrl = df.loc[(df['session'] == SESSION) & (df['value_type'] == VALUE_TYPE) & (df['measurement'] == MEASUREMENT) & (df['group'] == 'control')].copy()

mean_anx = df_anx.iloc[:, 5:22].mean()
sem_anx = df_anx.iloc[:, 5:22].sem()
mean_ctrl = df_ctrl.iloc[:, 5:22].mean()
sem_ctrl = df_ctrl.iloc[:, 5:22].sem()


fig = plt.figure(figsize=(18, 8))
gs = fig.add_gridspec(1, 2)

ax1 = fig.add_subplot(gs[0, 0])
df_ctrl.iloc[:, 5:22].transpose().plot(legend=False, alpha=.2, color='g', ax=ax1)
plt.errorbar(x = list(mean_ctrl.index), y = mean_ctrl.values, yerr = sem_ctrl.values, color='g')
plt.title('Control group')
plt.xlabel('time in [s] from stimulus onset')
plt.ylabel(VALUE_TYPE)
#plt.ylim(0.25, 0.4)

ax2 = fig.add_subplot(gs[0, 1], sharey=ax1)
df_anx.iloc[:, 5:22].transpose().plot(legend=False, alpha=.2, color='m', ax=ax2)
plt.errorbar(x = list(mean_anx.index), y = mean_anx.values, yerr = sem_anx.values, color='m')
plt.title('Anxiety group')
plt.xlabel('time in [s] from stimulus onset')
plt.ylabel(VALUE_TYPE)

plt.suptitle(VALUE_TYPE + ' for ' + MEASUREMENT + ' in session: ' + SESSION, fontsize = 15)
plt.tight_layout()
plt.show()

## Plotting of individual sessions to check more in detail

`SESSION`: 'All_preacq', 'All_acq1', 'All_acq2', 'All_gen1', 'All_gen2', 'All_ext' <br>
`MEASUREMENT`: 'HR', 'EDA' <br>
`NORM_TYPE`: 'norm_global_', 'norm_subject_and_session_' <br>

In [None]:
# Specify session and measurement
SESSION = 'All_acq2' 
MEASUREMENT = 'HR'
NORM_TYPE = 'norm_global_'   #'norm_subject_and_session_'

In [None]:
# Select data
df_ctrl = df.loc[(df['session'] == SESSION) & (df['group'] == 'control') & (df['measurement'] == MEASUREMENT)].copy()
df_anx = df.loc[(df['session'] == SESSION) & (df['group'] == 'anxiety') & (df['measurement'] == MEASUREMENT)].copy()

anx_norm_cs_p = df_anx.loc[df_anx['value_type'] == NORM_TYPE + 'CS+'].iloc[:, 5:22].mean()
anx_norm_cs_m = df_anx.loc[df_anx['value_type'] == NORM_TYPE + 'CS-'].iloc[:, 5:22].mean()
anx_discrim = df_anx.loc[df_anx['value_type'] == NORM_TYPE + 'discrimination_ratio'].iloc[:, 5:22].mean()
anx_discrim_sem = df_anx.loc[df_anx['value_type'] == NORM_TYPE + 'discrimination_ratio'].iloc[:, 5:22].sem()

ctrl_norm_cs_p = df_ctrl.loc[df_ctrl['value_type'] == NORM_TYPE + 'CS+'].iloc[:, 5:22].mean()
ctrl_norm_cs_m = df_ctrl.loc[df_ctrl['value_type'] == NORM_TYPE + 'CS-'].iloc[:, 5:22].mean()
ctrl_discrim = df_ctrl.loc[df_ctrl['value_type'] == NORM_TYPE + 'discrimination_ratio'].iloc[:, 5:22].mean()
ctrl_disrcim_sem = df_ctrl.loc[df_ctrl['value_type'] == NORM_TYPE + 'discrimination_ratio'].iloc[:, 5:22].sem()

# Create figure
fig = plt.figure(figsize=(18, 8))
gs = fig.add_gridspec(1, 2)

f_ax1 = fig.add_subplot(gs[0, 0])
plt.plot(anx_norm_cs_p, color = 'm', alpha = 1, label='Anx_CS+')
plt.plot(anx_norm_cs_m, color = 'm', alpha = 1, linestyle = 'dashed', label='Anx_CS-')

plt.plot(ctrl_norm_cs_p, color = 'g', alpha = 1, label='Ctrl_CS+')
plt.plot(ctrl_norm_cs_m, color = 'g', alpha = 1, linestyle = 'dashed', label='Ctrl_CS-')
#plt.errorbar(x = list(anx_discrim.index), y = anx_discrim.values, yerr = anx_discrim_sem.values, color='m', label='discrim. ratio')
#plt.hlines(0.5, xmin=-1, xmax=15, color='k', alpha = 0.4)
plt.xlabel('time in [s] from stimulus onset')
plt.ylabel('normalized CS responses / discrimination ratio')
plt.title('Anxiety group')
plt.legend(loc = 'lower left')

#fig.add_subplot(gs[0,1], sharey=f_ax1)

#plt.errorbar(x = list(ctrl_discrim.index), y = ctrl_discrim.values, yerr = ctrl_disrcim_sem.values, color='g', label='discrim. ratio')
#plt.hlines(0.5, xmin=-1, xmax=15, color='k', alpha = 0.4)
#plt.title('Control group')
#plt.xlabel('time in [s] from stimulus onset')
#plt.ylabel('normalized CS responses / discrimination ratio')
#plt.legend(loc = 'lower left')

fig.add_subplot(gs[0,1])
#fig.add_subplot(gs[0,2], sharey=f_ax1)
plt.errorbar(x = list(anx_discrim.index), y = anx_discrim.values, yerr = anx_discrim_sem.values, color='m', label='anxiety')
plt.errorbar(x = list(ctrl_discrim.index), y = ctrl_discrim.values, yerr = ctrl_disrcim_sem.values, color='g', label='control')
plt.hlines(0.5, xmin=-1, xmax=15, color='k', alpha = 0.4)
plt.title('Discrimination ratios')
plt.xlabel('time in [s] from stimulus onset')
plt.ylabel('discrimination ratio')
plt.legend(loc = 'lower left')

plt.suptitle(MEASUREMENT + ' in session: ' + SESSION)
plt.tight_layout()

plt.show()

# Plotting data of one measurement for all sessions

`MEASUREMENT`: 'HR', 'EDA' <br>
`NORM_TYPE`: 'norm_global_', 'norm_subject_and_session_' <br>

In [None]:
MEASUREMENT = 'EDA'
NORM_TYPE = 'norm_global_'

In [None]:
l_sessions = ['All_preacq', 'All_acq1', 'All_acq2', 'All_gen1', 'All_gen2', 'All_ext']

if MEASUREMENT == 'HR':
    ylim_cs_lower, ylim_cs_upper = 0.41, 0.57
    ylim_dr_lower, ylim_dr_upper = 0.44, 0.54
elif MEASUREMENT == 'EDA':
    ylim_cs_lower, ylim_cs_upper = 0.29, 0.54
    ylim_dr_lower, ylim_dr_upper = 0.46, 0.62

fig = plt.figure(figsize=(18, 6*len(l_sessions)))
gs = fig.add_gridspec(len(l_sessions), 2)

plt.subplots_adjust(wspace=0.2, hspace=0.5) 

for SESSION in l_sessions:
    row = l_sessions.index(SESSION)
    # Select data
    df_ctrl = df.loc[(df['session'] == SESSION) & (df['group'] == 'control') & (df['measurement'] == MEASUREMENT)].copy()
    df_anx = df.loc[(df['session'] == SESSION) & (df['group'] == 'anxiety') & (df['measurement'] == MEASUREMENT)].copy()

    anx_norm_cs_p = df_anx.loc[df_anx['value_type'] == NORM_TYPE + 'CS+'].iloc[:, 5:22].mean()
    anx_norm_cs_m = df_anx.loc[df_anx['value_type'] == NORM_TYPE + 'CS-'].iloc[:, 5:22].mean()
    anx_discrim = df_anx.loc[df_anx['value_type'] == NORM_TYPE + 'discrimination_ratio'].iloc[:, 5:22].mean()
    anx_discrim_sem = df_anx.loc[df_anx['value_type'] == NORM_TYPE + 'discrimination_ratio'].iloc[:, 5:22].sem()

    ctrl_norm_cs_p = df_ctrl.loc[df_ctrl['value_type'] == NORM_TYPE + 'CS+'].iloc[:, 5:22].mean()
    ctrl_norm_cs_m = df_ctrl.loc[df_ctrl['value_type'] == NORM_TYPE + 'CS-'].iloc[:, 5:22].mean()
    ctrl_discrim = df_ctrl.loc[df_ctrl['value_type'] == NORM_TYPE + 'discrimination_ratio'].iloc[:, 5:22].mean()
    ctrl_disrcim_sem = df_ctrl.loc[df_ctrl['value_type'] == NORM_TYPE + 'discrimination_ratio'].iloc[:, 5:22].sem()

    # Create figure
    fig.add_subplot(gs[row, 0])
    plt.plot(anx_norm_cs_p, color = 'm', alpha = 1, label='Anx_CS+')
    plt.plot(anx_norm_cs_m, color = 'm', alpha = 1, linestyle = 'dashed', label='Anx_CS-')

    plt.plot(ctrl_norm_cs_p, color = 'g', alpha = 1, label='Ctrl_CS+')
    plt.plot(ctrl_norm_cs_m, color = 'g', alpha = 1, linestyle = 'dashed', label='Ctrl_CS-')
    #plt.errorbar(x = list(anx_discrim.index), y = anx_discrim.values, yerr = anx_discrim_sem.values, color='m', label='discrim. ratio')
    #plt.hlines(0.5, xmin=-1, xmax=15, color='k', alpha = 0.4)
    plt.xlabel('time in [s] from stimulus onset')
    plt.ylabel('normalized responses')
    plt.title('Responses to CS+ and CS-')
    plt.ylim(ylim_cs_lower, ylim_cs_upper)
    plt.legend(loc = 'best')

    #fig.add_subplot(gs[0,1], sharey=f_ax1)

    #plt.errorbar(x = list(ctrl_discrim.index), y = ctrl_discrim.values, yerr = ctrl_disrcim_sem.values, color='g', label='discrim. ratio')
    #plt.hlines(0.5, xmin=-1, xmax=15, color='k', alpha = 0.4)
    #plt.title('Control group')
    #plt.xlabel('time in [s] from stimulus onset')
    #plt.ylabel('normalized CS responses / discrimination ratio')
    #plt.legend(loc = 'lower left')

    fig.add_subplot(gs[row,1])
    #fig.add_subplot(gs[0,2], sharey=f_ax1)
    plt.errorbar(x = list(anx_discrim.index), y = anx_discrim.values, yerr = anx_discrim_sem.values, color='m', label='anxiety')
    plt.errorbar(x = list(ctrl_discrim.index), y = ctrl_discrim.values, yerr = ctrl_disrcim_sem.values, color='g', label='control')
    plt.hlines(0.5, xmin=-1, xmax=16, color='k', alpha = 0.4)
    plt.title('Discrimination ratio')
    plt.ylim(ylim_dr_lower, ylim_dr_upper)
    plt.xlabel('time in [s] from stimulus onset')
    plt.ylabel('discrimination ratio')
    plt.legend(loc = 'best')

    
#plt.suptitle(MEASUREMENT + ' in session: ' + SESSION)
#plt.tight_layout()

plt.figtext(0.5,0.92, 'Measure: ' + MEASUREMENT, ha="center", va="top", fontsize=20, color="k")
plt.figtext(0.5,0.9, l_sessions[0], ha="center", va="top", fontsize=20, color="k")
plt.figtext(0.5,0.77, l_sessions[1], ha="center", va="top", fontsize=20, color="k")
plt.figtext(0.5,0.63, l_sessions[2], ha="center", va="top", fontsize=20, color="k")
plt.figtext(0.5,0.5, l_sessions[3], ha="center", va="top", fontsize=20, color="k")
plt.figtext(0.5,0.37, l_sessions[4], ha="center", va="top", fontsize=20, color="k")
plt.figtext(0.5,0.23, l_sessions[5], ha="center", va="top", fontsize=20, color="k")



#plt.savefig(MEASUREMENT + '_all_sessions.pdf')
plt.show()

# Now let´s have a closer look at the discrimination performance on individual subject level

## Are there any correlations between the discrimination ratios of the two measures for each subject (for selected timepoints)?


`session`: Can be any of: 'All_preacq', 'All_acq1', 'All_acq2', 'All_gen1', 'All_gen2', 'All_ext', but 'All_acq2' is probably the most meaningful for this. <br>
`timepoint`: Dictionary that specifies the respective timepoints to look at for the individual measures. <br>


In [None]:
session = 'All_acq2'
timepoint = {'HR': '6.0', 'EDA': '10.0'}

# Extract & re-organize the relevant data
df_drs = df.loc[df['value_type'] == 'norm_global_discrimination_ratio'].copy()

l_timepoints = ['3.0','5.0', '6.0', '7.0', '8.0', '9.0', '10.0', '11.0']
l_dfs = []

for session_temp in df_drs.session.unique():
    for time in l_timepoints:
        df_temp = df_drs.loc[df_drs['session'] == session_temp, ['subject', 'group', 'measurement', 'session', 'value_type', time]]
        timepoint_temp = df_temp.columns[-1]
        l_headers = list(df_temp.columns)
        l_headers[-1] = 'norm_global_discrimination_ratio'
        l_headers[-2] = 'timepoint'
        df_temp.columns = l_headers
        df_temp['timepoint'] = timepoint_temp
        l_dfs.append(df_temp)    
        
df_drs = pd.concat(l_dfs, axis=0)


# Pearson correlation anxiety group:
anx_hr = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['HR']) 
                    & (df_drs['measurement'] == 'HR') & (df_drs['group'] == 'anxiety'), 'norm_global_discrimination_ratio'].values
anx_eda = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['EDA'])
                     & (df_drs['measurement'] == 'EDA') & (df_drs['group'] == 'anxiety'), 'norm_global_discrimination_ratio'].values
pearson_anx = stats.pearsonr(anx_hr, anx_eda)


# Person correlation control group:
ctrl_hr = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['HR']) 
                    & (df_drs['measurement'] == 'HR') & (df_drs['group'] == 'control'), 'norm_global_discrimination_ratio'].values
ctrl_eda = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['EDA'])
                     & (df_drs['measurement'] == 'EDA') & (df_drs['group'] == 'control'), 'norm_global_discrimination_ratio'].values
pearson_ctrl = stats.pearsonr(ctrl_hr, ctrl_eda)


df_plot_hr = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['HR']) & (df_drs['measurement'] == 'HR')]
df_plot_eda = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['EDA']) & (df_drs['measurement'] == 'EDA')]

df_plot = pd.concat([df_plot_hr, df_plot_eda], axis = 0)


# Create figure
fig = plt.figure(figsize=(25, 8))
gs = fig.add_gridspec(1, 3)

ax1 = fig.add_subplot(gs[0, 0])
sns.lineplot(data=df_plot.loc[(df_plot['session'] == session) & (df_plot['group'] == 'control')], x="measurement", 
             y="norm_global_discrimination_ratio", units="subject", estimator=None, lw=1, alpha=0.5, color='g', marker='o', ax=ax1)
plt.title('Control group - Pearsons R: ' + str(round(pearson_ctrl[0], 3)) + ', p = ' + str(round(pearson_ctrl[1], 3)), fontsize=15)


ax2 = fig.add_subplot(gs[0, 1], sharey=ax1)
sns.lineplot(data=df_plot.loc[(df_plot['session'] == session) & (df_plot['group'] == 'anxiety')], x="measurement", 
             y="norm_global_discrimination_ratio", units="subject", estimator=None, lw=1, alpha=0.5, color='m', marker='o', ax=ax2)
plt.title('Anxiety group - Pearsons R: ' + str(round(pearson_anx[0], 3)) + ', p = ' + str(round(pearson_anx[1], 3)), fontsize=15)

ax3 = fig.add_subplot(gs[0, 2], sharey=ax1)
sns.lineplot(data=df_plot.loc[df_plot['session'] == session], x="measurement", y="norm_global_discrimination_ratio", 
             hue="group", lw=2, palette=['green', 'magenta'])
plt.title('Group means', fontsize=15)

plt.suptitle('Discrimination ratios at ' + timepoint['HR'] + 's for HR and at ' + timepoint['EDA'] + 's for EDA - Session: ' + session, fontsize=20)
plt.show()

## Does classification of each subject as Responder / non-Responder for one measurement correlate with the corresponding classificatin using the date of the other measurement?

`t1`: Reference timepoint, e.g. '0.0' <br>
`t2`: Timepoint that is compared to the reference timepoint, e.g. '3.0' <br>
`ALPHA`: Transparency for lineplot <br>

In [None]:
t1 = '0.0'
t2 = '3.0'
ALPHA = 0.05

In [None]:
# Classification of each subject as responder (can be different for each measurement, e.g. HR responder & EDA non-responder)
df['responder'] = False

for MEASUREMENT in ['HR', 'EDA']:
    for subject in df.subject.unique():

        CS_minus_t1 = df.loc[(df['session'] == 'All_acq2') & (df['measurement'] == MEASUREMENT) & (df['value_type'] == NORM_TYPE + 'CS-') & (df['subject'] == subject), t1].values
        CS_minus_t2 =  df.loc[(df['session'] == 'All_acq2') & (df['measurement'] == MEASUREMENT) & (df['value_type'] == NORM_TYPE + 'CS-') & (df['subject'] == subject), t2].values

        CS_plus_t1 = df.loc[(df['session'] == 'All_acq2') & (df['measurement'] == MEASUREMENT) & (df['value_type'] == NORM_TYPE + 'CS+') & (df['subject'] == subject), t1].values
        CS_plus_t2 =  df.loc[(df['session'] == 'All_acq2') & (df['measurement'] == MEASUREMENT) & (df['value_type'] == NORM_TYPE + 'CS+') & (df['subject'] == subject), t2].values

        responder = 0

        if MEASUREMENT == 'HR':
            # Check if decrease in HR from 0s to 3s is larger for CS+ compared to CS- AND if there is actually a decrease for CS+ between 0s and 3s
            if ((CS_minus_t2 - CS_minus_t1) > (CS_plus_t2 - CS_plus_t1)) & ((CS_plus_t2 - CS_plus_t1) < 0):
                responder = 1

        if MEASUREMENT == 'EDA':
            # Check if increase in EDA from 0s to 3s is larger for CS+ compared to CS- AND if there is actually a decrease for CS+ between 0s and 3s
            if ((CS_minus_t2 - CS_minus_t1) < (CS_plus_t2 - CS_plus_t1)) & ((CS_plus_t2 - CS_plus_t1) > 0):
                responder = 1

        df.loc[(df['subject'] == subject) & (df['measurement'] == MEASUREMENT),  'responder'] = responder

df['responder'] = df['responder'].astype('int32')


# Pearson correlation anxiety group:
anx_hr = df.loc[(df['value_type'] == 'norm_global_discrimination_ratio') & (df['session'] == 'All_acq2')
                    & (df['measurement'] == 'HR') & (df['group'] == 'anxiety'), 'responder'].values
anx_eda = df.loc[(df['value_type'] == 'norm_global_discrimination_ratio') & (df['session'] == 'All_acq2')
                    & (df['measurement'] == 'EDA') & (df['group'] == 'anxiety'), 'responder'].values
pearson_anx = stats.pearsonr(anx_hr, anx_eda)


# Person correlation control group:
ctrl_hr = df.loc[(df['value_type'] == 'norm_global_discrimination_ratio') & (df['session'] == 'All_acq2')
                    & (df['measurement'] == 'HR') & (df['group'] == 'control'), 'responder'].values
ctrl_eda = df.loc[(df['value_type'] == 'norm_global_discrimination_ratio') & (df['session'] == 'All_acq2')
                    & (df['measurement'] == 'EDA') & (df['group'] == 'control'), 'responder'].values
pearson_ctrl = stats.pearsonr(ctrl_hr, ctrl_eda)


# Person correlation all subjects:
all_hr = df.loc[(df['value_type'] == 'norm_global_discrimination_ratio') & (df['session'] == 'All_acq2')
                    & (df['measurement'] == 'HR'), 'responder'].values
all_eda = df.loc[(df['value_type'] == 'norm_global_discrimination_ratio') & (df['session'] == 'All_acq2')
                    & (df['measurement'] == 'EDA'), 'responder'].values

pearson_all = stats.pearsonr(all_hr, all_eda)


# Create figure
fig = plt.figure(figsize=(25, 8))
gs = fig.add_gridspec(1, 3)

ax1 = fig.add_subplot(gs[0, 0])


sns.lineplot(data=df.loc[(df['value_type'] == 'norm_global_discrimination_ratio') & (df['session'] == 'All_acq2') & (df['group'] == 'control')], x="measurement", 
             y="responder", units="subject", estimator=None, lw=3, alpha=ALPHA, color='k', marker='o', ax=ax1)
plt.title('Control group - Pearsons R: ' + str(round(pearson_ctrl[0], 3)) + ', p = ' + str(round(pearson_ctrl[1], 3)), fontsize=15)


ax2 = fig.add_subplot(gs[0, 1], sharey=ax1)
sns.lineplot(data=df.loc[(df['value_type'] == 'norm_global_discrimination_ratio') & (df['session'] == 'All_acq2') & (df['group'] == 'anxiety')], x="measurement", 
             y="responder", units="subject", estimator=None, lw=3, alpha=ALPHA, color='k', marker='o', ax=ax2)
plt.title('Anxiety group - Pearsons R: ' + str(round(pearson_anx[0], 3)) + ', p = ' + str(round(pearson_anx[1], 3)), fontsize=15)

ax3 = fig.add_subplot(gs[0, 2], sharey=ax1)
sns.lineplot(data=df.loc[(df['value_type'] == 'norm_global_discrimination_ratio') & (df['session'] == 'All_acq2')], x="measurement", y="responder", 
             hue="group", lw=2, palette=['green', 'magenta'])
plt.title('Group means', fontsize=15)


plt.suptitle('Responder vs. non-Responders - Pearsons R all subjects: ' + str(round(pearson_all[0], 3)) + ', p = ' + str(round(pearson_all[1], 3)), fontsize=20)
plt.show()

# Additional Plotting cells:

## Plotting discrimination ratios across sessions

In [None]:
#MEASUREMENT = 'HR'
VALUE_TYPE = 'norm_global_discrimination_ratio'

In [None]:
l_dicts = []

for MEASUREMENT in ['EDA', 'HR']:
    df_ctrl = df.loc[(df['value_type'] == VALUE_TYPE) & (df['group'] == 'control') & (df['measurement'] == MEASUREMENT)].copy()
    df_anx = df.loc[(df['value_type'] == VALUE_TYPE) & (df['group'] == 'anxiety') & (df['measurement'] == MEASUREMENT)].copy()

    discrim_ratios = {}
    for session in l_sessions:
        mean_ctrl = df_ctrl.loc[df_ctrl['session'] == session, [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]].mean()
        sem_ctrl = df_ctrl.loc[df_ctrl['session'] == session, [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]].sem()

        mean_anx = df_anx.loc[df_anx['session'] == session, [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]].mean()
        sem_anx = df_anx.loc[df_anx['session'] == session, [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]].sem()

        discrim_ratios[session] = (mean_ctrl, sem_ctrl, mean_anx, sem_anx)
    
    l_dicts.append(discrim_ratios)
    
fig = plt.figure(figsize=(18, 12))
gs = fig.add_gridspec(2, 2)

ax1 = fig.add_subplot(gs[0, 0])
alpha = 0.15
for key in list(l_dicts[0].keys()):
    plt.errorbar(x = list(l_dicts[0][key][0].index), y = l_dicts[0][key][0].values, yerr = l_dicts[0][key][1].values, color='g', alpha = alpha, label=key)
    alpha += 0.15
plt.hlines(0.5, xmin=-1, xmax=15, color='k', alpha = 0.4)
plt.title('Control group - EDA')
plt.xlabel('time in [s] from stimulus onset')
plt.ylabel('discrimination ratio')
plt.legend(loc = 'upper left')
    
    
fig.add_subplot(gs[0, 1], sharey=ax1)
alpha = 0.15
for key in list(l_dicts[0].keys()):
    plt.errorbar(x = list(l_dicts[0][key][2].index), y = l_dicts[0][key][2].values, yerr = l_dicts[0][key][3].values, color='m', alpha = alpha, label=key)
    alpha += 0.15
plt.hlines(0.5, xmin=-1, xmax=15, color='k', alpha = 0.4)
plt.title('Anxiety group - EDA')
plt.xlabel('time in [s] from stimulus onset')
plt.ylabel('discrimination ratio')
plt.legend(loc = 'upper left')


ax2 = fig.add_subplot(gs[1, 0])
alpha = 0.15
for key in list(l_dicts[1].keys()):
    plt.errorbar(x = list(l_dicts[1][key][0].index), y = l_dicts[1][key][0].values, yerr = l_dicts[1][key][1].values, color='g', alpha = alpha, label=key)
    alpha += 0.15
plt.hlines(0.5, xmin=-1, xmax=15, color='k', alpha = 0.4)
plt.title('Control group - HR')
plt.xlabel('time in [s] from stimulus onset')
plt.ylabel('discrimination ratio')
plt.legend(loc = 'lower left')
    
    
fig.add_subplot(gs[1, 1], sharey=ax2)
alpha = 0.15
for key in list(l_dicts[1].keys()):
    plt.errorbar(x = list(l_dicts[1][key][2].index), y = l_dicts[1][key][2].values, yerr = l_dicts[1][key][3].values, color='m', alpha = alpha, label=key)
    alpha += 0.15
plt.hlines(0.5, xmin=-1, xmax=15, color='k', alpha = 0.4)
plt.title('Anxiety group - HR')
plt.xlabel('time in [s] from stimulus onset')
plt.ylabel('discrimination ratio')
plt.legend(loc = 'lower left')

plt.tight_layout()
plt.show()

## HR early vs HR late

In [None]:
session = 'All_acq2'
timepoint = {'HR': '7.0', 'EDA': '11.0'}

# Pearson correlation anxiety group:
anx_hr = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['HR']) 
                    & (df_drs['measurement'] == 'HR') & (df_drs['group'] == 'anxiety'), 'norm_global_discrimination_ratio'].values
anx_eda = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['EDA'])
                     & (df_drs['measurement'] == 'HR') & (df_drs['group'] == 'anxiety'), 'norm_global_discrimination_ratio'].values
pearson_anx = stats.pearsonr(anx_hr, anx_eda)


# Person correlation control group:
ctrl_hr = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['HR']) 
                    & (df_drs['measurement'] == 'HR') & (df_drs['group'] == 'control'), 'norm_global_discrimination_ratio'].values
ctrl_eda = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['EDA'])
                     & (df_drs['measurement'] == 'HR') & (df_drs['group'] == 'control'), 'norm_global_discrimination_ratio'].values
pearson_ctrl = stats.pearsonr(ctrl_hr, ctrl_eda)


df_plot_hr = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['HR']) & (df_drs['measurement'] == 'HR')]
df_plot_eda = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['EDA']) & (df_drs['measurement'] == 'HR')]

df_plot = pd.concat([df_plot_hr, df_plot_eda], axis = 0)


# Create figure
fig = plt.figure(figsize=(25, 8))
gs = fig.add_gridspec(1, 3)

ax1 = fig.add_subplot(gs[0, 0])
sns.lineplot(data=df_plot.loc[(df_plot['session'] == session) & (df_plot['group'] == 'control') & (df_plot['measurement'] == 'HR')], x="timepoint", 
             y="norm_global_discrimination_ratio", units="subject", estimator=None, lw=1, alpha=0.5, color='g', marker='o', ax=ax1)
plt.title('Control group - Pearsons R: ' + str(round(pearson_ctrl[0], 3)) + ', p = ' + str(round(pearson_ctrl[1], 3)), fontsize=15)


ax2 = fig.add_subplot(gs[0, 1], sharey=ax1)
sns.lineplot(data=df_plot.loc[(df_plot['session'] == session) & (df_plot['group'] == 'anxiety') & (df_plot['measurement'] == 'HR')], x="timepoint", 
             y="norm_global_discrimination_ratio", units="subject", estimator=None, lw=1, alpha=0.5, color='m', marker='o', ax=ax2)
plt.title('Anxiety group - Pearsons R: ' + str(round(pearson_anx[0], 3)) + ', p = ' + str(round(pearson_anx[1], 3)), fontsize=15)

ax3 = fig.add_subplot(gs[0, 2], sharey=ax1)
sns.lineplot(data=df_plot.loc[(df_plot['session'] == session) & (df_plot['measurement'] == 'HR')], x="timepoint", y="norm_global_discrimination_ratio", 
             hue="group", lw=2, palette=['green', 'magenta'])
plt.title('Group means', fontsize=15)

plt.suptitle('Discrimination ratios at ' + timepoint['HR'] + 's for HR and at ' + timepoint['EDA'] + 's for EDA - Session: ' + session, fontsize=20)
plt.show()

## EDA early vs EDA late

In [None]:
session = 'All_acq2'
timepoint = {'HR': '7.0', 'EDA': '11.0'}

# Pearson correlation anxiety group:
anx_hr = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['HR']) 
                    & (df_drs['measurement'] == 'EDA') & (df_drs['group'] == 'anxiety'), 'norm_global_discrimination_ratio'].values
anx_eda = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['EDA'])
                     & (df_drs['measurement'] == 'EDA') & (df_drs['group'] == 'anxiety'), 'norm_global_discrimination_ratio'].values
pearson_anx = stats.pearsonr(anx_hr, anx_eda)


# Person correlation control group:
ctrl_hr = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['HR']) 
                    & (df_drs['measurement'] == 'EDA') & (df_drs['group'] == 'control'), 'norm_global_discrimination_ratio'].values
ctrl_eda = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['EDA'])
                     & (df_drs['measurement'] == 'EDA') & (df_drs['group'] == 'control'), 'norm_global_discrimination_ratio'].values
pearson_ctrl = stats.pearsonr(ctrl_hr, ctrl_eda)


df_plot_hr = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['HR']) & (df_drs['measurement'] == 'EDA')]
df_plot_eda = df_drs.loc[(df_drs['session'] == session) & (df_drs['timepoint'] == timepoint['EDA']) & (df_drs['measurement'] == 'EDA')]

df_plot = pd.concat([df_plot_hr, df_plot_eda], axis = 0)


# Create figure
fig = plt.figure(figsize=(25, 8))
gs = fig.add_gridspec(1, 3)

ax1 = fig.add_subplot(gs[0, 0])
sns.lineplot(data=df_plot.loc[(df_plot['session'] == session) & (df_plot['group'] == 'control') & (df_plot['measurement'] == 'EDA')], x="timepoint", 
             y="norm_global_discrimination_ratio", units="subject", estimator=None, lw=1, alpha=0.5, color='g', marker='o', ax=ax1)
plt.title('Control group - Pearsons R: ' + str(round(pearson_ctrl[0], 3)) + ', p = ' + str(round(pearson_ctrl[1], 3)), fontsize=15)


ax2 = fig.add_subplot(gs[0, 1], sharey=ax1)
sns.lineplot(data=df_plot.loc[(df_plot['session'] == session) & (df_plot['group'] == 'anxiety') & (df_plot['measurement'] == 'EDA')], x="timepoint", 
             y="norm_global_discrimination_ratio", units="subject", estimator=None, lw=1, alpha=0.5, color='m', marker='o', ax=ax2)
plt.title('Anxiety group - Pearsons R: ' + str(round(pearson_anx[0], 3)) + ', p = ' + str(round(pearson_anx[1], 3)), fontsize=15)

ax3 = fig.add_subplot(gs[0, 2], sharey=ax1)
sns.lineplot(data=df_plot.loc[(df_plot['session'] == session) & (df_plot['measurement'] == 'EDA')], x="timepoint", y="norm_global_discrimination_ratio", 
             hue="group", lw=2, palette=['green', 'magenta'])
plt.title('Group means', fontsize=15)

plt.suptitle('Discrimination ratios at ' + timepoint['HR'] + 's for HR and at ' + timepoint['EDA'] + 's for EDA - Session: ' + session, fontsize=20)
plt.show()

# Old junks of code:

Currently, all normalization is performed on the level of single sessions separately for each subject. <br>
This ensures a scaling of all values in each session and for each subject from 0 - 1. <br>
Alternatives would be to use more global min/max values for the scaling: <br>

1) subject specific min/max considering all sessions <br>
2) global min/max considering all sessions from all subjects

The advantage of more global min-max values would be that the scaled CS+ and CS- values reflect more accurately the absolute values, <br>
while session specific min-max scaling per subject scales a difference of 0.1 in session a to the same range as a difference of 10 in session b. <br>
However, session specific scaling should maximize the differences in the discrimination ratio, which is our primary goal here.

l_files = ['EDA_1Hz_Normalized1s_-1_15_Dennis.xlsx', 'HR_1Hz_Normalized1s_-1_15_Dennis.xlsx']
l_dfs_per_measurement = []

for file in l_files:
    if file.startswith('EDA'):
        measurement = 'EDA'
    elif file.startswith('HR'):
        measurement = 'HR'
    
    # Read excel file and extract all tab names (= sessions) as list
    df_dict = pd.read_excel(file, sheet_name = None, index_col = 0) 
    l_sessions = list(df_dict.keys())


    # Some reshaping is required to make subsequent work more efficient
    # Data is first cleared from subjects that have no data
    # Then it is split into two subsets (CS+ and CS- data)
    l_dfs_per_session = []
    for session in l_sessions:

        # Select data from respective session and drop columns without data
        df_temp = df_dict[session].dropna(axis = 1)

        # Rename index that corresponds to subject row
        l_idx = df_temp.index.tolist()
        l_idx[0] = 'subject'
        df_temp.index = l_idx

        # Identify number of subjects that were investigated
        for elem in df_temp.columns.to_list():
            if elem.startswith('CS-'):
                all_kids = df_temp.columns.to_list().index(elem)
                break

        # Set subjects as column headers
        df_temp.columns = df_temp.iloc[0]
        df_temp = df_temp.drop(df_temp.index[0])

        # Split data into CS+ and CS- subsets
        # Add metadata about session and value_type
        # For CS+ subset:
        df_cs_plus = df_temp.iloc[:, :all_kids].copy().transpose()
        df_cs_plus.insert(loc=0, column='value_type', value='abs_CS+')
        df_cs_plus.insert(loc=0, column='session', value=session)

        # For CS- subset:
        df_cs_minus = df_temp.iloc[:, all_kids:df_temp.shape[1]].copy().transpose()
        df_cs_minus.insert(loc=2, column='value_type', value='abs_CS-')
        df_cs_minus.insert(loc=1, column='session', value=session)

        # Concat the two subsets again and append them to list
        df_reshaped = pd.concat([df_cs_plus, df_cs_minus], axis=0)
        l_dfs_per_session.append(df_reshaped)

    # Concat the reshaped dataframes from all sessions that contain the "absolute" values 
    # Note: already normalized with Jérémys code to stimulus onset -1.0s
    df_abs = pd.concat(l_dfs_per_session, axis=0)
    df_abs.reset_index(inplace = True)

    # Now the data is normalized and the discrimination ratio is calculated
    # Calculations are performed iteratively and based only on the data from one subject and one session
    l_subjects = list(df_abs.subject.unique())
    l_dfs_per_subject = []

    for subject in l_subjects:
        l_dfs_per_session = []

        for session in l_sessions:
            # Select the data
            df_temp = df_abs.loc[(df_abs['session'] == session) & (df_abs['subject'] == subject)].iloc[:,3:20].copy()

            # Since the data contains positive and negative values, the absolute value of the min is added to each value
            # This forces all values to be positive and regular min-max-scaling can be performed
            session_min = df_temp.min().min()
            df_temp = df_temp + abs(session_min)
            new_min = df_temp.min().min()
            new_max = df_temp.max().max()
            df_temp = (df_temp - new_min) / (new_max - new_min)

            # It may be that the normalized value (0.0 at -1.0s) is the session_min, which would cause division by zero
            # Since both CS- and CS+ are 0.0 at -1.0s and the discrimination ratio is always 0.5 for -1.0s
            # It is set to 0.5 for these cases to handle the Error        
            if session_min == 0:
                l_values = [0.5] + list(df_temp.iloc[0,1:] / (df_temp.iloc[0,1:] + df_temp.iloc[1,1:]))
                columns = [-1.0] + list(df_temp.iloc[0,1:].index)
                pd.Series(l_values, index = columns)
                df_temp = df_temp.append(pd.Series(l_values, index = columns), ignore_index = True)

            # Regular computations:
            else:
                df_temp = df_temp.append(df_temp.iloc[0,:] / (df_temp.iloc[0,:] + df_temp.iloc[1,:]), ignore_index=True)

            # Metadata is added and the df is appended to the list that will ultimately contain data from all sessions per subject
            df_temp.insert(loc=0, column='value_type', value=['norm_CS+', 'norm_CS-', 'discrimination_ratio'])
            df_temp.insert(loc=0, column='session', value=session)
            df_temp.insert(loc=0, column='subject', value=subject)

            l_dfs_per_session.append(df_temp)

        # The individual dfs per session from one subject are concated and appended to the list that will ultimately contain the data from all subjects
        df_all_sessions_per_subject = pd.concat(l_dfs_per_session, axis=0)
        l_dfs_per_subject.append(df_all_sessions_per_subject)

    # The data from all subjects and sessions (min-max-scaled & discrimination_ratio) is concated and fused with the absolute values to create the final df   
    df_norm = pd.concat(l_dfs_per_subject, axis=0)

    # As last step information about measurement (HR or EDA) and about group (control or anxiety) are added and the df is appended to list for final concatenation
    df_measurement = pd.concat([df_abs, df_norm], axis=0)
    df_measurement.reset_index(inplace = True, drop = True)
    df_measurement.insert(loc=1, column='group', value = np.NaN)
    for subject in l_subjects:
        group = np.NaN
        if subject.startswith('GES'):
            group = 'control'
        elif subject.startswith('ANX'):
            group = 'anxiety'
        df_measurement.loc[df_measurement['subject'] == subject, 'group'] = group
    df_measurement.insert(loc=2, column = 'measurement', value = measurement)
    
    # Tab names in EDA file contain 'EDA ' as prefix for each session - this has to be removed:
    if measurement == 'EDA':
        df_measurement['session'] = [elem.replace('EDA ', '') for elem in df_measurement['session']]
    l_dfs_per_measurement.append(df_measurement)

df = pd.concat(l_dfs_per_measurement, axis=0)
df.reset_index(inplace=True, drop=True)
df.head()

    