In [1]:
import pandas as pd
import numpy as np
import os
import neurokit2 as nk
import matplotlib.pyplot as plt
from datetime import timedelta
import seaborn as sns
import statsmodels.api as sm
from scipy.stats import wilcoxon
import gc
%matplotlib inline


In [3]:
folder_path = "/Users/annapalatkina/Desktop/drive/participants/problematic"  

data_full = pd.DataFrame(columns=['Timestamp', 'Raw_EDA_biopac', 'Raw_ECG_biopac', 'EventLabel', 'Participant'])


for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)  
    if filename.endswith('.csv') and filename != 'R0V9G1L4.csv': 
        print(filename)
        data = pd.read_csv(file_path, comment='#',low_memory=False)
        if 'Channel 9 (EDA100C)' in data.columns:
            eda_column = 'Channel 9 (EDA100C)'
            ecg_column = 'Channel 13 (ECG100C)'
        elif 'Channel 9 (Raw)' in data.columns:
            eda_column = 'Channel 9 (Raw)'
            ecg_column = 'Channel 13 (Raw)'
        data = data[['Row', 'Timestamp', eda_column, ecg_column,'MarkerName','MarkerType']]
        start, end = data.query('MarkerName == "Experiment" and MarkerType == "S"').index.values[0], data.query('MarkerName == "Experiment" and MarkerType == "E"').index.values[0]
        data = data[start:end+1]
        data.reset_index(inplace=True, drop=True)
        markers =  list(data[data['MarkerName'].notna()].sort_values('Row').MarkerName.unique())
        markers.remove('Experiment')
        data['EventLabel'] = pd.Series(dtype='object')
        for i,m in enumerate(markers):
            start = data[(data.MarkerName == m) & (data.MarkerType == "S")].index.values[0]
            end = data[(data.MarkerName == m) & (data.MarkerType == "E")].index.values[0]
            data.loc[start:end, 'EventLabel'] = data.loc[start:end, 'EventLabel'].fillna(m)
        #data['EventLabel'] = data['EventLabel'].fillna('Silence')

        non_nan_indices = data['EventLabel'].notna()
        first_non_nan = non_nan_indices.idxmax() 
        last_non_nan = non_nan_indices[::-1].idxmax()

        data.loc[:first_non_nan-1, 'EventLabel'] = data.loc[:first_non_nan-1, 'EventLabel'].fillna('first_silence')
        data.loc[last_non_nan+1:, 'EventLabel'] = data.loc[last_non_nan+1:, 'EventLabel'].fillna('last_silence')

        data.dropna(subset=[eda_column, ecg_column], inplace=True)
        data.drop(columns=['MarkerName', 'MarkerType', 'Row'], inplace=True)
        data['Participant'] = [filename.split('.')[0]] * len(data)
        data.rename(columns={eda_column: 'Raw_EDA_biopac', ecg_column:'Raw_ECG_biopac'}, inplace=True)
        data_full = pd.concat([data_full, data], ignore_index=True)
        del data
        gc.collect()
 

data_full

M0R6U5F9.csv


  data_full = pd.concat([data_full, data], ignore_index=True)


L1N3B5O2.csv
G2Y7J4Q1.csv
J4X1T0D2.csv


Unnamed: 0,Timestamp,Raw_EDA_biopac,Raw_ECG_biopac,EventLabel,Participant
0,17158.000,1.485596,-0.231018,first_silence,M0R6U5F9
1,17163.000,1.485901,-0.233154,first_silence,M0R6U5F9
2,17168.000,1.485291,-0.219421,first_silence,M0R6U5F9
3,17173.000,1.486206,-0.213928,first_silence,M0R6U5F9
4,17178.000,1.485596,-0.198975,first_silence,M0R6U5F9
...,...,...,...,...,...
714525,907437.469,1.585693,0.411682,last_silence,J4X1T0D2
714526,907442.469,1.585999,0.378418,last_silence,J4X1T0D2
714527,907447.469,1.585999,0.340576,last_silence,J4X1T0D2
714528,907452.469,1.585999,0.271606,last_silence,J4X1T0D2


In [4]:
data_full.to_csv('problematic.csv')