In [5]:
import pandas as pd
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import datetime
import sklearn
import scikit_posthocs as sp
import statsmodels.formula.api as sfa
import statsmodels.api as sa
import os
from pathlib import Path
import statsmodels
import scipy
import neurokit2 as nk
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

import tqdm 

from sklearn.linear_model import LinearRegression
from collections import Counter
import matplotlib.pyplot as plt

import mne
from mne_icalabel import label_components
mne.set_log_level('WARNING')
from scipy.integrate import simpson

# Preprocessing

As mentioned in the paper, we used the same pipeline as the Chiossi et al. (2024) paper, thus used and adapted chunks of code for our need available at https://github.com/mimuc/avi24-adaptation-dataset

In [6]:
lstPIds = []
path = "./Data2/"
for filename in os.listdir(path):
    if filename.endswith(".csv"): 
        lstPIds.append(int(filename.split("-")[0].replace("ID", "")))
    else:
        continue
lstPIds = list(set(lstPIds))
print(lstPIds)

def readRawEDA (p, starttime):
    dfEDA = pd.read_csv(f"{path}ID{p}-EDA.csv")
    dfEDA = dfEDA[dfEDA.Time != "Time"]
    dfEDA = dfEDA.astype(float)
    dfEDA = dfEDA.groupby("TimeLsl").mean().reset_index()
    dfEDA = dfEDA.rename(columns={'EDA':'Value'})
    dfEDA = dfEDA[['Time', 'TimeLsl', 'Value']]
    dfEDA = dfEDA[~dfEDA.Value.isna()].copy()
    
    dfEDA.Value = (dfEDA.Value / 1000) / 25
    dfEDA["TimeNorm"] = dfEDA.Time - starttime
    
    return dfEDA

def processEDA(dfEDA):
    edaFs = 250 
    dfEDA = dfEDA.copy()
    dfEDA["Value"] = dfEDA["Value"].values - dfEDA["Value"].values[:250].mean()
    signal, info = nk.eda_process(dfEDA["Value"].values, sampling_rate=edaFs)
    dfEDA["EDARaw"] = signal.EDA_Raw.values
    dfEDA["EDATonic"] = signal.EDA_Tonic.values
    dfEDA["EDAPhasic"] = signal.EDA_Phasic.values
    dfEDA["EDASCR"] = signal.SCR_Amplitude.values
    dfEDA["EDApeak"] = signal.SCR_Peaks.values
    return dfEDA

lstAll = []
for pid in tqdm.tqdm(lstPIds):
    
    dfState = pd.read_csv(f"{path}ID{pid}-state.csv")

    dfStart = dfState[dfState.State == "start"].copy()
    dfEnd = dfState[dfState.State == "end"][["Time"]].iloc[:len(dfStart)]
    dfStart = dfStart.rename(columns={"Time":"TimeStart"})
    dfStart.TimeStart = dfStart.TimeStart #+ 60
    dfStart["TimeEnd"] = dfEnd.Time.values
    del dfStart["State"]
    dfStart["Duration"] = dfStart.TimeEnd - dfStart.TimeStart
    df = dfStart[dfStart.BlockNumber != -2].copy()
    
    for i in df["BlockNumber"].unique():
        d = df.loc[df["BlockNumber"]==i].copy(deep=True)
        length = d["TimeEnd"].item()-d["TimeStart"].item()
        inter = length/10.0
        start = d["TimeStart"].item()
        for j in range(10):
            end=start+inter
            row = {"TimeStart":start,
                   "BlockNumber": int(str(i)+str(j)), 
                   "AdaptationStatus":d.loc[d["BlockNumber"]==i]["AdaptationStatus"].item(), 
                   "NBackN":d.loc[d["BlockNumber"]==i]["NBackN"].item(),
                   "TimeEnd":end, 
                   "Duration":inter}
            d.loc[len(d.index)]=  row      
            d = d.reset_index(drop=True)
            start += inter
            
        df = pd.concat((df,d))
        df = df.drop(df.loc[df["BlockNumber"]==i].index)
    df =df.reset_index()

    dfSphere = pd.read_csv(f"{path}ID{pid}-sphere.csv")
    del dfSphere["Feedback"]
    
    lstMissedCount = []
    for i, e in df.iterrows():
        dfX = dfSphere[(dfSphere.Time> e.TimeStart) & (dfSphere.Time <= e.TimeEnd )].reset_index(drop=True)
        missed = 0
        for i in range(0, len(dfX)-1):
            if ((dfX.iloc[i].Type == "up") & (dfX.iloc[i+1].Type == "down")):
                if(dfX.iloc[i+1].Time-dfX.iloc[i].Time > 4):
                    missed += 1
        lstMissedCount.append(missed)
    
    df["NBackMissed"] = lstMissedCount
    
    
    df.index = list(range(len(df)))
    df["PId"] = pid
    
    # Read Flow File
    dfFlow = pd.read_csv(f"{path}ID{pid}-flow.csv")
    dfFlow["TimeNorm"] = dfFlow.Time-dfState.Time.iloc[0]

    ## Calculate Accuracy
    lstLiamData = []
    for i, e in df.iterrows():
        dfX = dfFlow[(dfFlow.Time > e.TimeStart) & (dfFlow.Time < e.TimeEnd )]
       
        if ((len(dfX) == 0)):
            print("No Data! Liams" + str(i))
           
        lstLiamData.append(len(dfX))
        
    df["LiamsCountTotal"] = lstLiamData
    
    df["Flow"] = df["LiamsCountTotal"] / df.Duration * 60
   
    dfEDA = readRawEDA(pid, dfState.Time.iloc[0])
    
    
    lstDfEAD = []
    lstEDATonic = []
    lstEDATonicSD = []
    lstEDATonicMax = []
    lstEDATonicMin = []
    lstEDATonicDiff30 = []
    lstEDARaw = []
    lstEDAPhasic = []
    lstEDASCR = []
    lstEDApeak = []
    lstSlopeEDAPhasic = []
    lstSlopeEDATonic = []
    
    for i, e in df.iterrows():
        dfX = dfEDA[(dfEDA.Time > e.TimeStart) & (dfEDA.Time < e.TimeEnd )] # ALl data
        dfX = processEDA(dfX.copy())
        lstDfEAD.append(dfX)
        lstEDATonic.append(dfX.EDATonic.mean())
        lstEDARaw.append(dfX.EDARaw.mean())
        lstEDAPhasic.append(dfX.EDAPhasic.mean())
        lstEDATonicMax.append(dfX.EDATonic.max())
        lstEDATonicMin.append(dfX.EDATonic.min())
        lstEDATonicSD.append(dfX.EDATonic.std())
        lstEDASCR.append(dfX.EDASCR.mean())
        lstEDApeak.append(dfX.EDApeak.mean())
        
        reg = LinearRegression().fit(dfX.TimeLsl.values.reshape([-1,1]), dfX.EDAPhasic.values)
        lstSlopeEDAPhasic.append(reg.coef_[0])
        reg = LinearRegression().fit(dfX.TimeLsl.values.reshape([-1,1]), dfX.EDATonic.values)
        lstSlopeEDATonic.append(reg.coef_[0])
        
        

    df["SlopeEDAPhasic"] = lstSlopeEDAPhasic
    df["SlopeEDATonic"] = lstSlopeEDATonic
    df["dfEDA"] = lstDfEAD
    df["EDATonic"] = lstEDATonic
    df["EDARaw"] = lstEDARaw
    df["EDAPhasic"] = lstEDAPhasic
    df["EDATonicMin"] = lstEDATonicMin
    df["EDATonicMax"] = lstEDATonicMax
    df["EDATonicSD"] = lstEDATonicSD
    df["EDASCR"] = lstEDASCR
    df["EDApeak"] = lstEDApeak
    
    

    lstAll.append(df)
    
dfAll = pd.concat(lstAll)
dfAll = dfAll.sort_values("TimeStart")
dfAll = dfAll.reset_index(drop=True)

[1, 2, 3, 5, 6, 7, 9, 11, 12, 13, 14, 17, 18, 19, 20, 22, 23]


100%|██████████| 17/17 [01:05<00:00,  3.87s/it]


In [7]:
print(dfAll)
dfEda = dfAll.copy(deep=True)

     index     TimeStart  BlockNumber  AdaptationStatus  NBackN       TimeEnd  \
0        1  1.662020e+09           10                 1       2  1.662020e+09   
1        3  1.662020e+09           12                 1       2  1.662020e+09   
2        5  1.662020e+09           14                 1       2  1.662020e+09   
3        7  1.662020e+09           16                 1       2  1.662020e+09   
4        9  1.662020e+09           18                 1       2  1.662020e+09   
..     ...           ...          ...               ...     ...           ...   
845      6  1.665667e+09           15                 1       2  1.665667e+09   
846      7  1.665667e+09           16                 1       2  1.665667e+09   
847      8  1.665667e+09           17                 1       2  1.665667e+09   
848      9  1.665667e+09           18                 1       2  1.665667e+09   
849     10  1.665667e+09           19                 1       2  1.665667e+09   

      Duration  NBackMissed

Practice  =   10     20
Time 1    =   11     21
Time 2    =   12     22
Time 3    =   13     23
Outlast 1 =   14     24
Outlast 2 =   15     25
Outlast 3 =   16     26

In [9]:
chan_names = ['Fz', 'P3', 'Pz', 'P4', 'PO7', 'Oz', 'PO8']
chan_maps = {
    'Value0': 'Fz',
    'Value1': 'P3',
    'Value2': 'Pz',
    'Value3': 'P4',
    'Value4': 'PO7',
    'Value5': 'Oz',
    'Value6': 'PO8',
}

def load_eeg(pid):
    dfEEG = pd.read_csv(f"./data2/ID{pid}-EEG.csv")
    dfState = pd.read_csv(f"./data2/ID{pid}-state.csv")
    dfEEG.rename(columns=chan_maps, inplace=True)
    dfEEG.drop("TimeLsl", axis =1, inplace=True)
    dfEEG['BlockNumber'] = -1
    dfEEG.loc[dfEEG[(dfEEG.Time > dfState['Time'][0]) & (dfEEG.Time < dfState['Time'][0+1])].index, 'BlockNumber']   = dfState['BlockNumber'][0]
    dfEEG.loc[dfEEG[(dfEEG.Time > dfState['Time'][2]) & (dfEEG.Time < dfState['Time'][2+1])].index, 'BlockNumber']   = dfState['BlockNumber'][2]
    dfEEG.loc[dfEEG[(dfEEG.Time > dfState['Time'][4]) & (dfEEG.Time < dfState['Time'][4+1])].index, 'BlockNumber']   = dfState['BlockNumber'][4]
    dfEEG.loc[dfEEG[(dfEEG.Time > dfState['Time'][6]) & (dfEEG.Time < dfState['Time'][6+1])].index, 'BlockNumber']   = dfState['BlockNumber'][6]
    dfEEG.loc[dfEEG[(dfEEG.Time > dfState['Time'][8]) & (dfEEG.Time < dfState['Time'][8+1])].index, 'BlockNumber']  = dfState['BlockNumber'][8]
    dfEEG.loc[dfEEG[(dfEEG.Time > dfState['Time'][10]) & (dfEEG.Time < dfState['Time'][10+1])].index, 'BlockNumber'] = dfState['BlockNumber'][10]
    dfEEG.loc[dfEEG[(dfEEG.Time > dfState['Time'][12]) & (dfEEG.Time < dfState['Time'][12+1])].index, 'BlockNumber'] = dfState['BlockNumber'][12]

    dfAll = dfEEG.copy()
    dfAll = dfAll.drop(dfAll[dfAll.BlockNumber < 0].index)
    dfAll = dfAll.dropna()
    dfAll = dfAll.sort_values(by="Time")
    return dfAll

pid_valid = list(range(1, 24))
df_all = {}

for pid in tqdm.tqdm(pid_valid):
    if pid == 4 or pid == 8 or pid == 10 or pid == 15 or pid == 16 or pid == 17 or pid == 21:
        continue
    df = load_eeg(pid)
    for i in df["BlockNumber"].unique():
        d = df.loc[df["BlockNumber"]==i].copy(deep=True)
        
        for j in range(10):
            l = int(len(d)*0.1)
            start = j*l
            
            d.iloc[start:start+l,-1]= int(str(i)+str(j))
                
        df.loc[df["BlockNumber"]==i]= d
        df = df.drop(df.loc[df["BlockNumber"]==i].index)
    df_all[pid] = df

100%|██████████| 23/23 [00:52<00:00,  2.26s/it]


In [10]:
df_all

{1:                  Time        Fz         P3         Pz         P4        PO7  \
 49761    1.662020e+09  7002.464  16882.490 -2877.8610 -2876.9580  16887.910   
 49788    1.662020e+09  6983.501  16863.530 -2896.8240 -2895.9210  16868.940   
 49789    1.662020e+09  6962.732  16842.760 -2917.5930 -2916.6900  16848.170   
 49790    1.662020e+09  6964.538  16844.560 -2915.7870 -2914.8840  16849.980   
 49791    1.662020e+09  6957.615  16837.640 -2922.7100 -2921.8070  16843.060   
 ...               ...       ...        ...        ...        ...        ...   
 1313264  1.662024e+09  8282.918  -1597.407   149.6207   597.4826   6806.754   
 1313263  1.662024e+09  8287.734  -1592.591   137.2498   544.1099   6768.730   
 1313262  1.662024e+09  8282.617  -1597.708   132.7359   596.2772   6813.392   
 1313261  1.662024e+09  8287.734  -1592.591   124.5858   568.8326   6794.675   
 1313259  1.662024e+09  8271.179  -1609.146   119.7903   560.7195   6776.009   
 
                 Oz       PO8  Valu

In [11]:
picks_alpha = ['P3', 'Pz', 'P4']
picks_theta = ['Fz', 'Pz']
picks_beta = ['Fz', 'Pz']

def compute_eeg_feature(group, freqs_range):
    picks = mne.pick_types(group.info, meg=False, eeg=True, eog=False, stim=False)
    psds, freqs = mne.time_frequency.psd_welch(group, proj=False, picks=picks, n_jobs=2, n_per_seg=50, n_overlap=5, n_fft=300)
    idx = np.logical_and(freqs >= freqs_range[0], freqs <= freqs_range[1])
    psds_mean = psds.mean(0)
    freq_res = freqs[1] - freqs[0]
    return simpson(np.log(psds_mean[idx]), dx=freq_res)



def get_block(df, block_number):
    return df[df.BlockNumber == block_number]

def eeg_analysis(df_block, pid):
    info = mne.create_info(ch_names=chan_names, sfreq=250, ch_types='eeg')
    info.set_montage('standard_1020')
    raw = mne.io.RawArray(df_block[chan_names].values.T, info)
    raw_filter = raw.filter(l_freq=1, h_freq=70, fir_design='firwin2')
    raw_filter_notch = raw_filter.notch_filter([50, 100], picks=chan_names, fir_design='firwin')
    raw_filter_notch_ref = raw_filter_notch.set_eeg_reference('average', projection=True)

    ica = mne.preprocessing.ICA(n_components=len(chan_names), random_state=42, max_iter='auto')
    ica.fit(raw_filter_notch_ref)

    ic_labels = label_components(raw_filter_notch_ref, ica, method="iclabel")

    labels = ic_labels["labels"]
    exclude_idx = [idx for idx, label in enumerate(labels) if label not in ["brain", "other"]]

    reconst_raw = raw_filter_notch_ref.copy()
    ica.apply(reconst_raw, exclude=exclude_idx)
    raw_filter_notch_ref_ica = reconst_raw.copy()

    alpha = compute_eeg_feature(raw_filter_notch_ref_ica.copy().pick_channels(picks_alpha), (8, 12))
    theta = compute_eeg_feature(raw_filter_notch_ref_ica.copy().pick_channels(picks_theta), (4, 8))
    beta  = compute_eeg_feature(raw_filter_notch_ref_ica.copy().pick_channels(picks_beta), (12, 25))
    return pid, len(exclude_idx), alpha, theta, beta, raw_filter_notch_ref_ica

In [12]:
df = pd.DataFrame()
for pid in tqdm.tqdm(pid_valid):
    if pid == 4 or pid == 8 or pid == 10 or pid == 15 or pid == 16 or pid == 17 or pid == 21:
        continue
    blocknumbers = df_all[pid].BlockNumber.unique()
    for block_number in blocknumbers:
        df_current = df_all[pid]
        df_current = get_block(df_current, block_number)
        pid, n_ica, alpha, theta, beta, _ = eeg_analysis(df_current, pid)
        d = pd.DataFrame({
                'pid': pid,
                'n_ica': n_ica,
                'block_number': block_number,
                'alpha': alpha,
                'theta': theta,
                'beta': beta,
            }, index=[0])
        
        df = pd.concat([df, d], ignore_index = True)
        df.reset_index()

100%|██████████| 23/23 [24:32<00:00, 64.02s/it]


In [13]:
print(df)

      pid  n_ica  block_number      alpha      theta       beta
0       1      0            10  16.527260  21.287140  57.765646
1       1      0            11  22.604747  21.165809  81.355744
2       1      0            12   6.441857  12.826305  25.792392
3       1      0            13  23.497766  13.028162  24.995866
4       1      0            14  23.933241  17.408895  35.778763
...   ...    ...           ...        ...        ...        ...
1115   23      0            15   2.452789   8.534807  12.838007
1116   23      0            16   3.626514  10.535092  17.799381
1117   23      0            17   2.092037   8.707018  12.978686
1118   23      0            18   4.075717  13.672644  27.245836
1119   23      0            19   2.105800   8.162407  11.019985

[1120 rows x 6 columns]


In [14]:
df_fin = df.copy(deep=True)

In [15]:
def get_event_intervals(dfAdaptation) -> list:
    intervals = []
    workloads = []
    currentTime = 0
    currentDirection = 'less'
    currentWorkload = 'low'
    for idx, row in dfAdaptation.iterrows():
        if idx == 0:
            currentTime = row['Time']
            currentDirection = row['Direction']
            continue
        if currentDirection == row['Direction']:
            currentTime = row['Time']
            currentDirection = row['Direction']
        if currentDirection != row['Direction']:
            intervals.append((currentTime, row['Time']))
            currentTime = row['Time']
            oldDirection = currentDirection
            currentDirection = row['Direction']
            if oldDirection == 'less' and currentDirection == 'more':
                currentWorkload = 'low'
            elif oldDirection == 'more' and currentDirection == 'less':
                currentWorkload = 'high'
            workloads.append(currentWorkload)
    return intervals, workloads

def load_ecg(pid):

    dfECG = pd.read_csv(f"./data2/ID{pid}-ECG.csv")
    dfState = pd.read_csv(f"./data2/ID{pid}-state.csv")
    dfAdaptation = pd.read_csv(f"./data2/ID{pid}-adaptation.csv")
    dfECG.drop("TimeLsl", axis =1, inplace=True)
    intervals, workloads = get_event_intervals(dfAdaptation)
    for idx, interval in enumerate(intervals):
        dfECG.loc[(dfECG['Time'] >= interval[0]) & (dfECG['Time'] <= interval[1]), 'workload'] = workloads[idx]
        dfECG.loc[(dfECG['Time'] >= interval[0]) & (dfECG['Time'] <= interval[1]), 'interval_id'] = idx
    dfECG = dfECG[dfECG['workload'].notna()]

    dfECG.loc[dfECG[(dfECG.Time > dfState['Time'][0]) & (dfECG.Time < dfState['Time'][0+1])].index, 'BlockNumber']   = dfState['BlockNumber'][0]
    dfECG.loc[dfECG[(dfECG.Time > dfState['Time'][2]) & (dfECG.Time < dfState['Time'][2+1])].index, 'BlockNumber']   = dfState['BlockNumber'][2]
    dfECG.loc[dfECG[(dfECG.Time > dfState['Time'][4]) & (dfECG.Time < dfState['Time'][4+1])].index, 'BlockNumber']   = dfState['BlockNumber'][4]
    dfECG.loc[dfECG[(dfECG.Time > dfState['Time'][6]) & (dfECG.Time < dfState['Time'][6+1])].index, 'BlockNumber']   = dfState['BlockNumber'][6]
    dfECG.loc[dfECG[(dfECG.Time > dfState['Time'][8]) & (dfECG.Time < dfState['Time'][8+1])].index, 'BlockNumber']  = dfState['BlockNumber'][8]
    dfECG.loc[dfECG[(dfECG.Time > dfState['Time'][10]) & (dfECG.Time < dfState['Time'][10+1])].index, 'BlockNumber'] = dfState['BlockNumber'][10]
    dfECG.loc[dfECG[(dfECG.Time > dfState['Time'][12]) & (dfECG.Time < dfState['Time'][12+1])].index, 'BlockNumber'] = dfState['BlockNumber'][12]
    dfECG = dfECG.dropna()
    dfECG = dfECG.sort_values(by="Time")
    return dfECG

def get_block(df, block_number):
    return df[df.BlockNumber == block_number]

df_all = {}

def extract_ecg_features(pid, df):
    df_features = pd.DataFrame()
    intervals = df.interval_id.unique()
    for interval_id in intervals:
        df_current = df[df.interval_id == interval_id]

        if len(df_current['Value']) < 1:
            df_features = pd.concat((df_features,pd.DataFrame.from_dict([{
                'pid': pid,
                'workload': df_current['workload'].iloc[0],
                'interval': interval_id,
            }])))
            continue


        s, _ = nk.ecg_process(df_current['Value'], sampling_rate=130)
        peaks, _ = nk.ecg_peaks(nk.ecg_clean(df_current['Value'], sampling_rate=130),
                                sampling_rate=130,
                                correct_artifacts=True)

        try:
            hrv_rmssd = np.mean(nk.hrv(peaks, sampling_rate=130)['HRV_RMSSD'])
        except Exception:
            hrv_rmssd = np.NaN


        df_features = pd.concat((df_features,pd.DataFrame.from_dict([{
            'pid': pid,
            'workload': df_current['workload'].iloc[0],
            'block_number': df_current['BlockNumber'].iloc[0],
            'interval': interval_id,
            'ecg_rate_mean': np.mean(s['ECG_Rate']),
            'ecg_rate_var': np.var(s['ECG_Rate']),
            'hrv_rmssd': hrv_rmssd,
        }])))
    return df_features


In [16]:
pid_valid = list(range(1, 24))
df_all = {}

for pid in tqdm.tqdm(pid_valid):
    if pid == 4 or pid == 8 or pid == 10 or pid == 15 or pid == 16 or pid == 17 or pid == 21:
        continue
    df = load_ecg(pid)
    
    for i in df["BlockNumber"].unique():
        d = df.loc[df["BlockNumber"]==i].copy(deep=True)
        
        for j in range(10):
            l = int(len(d)*0.1)
            start = j*l
            
            d.iloc[start:start+l,-1]= int(str(int(i))+str(j))
                
        df.loc[df["BlockNumber"]==i]= d
        df = df.drop(df.loc[df["BlockNumber"]==i].index)
    df_all[pid] = df
    
print(df_all)

100%|██████████| 23/23 [00:09<00:00,  2.35it/s]

{1:                 Time  Value workload  interval_id  BlockNumber
108632  1.662020e+09    -22      low          0.0         60.0
108659  1.662020e+09    -12      low          0.0         60.0
108660  1.662020e+09     -7      low          0.0         60.0
108661  1.662020e+09     -4      low          0.0         60.0
108662  1.662020e+09      0      low          0.0         60.0
...              ...    ...      ...          ...          ...
415716  1.662023e+09    -22      low         12.0         79.0
415715  1.662023e+09    -24      low         12.0         79.0
415714  1.662023e+09    -24      low         12.0         79.0
415713  1.662023e+09    -17      low         12.0         79.0
415712  1.662023e+09    -26      low         12.0         79.0

[34550 rows x 5 columns], 2:                 Time  Value workload  interval_id  BlockNumber
117438  1.662028e+09     46     high          0.0         70.0
117465  1.662028e+09   -107     high          0.0         70.0
117466  1.662028e+09 




In [17]:
df = pd.DataFrame()
for pid in pid_valid:
    print('current pid: ', pid)
    if pid == 4 or pid == 8 or pid == 10 or pid == 15 or pid == 16 or pid == 17 or pid == 21:
        continue
    df_features = extract_ecg_features(pid, df_all[pid])
    df = pd.concat((df,df_features))
    
    
df['ecg_rate_mean_norm'] = (df['ecg_rate_mean'] - df['ecg_rate_mean'].min()) / (df['ecg_rate_mean'].max() - df['ecg_rate_mean'].min())
df['hrv_rmssd_norm'] = (df['hrv_rmssd'] - df['hrv_rmssd'].min()) / (df['hrv_rmssd'].max() - df['hrv_rmssd'].min())

current pid:  1
current pid:  2
current pid:  3
current pid:  4
current pid:  5
current pid:  6
current pid:  7
current pid:  8
current pid:  9
current pid:  10
current pid:  11
current pid:  12
current pid:  13
current pid:  14
current pid:  15
current pid:  16
current pid:  17
current pid:  18
current pid:  19
current pid:  20
current pid:  21
current pid:  22
current pid:  23


In [18]:
print(df)

    pid workload  block_number  interval  ecg_rate_mean  ecg_rate_var  \
0     1      low          60.0       0.0      86.202923    585.809445   
0     1     high          62.0       1.0      91.153597    726.346759   
0     1      low          65.0       2.0      90.731661    536.064351   
0     1     high          67.0       3.0     105.082848    623.451906   
0     1      low          70.0       4.0     101.749400    912.095071   
..  ...      ...           ...       ...            ...           ...   
0    23      low          72.0      10.0     104.721125   1048.290171   
0    23     high          74.0      11.0      97.288675    585.334778   
0    23      low          75.0      12.0     106.591597    767.416870   
0    23     high          77.0      13.0     107.165107    787.030933   
0    23      low          78.0      14.0      97.362037    829.135643   

     hrv_rmssd  ecg_rate_mean_norm  hrv_rmssd_norm  
0   316.134194            0.287601        0.639422  
0   323.063356   

In [19]:
dfEda.rename(columns= {"BlockNumber": "block_number"}, inplace=True)


final = pd.DataFrame()
for pid in pid_valid:
    print('current pid: ', pid)
    if pid == 4 or pid == 8 or pid == 10 or pid == 15 or pid == 16 or pid == 17 or pid == 21:
        continue

    dEEG = df_fin.loc[df_fin["pid"]==pid]
    dEDA = dfEda.loc[dfEda["PId"]==pid]
    dECG = df.loc[df["pid"]==pid]
    
    
    dfFin = dEEG.copy(deep=True)
    dfFin = dfFin.merge(dEDA, on="block_number", how="inner")
    dfFin = dfFin.merge(dECG, on="block_number", how="inner")
    
    
    print(len(dEEG["block_number"]))
    print(len(dEDA["block_number"]))
    print(len(dECG["block_number"]))

    dfFin.drop(['pid_x', 'index',"pid_y"],axis=1, inplace=True)

    final = pd.concat((final,dfFin))
final.reset_index(inplace=True)

current pid:  1
70
50
13
current pid:  2
70
50
11
current pid:  3
70
50
17
current pid:  4
current pid:  5
70
50
10
current pid:  6
70
50
14
current pid:  7
70
50
4
current pid:  8
current pid:  9
70
50
6
current pid:  10
current pid:  11
70
50
23
current pid:  12
70
50
8
current pid:  13
70
50
14
current pid:  14
70
50
4
current pid:  15
current pid:  16
current pid:  17
current pid:  18
70
50
14
current pid:  19
70
50
20
current pid:  20
70
50
12
current pid:  21
current pid:  22
70
50
12
current pid:  23
70
50
15


In [46]:
print(final.columns)

finalData = final[["alpha","theta","beta","EDATonic","EDAPhasic","EDASCR","workload","ecg_rate_mean","hrv_rmssd"]]

Index(['index', 'n_ica', 'block_number', 'alpha', 'theta', 'beta', 'TimeStart',
       'AdaptationStatus', 'NBackN', 'TimeEnd', 'Duration', 'NBackMissed',
       'PId', 'LiamsCountTotal', 'Flow', 'SlopeEDAPhasic', 'SlopeEDATonic',
       'dfEDA', 'EDATonic', 'EDARaw', 'EDAPhasic', 'EDATonicMin',
       'EDATonicMax', 'EDATonicSD', 'EDASCR', 'EDApeak', 'workload',
       'interval', 'ecg_rate_mean', 'ecg_rate_var', 'hrv_rmssd',
       'ecg_rate_mean_norm', 'hrv_rmssd_norm'],
      dtype='object')


In [48]:
finalData.to_csv('out.csv', index=False) 