In [1]:
import os
import pandas as pd
import numpy  as np
from scipy.fft    import dct, idct
from scipy.signal import butter, savgol_filter, ellip, filtfilt, detrend
from matplotlib   import pylab as plt

## Loading data

In [20]:
# paths were the raw data is
root_path = 'data/'
data_path = os.path.join(root_path, 'Pilot')
shimmer_path  = os.path.join(data_path, '2023-02-26_18.38.04_Pilot_SS_SD_Session1')

# paths were the preprocessed data should be
out_path = os.path.join(root_path, 'Processed')
eda_out_path = os.path.join(out_path, 'EDA')

# check if the output folders exist and create them if necessary
for folder in [out_path, eda_out_path]:
    if not os.path.exists(folder):
        os.mkdir(folder)

# list all files (ignoring the hidden macos file)
shimmer_files = [x for x in os.listdir(shimmer_path) if x.endswith('.csv')]

In [21]:
shimmer_files

['Pilot_SS_Session1_Shimmer_D210_Calibrated_SD.csv',
 'Pilot_SS_Session1_Shimmer_EC8E_Calibrated_SD.csv',
 'Pilot_SS_Session1_Shimmer_895A_Calibrated_SD.csv']

In [22]:
eda_shimmer = shimmer_files[0]
eda_shimmer

'Pilot_SS_Session1_Shimmer_D210_Calibrated_SD.csv'

In [23]:
trigger_shimmer = shimmer_files[2]
eda_shimmer

'Pilot_SS_Session1_Shimmer_D210_Calibrated_SD.csv'

## Preprocessing

In [24]:
# Reshape a numpy array 'a' of shape (n, x) to form shape((n - window_size), window_size, x))
def rolling_window(a, window, step_size):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1 - step_size + 1, window)
    strides = a.strides + (a.strides[-1] * step_size,)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

In [25]:
def filterTonicEDA(signal, coarsest=2):
    X        = dct(signal) # discrete cosine transform
    absX     = np.abs(X)   # keep magnitude only
    absX[::-1].sort()      # sort in reverse order (descending)
    absX[coarsest:] = 0    # remove components with less impact
    tonic    = idct(absX)  # inverse discrete cosine transform
    return signal - tonic, tonic # return phasic and tonic component

In [34]:
# pre-processing like in Perusquia, 2019: skin condunctance from neck and hand

fs_eda     = 128  # sampling frequency of EDA data
step_width = 1    # of sliding window
winDur     = 0.1  # duration in seconds of sliding window
N_win      = int(np.round(fs_eda * winDur)) # duration of sliding window in samples
smoothing_fcn = np.mean # I could call np.mean(..) later but apparently it is faster this way
winLen     = 1001 # sample duration of the savitzky golay filter
order      = 1    # order of the savitzky golay filter

filepath = os.path.join(shimmer_path, eda_shimmer)
df       = pd.read_csv(filepath, skiprows=[0,2], header=[0])

In [35]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,"sep=,"
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Shimmer_D210_TimestampSync_Unix_CAL
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,ms
1677404000000.0,1.0,0.084236,11871.363636,12515.018315,-1.0,-1.0,
1677404000000.0,1.0,0.084236,11871.363636,1679.120879,-1.0,-1.0,
1677404000000.0,1.0,0.084236,11871.363636,1704.029304,-1.0,-1.0,
1677404000000.0,1.0,0.084236,11871.363636,1704.029304,-1.0,-1.0,
1677404000000.0,1.0,0.084236,11871.363636,1704.029304,-1.0,-1.0,


In [None]:
# note: instead of usually writing df["data"] we can write df.data
buffer  = rolling_window(df.data.values, N_win, step_width)    # arrange data in 100 ms sliding windows
smooth  = smoothing_fcn(buffer, axis=1) # mean average of 100 ms sliding windows
phasic, tonic = filterTonicEDA(smooth)  # remove signal consisting of first 2 dct components only
eda_filtered  = signal.savgol_filter(phasic, winLen, order) # apply savitzky-golay filter
# pad zeros at the end because sliding window makes the signal a bit shorter
# and save it into the data frame loaded
df.data = np.concatenate((eda_filtered, np.zeros(N_win-1)))
df['tonic'] = np.concatenate((tonic, np.zeros(N_win-1)))

# rename data to phasic
column_names = list(df.columns)
column_names[1] = 'phasic'
df.columns = column_names

# order columns so that data comes before labels
df = df[df.columns[[0,1,-1,*list(range(2,len(column_names)-1))]]]

# save data frame with pre-processed data to new file
df.to_csv(os.path.join(eda_out_path, file), index=False)