In [None]:
import os
import pandas as pd
import numpy  as np
from scipy.fft    import dct, idct
from scipy.signal import butter, savgol_filter, ellip, filtfilt, detrend
from matplotlib   import pylab as plt

## Loading data

In [None]:
# paths were the raw data is
root_path = '../../01_Data/'
data_path = os.path.join(root_path, 'Labeled')
emg_path  = os.path.join(data_path, 'EMG')
eda_path  = os.path.join(data_path, 'EDA')

# paths were the preprocessed data should be
out_path = os.path.join(root_path, 'Processed')
eda_out_path = os.path.join(out_path, 'EDA')
emg_out_path = os.path.join(out_path, 'EMG')

# check if the output folders exist and create them if necessary
for folder in [out_path, eda_out_path, emg_out_path]:
    if not os.path.exists(folder):
        os.mkdir(folder)

# list all emg and eda files (ignoring the hidden macos file)
emg_files = [x for x in os.listdir(emg_path) if not '.DS_Store' in x]
eda_files = [x for x in os.listdir(eda_path) if not '.DS_Store' in x]

In [None]:
# Reshape a numpy array 'a' of shape (n, x) to form shape((n - window_size), window_size, x))
def rolling_window(a, window, step_size):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1 - step_size + 1, window)
    strides = a.strides + (a.strides[-1] * step_size,)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

## Preprocessing

In [None]:
def filterTonicEDA(signal, coarsest=2):
    X        = dct(signal) # discrete cosine transform
    absX     = np.abs(X)   # keep magnitude only
    absX[::-1].sort()      # sort in reverse order (descending)
    absX[coarsest:] = 0    # remove components with less impact
    tonic    = idct(absX)  # inverse discrete cosine transform
    return signal - tonic, tonic # return phasic and tonic component

In [None]:
# pre-processing like in Perusquia, 2019: skin condunctance from neck and hand

fs_eda     = 128  # sampling frequency of EDA data
step_width = 1    # of sliding window
winDur     = 0.1  # duration in seconds of sliding window
N_win      = int(np.round(fs_eda * winDur)) # duration of sliding window in samples
smoothing_fcn = np.mean # I could call np.mean(..) later but apparently it is faster this way
winLen     = 1001 # sample duration of the savitzky golay filter
order      = 1    # order of the savitzky golay filter

for file in eda_files:
    filepath = os.path.join(eda_path, file)
    df       = pd.read_csv(filepath)

    # note: instead of usually writing df["data"] we can write df.data
    buffer  = rolling_window(df.data.values, N_win, step_width)    # arrange data in 100 ms sliding windows
    smooth  = smoothing_fcn(buffer, axis=1) # mean average of 100 ms sliding windows
    phasic, tonic = filterTonicEDA(smooth)  # remove signal consisting of first 2 dct components only
    eda_filtered  = signal.savgol_filter(phasic, winLen, order) # apply savitzky-golay filter
    # pad zeros at the end because sliding window makes the signal a bit shorter
    # and save it into the data frame loaded
    df.data = np.concatenate((eda_filtered, np.zeros(N_win-1)))
    df['tonic'] = np.concatenate((tonic, np.zeros(N_win-1)))

    # rename data to phasic
    column_names = list(df.columns)
    column_names[1] = 'phasic'
    df.columns = column_names

    # order columns so that data comes before labels
    df = df[df.columns[[0,1,-1,*list(range(2,len(column_names)-1))]]]

    # save data frame with pre-processed data to new file
    df.to_csv(os.path.join(eda_out_path, file), index=False)

In [None]:
df.head()