In [1]:
#Active Library dependencies
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyphysio as ph
from pdb import set_trace
import random
import glob
#Always plot inline if possible
%matplotlib inline

Using dask. Scheduler: threads
Please cite:
Bizzego et al. (2019) 'pyphysio: A physiological signal processing library for data science approaches in physiology', SoftwareX


In [2]:
#Preprocessing filters that will be applied to raw data samples
def exp_moving_average(signal, w):
    """Expoential moving average filter from pandas"""
    return pd.Series(signal.ewm(span=w, adjust=True).mean(), signal.index)

def filt_EDA(df_data):
    """Apply filter to EDA signal using processing steps from Pyphysio.
    
    Parameters
    ----------
    df_data : pandas.DataFrame
        DataFrame with EDA time and signal data
    
    Returns
    -------
    pandas.DataFrame 
        Updated DataFrame with Tonic and Phasic signals
    
    """
    
    # Set up Pyphysio EvenlySignal object
    eda_data = ph.EvenlySignal(values = df_data['EDA'].to_numpy(), sampling_freq = 4, signal_type='EDA')
    
    # Apply IIR filter
    eda_data = ph.IIRFilter(fp=0.8, fs=1.1, ftype='ellip')(eda_data)
    driver = ph.DriverEstim()(eda_data)
    
    # Estimate Tonic and Phasic signals
    phasic, tonic, _ = ph.PhasicEstim(delta=0.02)(driver)
    
    # Adjust signal length to match length of original EDA signal
    if len(phasic) != len(eda_data.get_values()):
        phasic = np.append(phasic.get_values(), phasic[-1])
        tonic = np.append(tonic.get_values(), tonic[-1])    
    
    # Append signal data to DataFrame
    df_data.loc[:,'Tonic'] = tonic
    df_data.loc[:,'Phasic'] = phasic
    return df_data  

def filt_TEMP(df_data):
    """Apply filter to TEMP signal using predetermined values"""
    df_data['TEMP_Filtered'] = exp_moving_average(df_data['TEMP'],60)
    return df_data

def filter_signals(df_data):
    """Apply filters/processing to respective signals.
    
    This function is intended to be used in conjunction with the
    pandas.DataFrame.apply() method, which passes a column of a DataFrame at
    a time.
    
    Parameters
    ----------
    df_data : pandas.Series
        A Series of DataFrames which contain all the signals for a single
        session.
    
    Returns
    -------
    pandas.Series
        Updated Series of DataFrames containing filtered/processed signal
        data
        
    Notes
    -----
    No filtering/processing for ACC, HR, and IBI signals is implemented,
    but is commented out for implementation in the future.
    
    """
    
    #df_data.loc['BVP'].loc[:,'BVP'] = filt_BVP(df_data.loc['BVP'])
    df_data = filt_EDA(df_data)
    df_data = filt_TEMP(df_data)
    
    return df_data

In [3]:
#RMS calculation Helper Function
def rms(data):
    return np.sqrt(np.mean(data ** 2))

#Every other feature is easy to calculate using Python built-ins
def feature_extract(df_data):
    result = {}
    result['Time'] = df_data.loc[:,'timestamp'].min()
    #for featbase in ['HR','EDA','TEMP','Tonic','Phasic','TEMP_Filtered']:
    for featbase in ['HR','EDA','TEMP']:
        #set_trace()
        result[featbase + '_Mean'] = df_data.loc[:,featbase].mean()
        result[featbase + '_Minimum'] = df_data.loc[:,featbase].min()
        result[featbase + '_Maximum'] = df_data.loc[:,featbase].max()
        result[featbase + '_Stdev'] = df_data.loc[:,featbase].std()
        result[featbase + '_RMS'] = rms(df_data.loc[:,featbase])
        result[featbase + '_MAD'] = df_data.loc[:,featbase].mad()
        result[featbase + '_MAV'] = df_data.loc[:,featbase].abs().max()
        result[featbase + '_Median'] = df_data.loc[:,featbase].median()
        result[featbase + '_P25'] = df_data.loc[:,featbase].quantile(0.25)
        result[featbase + '_P75'] = df_data.loc[:,featbase].quantile(0.75)
    return pd.Series(result, dtype='object')


In [4]:
os.chdir('/Users/sandoval/Library/CloudStorage/Box-Box/R15 Sensor Preprocessing and Analysis/10 Minute Windows/')

In [5]:
#Load baseline and behavior data for ALL participants into a dataframe
#participants = ['PR021','PR022','PR023','PR032','PR037','PR038','PR039','PR042','PR044','PR059','PR073','PR074','PR078']
#participants = ['PR003', 'PR007', 'PR008', 'PR010', 'PR011', 'PR018', 
                #'PR020', 'PR021', 'PR022', 'PR023', 'PR025', 'PR030', 
                #'PR032', 'PR033', 'PR037', 'PR038', 'PR039', 'PR042', 
                #'PR043', 'PR044', 'PR046', 'PR048', 'PR059', 'PR073', 
                #'PR074', 'PR078', 'PR079', 'PR084', 'PR085', 'PR086',
                #'PR089', 'PR092', 'PR093', 'PR096', 'PR101', 'PR102', 
                #'PR104', 'PR106', 'PR113', 'PR114', 'PR115', 'PR116',
                #'PR117', 'PR126', 'PR128']

participants = ['PR003', 'PR008', 'PR010', 'PR011', 'PR018', 'PR020', 'PR021', 'PR022', 'PR023', 'PR025',  'PR032', 'PR033', 'PR037', 'PR038', 'PR039', 'PR042', 'PR044', 'PR046', 'PR048', 'PR059', 'PR073', 'PR074', 'PR078', 'PR079', 'PR084', 'PR086', 'PR096', 'PR101', 'PR102', 'PR104', 'PR106', 'PR113', 'PR116', 'PR126', 'PR128']

behavior_df = pd.DataFrame()
baseline_df = pd.DataFrame()

for curpar in participants:
    #First load the behavior windows
    behfile = curpar + '/'+ curpar + ' Behavior 40 min Windows MERGED.csv'
    cur_behdf = pd.read_csv(behfile,parse_dates=['timestamp'], infer_datetime_format=True)
    cur_behdf['PID'] = curpar 
    if behavior_df.empty:
        behavior_df = cur_behdf
    else:
        behavior_df = pd.concat([behavior_df, cur_behdf])
    #Now load the baseline windows
    basefile = curpar + '/'+ curpar + ' Baseline MERGED.csv'
    cur_basedf = pd.read_csv(basefile,parse_dates=['timestamp'], infer_datetime_format=True)
    cur_basedf['PID'] = curpar 
    if baseline_df.empty:
        baseline_df = cur_basedf
    else:
        baseline_df = pd.concat([baseline_df, cur_basedf])
    

ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [None]:
behavior_df

In [6]:
#path = r'/Users/sandoval/Library/CloudStorage/Box-Box/R15 Sensor Preprocessing and Analysis/10 Minute Windows/')
#all_files = glob.glob(os.path.join(path, "/*.csv"))
physioAll = []

for files in glob.glob('/Users/sandoval/Library/CloudStorage/Box-Box/R15 Sensor Preprocessing and Analysis/10 Minute Windows/PR*/*40 min Windows MERGED.csv', recursive=True):
    cur = pd.read_csv(files,parse_dates=['timestamp'], infer_datetime_format=True)
    physio = cur.sort_values(['timestamp'], ignore_index=True)
    physio = physio.groupby(['event']).apply(feature_extract)
    physio['pid'] = files
    physioAll.append(physio)
    
physioAll
    

[                         Time     HR_Mean  HR_Minimum  HR_Maximum   HR_Stdev  \
 event                                                                          
 1     2022-04-04 02:02:11.750  107.335362       62.22      161.68  24.389427   
 
            HR_RMS     HR_MAD  HR_MAV  HR_Median  HR_P25  ...  TEMP_Minimum  \
 event                                                    ...                 
 1      110.071168  20.903546  161.68     101.92   87.47  ...         24.13   
 
        TEMP_Maximum  TEMP_Stdev   TEMP_RMS  TEMP_MAD  TEMP_MAV  TEMP_Median  \
 event                                                                         
 1             34.83    4.615926  30.125431  4.559149     34.83        33.43   
 
        TEMP_P25  TEMP_P75                                                pid  
 event                                                                         
 1         24.63     34.05  /Users/sandoval/Library/CloudStorage/Box-Box/R...  
 
 [1 rows x 32 columns],
        

In [21]:
type(physioAll)
df = pd.concat(physioAll)
df

Unnamed: 0_level_0,Time,HR_Mean,HR_Minimum,HR_Maximum,HR_Stdev,HR_RMS,HR_MAD,HR_MAV,HR_Median,HR_P25,...,TEMP_Minimum,TEMP_Maximum,TEMP_Stdev,TEMP_RMS,TEMP_MAD,TEMP_MAV,TEMP_Median,TEMP_P25,TEMP_P75,pid
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2022-04-04 02:02:11.750,107.335362,62.22,161.68,24.389427,110.071168,20.903546,161.68,101.920,87.4700,...,24.13,34.83,4.615926,30.125431,4.559149,34.83,33.43,24.63,34.05,/Users/sandoval/Library/CloudStorage/Box-Box/R...
1,2021-02-20 21:19:19.250,96.539012,69.37,163.83,19.935513,98.575678,15.453156,163.83,92.380,80.4800,...,36.43,37.55,0.266105,36.974514,0.224128,37.55,36.97,36.75,37.16,/Users/sandoval/Library/CloudStorage/Box-Box/R...
2,2021-02-21 01:32:42.250,81.321598,72.85,87.85,3.266206,81.387150,2.586749,87.85,80.830,79.2200,...,36.77,38.91,0.166160,38.526783,0.133064,38.91,38.50,38.41,38.65,/Users/sandoval/Library/CloudStorage/Box-Box/R...
3,2021-02-21 14:32:00.250,87.423412,1.00,109.05,10.262724,88.023613,7.317069,109.05,88.265,84.1575,...,34.95,37.50,0.579495,36.301614,0.493856,37.50,36.07,35.93,36.83,/Users/sandoval/Library/CloudStorage/Box-Box/R...
4,2021-02-21 19:06:14.000,86.538733,62.13,106.62,8.153998,86.921994,6.357927,106.62,87.880,81.0500,...,36.49,38.03,0.418170,37.165329,0.345500,38.03,37.09,36.87,37.45,/Users/sandoval/Library/CloudStorage/Box-Box/R...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,2021-11-11 15:25:32.750,85.733442,56.53,165.00,19.846936,88.000471,13.890121,165.00,80.470,73.1800,...,33.39,35.05,0.435550,34.172848,0.373485,35.05,34.23,33.73,34.50,/Users/sandoval/Library/CloudStorage/Box-Box/R...
8,2021-11-11 15:25:35.750,85.751434,56.53,165.00,19.844130,88.017367,13.885273,165.00,80.480,73.1800,...,33.39,35.05,0.435987,34.171583,0.373882,35.05,34.23,33.73,34.50,/Users/sandoval/Library/CloudStorage/Box-Box/R...
9,2021-11-15 15:22:31.500,84.208054,66.77,124.02,9.184263,84.707369,6.301756,124.02,82.930,78.7300,...,28.55,33.18,0.993625,31.612592,0.761390,33.18,31.95,31.54,32.21,/Users/sandoval/Library/CloudStorage/Box-Box/R...
1,2021-11-11 00:36:32.250,82.866275,1.00,106.50,24.795870,86.426842,12.841769,106.50,89.250,85.0000,...,26.79,34.00,2.909331,31.901894,2.592295,34.00,33.31,27.97,33.89,/Users/sandoval/Library/CloudStorage/Box-Box/R...


In [23]:
def pidMaker(string):
    return string.replace('/Users/sandoval/Library/CloudStorage/Box-Box/R15 Sensor Preprocessing and Analysis/10 Minute Windows/', '').replace(' Behavior 40 min Windows MERGED', '').replace('PR*/', '').replace('.csv', '')

In [26]:
df['pid'] = df['pid'].apply(pidMaker).astype('string')
df['pid'] = df['pid'].str[:5]
df['Window'] = 'Behavior'
print(df)

                         Time     HR_Mean  HR_Minimum  HR_Maximum   HR_Stdev  \
event                                                                          
1     2022-04-04 02:02:11.750  107.335362       62.22      161.68  24.389427   
1     2021-02-20 21:19:19.250   96.539012       69.37      163.83  19.935513   
2     2021-02-21 01:32:42.250   81.321598       72.85       87.85   3.266206   
3     2021-02-21 14:32:00.250   87.423412        1.00      109.05  10.262724   
4     2021-02-21 19:06:14.000   86.538733       62.13      106.62   8.153998   
...                       ...         ...         ...         ...        ...   
7     2021-11-11 15:25:32.750   85.733442       56.53      165.00  19.846936   
8     2021-11-11 15:25:35.750   85.751434       56.53      165.00  19.844130   
9     2021-11-15 15:22:31.500   84.208054       66.77      124.02   9.184263   
1     2021-11-11 00:36:32.250   82.866275        1.00      106.50  24.795870   
2     2021-11-13 14:28:38.250   83.33039

In [9]:
for col in physio.columns:
    print(col)

Time
HR_Mean
HR_Minimum
HR_Maximum
HR_Stdev
HR_RMS
HR_MAD
HR_MAV
HR_Median
HR_P25
HR_P75
EDA_Mean
EDA_Minimum
EDA_Maximum
EDA_Stdev
EDA_RMS
EDA_MAD
EDA_MAV
EDA_Median
EDA_P25
EDA_P75
TEMP_Mean
TEMP_Minimum
TEMP_Maximum
TEMP_Stdev
TEMP_RMS
TEMP_MAD
TEMP_MAV
TEMP_Median
TEMP_P25
TEMP_P75
pid


In [27]:
df = pd.DataFrame(df)
df

Unnamed: 0_level_0,Time,HR_Mean,HR_Minimum,HR_Maximum,HR_Stdev,HR_RMS,HR_MAD,HR_MAV,HR_Median,HR_P25,...,TEMP_Maximum,TEMP_Stdev,TEMP_RMS,TEMP_MAD,TEMP_MAV,TEMP_Median,TEMP_P25,TEMP_P75,pid,Window
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2022-04-04 02:02:11.750,107.335362,62.22,161.68,24.389427,110.071168,20.903546,161.68,101.920,87.4700,...,34.83,4.615926,30.125431,4.559149,34.83,33.43,24.63,34.05,PR113,Behavior
1,2021-02-20 21:19:19.250,96.539012,69.37,163.83,19.935513,98.575678,15.453156,163.83,92.380,80.4800,...,37.55,0.266105,36.974514,0.224128,37.55,36.97,36.75,37.16,PR033,Behavior
2,2021-02-21 01:32:42.250,81.321598,72.85,87.85,3.266206,81.387150,2.586749,87.85,80.830,79.2200,...,38.91,0.166160,38.526783,0.133064,38.91,38.50,38.41,38.65,PR033,Behavior
3,2021-02-21 14:32:00.250,87.423412,1.00,109.05,10.262724,88.023613,7.317069,109.05,88.265,84.1575,...,37.50,0.579495,36.301614,0.493856,37.50,36.07,35.93,36.83,PR033,Behavior
4,2021-02-21 19:06:14.000,86.538733,62.13,106.62,8.153998,86.921994,6.357927,106.62,87.880,81.0500,...,38.03,0.418170,37.165329,0.345500,38.03,37.09,36.87,37.45,PR033,Behavior
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,2021-11-11 15:25:32.750,85.733442,56.53,165.00,19.846936,88.000471,13.890121,165.00,80.470,73.1800,...,35.05,0.435550,34.172848,0.373485,35.05,34.23,33.73,34.50,PR102,Behavior
8,2021-11-11 15:25:35.750,85.751434,56.53,165.00,19.844130,88.017367,13.885273,165.00,80.480,73.1800,...,35.05,0.435987,34.171583,0.373882,35.05,34.23,33.73,34.50,PR102,Behavior
9,2021-11-15 15:22:31.500,84.208054,66.77,124.02,9.184263,84.707369,6.301756,124.02,82.930,78.7300,...,33.18,0.993625,31.612592,0.761390,33.18,31.95,31.54,32.21,PR102,Behavior
1,2021-11-11 00:36:32.250,82.866275,1.00,106.50,24.795870,86.426842,12.841769,106.50,89.250,85.0000,...,34.00,2.909331,31.901894,2.592295,34.00,33.31,27.97,33.89,PR104,Behavior


In [29]:
df.to_csv('Physio Behavior Windows.csv')

In [29]:
os.getcwd()

'/Users/sandoval/Library/CloudStorage/Box-Box/R15 Sensor Preprocessing and Analysis/10 Minute Windows'