In [1]:
import pandas as pd
import neurokit2 as nk
import plotly.graph_objects as go
import plotly.io as pio
import glob
import os
import matplotlib.pyplot as plt
import numpy as np
import csv
from scipy.stats import zscore

def custom_downsample(df, factor, retain_columns):
    # Initialize an empty DataFrame for the downsampled data
    downsampled_df = pd.DataFrame()

    for column in df.columns:
        if column in retain_columns:
            # Retain the ones in the specified columns
            downsampled_data = df[column].rolling(window=factor, min_periods=1).max()
        else:
            # Downsample by taking the mean of every 'factor' rows
            downsampled_data = df[column].rolling(window=factor, min_periods=1).mean()

        # Select every 'factor'-th row
        downsampled_data = downsampled_data.iloc[::factor,]

        # Add the downsampled column to the new DataFrame
        downsampled_df[column] = downsampled_data

    return downsampled_df

def add_peak_height_colum(df,peak_col,signal_col):
    peak_heights = df.loc[df[peak_col]==1][signal_col]
    return peak_heights.index.values, peak_heights

columns_of_interest = [
    'PPG_Clean', 'PPG_Rate', 'PPG_Peaks',
    'ECG_Rate','ECG_Quality',
    'EDA_Clean', 'EDA_Tonic', 'EDA_Phasic', 'SCR_Onsets', 'SCR_Peaks','SCR_Height', 'SCR_Amplitude',
    'RSP_Amplitude', 'RSP_Rate',
    'corrugator_EMG_Clean',
    'corrugator_EMG_Amplitude', 'corrugator_EMG_Activity',
    'corrugator_EMG_Onsets', 'corrugator_EMG_Offsets', 'trapezius_EMG_Raw',
    'trapezius_EMG_Clean', 'trapezius_EMG_Amplitude',
    'trapezius_EMG_Activity', 'trapezius_EMG_Onsets',
    'trapezius_EMG_Offsets', 'zygomaticus_EMG_Raw', 'zygomaticus_EMG_Clean',
    'zygomaticus_EMG_Amplitude', 'zygomaticus_EMG_Activity',
    'zygomaticus_EMG_Onsets', 'zygomaticus_EMG_Offsets',
    'RSP_RVT', 'RSP_Phase', 'RSP_Phase_Completion',
    'RSP_Symmetry_PeakTrough', 'RSP_Symmetry_RiseDecay','skt','sub_num','vid_num',
]

columns_that_are_mostly_zeros = ['PPG_Peak_Height','SCR_Peaks','SCR_Height','SCR_Amplitude',
    'zygomaticus_EMG_Onsets', 'zygomaticus_EMG_Activity',
    'zygomaticus_EMG_Offsets', 'trapezius_EMG_Activity','trapezius_EMG_Onsets',
    'corrugator_EMG_Activity',
    'corrugator_EMG_Onsets', 'corrugator_EMG_Offsets',
    'trapezius_EMG_Offsets']

def add_ppg_heights_and_downsample(processed_file,columns_that_are_mostly_zeros): 
    df = pd.read_csv(processed_file)
    # 1. calculate new peak heights
    df['PPG_Peak_Height']=0
    df['ECG_HR-PPG_HR']=df['ECG_Rate']-df['PPG_Rate']
    peak_idxs, peak_heights = add_peak_height_colum(df,'PPG_Peaks','PPG_Clean')
    df.iloc[peak_idxs,df.columns.get_loc('PPG_Peak_Height')]=peak_heights
    # 2. also downsample 
    down_sampled_df = custom_downsample(df,int(1000/20),columns_that_are_mostly_zeros)
    #return down_sampled_df
    down_sampled_df.to_csv(processed_file[:-4]+'_20hz.csv',index=False)
def add_mean_val_arousal(scenario_file):
    df = pd.read_csv(scenario_file)
    mean_subject_features = glob.glob(scenario_file.split('/t')[0]+'/train/mean_subject_affective_features.csv')
    sub_features = pd.read_csv(mean_subject_features[0])
    print(scenario_file)
    sub_num =scenario_file.split('sub_')[1].split('_')[0]
    sub_val_arousal = sub_features[sub_features['sub']==int(sub_num)][['mean_valence','mean_arousal']].values
    sub_val, sub_arousal =sub_val_arousal[0,0],sub_val_arousal[0,1]
    df['sub_mean_val']=sub_val
    df['sub_mean_arousal']=sub_arousal
    df.to_csv(scenario_file,index=False)
    
def add_mean_val_arousal_video(scenario_file):
    df = pd.read_csv(scenario_file)
    mean_video_features = glob.glob(scenario_file.split('/t')[0]+'/train/mean_video_affective_features.csv')
    vid_features = pd.read_csv(mean_video_features[0])
    vid_features
    vid_num = scenario_file.split('vid_')[1].split('_')[0]
    vid_val_arousal = vid_features[vid_features['video']==int(vid_num)][['mean_valence','mean_arousal']].values
    vid_val, vid_arousal =vid_val_arousal[0,0],vid_val_arousal[0,1]
    df['vid_mean_val']=vid_val
    df['vid_mean_arousal']=vid_arousal
    df.to_csv(scenario_file,index=False)


In [6]:
pd.read_csv(
    '/work/abslab/emognition_2023_challenge/data/scenario_2/fold_0/train/physiology/sub_0_vid_0_processed_1hzphysio_features_calc.csv')

Unnamed: 0.1,Unnamed: 0,PPG_Raw,PPG_Clean,PPG_Rate,PPG_Peaks,ECG_Raw,ECG_Clean,ECG_Rate,ECG_Quality,ECG_R_Peaks,...,RSP_Symmetry_PeakTrough_rolling_mean_10,RSP_Symmetry_PeakTrough_rolling_std_10,RSP_Symmetry_RiseDecay_lagged_5,RSP_Symmetry_RiseDecay_future_5,RSP_Symmetry_RiseDecay_rolling_mean_10,RSP_Symmetry_RiseDecay_rolling_std_10,skt_lagged_5,skt_future_5,skt_rolling_mean_10,skt_rolling_std_10
0,0,36.758000,-0.191260,78.323438,0.000,1.004000,0.127614,70.289097,0.935556,0.000,...,0.726505,0.006618,0.421697,0.421118,0.414811,0.013641,25.230471,25.233576,25.233194,0.002867
1,20,37.304752,0.118434,77.660066,0.001,0.823051,-0.011197,74.510930,0.943527,0.001,...,0.726505,0.006618,0.421697,0.418726,0.414811,0.013641,25.230471,25.232400,25.233194,0.002867
2,40,37.180230,0.030908,66.378558,0.001,0.896275,0.003559,81.139709,0.972960,0.001,...,0.726505,0.006618,0.421697,0.414606,0.414811,0.013641,25.230471,25.231211,25.233194,0.002867
3,60,36.715727,-0.058093,88.107024,0.001,0.851247,-0.018302,69.092421,0.975171,0.002,...,0.726505,0.006618,0.421697,0.406643,0.414811,0.013641,25.230471,25.230539,25.233194,0.002867
4,80,36.376434,-0.067561,61.423990,0.001,0.758885,0.006071,69.603163,0.963426,0.001,...,0.726505,0.006618,0.421697,0.378537,0.414811,0.013641,25.230471,25.229247,25.233194,0.002867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,2480,35.763258,-0.098614,64.082872,0.001,1.096839,-0.034330,75.750897,0.923386,0.001,...,0.725800,0.025007,0.336932,0.419715,0.406922,0.027798,25.147997,25.146938,25.147852,0.000586
125,2500,36.001839,0.044858,99.499286,0.002,2.128269,0.023295,74.805158,0.971828,0.001,...,,,0.378182,0.419715,,,25.148183,25.146938,,
126,2520,36.099791,0.026630,117.094609,0.002,2.525150,-0.011305,73.808642,0.991254,0.001,...,,,0.416101,0.419715,,,25.148881,25.146938,,
127,2540,36.136933,0.015012,94.323264,0.001,2.627074,-0.008511,78.365776,1.015544,0.002,...,,,0.419715,0.419715,,,25.147923,25.146938,,


# downsample and add ppg heights

In [4]:
processed_file_paths = '/work/abslab/emognition_2023_challenge/data/scenario_1/*/physiology/*processed.csv'
for processed_file in glob.glob(processed_file_paths):
    add_ppg_heights_and_downsample(processed_file,columns_that_are_mostly_zeros)

# Downsample to 1hz

In [14]:
processed_file_paths = '/work/abslab/emognition_2023_challenge/data/scenario_2/*/*/physiology/*20hz.csv'
for processed_file in glob.glob(processed_file_paths):
    df = pd.read_csv(processed_file)
    downsampled_df = custom_downsample(df,20,columns_that_are_mostly_zeros)
    hz_path = processed_file.split('20hz')[0]+'1hz.csv'
    downsampled_df.to_csv(hz_path)

# add mean subject features

In [26]:
scenario_2_files = glob.glob('/work/abslab/emognition_2023_challenge/data/scenario_2/\
*0/*/phys*/*[0-9]*hz.csv')
error_list = []
for scenario_file in scenario_2_files:
    add_mean_val_arousal(scenario_file)
    # if not (('scenario_3' in scenario_file) or ('scenario_4' in scenario_file)):
    #     print('else', scenario_file)
    #     df = pd.read_csv(scenario_file)
    #     vid_num = scenario_file.split('_')[-3]
    #     train_folder = scenario_file.split('/t')[0] +'/train/'
    #     dynamic_affective_features = pd.read_csv(train_folder + f'mean_video-{vid_num}_dynamic_affective_features.csv')
    #     break


# add video features for files in which you could have mean/valence arousal

In [13]:
#add mean video features 
scenario_2_files = glob.glob('/work/abslab/emognition_2023_challenge/data/scenario_2/*/*/phys*/*[0-9]_processed_1hz*calc*.csv')
error_list = []
for scenario_file in scenario_2_files:
    add_mean_val_arousal_video(scenario_file)
#add mean video features 
scenario_1_files = glob.glob('/work/abslab/emognition_2023_challenge/data/scenario_1/*/phys*/*[0-9]_processed_1hz*calc*.csv')
error_list = []
for scenario_file in scenario_1_files:
    add_mean_val_arousal_video(scenario_file)
scenario_file

'/work/abslab/emognition_2023_challenge/data/scenario_1/train/physiology/sub_9_vid_9_processed_1hzphysio_features_calc.csv'

# add some calculated features

In [8]:
def create_features(df, columns, lag_rows, future_rows, rolling_window):
    for col in columns:
        # Lagged features
        df[f'{col}_lagged_{lag_rows}'] = df[col].shift(lag_rows).fillna(method='bfill')

        # Future features
        df[f'{col}_future_{future_rows}'] = df[col].shift(-future_rows).fillna(method='ffill')

        # Rolling average features
        df[f'{col}_rolling_mean_{rolling_window}'] = (
            df[col].rolling(window=rolling_window,center=True).mean().fillna(method='bfill')
        )

        # Rolling standard deviation features
        df[f'{col}_rolling_std_{rolling_window}'] = (
            df[col].rolling(window=rolling_window,center=True).std().fillna(method='bfill')
        )

    return df

columns_of_interest = [ 'PPG_Rate', 'PPG_Peaks',
    'ECG_Rate',
    'EDA_Clean', 'EDA_Tonic', 'EDA_Phasic', 'SCR_Peaks','SCR_Height', 'SCR_Amplitude',
    'RSP_Amplitude', 'RSP_Rate',
    'RSP_RVT', 'RSP_Symmetry_PeakTrough', 'RSP_Symmetry_RiseDecay','skt',
]


In [9]:
for scenario in [2,3,4]: 
    print(scenario)
    processed_file_paths = f'/work/abslab/emognition_2023_challenge/data/scenario_{scenario}/*/*/physiology/*1hz.csv'
    lag_rows = 5
    future_rows =5
    rolling_window = 10
    for file_path in glob.glob(processed_file_paths):
        df = pd.read_csv(file_path)
        updated_df = create_features(df, columns_of_interest, lag_rows, future_rows, rolling_window)
        updated_df.to_csv(file_path[:-4]+'physio_features_calc.csv',index=None)


2


In [32]:
processed_file_paths = f'/work/abslab/emognition_2023_challenge/data/scenario_1/*/physiology/*1hz.csv'
lag_rows = 5
future_rows =5
rolling_window = 10
for file_path in glob.glob(processed_file_paths):
    df = pd.read_csv(file_path)
    updated_df = create_features(df, columns_of_interest, lag_rows, future_rows, rolling_window)
    updated_df.to_csv(file_path[:-4]+'physio_features_calc.csv',index=None)