In [1]:
import os
import random
import json
import re
import pandas as pd
import neurokit2 as nk
from scipy import signal
import json

In [13]:
import numpy as np
def resample_signal(signal, original_fs = 1000, desired_fs = 20):
    """
    zero padding fft for binary values only
    """
    # Calculate resampling ratio
    resampling_ratio = desired_fs / original_fs

    # Zero-padding to the next power of 2 if necessary
    n = len(signal)
    n_fft = int(2 ** np.ceil(np.log2(n)))
    if n_fft > n:
        signal = np.pad(signal, (0, n_fft - n))

    # Compute FFT of the input signal
    signal_fft = np.fft.fft(signal)

    # Upsampling or downsampling in the frequency domain
    if resampling_ratio > 1:
        # Upsampling
        n_zeros = int((resampling_ratio - 1) * n_fft)
        signal_fft = np.concatenate(([0], signal_fft[1:(n_fft // 2 + 1)], np.zeros(n_zeros), signal_fft[(n_fft // 2 + 1):], [0]))
    elif resampling_ratio < 1:
        # Downsampling
        n_remove = n_fft - int(n_fft * resampling_ratio)
        signal_fft = np.concatenate(([0], signal_fft[1:(n_fft // 2 - n_remove // 2 + 1)], signal_fft[(n_fft // 2 + n_remove // 2 + 1):], [0]))

    # Inverse FFT
    resampled_signal = np.fft.ifft(signal_fft)

    # Truncate the result and normalize
    resampled_size = n * resampling_ratio
    
    # Corner case if remiander of desired_fs divided by original_fs is 0 dont do anything
    if (resampled_size)%1 == 0:
        resampled_signal = resampled_signal[:int(resampled_size)].real
    # Corner case if remainder of desired_fs divided by original_fs is not 0 round up
    # e.g(1.2 ->2)
    else:
        resampled_signal = resampled_signal[:int(resampled_size) + 1].real
    
    # round the result, only keep 0 or 1, threashold set to 0.3
    for index in range(len(resampled_signal)):
        if resampled_signal[index] >= 0.3:
            resampled_signal[index] = 1
        else:
            resampled_signal[index] = 0

    return resampled_signal

In [14]:
from scipy import signal

def reduce_resolution_to_20HZ(df, signals):
     for col in signals:
                
        # check if it is binary column, if it is binary column, use zero-padding fft
        if signals[col].isin([0,1]).all():
            df[col] = resample_signal(signals[col])
        else:
            # Check if remainder of signal amount (1000HZ) divided by 50 (to 20HZ) is 0
            if len(signals[col])%50 == 0:
                df[col] = signal.resample(signals[col], int(len(signals[col])/50))
            # round up the resut if remainder of signal amount (1000HZ) divided by 50 (to 20HZ) is not 0
            else:
                df[col] = signal.resample(signals[col], int(len(signals[col])/50)+1)
    

In [11]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_1'):
    # recusivly find all files in all folders and subfolders
    for file in files:
        if 'train' in root and 'physiology' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            # Get the ID from file name
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            # Get the VIDEO_FILENAME from file name
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            # Get the SCENARIO from file name
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            # Generate a 8-digit random number per ID per VIDEO_NAME
            unique_digits = random.sample(range(10), 8)
            # Convert random number to string
            unique_number = int(''.join(map(str, unique_digits)))
            
            # Check if unique number already generated for the ID
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            # Check if unique number already generated for the ID's video
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            # If the unique number for this ID's VIDEO exist, use this one
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution to 20Hz
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution to 20Hz
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution to 20Hz
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution to 20Hz
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt reduce resolution to 20Hz
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo reduce resolution to 20Hz
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru reduce resolution to 20Hz
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap reduce resolution to 20Hz
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_1_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_1_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [12]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_0' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            # Add identity columns
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            
            # Append all data in one list
            concated.append(reduced_resolution)

# Convert list to dataframe
frame = pd.concat(concated)

frame.to_csv("./all_vars_round_up/train_physiology_senario_2_fold_0_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_2_fold_0_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(


In [15]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_1' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_2_fold_1_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_2_fold_1_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [16]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_2' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)
            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_2_fold_2_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_2_fold_2_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [17]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_3' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_2_fold_3_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_2_fold_3_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [18]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_4' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_2_fold_4_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_2_fold_4_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [19]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_3'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_0' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_3_fold_0_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_3_fold_0_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(


In [20]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_3'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_1' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)
            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_3_fold_1_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_3_fold_1_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(


In [21]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_3'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_2' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_3_fold_2_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_3_fold_2_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [22]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_3'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_3' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_3_fold_3_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_3_fold_3_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(


  warn(


In [23]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_4'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_0' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_4_fold_0_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_4_fold_0_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [24]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_4'):
    for file in files:
        if 'train' in root and 'physiology' in root and 'fold_1' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/train_physiology_senario_4_fold_1_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/train_physiology_senario_4_fold_1_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  info["SCR_Peaks"] > np.nanmin(info["SCR_Onsets"]), ~np.isnan(info["SCR_Onsets"])
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [25]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_1'):
    for file in files:
        if 'test' in root and 'physiology' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_1_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_1_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(


  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [26]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_0' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_2_fold_0_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_2_fold_0_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [27]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_1' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_2_fold_1_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_2_fold_1_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [28]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_2' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_2_fold_2_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_2_fold_2_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(


In [29]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_3' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_2_fold_3_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_2_fold_3_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(
  warn(


In [30]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_2'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_4' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_2_fold_4_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_2_fold_4_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [31]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_3'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_0' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_3_fold_0_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_3_fold_0_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(


  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(


In [32]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_3'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_1' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_3_fold_1_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_3_fold_1_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(


  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [33]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_3'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_2' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_3_fold_2_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_3_fold_2_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(


In [34]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_3'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_3' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_3_fold_3_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_3_fold_3_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [35]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_4'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_0' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_4_fold_0_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_4_fold_0_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [36]:
id_vid = json.load(open('train_physiology_senario_1_neurokit2_20HZ_20230413.json'))
concated = []
for root, subFolder, files in os.walk('D:\OneDrive - The Pennsylvania State University\EPiC2023\scenario_4'):
    for file in files:
        if 'test' in root and 'physiology' in root and 'fold_1' in root and file.endswith(".csv"):
            # Get the path to the CSV file
            file_path = os.path.join(root, file)
            
            ID = re.search(r'sub_(.*?)_vid', file).group(1).strip()
            VIDEO_FILENAME = re.search(r'vid_(.*?)\.csv', file).group(1).strip()
            SCENARIO = re.search(r'scenario_(.*?)\\', root).group(1).strip()
            unique_digits = random.sample(range(10), 8)
            unique_number = int(''.join(map(str, unique_digits)))
            if ID not in id_vid:
                id_vid[ID] = {VIDEO_FILENAME: unique_number}
            elif VIDEO_FILENAME not in id_vid[ID]:
                id_vid[ID][VIDEO_FILENAME] = unique_number
            else:
                unique_number = id_vid[ID][VIDEO_FILENAME]
            df = pd.read_csv(file_path)
            
            reduced_resolution = pd.DataFrame()
            
            # ecg reduce resolution
            ecg_signals,ecg_info = nk.ecg_process(df['ecg'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ecg_signals)
            
            
            # bvp/ppg reduce resolution
            ppg_signals,ppg_info = nk.ppg_process(df['bvp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,ppg_signals)


            # eda/gsr reduce resolution
            eda_signals,eda_info = nk.eda_process(df['gsr'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,eda_signals)
            
            
            # rsp reduce resolution
            rsp_signals,rsp_info = nk.rsp_process(df['rsp'], sampling_rate = 1000)
            reduce_resolution_to_20HZ(reduced_resolution,rsp_signals)

            
            # skt
            if(len(df['skt'])%50) == 0:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50))
            else:
                reduced_resolution['SKT'] = signal.resample(df['skt'],  int(len(df['skt'])/50) + 1)

            
            # emg_zygo
            emg_zygo_signals, emg_zygo_info = nk.emg_process(df['emg_zygo'], sampling_rate = 1000)
            new_column_names = [col + '_zygo' for col in emg_zygo_signals.columns]
            emg_zygo_signals = emg_zygo_signals.rename(columns=dict(zip(emg_zygo_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_zygo_signals)
            
            # emg_coru
            #  NeuroKitWarning: No events found. Check your event_channel or adjust 'threshold' or 'keep' arguments.
            emg_coru_signals, emg_coru_info = nk.emg_process(df['emg_coru'], sampling_rate = 1000)
            new_column_names = [col + '_coru' for col in emg_coru_signals.columns]
            emg_coru_signals = emg_coru_signals.rename(columns=dict(zip(emg_coru_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_coru_signals)
            
            # emg_trap
            emg_trap_signals, emg_trap_info = nk.emg_process(df['emg_trap'], sampling_rate = 1000)
            new_column_names = [col + '_trap' for col in emg_trap_signals.columns]
            emg_trap_signals = emg_trap_signals.rename(columns=dict(zip(emg_trap_signals.columns, new_column_names)))
            reduce_resolution_to_20HZ(reduced_resolution,emg_trap_signals)
            
            reduced_resolution['ID'] = ID
            reduced_resolution['VIDEO_FILENAME'] = VIDEO_FILENAME
            reduced_resolution['SCENARIO'] = SCENARIO
            reduced_resolution['unique_number'] = unique_number
            concated.append(reduced_resolution)


frame = pd.concat(concated)
frame.to_csv("./all_vars_round_up/test_physiology_senario_4_fold_1_neurokit2_all_variables_20HZ_20230422.csv", index = False)

reduced_frame = frame[['EDA_Tonic','EDA_Phasic','SCR_Onsets','SCR_Peaks','SCR_Height','SCR_Recovery',
                       'ECG_Rate','PPG_Rate','RSP_Clean','RSP_Rate','RSP_Amplitude','RSP_RVT',
                       'EMG_Amplitude_zygo','EMG_Onsets_zygo','EMG_Offsets_zygo','EMG_Activity_zygo','EMG_Raw_zygo',
                       'EMG_Amplitude_coru','EMG_Onsets_coru','EMG_Offsets_coru','EMG_Activity_coru','EMG_Raw_coru',
                       'EMG_Amplitude_trap','EMG_Onsets_trap','EMG_Offsets_trap','EMG_Activity_trap','EMG_Raw_trap',
                       'ID','VIDEO_FILENAME','SCENARIO','unique_number']]
reduced_frame.to_csv("./reduced_vars_round_up/test_physiology_senario_4_fold_1_neurokit2_reduced_variables_20HZ_20230422.csv", index = False)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
