Sample code for calculating the Cross Entropies between HR and Temp for the first two subjects

In [2]:
import pandas as pd
import numpy as np
import matplotlib as plt
import pickle
import EntropyHub as EH
import gc 
import warnings
warnings.filterwarnings("ignore")

In [3]:
dropped_subjects = []

first_subject_id = 1
last_subject_id = 16 # We have 16 subjects in total, here just as an example I have only imported data from the first two subjects 


data_folder = 'new_data' 
folder_to_save = 'BVPACC' #save computed entropies

#name of row
XEntropy_signals = 'bvp+acc'


In [4]:
fs = {} # Dictionary to store sampling frequency of each signal
fs["acc"] = 32
fs["bvp"] = 64
fs["eda"] = 4
fs["temp"] = 4
fs["hr"]  = 1

epoch_seconds = 5 * 60 # 1 epoch is 5 minutes => 5min * 60 seconds/min = 300 seconds 

epoch_size = 300
# e4_dfs_list = {' hr',' temp', ' eda', ' acc', ' bvp'} 

e4_dfs_list = {'bvp','acc'}

In [5]:
segmented_dfs = {}
for subject_id in [id for id in range(first_subject_id, last_subject_id + 1) if id not in dropped_subjects]:
    segmented_dfs[subject_id] = pd.read_pickle(f'{data_folder}/segmented_df_{subject_id}_5MIN.pkl') 
    print(f'Loaded segmented df for subject {subject_id}.')

Loaded segmented df for subject 1.
Loaded segmented df for subject 2.
Loaded segmented df for subject 3.
Loaded segmented df for subject 4.
Loaded segmented df for subject 5.
Loaded segmented df for subject 6.
Loaded segmented df for subject 7.
Loaded segmented df for subject 8.
Loaded segmented df for subject 9.
Loaded segmented df for subject 10.
Loaded segmented df for subject 11.
Loaded segmented df for subject 12.
Loaded segmented df for subject 13.
Loaded segmented df for subject 14.
Loaded segmented df for subject 15.
Loaded segmented df for subject 16.


In [6]:
# Combine all epochs vertically to form one DataFrame

concatenated_dfs = {}

for subject_id in [id for id in range(first_subject_id, last_subject_id + 1) if id not in dropped_subjects]:
    for signal_name in e4_dfs_list:
        try:
            concatenated_dfs[f'{subject_id}, {signal_name}'] = pd.concat(segmented_dfs[subject_id][f'{signal_name}_segments'].values, ignore_index=True)
            print(f'Vertically concatenated all epochs for subject {subject_id}, {signal_name}.')
        except Exception as e:
            print(f'Error - {e} for {subject_id}, {signal_name}')

Vertically concatenated all epochs for subject 1, acc.
Vertically concatenated all epochs for subject 1, bvp.
Vertically concatenated all epochs for subject 2, acc.
Vertically concatenated all epochs for subject 2, bvp.
Vertically concatenated all epochs for subject 3, acc.
Vertically concatenated all epochs for subject 3, bvp.
Vertically concatenated all epochs for subject 4, acc.
Vertically concatenated all epochs for subject 4, bvp.
Vertically concatenated all epochs for subject 5, acc.
Vertically concatenated all epochs for subject 5, bvp.
Vertically concatenated all epochs for subject 6, acc.
Vertically concatenated all epochs for subject 6, bvp.
Vertically concatenated all epochs for subject 7, acc.
Vertically concatenated all epochs for subject 7, bvp.
Vertically concatenated all epochs for subject 8, acc.
Vertically concatenated all epochs for subject 8, bvp.
Vertically concatenated all epochs for subject 9, acc.
Vertically concatenated all epochs for subject 9, bvp.
Vertically

For the Cross Entropy calculation, both signals must have the same length. Therefore, we need to resample it as shown below

In [7]:
resampled_dfs = {}

for subject_id in [id for id in range(first_subject_id, last_subject_id + 1) if id not in dropped_subjects]:
    for signal_name in e4_dfs_list:
        try:
            resampled_dfs[f'{subject_id}, {signal_name}'] = concatenated_dfs[f'{subject_id}, {signal_name}'].groupby(concatenated_dfs[f'{subject_id}, {signal_name}'].index // fs[f'{signal_name}']).mean()
            print(f'Resampled concatenated df for subject {subject_id}, {signal_name}.')
        except Exception as e:
            print(f'Error - {e} for {subject_id}, {signal_name}')

Resampled concatenated df for subject 1, acc.
Resampled concatenated df for subject 1, bvp.
Resampled concatenated df for subject 2, acc.
Resampled concatenated df for subject 2, bvp.
Resampled concatenated df for subject 3, acc.
Resampled concatenated df for subject 3, bvp.
Resampled concatenated df for subject 4, acc.
Resampled concatenated df for subject 4, bvp.
Resampled concatenated df for subject 5, acc.
Resampled concatenated df for subject 5, bvp.
Resampled concatenated df for subject 6, acc.
Resampled concatenated df for subject 6, bvp.
Resampled concatenated df for subject 7, acc.
Resampled concatenated df for subject 7, bvp.
Resampled concatenated df for subject 8, acc.
Resampled concatenated df for subject 8, bvp.
Resampled concatenated df for subject 9, acc.
Resampled concatenated df for subject 9, bvp.
Resampled concatenated df for subject 10, acc.
Resampled concatenated df for subject 10, bvp.
Resampled concatenated df for subject 11, acc.
Error - Unable to allocate 311.

In [8]:
# The signal needs to be reshaped into an array of shape: (number_of_epochs, epoch_size)

signal_reshaped = {}
for subject_id in [id for id in range(first_subject_id, last_subject_id + 1) if id not in dropped_subjects]:
    for signal_name in e4_dfs_list: 
            try:
                signal_to_reshape = np.array(resampled_dfs[f'{subject_id}, {signal_name}'])
                signal_reshaped[f'{subject_id}, {signal_name}'] = signal_to_reshape.reshape(int(len(signal_to_reshape) / epoch_size), epoch_size)
            except Exception as e:
                print(f'Error {e} for subject {subject_id}, {signal_name}.')

Error Unable to allocate 3.02 MiB for an array with shape (396000, 1) and data type float64 for subject 3, acc.
Error Unable to allocate 3.02 MiB for an array with shape (396000, 1) and data type float64 for subject 3, bvp.
Error Unable to allocate 3.09 MiB for an array with shape (405000, 1) and data type float64 for subject 4, acc.
Error Unable to allocate 3.09 MiB for an array with shape (405000, 1) and data type float64 for subject 4, bvp.
Error Unable to allocate 5.43 MiB for an array with shape (711900, 1) and data type float64 for subject 5, acc.
Error Unable to allocate 5.43 MiB for an array with shape (711900, 1) and data type float64 for subject 5, bvp.
Error Unable to allocate 3.60 MiB for an array with shape (471600, 1) and data type float64 for subject 6, acc.
Error Unable to allocate 3.60 MiB for an array with shape (471600, 1) and data type float64 for subject 6, bvp.
Error Unable to allocate 4.32 MiB for an array with shape (565800, 1) and data type float64 for subject 

## Feature Extraction - Non-linear domain (Entropy)

In [9]:
def get_nonlinear_features_gc(sig1, sig2, signal_name=""):
  
    feature_list = ["XAp0", "XAp1", "XAp2",
    "XCond1", "XCond2",  
    "XDist",
    "XFuzz1","XFuzz2",
    "XK2_1",  "XK2_2", 
    "XPermEn",
    "XSamp0","XSamp1", "XSamp2",
    "XSpec"]


    for i in range(len(feature_list)):
        feature_list[i] = signal_name + "_" + feature_list[i]
    
    aXAp0 = np.ones(len(sig1)) * np.nan
    aXAp1 = np.ones(len(sig1)) * np.nan
    aXAp2 = np.ones(len(sig1)) * np.nan

    aPhi = np.ones(len(sig1)) * np.nan
    aPhi1 = np.ones(len(sig1)) * np.nan
    aPhi2 = np.ones(len(sig1)) * np.nan
    aPhi3 = np.ones(len(sig1)) * np.nan

    aXCond1 = np.ones(len(sig1)) * np.nan
    aXCond2 = np.ones(len(sig1)) * np.nan

    aSEw = np.ones(len(sig1)) * np.nan
    aSEz = np.ones(len(sig1)) * np.nan

    aXDist = np.ones(len(sig1)) * np.nan
    aPpi = np.ones(len(sig1)) * np.nan
    aPpi1 = np.ones(len(sig1)) * np.nan
    aPpi2 = np.ones(len(sig1)) * np.nan

    aXFuzz1 = np.ones(len(sig1)) * np.nan
    aXFuzz2 = np.ones(len(sig1)) * np.nan

    aPs1 = np.ones(len(sig1)) * np.nan
    aPs1_1 = np.ones(len(sig1)) * np.nan
    aPs1_2 = np.ones(len(sig1)) * np.nan
    aPs2 = np.ones(len(sig1)) * np.nan
    aPs2_1 = np.ones(len(sig1)) * np.nan

    aXK2_1 = np.ones(len(sig1)) * np.nan
    aXK2_2 = np.ones(len(sig1)) * np.nan

    aCi = np.ones(len(sig1)) * np.nan
    aCi1 = np.ones(len(sig1)) * np.nan
    aCi2 = np.ones(len(sig1)) * np.nan

    aXPermEn = np.ones(len(sig1)) * np.nan

    aXSamp0 = np.ones(len(sig1)) * np.nan
    aXSamp1 = np.ones(len(sig1)) * np.nan
    aXSamp2 = np.ones(len(sig1)) * np.nan
    aA = np.ones(len(sig1)) * np.nan
    aA1 = np.ones(len(sig1)) * np.nan
    aA2 = np.ones(len(sig1)) * np.nan
    aB = np.ones(len(sig1)) * np.nan
    aB1 = np.ones(len(sig1)) * np.nan
    aB2 = np.ones(len(sig1)) * np.nan

    aXSpec = np.ones(len(sig1)) * np.nan
    aBandEn = np.ones(len(sig1)) * np.nan

    for i in range(len(sig1)):
        if (~np.isnan(sig1[i]).any() and ~np.isnan(sig2[i]).any()):
            if (len(np.unique(sig1[i])) != 1 and len(np.unique(sig2[i])) != 1):

                
                #XApEn
                try:
                    XAp, Phi = EH.XApEn(sig1[i], sig2[i])
                    aXAp0[i] = XAp[0]
                    aXAp1[i] = XAp[1]
                    aXAp2[i] = XAp[2]

                    aPhi[i] = Phi[0]
                    aPhi1[i] = Phi[1]
                    aPhi2[i] = Phi[2]
                    aPhi3[i] = Phi[3]
                except Exception as e:
                   print(f"Error computing XApEn for index {i}: {e}")

                #XCondEn
                    
                try:
                    XCond, SEw, SEz = EH.XCondEn(sig1[i], sig2[i])
                    aXCond1[i] = XCond[0]
                    aXCond2[i] = XCond[1]

                    aSEw[i] = SEw
                    aSEz[i] = SEz

                except Exception as e:
                   print(f"Error computing XCond for index {i}: {e}")
        
                #XDistEn
                try:
                    XDist, Ppi = EH.XDistEn(sig1[i], sig2[i])
                    aXDist[i] = XDist

                    aPpi[i] = Ppi[0]
                    aPpi1[i] = Ppi[1]
                    aPpi2[i] = Ppi[2]
                except Exception as e:
                   print(f"Error computing XDistEn for index {i}: {e}")

                #XFuzzEn
                try:
                    XFuzz, Ps1, Ps2 = EH.XFuzzEn(sig1[i], sig2[i])
                    aXFuzz1[i] = XFuzz[0]
                    aXFuzz2[i] = XFuzz[1]

                    aPs1[i] = Ps1[0]
                    aPs1_1[i] = Ps1[1]
                    aPs1_2[i] = Ps1[2]

                    aPs2[i] = Ps2[0]
                    aPs2_1[i] = Ps2[1]

                except Exception as e:
                        print(f"Error computing XFuzzEn for index {i}: {e}")

                #XK2En
                try:
                    XK2, Ci = EH.XK2En(sig1[i], sig2[i])
                    aXK2_1[i] = XK2[0]
                    aXK2_2[i] = XK2[1]

                    aCi[i] = Ci[0]
                    aCi1[i] = Ci[1]
                    aCi2[i] = Ci[2]

                except Exception as e:
                    print(f"Error computing XK2En for index {i}: {e}")

                #XPermEn
                try:
                    aXPermEn[i] = EH.XPermEn(sig1[i], sig2[i])
                        
                except Exception as e:
                    print(f"Error computing XPermEn for index {i}: {e}")

                #XSampEn
                try:
                    XSamp, A, B = EH.XSampEn(sig1[i], sig2[i])
                    aXSamp0[i] = XSamp[0]
                    aXSamp1[i] = XSamp[1]
                    aXSamp2[i] = XSamp[2]

                    aA[i] = A[0]
                    aA1[i] = A[1]
                    aA2[i] = A[2]
                    
                    aB[i] = B[0]
                    aB1[i] = B[1]
                    aB2[i] = B[2]
                except Exception as e:
                    print(f"Error computing XSampEn for index {i}: {e}")

                #XSpecEn
                try:
                    XSpec, BandEn = EH.XSpecEn(sig1[i], sig2[i])
                    aXSpec[i] = XSpec 
                    aBandEn[i] = BandEn
                except Exception as e:
                    print(f"Error computing XSpecEn for index {i}: {e}")

        if i % 100 == 0:
            gc.collect()

    results = [aXAp0, aXAp1, aXAp2, 
    aXCond1, aXCond2, 
    aXDist,
    aXFuzz1, aXFuzz2, 
    aXK2_1, aXK2_2, 
    aXPermEn,
    aXSamp0, aXSamp1, aXSamp2, 
    aXSpec]
    
    return pd.DataFrame(np.array(results).T, columns=feature_list)

In [10]:
non_linear_features = {}


# Here, I have explicilty chosen the HR and Temp signals for calculating cross entropy.. 
# Later, you will need to calculate cross entropies for all possible combinations.. eg: HR and EDA, Temp and EDA etc
for subject_id in [id for id in range(first_subject_id, last_subject_id + 1) if id not in dropped_subjects]:   
    try:
        non_linear_features[f'{subject_id}, {XEntropy_signals}'] = get_nonlinear_features_gc(signal_reshaped[f'{subject_id},bvp'], signal_reshaped[f'{subject_id},acc'], f'{XEntropy_signals}')
    except Exception as e:
        print(f'Error {e} for {subject_id}, {XEntropy_signals}.')
    print(f'!!!!!!!!!processed for subject {subject_id}!!!!!!!!!')


Error '1,bvp' for 1, bvp+acc.
!!!!!!!!!processed for subject 1!!!!!!!!!
Error '2,bvp' for 2, bvp+acc.
!!!!!!!!!processed for subject 2!!!!!!!!!
Error '3,bvp' for 3, bvp+acc.
!!!!!!!!!processed for subject 3!!!!!!!!!
Error '4,bvp' for 4, bvp+acc.
!!!!!!!!!processed for subject 4!!!!!!!!!
Error '5,bvp' for 5, bvp+acc.
!!!!!!!!!processed for subject 5!!!!!!!!!
Error '6,bvp' for 6, bvp+acc.
!!!!!!!!!processed for subject 6!!!!!!!!!
Error '7,bvp' for 7, bvp+acc.
!!!!!!!!!processed for subject 7!!!!!!!!!
Error '8,bvp' for 8, bvp+acc.
!!!!!!!!!processed for subject 8!!!!!!!!!
Error '9,bvp' for 9, bvp+acc.
!!!!!!!!!processed for subject 9!!!!!!!!!
Error '10,bvp' for 10, bvp+acc.
!!!!!!!!!processed for subject 10!!!!!!!!!
Error '11,bvp' for 11, bvp+acc.
!!!!!!!!!processed for subject 11!!!!!!!!!
Error '12,bvp' for 12, bvp+acc.
!!!!!!!!!processed for subject 12!!!!!!!!!
Error '13,bvp' for 13, bvp+acc.
!!!!!!!!!processed for subject 13!!!!!!!!!
Error '14,bvp' for 14, bvp+acc.
!!!!!!!!!processed f

In [11]:
def save_features():
    for subject_id in [id for id in range(first_subject_id, last_subject_id + 1) if id not in dropped_subjects]:
        #non_linear_features[f'{subject_id}, {XEntropy_signals}'].to_pickle(f'{folder_to_save}/{XEntropy_signals}_{subject_id}.pkl')
        non_linear_features[f'{subject_id}, {XEntropy_signals}'].to_csv(f'{folder_to_save}/{XEntropy_signals}_{subject_id}.csv')
        print(f'Saved XApEn for subject {subject_id}. ')

In [12]:
save_features()

KeyError: '1, bvp+acc'