In [1]:
import pickle
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import pysiology # reference https://github.com/Gabrock94/Pysiology
import neurokit2 as nk #  https://neuropsychology.github.io/NeuroKit/functions/ecg.html

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
import matplotlib.pyplot as plt

# Import local Libraries
from features.feature_extractor import Features

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

Thank you for using Pysiology. If you use it in your work, please cite:
Gabrieli G., Azhari A., Esposito G. (2020) PySiology: A Python Package for Physiological Feature Extraction. In: Esposito A., Faundez-Zanuy M., Morabito F., Pasero E. (eds) Neural Approaches to Dynamics of Signal Exchanges. Smart Innovation, Systems and Technologies, vol 151. Springer, Singapore. https://doi.org/10.1007/978-981-13-8950-4_35


In [2]:
# overview
traj = pd.read_csv('../data/trajectories_with_features.csv')
waves = ['II'] # ignore 'I', 'III', 'V', 'PAP'
print('patients',traj.subject_id.drop_duplicates().shape[0])
print('admissions',traj.hadm_id.drop_duplicates().shape[0])
print('hadm_id is a unique index on this dataframe')
print('train, tune, test splits are on a per-patient basis and not per-admission basis')
print(waves, "waveforms have been captured ")

patients 4917
admissions 5235
hadm_id is a unique index on this dataframe
train, tune, test splits are on a per-patient basis and not per-admission basis
['II'] waveforms have been captured 


In [3]:
traj = pd.read_csv('../data/trajectories_with_features.csv')
traj = traj.dropna(axis=1, how='all')
traj = traj.loc[traj.full_waveform_duration.notna(),:] # remove this line once all features are generated
features = traj.columns[list(traj.columns.values).index('split')+1:].values # all feature columns come after "split"
for f in features: 
    x = traj.get(f).values
    if (x == -np.inf).any(): 
        min_val = np.sort(np.unique(x))[1]
        x = np.where(x == -np.inf, min_val, x)
    if (x == np.inf).any(): 
        max_val = np.sort(np.unique(x))[-3]
        x = np.where(x == np.inf, max_val, x)
    if np.isnan(x).any(): 
        nan_val = np.mean(x[~np.isnan(x)])
        x = np.where(np.isnan(x), nan_val, x)
    mu, sigma = np.mean(x), np.std(x)
    categories = [*range(-2,3)]
    intervals = [-1*float('inf'),] + [(mu+(i*sigma)) for i in range(-2,0)] + [(mu+(i*sigma)) for i in range(1,3)] + [float('inf'),]
    for i in range(len(categories)): 
        start, end = intervals[i], intervals[i+1]
        traj.loc[np.logical_and(x>=start, x<end), f] = categories[i]

traj.to_csv('../data/trajectories_with_categorical_features.csv',index=False)