In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
data = pd.read_csv('all_infants_vital_signs.csv')

In [4]:
#sliding window for input_output pairs

window_size = 4500 #roughly 30 minutes for each infant
step_size = 750 #step size of about 5 minutes between consecutive windows 

X = []
y = []

In [6]:
for infant_id in data['infant_num'].unique():
    infant_data = data[data['infant_num'] == infant_id]
    for i in range(0,len(infant_data) - window_size,step_size):
        X_window = infant_data.iloc[i:i + window_size].values
        y_window = infant_data.iloc[i + window_size][['heart_rate (BPM)', 'respiration_rate (BrPM)']].values
        X.append(X_window)
        y.append(y_window)

X = np.array(X)
y = np.array(y)


In [7]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size = 0.4, random_state=69)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=69)

In [8]:
print(f"Training Input shape: {X_train.shape}")  #number of windows, window sizem number of features
print(f"Training Output shape: {y_train.shape}") # number of windows, number of targets
print(f"Validation Input shape: {X_val.shape}")  
print(f"Validation Output shape: {y_val.shape}") 
print(f"Testing Input shape: {X_test.shape}")  
print(f"Testing Output shape: {y_test.shape}") 

Training Input shape: (6009, 4500, 5)
Training Output shape: (6009, 2)
Validation Input shape: (2003, 4500, 5)
Validation Output shape: (2003, 2)
Testing Input shape: (2004, 4500, 5)
Testing Output shape: (2004, 2)


In [9]:
np.save('X_train.npy', X_train)
np.save('y_train.npy', y_train)
np.save('X_val.npy', X_val)
np.save('y_val.npy', y_val)
np.save('X_test.npy', X_test)
np.save('y_test.npy', y_test)
