In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# !pip install joblib

In [None]:
import os

from joblib import Parallel, delayed
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from sklearn.model_selection import GroupKFold, LeaveOneGroupOut

from models import stacked_cnn_rnn_improved as architecture
import algorithms.heartrate as hr
import utils

In [None]:
# tensorflow settings
tf.logging.set_verbosity(tf.logging.ERROR)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
tf.keras.backend.set_session(tf.Session(config=config))

In [None]:
# load data
# x_data_train, y_data_train, groups_train = ...
# dummy:
train_size = 10000
n_groups = 28
x_data_train = np.random.normal(size=(train_size, 400, 1))
y_data_train = np.random.normal(loc=68, scale=10, size=(train_size,))
groups_train = np.sort(np.random.randint(n_groups, size=train_size))

print(x_data_train.shape, y_data_train.shape, groups_train.shape)

In [None]:
enlarge = 1
model_params = dict(metrics=["mae", "mape"], enlarge=enlarge)
fit_params = dict(epochs=1, verbose=2)  # set epochs between 30 and 75

modelname = (architecture.__name__ + "-x{}".format(enlarge))
modelpath = os.path.join("output", modelname)
os.makedirs(modelpath, exist_ok=True)
os.mkdir(os.path.join(modelpath, "final"))

# write model architecture to JSON file
model = architecture.create(**model_params)
with open(os.path.join(modelpath, "model.json"), "w") as fp:
    fp.write(model.to_json())

In [None]:
# single process for parallel training
def process_split(xt, yt, i, fit_params):
    # set allow_growth in subprocess
    import tensorflow as tf
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    tf.keras.backend.set_session(tf.Session(config=config))
    
    csv_path = os.path.join(modelpath, "logs-{:02d}.csv".format(i))
    weights_path = os.path.join(modelpath, "weights-{:02d}.h5".format(i))
    fit_params.update(callbacks=[tf.keras.callbacks.CSVLogger(csv_path)])
    
    model = architecture.create(**model_params)
    r = model.fit(xt, yt, **fit_params)
    
    model.save_weights(weights_path)
    tf.keras.backend.clear_session()

    return r.history

In [None]:
# split training data with LeaveOneGroupOut cross validation
splitter = utils.get_group_splitter(n_groups, groups_train)

In [None]:
rs = Parallel(n_jobs=3, verbose=10)(
    delayed(process_split)(x_data_train[t_inds], y_data_train[t_inds],
                           i, fit_params)
    for i, (t_inds, v_inds) in enumerate(splitter)
)

In [None]:
model = utils.get_model_from_json(modelpath, "model.json")

# calculate MAPE and MAE for each left-out patient
splitter = LeaveOneGroupOut().split(x_data_train, y_data_train, groups=groups_train)
results = []
for i, (t_inds, v_inds) in enumerate(splitter):
    model.load_weights(os.path.join(modelpath, "weights-{:02d}.h5".format(i)))
    y_pred = model.predict(x_data_train[v_inds])
    y_true = y_data_train[v_inds]
    results.append((hr.hr_mape(y_true, y_pred), hr.hr_mae(y_true, y_pred)))
results = np.array(results)
display(results)

In [None]:
# train one model on entire training set

model = architecture.create(**model_params)
r = model.fit(x_data_train, y_data_train, **fit_params)
model.save_weights(os.path.join(modelpath, "final", "weights-00.h5"))
tf.keras.backend.clear_session()