In [1]:
import os

from joblib import Parallel, delayed
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from sklearn.model_selection import GroupKFold, LeaveOneGroupOut

from models import deeper_fcn as architecture
import algorithms.heartrate as hr
import utils

In [2]:
# tensorflow settings
tf.logging.set_verbosity(tf.logging.ERROR)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
tf.keras.backend.set_session(tf.Session(config=config))

In [3]:
# load data
# x_data_train, y_data_train, groups_train = ...
# dummy:
train_size = 10000
n_groups = 28
x_data_train = np.random.normal(size=(train_size, 400, 1))
y_data_train = np.random.normal(loc=68, scale=10, size=(train_size,))
groups_train = np.sort(np.random.randint(n_groups, size=train_size))

print(x_data_train.shape, y_data_train.shape, groups_train.shape)

(10000, 400, 1) (10000,) (10000,)


In [4]:
enlarge = 1
model_params = dict(metrics=["mae", "mape"], enlarge=enlarge)
fit_params = dict(epochs=1, verbose=2)  # set epochs between 30 and 75

modelname = (architecture.__name__ + "-x{}".format(enlarge))
modelpath = os.path.join("output", modelname)
os.makedirs(os.path.join(modelpath, "final"), exist_ok=True)

# write model architecture to JSON file
model = architecture.create(**model_params)
with open(os.path.join(modelpath, "model.json"), "w") as fp:
    fp.write(model.to_json())

In [5]:
# single process for parallel training
def process_split(xt, yt, i, fit_params):
    # set allow_growth in subprocess
    import tensorflow as tf
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    tf.keras.backend.set_session(tf.Session(config=config))
    
    csv_path = os.path.join(modelpath, "logs-{:02d}.csv".format(i))
    weights_path = os.path.join(modelpath, "weights-{:02d}.h5".format(i))
    fit_params.update(callbacks=[tf.keras.callbacks.CSVLogger(csv_path)])
    
    model = architecture.create(**model_params)
    r = model.fit(xt, yt, **fit_params)
    
    model.save_weights(weights_path)
    tf.keras.backend.clear_session()

    return r.history

In [6]:
# split training data with LeaveOneGroupOut cross validation
splitter = utils.get_group_splitter(n_groups, groups_train)

In [7]:
rs = Parallel(n_jobs=3, verbose=10)(
    delayed(process_split)(x_data_train[t_inds], y_data_train[t_inds],
                           i, fit_params)
    for i, (t_inds, v_inds) in enumerate(splitter)
)

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   2 tasks      | elapsed:   13.1s
[Parallel(n_jobs=3)]: Done   7 tasks      | elapsed:   33.6s
[Parallel(n_jobs=3)]: Done  12 tasks      | elapsed:   44.1s
[Parallel(n_jobs=3)]: Done  19 tasks      | elapsed:  1.2min
[Parallel(n_jobs=3)]: Done  26 out of  28 | elapsed:  1.6min remaining:    7.4s
[Parallel(n_jobs=3)]: Done  28 out of  28 | elapsed:  1.8min finished


In [8]:
model = utils.get_model_from_json(modelpath, "model.json")

# calculate MAPE and MAE for each left-out patient
splitter = LeaveOneGroupOut().split(x_data_train, y_data_train, groups=groups_train)
results = []
for i, (t_inds, v_inds) in enumerate(splitter):
    model.load_weights(os.path.join(modelpath, "weights-{:02d}.h5".format(i)))
    y_pred = model.predict(x_data_train[v_inds])
    y_true = y_data_train[v_inds]
    results.append((hr.hr_mape(y_true, y_pred), hr.hr_mae(y_true, y_pred)))
results = np.array(results)
display(results)

array([[12.06516994,  7.90923906],
       [12.24843843,  7.97319389],
       [13.54572565,  8.56025441],
       [12.43106642,  8.09660937],
       [12.89235405,  8.46224464],
       [12.84064371,  8.29353937],
       [13.92926378,  8.83020024],
       [12.83810673,  8.17103714],
       [14.00564411,  8.93944474],
       [12.99247063,  8.42603769],
       [12.52689613,  7.99825266],
       [13.00469643,  8.66117743],
       [12.38614354,  8.30037064],
       [11.7570788 ,  7.55882427],
       [12.86287047,  8.21690826],
       [13.008025  ,  8.29714316],
       [12.22924466,  7.74526836],
       [13.07709925,  8.48293472],
       [12.38936716,  8.12894619],
       [14.13572796,  9.28046742],
       [11.78294899,  7.72729171],
       [11.9294967 ,  7.92972904],
       [12.07058457,  7.85185918],
       [12.63426889,  8.08966374],
       [13.71262795,  8.87422151],
       [13.19405335,  8.66145986],
       [12.87355714,  8.39588449],
       [12.72016255,  8.18050985]])

In [9]:
# train one model on entire training set

model = architecture.create(**model_params)
r = model.fit(x_data_train, y_data_train, **fit_params)
model.save_weights(os.path.join(modelpath, "final", "weights-00.h5"))
tf.keras.backend.clear_session()

 - 7s - loss: 532.3504 - mean_absolute_error: 15.4585 - mean_absolute_percentage_error: 23.5018
