In [1]:
# imports
import os
import sys

sys.path.append("../../")

import numpy as np
import tensorflow as tf
from numba import njit

from cardio import EcgDataset
from cardio import dataset as ds
# from cardio.dataset import best_practice
from cardio.dataset import F, B, C, V, Config
from cardio.dataset.research import KV, Grid, Option, Research

from cardio.dataset.models.tf import VGG, VGG16, VGG19

  from ._conv import register_converters as _register_converters


# Setting up

In [2]:
# Adding CustomResNet model
class VGGCustom(VGG):
    pass

In [2]:
# Helper functions

def make_data(batch, **kwagrs):
    import numpy as np
    n_reps = [signal.shape[0] for signal in batch.signal]
    signals = np.array([segment for signal in batch.signal for segment in signal])
    targets = np.repeat(batch.target, n_reps, axis=0)
    return {"feed_dict": {'signals': signals, 'labels': targets}}

# Flipping for augmentation
@njit(nogil=True)
def center_flip(signal):
    return np.random.choice(np.array([1, -1])) * (signal - np.mean(signal))

# Checking for number of parameters in the model
def get_trainable_variables(iteration, experiment, ppl, model_name="model"):
    return experiment[ppl].pipeline.get_model_by_name(model_name).get_number_of_trainable_vars()

# Calculation F1 score on the test data
def calc_f1_score(iteration, experiment, ppl, agg_func=np.mean, threshold=0.5):
    """Calculates F1 score after run of test pipeline usnig its variables."""
    from sklearn import metrics
    import numpy as np
    
    predictions = np.concatenate(experiment[ppl].pipeline.get_variable('predictions'))
    targets = np.concatenate(experiment[ppl].pipeline.get_variable('targets'))
    splits = np.concatenate(experiment[ppl].pipeline.get_variable('splits'))

    def safe_softmax(preds):
        preds -= np.max(preds, axis=1, keepdims=True)
        preds_exp = np.exp(preds)
        return (preds_exp / np.sum(preds_exp, axis=1, keepdims=True))
    
    predictions = safe_softmax(predictions)

    def split_agg(vec, splits, agg_func):
        vec = np.split(vec, np.cumsum(splits)[:-1])
        return np.array([agg_func(sig[:, 0]) for sig in vec])

    predictions = split_agg(predictions, splits, agg_func)
    targets = split_agg(targets, splits, agg_func)
    
    f_score = metrics.f1_score(targets, predictions>threshold, average='macro')
    
    return f_score

In [3]:
# Paths to the data
PATH = "/notebooks/data/ECG/training2017" # Change this path for your data dicrectory
SIGNALS_MASK = os.path.join(PATH, "A*.hea")
LABELS_PATH = os.path.join(PATH, "REFERENCE.csv")

In [4]:
# Creating dataset
eds = EcgDataset(path=SIGNALS_MASK, no_ext=True, sort=True)
eds.split(0.8, shuffle=False)

In [5]:
# Setting training constants
BATCH_SIZE = 32
EPOCHS = 500
TEST_EACH_EPOCH = 10
TRAIN_SIZE = len(eds.train)
ITERATIONS = ((TRAIN_SIZE // BATCH_SIZE) + 1) * EPOCHS
TEST_EXEC_FOR = ITERATIONS // EPOCHS * TEST_EACH_EPOCH
STR_EXEC = '%{}'.format(TEST_EXEC_FOR)

In [None]:
# Archtectures
_VGG16_ARCH = [
    (2, 0, 64, 1),
    (2, 0, 128, 1),
    (2, 1, 256, 1),
    (2, 1, 512, 1),
    (2, 1, 512, 1)
]

_VGG19_ARCH = [
    (2, 0, 64, 1),
    (2, 0, 128, 1),
    (4, 0, 256, 1),
    (4, 0, 512, 1),
    (4, 0, 512, 1)
]

VGG_Custom_1 = [
    (2, 0, 4, 1),
    (2, 0, 8, 1),
    (2, 0, 16, 1),
    (2, 0, 32, 1),
    (2, 0, 64, 1),
    (2, 0, 128, 1),
    (2, 0, 256, 1)
]

VGG_Custom_2 = [
    (1, 0, 4, 1),
    (1, 0, 8, 1),
    (1, 0, 8, 1),
    (1, 0, 16, 1),
    (1, 0, 16, 1),
    (1, 0, 20, 1),
    (1, 0, 20, 1)
]
VGG_Custom_3 = [
    (2, 0, 4, 1),
    (2, 0, 8, 1),
    (2, 0, 16, 1),
    (2, 0, 32, 1),
    (2, 1, 64, 1),
    (2, 1, 128, 1),
    (2, 1, 256, 1)
]

In [20]:
# Defining models configurations
model_config = {
    'inputs': dict(signals={'shape': F(lambda batch: batch.signal[0].shape[1:])},
                   labels={'classes': ['A', 'NO'], 'transform': 'ohe', 'name': 'targets'}),
    'input_block/inputs': 'signals',
    "loss": "ce",
    "session/config": tf.ConfigProto(allow_soft_placement=True),
    
    "device": C("device"),
    "optimizer": "Adam",
    'head/units': C('units'),
    
}

In [21]:
# Setting pipelines

In [22]:
# Trainig
root_train = (
  ds.Pipeline()
    .load(components=["signal", "meta"], fmt="wfdb")
    .load(components="target", fmt="csv", src=LABELS_PATH)
    .drop_labels(["~"])
    .rename_labels({"N": "NO", "O": "NO"})
    .apply_to_each_channel(center_flip)
    .random_resample_signals("normal", loc=300, scale=10)
    .random_split_signals(3000, {"A": 6, "NO": 2})
    .apply_transform(func=np.transpose, src='signal', dst='signal', axes=[0, 2, 1])
).run(BATCH_SIZE, shuffle=True, drop_last=True, n_epochs=None, lazy=True)

model_train = (
  ds.Pipeline()
    .init_variable('loss', init_on_each_run=list)
    .init_model('dynamic', C('model'), 'model', config=model_config)
    .train_model('model',
                 make_data=make_data,
                 fetches=["loss"],
                 save_to=[V("loss")], mode="w"
                )
)

# Testing
root_test = (
  ds.Pipeline()
    .load(components=["signal", "meta"], fmt="wfdb")
    .load(components="target", fmt="csv", src=LABELS_PATH)
    .drop_labels(["~"])
    .rename_labels({"N": "NO", "O": "NO"})
    .apply_to_each_channel(center_flip)
    .split_signals(3000, 3000)
    .apply_transform(func=np.transpose, src='signal', dst='signal', axes=[0, 2, 1])
).run(BATCH_SIZE, shuffle=True, drop_last=True, n_epochs=1, lazy=True)

model_test = (
  ds.Pipeline()
    .init_variable("splits", init_on_each_run=list)
    .apply_transform(src="signal", dst="splits", func=lambda x: [x.shape[0]])
    .update_variable("splits", B("splits"), mode="a")
    .import_model("model", C("import_from"))
    .init_variable("targets", init_on_each_run=list)
    .init_variable("predictions", init_on_each_run=list)
    .predict_model("model", make_data=make_data,
                 fetches=["predictions", "targets"], 
                 save_to=[V("predictions"), 
                          V("targets")], mode="a")
)

In [None]:
# Define research options
model_op1 = Option('model', [VGG16, VGG19])
model_op2 = Option('model', [VGGCustom])

# head_units_kv = KV('model_config/head/units', 'units')
head_units_op1 = Option('head_units', [[100, 100, 2]])
head_units_op2 = Option('head_units', [[100, 100, 2], [100, 10, 2]])

# arch_kv = KV('model_config/body/arch', 'arch')
arch_op2 = Option('arch', [_VGG])
arch_op2 = Option('arch', [])

grid = model_op2*head_units_op2*arch_op1 + #model_op1*head_units_op1

In [None]:

mr = (
    Research()
    .pipeline(root_train << eds.train, model_train,
              variables=["loss"], name="train", logging=True,
              dump=STR_EXEC)
    .pipeline(root_test << eds.test, model_test, name="test",
              execute=STR_EXEC, dump=STR_EXEC, variables=["splits", "predictions", "targets"],
              import_from="train", run=True, logging=True)
    .function(calc_f1_score, returns='f_score', name='f_score',
              execute=STR_EXEC, dump=STR_EXEC, ppl='test')
    .function(get_trainable_variables, returns='trainable_variables', 
              name='trainable_variables', execute=1, dump=1, ppl='train')
    .grid(grid)
)

In [None]:
mr.run(n_reps=5, n_iters=ITERATIONS, workers=4, gpu=[4, 5, 6, 7], 
       branches=1, name='CustomVGGResearch', progress_bar=True)