In [1]:
# imports
import os
import sys

sys.path.append("../../")

import numpy as np
import tensorflow as tf
from numba import njit

from cardio import EcgDataset
from cardio import dataset as ds
from cardio.dataset import best_practice
from cardio.dataset import F, B, C, V, Config
from cardio.dataset.research import KV, Grid, Option, Research

from cardio.dataset.models.tf import ResNet, ResNet18, ResNet34

  from ._conv import register_converters as _register_converters


# Setting up

In [2]:
# Adding CustomResNet model
class ResNetCustom(ResNet):
    pass
#     @classmethod
#     def default_config(cls):
#         config = ResNet.default_config()
#         return config

In [3]:
# Helper functions
def make_data(batch, **kwagrs):
    import numpy as np
    n_reps = [signal.shape[0] for signal in batch.signal]
    signals = np.array([segment for signal in batch.signal for segment in signal])
    targets = np.repeat(batch.target, n_reps, axis=0)
    return {"feed_dict": {'signals': signals, 'labels': targets}}

# Flipping for augmentation
@njit(nogil=True)
def center_flip(signal):
    return np.random.choice(np.array([1, -1])) * (signal - np.mean(signal))

# Checking for number of parameters in the model
def get_trainable_variables(iteration, experiment, ppl, model_name="model"):
    return experiment[ppl].pipeline.get_model_by_name(model_name).get_number_of_trainable_vars()

# Calculation F1 score on the test data
def calc_f1_score(iteration, experiment, ppl, agg_func=np.mean, threshold=0.5):
    """Calculates F1 score after run of test pipeline usnig its variables."""
    from sklearn import metrics
    import numpy as np
    
    predictions = np.concatenate(experiment[ppl].pipeline.get_variable('predictions'))
    targets = np.concatenate(experiment[ppl].pipeline.get_variable('targets'))
    splits = np.concatenate(experiment[ppl].pipeline.get_variable('splits'))

    def safe_softmax(preds):
        preds -= np.max(preds, axis=1, keepdims=True)
        preds_exp = np.exp(preds)
        return (preds_exp / np.sum(preds_exp, axis=1, keepdims=True))
    
    predictions = safe_softmax(predictions)

    def split_agg(vec, splits, agg_func):
        vec = np.split(vec, np.cumsum(splits)[:-1])
        return np.array([agg_func(sig[:, 0]) for sig in vec])

    predictions = split_agg(predictions, splits, agg_func)
    targets = split_agg(targets, splits, agg_func)
    
    f_score = metrics.f1_score(targets, predictions>threshold, average='macro')
    
    return f_score

In [4]:
# Paths to the data
PATH = "/notebooks/data/ECG/training2017" # Change this path for your data dicrectory
SIGNALS_MASK = os.path.join(PATH, "A*.hea")
LABELS_PATH = os.path.join(PATH, "REFERENCE.csv")

In [5]:
# Creating dataset
eds = EcgDataset(path=SIGNALS_MASK, no_ext=True, sort=True)
eds.split(0.8, shuffle=False)

In [6]:
# Setting training constants
BATCH_SIZE = 32
EPOCHS = 300
TEST_EACH_EPOCH = 10
TRAIN_SIZE = len(eds.train)
ITERATIONS = ((TRAIN_SIZE // BATCH_SIZE) + 1) * EPOCHS
TEST_EXEC_FOR = ITERATIONS // EPOCHS * TEST_EACH_EPOCH
STR_EXEC = '%{}'.format(TEST_EXEC_FOR)

In [7]:
# Defining models configurations
model_config = {
    'inputs': dict(signals={'shape': F(lambda batch: batch.signal[0].shape[1:])},
                   labels={'classes': ['A', 'NO'], 'transform': 'ohe', 'name': 'targets'}),
    'input_block/inputs': 'signals',
    "loss": "ce",
    "input_block/filters": C('input_filters'),
    "body/block/layout": C('layout'),
    "body/filters": C('filters'),
    "body/num_blocks": C('blocks'),
    "session/config": tf.ConfigProto(allow_soft_placement=True),
    "device": C("device"),
    "optimizer": "Adam",
}

In [8]:
# Setting pipelines

In [9]:
# Trainig
root_train = (
  ds.Pipeline()
    .load(components=["signal", "meta"], fmt="wfdb")
    .load(components="target", fmt="csv", src=LABELS_PATH)
    .drop_labels(["~"])
    .rename_labels({"N": "NO", "O": "NO"})
    .apply_to_each_channel(center_flip)
    .random_resample_signals("normal", loc=300, scale=10)
    .random_split_signals(3000, {"A": 6, "NO": 2})
    .apply_transform(func=np.transpose, src='signal', dst='signal', axes=[0, 2, 1])
).run(BATCH_SIZE, shuffle=True, drop_last=True, n_epochs=None, lazy=True)

model_train = (
  ds.Pipeline()
    .init_variable('loss', init_on_each_run=list)
    .init_model('dynamic', C('model'), 'model', config=model_config)
    .train_model('model',
                 make_data=make_data,
                 fetches=["loss"],
                 save_to=[V("loss")], mode="w"
                )
)

# Testing
root_test = (
  ds.Pipeline()
    .load(components=["signal", "meta"], fmt="wfdb")
    .load(components="target", fmt="csv", src=LABELS_PATH)
    .drop_labels(["~"])
    .rename_labels({"N": "NO", "O": "NO"})
    .apply_to_each_channel(center_flip)
    .split_signals(3000, 3000)
    .apply_transform(func=np.transpose, src='signal', dst='signal', axes=[0, 2, 1])
).run(BATCH_SIZE, shuffle=True, drop_last=True, n_epochs=1, lazy=True)

model_test = (
  ds.Pipeline()
    .init_variable("splits", init_on_each_run=list)
    .apply_transform(src="signal", dst="splits", func=lambda x: [x.shape[0]])
    .update_variable("splits", B("splits"), mode="a")
    .import_model("model", C("import_from"))
    .init_variable("targets", init_on_each_run=list)
    .init_variable("predictions", init_on_each_run=list)
    .predict_model("model", make_data=make_data,
                 fetches=["predictions", "targets"], 
                 save_to=[V("predictions"), 
                          V("targets")], mode="a")
)

In [10]:
# Define research options
model_op1 = Option('model', [ResNet18, ResNet34])
model_op2 = Option('model', [ResNetCustom])

layout_op1 = Option('layout', ['cnacna'])
layout_op2 = Option('layout', ['cna', 'cnacna'])

# input_filters_kv = KV("model_config/input_block/filters", "input_filters")
input_filters_op1 = Option('input_filters', [64])
input_filters_op2 = Option('input_filters', [32, 8])

#blocks_kv = KV("model_config/body/num_blocks", "blocks")
blocks_op1= Option('blocks', [[2, 2, 2, 2], [3, 4, 6, 3]])
blocks_op2 = Option('blocks', [[2, 3, 4, 5, 4, 3, 2], [2, 2, 2, 2, 2, 2, 2],
                               [1, 1, 1, 1, 1, 1, 1]])

# filters_kv = KV("model_config/body/filters", "filters")
filters_op1 = Option('filters', [[64, 128, 256, 512], [64, 128, 256, 512]])
filters_op2 = Option('filters', [[4, 8, 16, 32, 64, 128, 256], [4, 4, 8, 8, 16, 16, 20]])

grid = (#Option.product(model_op1, blocks_op1, filters_op1) * layout_op1 * input_filters_op1 + 
        model_op2 * layout_op2 * input_filters_op2 * blocks_op2 * filters_op2)

In [11]:
mr = (
    Research()
    .pipeline(root_train << eds.train, model_train,
              variables=["loss"], name="train", dump=STR_EXEC)
    .pipeline(root_test << eds.test, model_test,
              name="test", execute=STR_EXEC, dump=STR_EXEC,
              import_from="train", run=True)
    .function(calc_f1_score, returns='f_score', name='f_score',
              execute=STR_EXEC, dump=STR_EXEC, ppl='test')
    .function(get_trainable_variables, returns='trainable_variables', 
              name='trainable_variables', execute=1, dump=1, ppl='train')
    .grid(grid)
)

In [None]:
mr.run(n_reps=5, n_iters=ITERATIONS, workers=6, gpu=[1, 2, 4, 5, 6, 7], 
       branches=1, name='CustomResNetResearch_300ep', progress_bar=True)

Research CustomResNetResearch_300ep is starting...


  0%|          | 0/7704000 [00:00<?, ?it/s]

Distributor has 120 jobs with 64200 iterations. Totally: 7704000
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:

Future major versions of TensorFlow will allow gradients to f

  0%|          | 4206/7704000 [02:02<62:06:49, 34.43it/s]