In [1]:
# define logging and working directory
from ProjectRoot import change_wd_to_project_root
change_wd_to_project_root()
from src.utils.notebook_imports import *
from pyforest import *


# define GPU id to use
# 0 = 1080 Bus ID 2
# 1 = Titan Bus ID 131
# 2 = Titan Bus ID 132
GPU_IDS = '0,1'
current_gpu = choose_gpu_by_id(GPU_IDS)
print(current_gpu)

# jupyter magic config
%matplotlib inline
%reload_ext autoreload
%autoreload 2

# import own modules and utils
from src.utils.utils_io import Console_and_file_logger, ensure_dir, init_config
import src.utils.my_metrics as metr
from src.models.train_model import train_kfolded
# train kfolded data, start each fold manually to improve speed
from src.data.dataset import get_kfolded_data, create_acdc_dataframe_for_cv

# define experiment name for report, model and log paths + filenames
EXPERIMENT = '2D/close_gap2/acdc_plus2'
now = datetime.datetime.now()

# image params
DIM = [224, 224]
IMG_WIDTH = 224
IMG_HEIGHT = 224
# Grayscale images
IMG_CHANNELS = 1
# RV = 1 = Y[...,0] 
# Myo = 2 = Y[...,1] 
# LV = 3 = Y[...,2]
MASK_VALUES = [0, 1, 2, 3]  
MASK_CLASSES = len(MASK_VALUES)
AUGMENT = False
SHUFFLE = True
AUGMENT_GRID = True
RESAMPLE = False
SPACING_X = 1.00
SPACING_Y = 1.00
SCALER = 'MinMax'


# path params
DATASET = 'tetra'  # 'acdc' # or 'tetra'
DATA_ROOT = 'data/raw/tetra/2D/'
TRAIN_PATH = 'data/raw/tetra/2D/train/'
VAL_PATH = 'data/raw/tetra/2D/val/'
TEST_PATH = 'data/raw/tetra/2D/test/'

MODEL_PATH = os.path.join(os.path.join('models', EXPERIMENT), str(now.strftime("%Y-%m-%d_%H_%M")))
TENSORBOARD_LOG_DIR = os.path.join(os.path.join('reports/tensorboard_logs', EXPERIMENT),str(now.strftime("%Y-%m-%d_%H_%M")))
CONFIG_PATH = os.path.join(os.path.join('reports/configs/',EXPERIMENT),str(now.strftime("%Y-%m-%d_%H_%M")))
HISTORY_PATH = os.path.join(os.path.join('reports/history/',EXPERIMENT),str(now.strftime("%Y-%m-%d_%H_%M")))

# training params
ARCHITECTURE = '2D'
seed = 42
BATCHSIZE =  64 # 32, 64, 16, 1
INITIAL_EPOCH = 0
EPOCHS = 150
FOLDS = 4
EPOCHS_BETWEEN_CHECKPOINTS = 5
MONITOR_FUNCTION = 'val_dice_coef_labels'
MONITOR_MODE = 'max'

# Network params
OPTIMIZER = 'Adam'  # adam, sgd, softmax # https://keras.io/optimizers/
ACTIVATION = 'elu'  # 'elu' --> works well with binary_crossentropy and bce_dice_loss, relu does not work, it clips negative values, bse does return negative values
LEARNING_RATE = 0.001
DECAY = 0.0
EPSILON = 1e-08
DROPOUT_L1_L2 = 0.3 # best with 0.3 - 0.5
DROPOUT_L3_L4 = 0.4
DROPOUT_L5 = 0.5
BATCH_NORMALISATION = True


LOSS_FUNCTION = metr.bce_dice_jac_loss  # 'binary_crossentropy'--> 91 IOU, bce_dice_loss --> 94 IOU
#LOSS_FUNCTION = metr.weighted_cce_dice_coef([1,5,5,3])
#LOSS_FUNCTION = keras.losses.categorical_crossentropy
#LOSS_FUNCTION = metr.weighted_categorical_crossentropy([1,5,10,3])
#LOSS_FUNCTION = keras.losses.binary_crossentropy
metrics = [
    metr.dice_coef_labels,
    metr.dice_coef_myo,
    metr.dice_coef_lv,
    metr.dice_coef_rv,
]

# make sure logging directories are available
Console_and_file_logger(EXPERIMENT, logging.INFO)


# Define a config for param injection,
# save a serialized version, 
# make sure all paths exist
config = init_config(locals(), False)


# define a Tensorflow config
#tf_config = tf.ConfigProto()
#tf_config=tf.ConfigProto(
#      allow_soft_placement=True, log_device_placement=True)
#tf_config.gpu_options.allow_growth = True
#tf_session = tf.Session(config=tf_config)
#tf.keras.backend.set_session(tf_session)

search for root_dir and set working directory
Working directory set to: /mnt/data/git/cardio


Using TensorFlow backend.
2019-10-29 20:44:00,586 INFO -------------------- Start --------------------
2019-10-29 20:44:00,586 INFO Working directory: /mnt/data/git/cardio.
2019-10-29 20:44:00,587 INFO Log file: ./logs/2D/close_gap2/acdc_plus2.log


['gpu(0)', 'gpu(1)']


## Train a model on ACDC data & n patients from the GCN data, define the generalisation gap

In [2]:
# used for Finetuning a model
def get_n_patients(df, n=1, fold=0):
    # make random.choice idempotent
    np.random.seed(42)
    # select random patients
    patients = np.random.choice(sorted(df['patient'].unique()), size=n)
    logging.info('Added patients: {} from the GCN dataset'.format(patients))
    # filter data frame by fold and by random selected patients ids, make sure to make a copy to avoid side effects
    df_temp = df[(df['fold'] == fold) & (df['patient'].isin(patients))].copy()
    # make sure all selected images will be used during training train images
    # train_kfolded will only use images with modality == train, override the modality for all selected patients/rows
    df_temp.loc[:,'modality'] = 'train'
    df_temp.reset_index(inplace=True)
    return df_temp

fold = 0
number_of_gcn_patients = 2

# load ACDC split
kfold_data_dir_acdc = 'reports/kfolds_data/2D/unet_acdc_spacing1_batch32_drop04_batchnorm_minmax_kfold/'
df_folds_acdc = pd.read_csv(os.path.join(kfold_data_dir_acdc, 'df_kfold_acdc.csv'))


# load GCN split
kfold_data_dir_gcn = 'reports/kfolds_data/2D/unet_tetra_spacing1_batch32_drop04_batchnorm_minmax_kfold4'
df_folds_gcn = pd.read_csv(os.path.join(kfold_data_dir_gcn, 'df_kfold.csv'))


extend = get_n_patients(df_folds_gcn, number_of_gcn_patients)
df_folds = pd.concat([df_folds_acdc, extend], sort=False)
df_fold = df_folds[df_folds['fold'] == fold]
config['GCN_PATIENTS'] = sorted(extend['patient'].unique())
config['GCN_IMAGES'] = len(extend)

train_kfolded(config, metrics, df_fold)

2019-10-29 20:44:08,304 INFO Added patients: ['0275-D8RUZJ8N-2006-09-22' '0547-RHQMXMC7-2005-11-28'] from the GCN dataset
2019-10-29 20:44:08,315 INFO Fold: 0
2019-10-29 20:44:08,316 INFO train images: 1550
2019-10-29 20:44:08,316 INFO train masks: 1550
2019-10-29 20:44:08,316 INFO test images: 474
2019-10-29 20:44:08,317 INFO test masks: 474
2019-10-29 20:44:08,317 INFO Create DataGenerator
2019-10-29 20:44:08,322 INFO Datagenerator created with: 
 shape: [224, 224]
 batchsize: 64
 Scaler: MinMax
 Images: 1550 
 Augment_grid: True 
 Thread workers: 64
2019-10-29 20:44:08,322 INFO No augmentation
2019-10-29 20:44:08,322 INFO Create DataGenerator
2019-10-29 20:44:08,324 INFO Datagenerator created with: 
 shape: [224, 224]
 batchsize: 64
 Scaler: MinMax
 Images: 474 
 Augment_grid: False 
 Thread workers: 64
2019-10-29 20:44:08,324 INFO No augmentation
2019-10-29 20:44:12,239 INFO Optimizer: Adam
2019-10-29 20:44:12,387 INFO selected patients: ['0116-5CXHN75U-2006-08-02'] from: 151
2019-

Epoch 1/150


2019-10-29 20:44:39,921 INFO Saved model to disk: models/2D/close_gap2/acdc_plus2/2019-10-29_20_44_fold0



Epoch 00001: loss improved from inf to 0.46059, saving model to models/2D/close_gap2/acdc_plus2/2019-10-29_20_44_fold0/checkpoint.h5
Epoch 2/150

Epoch 00002: loss improved from 0.46059 to 0.38503, saving model to models/2D/close_gap2/acdc_plus2/2019-10-29_20_44_fold0/checkpoint.h5
Epoch 3/150

Epoch 00003: loss improved from 0.38503 to 0.33743, saving model to models/2D/close_gap2/acdc_plus2/2019-10-29_20_44_fold0/checkpoint.h5
Epoch 4/150

Epoch 00004: loss improved from 0.33743 to 0.28885, saving model to models/2D/close_gap2/acdc_plus2/2019-10-29_20_44_fold0/checkpoint.h5
Epoch 5/150

Epoch 00005: loss improved from 0.28885 to 0.23615, saving model to models/2D/close_gap2/acdc_plus2/2019-10-29_20_44_fold0/checkpoint.h5
Epoch 6/150

Epoch 00006: loss improved from 0.23615 to 0.17311, saving model to models/2D/close_gap2/acdc_plus2/2019-10-29_20_44_fold0/checkpoint.h5
Epoch 7/150

Epoch 00007: loss improved from 0.17311 to 0.11066, saving model to models/2D/close_gap2/acdc_plus2/201

2019-10-29 20:54:43,987 INFO Fold: 0 finished - saving scores.


Epoch 00063: early stopping


2019-10-29 20:54:44,995 INFO Training fold 0 done in 636.683 s
2019-10-29 20:54:45,298 INFO All folds are finished in 636.985 sec
