In [1]:
# tpu v3-8 https://www.kaggle.com/docs/tpu#tpu2
P = {}
P['EPOCHS'] = 200
# 不同base 的efficeinet 似乎只有模型规模的指数不同! 
# 不是的！！！！！ 不同 baseline 对应了 不同分辨率 https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/
P['BACKBONE'] = 'efficientnetb2' 
P['NFOLDS'] = 5
P['SEED'] = 7788
P['VERBOSE'] = 0 # One lIne per Epoch
P['BATCH_COE'] = 12

P['DIM'] = 512

P['LR'] = 1e-4 # for tpu
P['STEPS_COE'] = 1

P['patience'] = 30
P['extenal'] = True

In [2]:
!pip install segmentation_models -q

import os
os.environ['SM_FRAMEWORK'] = 'tf.keras'
import glob

from segmentation_models.losses import bce_jaccard_loss
import segmentation_models as sm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from tensorflow.keras.utils import get_custom_objects

from kaggle_datasets import KaggleDatasets

import math
import random
AUTO = tf.data.experimental.AUTOTUNE

import tensorflow_addons as tfa

Segmentation Models: using `tf.keras` framework.


In [3]:
try: # detect TPUs
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
    tf.config.optimizer.set_jit(True)
except ValueError: # no TPU found, detect GPUs
    strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU

BATCH_SIZE = P['BATCH_COE'] * strategy.num_replicas_in_sync

print("Number of accelerators: ", strategy.num_replicas_in_sync)
print("BATCH_SIZE: ", str(BATCH_SIZE))

Number of accelerators:  8
BATCH_SIZE:  96


## GCS_PATHS

In [4]:
def FOLD_FILES(cfg):
    GCS_PATH = KaggleDatasets().get_gcs_path(cfg.train_images)
    TRAINING_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/train/*.tfrec')
    GCS_PATH = KaggleDatasets().get_gcs_path(cfg.external_images)
    EXTERNAL = tf.io.gfile.glob(GCS_PATH + '/train/*.tfrec')
    
    return TRAINING_FILENAMES, EXTERNAL

In [5]:
class CONFIGURATION:
    def __init__(self):
        self.train_images = '512x512-train'
        self.external_images = '512x512tfrecs'
        
        self.NFOLDS = 5
        self.mean_train = np.array([0.63701495, 0.4709702, 0.6817423])
        self.mean_train = np.expand_dims(np.expand_dims(self.mean_train, axis = 0), axis = 0)
        self.std_train = np.array([0.15978882, 0.2245109, 0.14173926])
        self.std_train = np.expand_dims(np.expand_dims(self.std_train, axis = 0), axis = 0)

        self.stats = (self.mean_train, self.std_train)
        self.TRAINING_FILENAMES, self.EXTERNAL = FOLD_FILES(self)
        self.ALL_TRAINING_FILENAMES = self.TRAINING_FILENAMES + self.EXTERNAL
Config = CONFIGURATION()

# Datasets pipeline

In [6]:
DIM = P['DIM']
def dropout(image, mask, CT = 10, CT_WIDTH = 0.05):
    # input - one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image with CT squares of side size SZ*DIM removed

    # DO DROPOUT WITH PROBABILITY DEFINED ABOVE
    mask = tf.cast(mask, tf.float32)
    for k in range(CT):
        # CHOOSE RANDOM LOCATION
        x = random.randint(0, DIM - 1)
        y = random.randint(0,DIM - 1)

        # COMPUTE SQUARE 
        WIDTH = DIM * CT_WIDTH
        ya = int(max(0,y-WIDTH//2))
        yb = int(min(DIM,y+WIDTH//2))
        xa = int(max(0,x-WIDTH//2))
        xb = int(min(DIM,x+WIDTH//2))

        # DROPOUT IMAGE
        one = image[ya:yb,0:xa,:]
        two = tf.zeros([yb-ya,xb-xa, 3]) 
        three = image[ya:yb,xb:DIM,:]

        middle = tf.concat([one,two,three],axis=1)
        image = tf.concat([image[0:ya,:,:],middle,image[yb:DIM,:,:]],axis=0)

        one = mask[ya:yb,0:xa,:]
        two = tf.zeros([yb-ya,xb-xa,1]) 
        three = mask[ya:yb,xb:DIM,:]
        middle = tf.concat([one,two,three],axis=1)
        mask = tf.concat([mask[0:ya,:,:],middle,mask[yb:DIM,:,:]],axis=0)

    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR 
    image = tf.reshape(image,[DIM,DIM,3])
    mask = tf.reshape(mask, [DIM, DIM, 1])
    return image, tf.cast(mask, tf.bool) 
def get_mat(height_zoom, width_zoom):
    # returns 3x3 transformmatrix which transforms indicies
        
    
    # ROTATION MATRIX
    one = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
        
    
    # ZOOM MATRIX
    zoom_matrix = tf.reshape( tf.concat([one/height_zoom,zero,zero, zero,one/width_zoom,zero, zero,zero,one],axis=0),[3,3] )
    
    
    return zoom_matrix
def transform(image,label):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    XDIM = DIM%2 #fix for size 331
    label = tf.cast(label, dtype = tf.float32)
    rot = 360. * tf.random.uniform([1],dtype='float32', minval = -1, maxval = 1)
    h_zoom = 1.0 + tf.random.uniform([1],dtype='float32', minval = 0, maxval = 0.5)/10.
    w_zoom = 1.0 + tf.random.uniform([1],dtype='float32', minval = 0, maxval = 0.5)/10.
    
    # GET TRANSFORMATION MATRIX
    m = get_mat(h_zoom,w_zoom) 

    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat( tf.range(DIM//2,-DIM//2,-1), DIM )
    y = tf.tile( tf.range(-DIM//2,DIM//2),[DIM] )
    z = tf.ones([DIM*DIM],dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m,tf.cast(idx,dtype='float32'))
    idx2 = K.cast(idx2,dtype='int32')
    idx2 = K.clip(idx2,-DIM//2+XDIM+1,DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack( [DIM//2-idx2[0,], DIM//2-1+idx2[1,]] )
    trans = tf.transpose(idx3)
    d = tf.gather_nd(image, trans)
    mask = tf.gather_nd(label, trans)
    image = tf.reshape(d,[DIM,DIM,3])
    # Round Masks to nearest and return to bool
    mask = tf.reshape(mask, [DIM, DIM, 1])
    mask = tf.cast(tf.greater(mask, tf.constant(0.5)), tf.float32)
    
    image = tfa.image.rotate(image, rot, fill_mode = 'reflect')
    mask = tfa.image.rotate(mask, rot, fill_mode = 'reflect')
    return image, tf.cast(mask, tf.bool)
@tf.function
def shift_scale_rotate(img, mask):
    return transform(img, mask)
def random_gamma(img):
    gamma = (80, 120)
    gamma = random.randint(*gamma) / 100
    return tf.image.adjust_gamma(img, gamma = gamma)
def normalize(image):
    mean, std = Config.stats
    image = image - mean
    image = image / std
    return image
def visualize(image, mask):
    plt.imshow(image)
    plt.imshow(mask, alpha = 0.5)
    plt.show()
    return image, mask

In [7]:
DIM = P['DIM']
seed = P['SEED']
def _parse_image_function(example_proto, augment = True):
    image_feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'mask': tf.io.FixedLenFeature([], tf.string)
    }
    single_example = tf.io.parse_single_example(example_proto, image_feature_description)
    image = tf.reshape( tf.io.decode_raw(single_example['image'],out_type=np.dtype('uint8')), (DIM,DIM, 3))
    mask =  tf.reshape(tf.io.decode_raw(single_example['mask'],out_type='bool'),(DIM,DIM,1))
    image = tf.cast(image, tf.float32) / 255.0
    if augment == True:
        print("AUGMENTING")
        if tf.random.uniform(()) > 0.5:
            image = tf.image.flip_left_right(image)
            mask = tf.image.flip_left_right(mask)    
        if tf.random.uniform(()) > 0.5:	        
            image = tf.image.flip_up_down(image)	      
            mask = tf.image.flip_up_down(mask)
        if tf.random.uniform(()) > 0.5:
            image = tf.image.rot90(image)
            mask = tf.image.rot90(mask)      

            
        if tf.random.uniform(()) > 0.75:
            uniform = tf.random.uniform(())
            if uniform < 1/3:
                image = tf.image.random_contrast(image, lower=0.8, upper=1.2, seed=seed)
            elif uniform < 2/3:
                image = tf.image.random_brightness(image, max_delta=0.2, seed=seed)
            else:
                image = random_gamma(image) 

        if tf.random.uniform(()) < 0.5:
            image, mask = shift_scale_rotate(image, mask)
    
        if tf.random.uniform(()) < 0.5:
            image = tfa.image.gaussian_filter2d(image, filter_shape = (3, 7), sigma = (0.8, 1.4))
        
        if tf.random.uniform(()) < 0.5:
            noise = tf.random.normal(shape=tf.shape(image), mean=0.002, stddev=(50)/(255), dtype=tf.float32)
            image = image + noise
        if tf.random.uniform(()) < 0.25:
            image, mask = dropout(image, mask)
    image = normalize(image) # Normalize, both at train and val time
    return tf.cast(image, tf.float16), tf.cast(mask, tf.float32)

def load_dataset(filenames, augment = True):
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
    dataset = dataset.map(lambda image: _parse_image_function(image, augment = augment), num_parallel_calls=AUTO)           
    return dataset

def get_training_dataset(fold_idx, index= P['SEED']):
    print("trainning data load")
    files = []
    for idx in range(Config.NFOLDS):
        
        if idx != fold_idx:
            files += [Config.TRAINING_FILENAMES[idx]]
    files += Config.EXTERNAL
    dataset = load_dataset(files)
    dataset = dataset.repeat()
    dataset = dataset.shuffle(128, seed = index) # High Memory Consumption I think. I think it's tradeoff memory for performance:<
    dataset = dataset.batch(BATCH_SIZE,drop_remainder=True)
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_validation_dataset(fold_idx, ordered=True):
    print("validate data load")
    files = Config.TRAINING_FILENAMES[fold_idx]
    dataset = load_dataset(files, augment = False)
    dataset = dataset.batch(BATCH_SIZE,drop_remainder=True)
    dataset = dataset.prefetch(AUTO)
    return dataset

# Model

In [8]:
class TrainConfig:
    num_classes = 2
    lr =1e-4
    weight_decay = 0.0
    max_lr = 10
    min_lr = 1e-4
    
    patience = 3
    early_stop = 20

In [9]:
CRITERION = keras.losses.CategoricalCrossentropy(from_logits = False, reduction = 'none')
def jaccard_loss(y_true, y_pred):
    # Y_true: Tensor(B, 512, 512, 1)
    # Y_pred: Tensor(B, 512, 512, 2)
    y_ones = y_pred[:, :, :, 1]
    eps = 1e-8
    y_true = tf.squeeze(y_true)
    
    intersection = tf.reduce_sum(y_ones * y_true)
    cardinality = tf.reduce_sum(y_ones + y_true)
    cardinality = cardinality - intersection
    
    jaccard = (intersection + eps) / (cardinality + eps)
    loss = 1 - jaccard 
    return tf.math.log((tf.math.exp(loss) + tf.math.exp(-loss)) / 2)
def ce_loss(y_true, y_pred):
    # Y_true: (B, 512, 512)
    # Y_pred: (B, 512, 512, 2)
    y_true = tf.squeeze(tf.one_hot(tf.cast(y_true, tf.int32), 2))
    loss = CRITERION(y_true, y_pred)
    return tf.reduce_mean(loss)
def loss_fn(y_true, y_pred):
    ce = ce_loss(y_true, y_pred)
    jaccard = jaccard_loss(y_true, y_pred)
    return ce + jaccard
class Dice(tf.keras.metrics.Metric):
    def __init__(self, name = 'dice', **kwargs):
        super().__init__(name = name, **kwargs)
        self.inter = tf.Variable(0.0)
        self.union = tf.Variable(0.0)
    def result(self):
        eps = 1e-8
        return (2 * self.inter + eps)  / (self.union + eps)
    def inter_union(self, y_true, y_pred):
        y_true = tf.squeeze(y_true)
        y_ones = y_pred[:, :, :, 1]
        self.inter.assign_add(tf.reduce_sum(y_ones * y_true))
        self.union.assign_add(tf.reduce_sum(y_ones + y_true))
    def update_state(self, y_true, y_pred, sample_weight = None):
        self.inter_union(y_true, y_pred)
    def reset_states(self):
        self.inter.assign(0.0)
        self.union.assign(0.0)
class SingleDice(tf.keras.metrics.Metric):
    def __init__(self, name = 'dice', **kwargs):
        super().__init__(name = name, **kwargs)
        self.inter = tf.Variable(0.0)
        self.union = tf.Variable(0.0)
    def result(self):
        eps = 1e-8
        return (2 * self.inter + eps) / (self.union + eps)
    def inter_union(self, y_true, y_pred):
        self.inter.assign_add(tf.reduce_sum(y_true * y_pred))
        self.union.assign_add(tf.reduce_sum(y_true + y_pred))
    def update_state(self, y_true, y_pred, sample_weight = None):
        self.inter_union(y_true, y_pred)
    def reset_states(self):
        self.inter.assign(0.0)
        self.union.assign(0.0)

In [10]:
class ParamScheduler:
    def __init__(self, start, end, num_iter):
        self.start = start
        self.end = end
        self.num_iter = num_iter
        self.idx = -1
        
        
    def step(self):
        self.idx+=1
        return self.func(self.start, self.end, self.idx/self.num_iter)
    
    def reset(self):
        self.idx=-1
        
    def is_complete(self):
        return self.idx >= self.num_iter

class CosineScheduler(ParamScheduler):
    def func(self, start_val, end_val, pct):
        cos_out = np.cos(np.pi * pct) + 1
        return end_val + (start_val - end_val)/2 * cos_out

class OneCycleScheduler(keras.callbacks.Callback):
    
    def __init__(self, init_lr, max_lr, min_lr, momentums=(0.95,0.85), start_div=25., pct_start=0.3, verbose=True, sched=CosineScheduler, end_div=None):
        self.max_lr, self.momentums, self.start_div, self.pct_start, self.verbose, self.sched, self.end_div = max_lr, momentums, start_div, pct_start, verbose, sched, end_div
        if self.end_div is None:
            self.end_div = start_div * 1e4
        self.logs = {}
        self.min_lr = min_lr
        self.init_lr = init_lr
        
    def on_train_begin(self, logs=None):
        self.num_epochs = self.params['epochs']
        self.steps_per_epoch = self.params['steps']
        self.start_lr = self.max_lr/self.start_div * self.init_lr
        self.end_lr = self.max_lr/self.end_div * self.init_lr
        self.num_iter = self.num_epochs * self.steps_per_epoch
        self.num_iter_1 = int(self.pct_start*self.num_iter)
        self.num_iter_2 = self.num_iter - self.num_iter_1
        self.lr_scheds = (self.sched(self.start_lr, self.max_lr * self.init_lr, self.num_iter_1), self.sched(self.max_lr * self.init_lr, self.end_lr, self.num_iter_2))
        self.sched_idx = 0
        self.optimizer_params_step()   
        
    def optimizer_params_step(self):
        cur_lr = min(self.model.optimizer.lr, self.init_lr)
        max_lr = cur_lr * self.max_lr
        min_lr = cur_lr * self.min_lr # scale with Reduce LR on Plateau
        # Change Param Scheduler
        if self.sched_idx == 0:
            self.lr_scheds[self.sched_idx].start = cur_lr
            self.lr_scheds[self.sched_idx].end = max_lr
        else:
            self.lr_scheds[self.sched_idx].start = cur_lr
            self.lr_scheds[self.sched_idx].end = min_lr
        next_lr = self.lr_scheds[self.sched_idx].step()
        # update optimizer params
        K.set_value(self.model.optimizer.lr, max(next_lr, min_lr))
        
    def on_batch_end(self, batch, logs=None):
        if self.sched_idx >= len(self.lr_scheds):
            self.model.stop_training=True
            return
        self.optimizer_params_step()
        if self.lr_scheds[self.sched_idx].is_complete():
            self.sched_idx += 1
            
    def on_epoch_end(self, epoch, logs=None):
        if epoch >= self.num_epochs:
            self.model.stop_training=True
            return
class BestDiceTh(keras.metrics.Metric):
    def __init__(self, name = 'best_dice_th', **kwargs):
        super().__init__(name = name, **kwargs)
        self.th = np.arange(0, 1, 0.01)
        self.num_thresh = len(self.th)
        self.inter = [tf.Variable(0.0) for i in range(self.num_thresh)]
        self.union = [tf.Variable(0.0) for i in range(self.num_thresh)]
    def inter_union(self, y_true, y_pred):
        y_ones = y_pred[:, :, :, 1]
        y_true = tf.squeeze(y_true)
        for idx in range(self.num_thresh):
            th = self.th[idx]
            pred = tf.cast((y_ones > th), tf.float32)
            self.inter[idx].assign_add(tf.reduce_sum(pred * y_true))
            self.union[idx].assign_add(tf.reduce_sum(pred + y_true))
    def result(self):
        best_th = 0.0
        best = 0.0
        eps = 1e-8
        for idx in range(self.num_thresh):
            dice = (2 * self.inter[idx] + eps) / (self.union[idx] + eps)
            if dice > best:
                best = tf.cast(dice, tf.float32)
                best_th = tf.cast(self.th[idx], tf.float32)
        return best_th
    def update_state(self, y_true, y_pred, sample_weight = None):
        self.inter_union(y_true, y_pred)
    def reset_states(self):
        for idx in range(self.num_thresh):
            self.inter[idx].assign(0.0)
            self.union[idx].assign(0.0)
class DiceTh(keras.metrics.Metric):
    def __init__(self, name = 'dice_th', **kwargs):
        super().__init__(name = name, **kwargs)
        self.th = np.arange(0, 1, 0.01)
        self.num_thresh = len(self.th)
        self.inter = [tf.Variable(0.0) for i in range(self.num_thresh)]
        self.union = [tf.Variable(0.0) for i in range(self.num_thresh)]
    def reset(self):
        for idx in range(self.num_thresh):
            self.inter[idx].assign(0.0)
            self.union[idx].assign(0.0)
    def inter_union(self, y_true, y_pred):
        y_ones = y_pred[:, :, :, 1]
        y_true = tf.squeeze(y_ones)
        for idx in range(self.num_thresh):
            th = self.th[idx]
            pred = tf.cast((y_ones > th), tf.float32) 
            self.inter[idx].assign_add(tf.reduce_sum(pred * y_true))
            self.union[idx].assign_add(tf.reduce_sum(pred + y_true))
    def update_state(self, y_true, y_pred, sample_weight = None):
        self.inter_union(y_true, y_pred)
    def result(self):
        best_dice = 0.0
        eps = 1e-8
        for idx in range(self.num_thresh):
            dice = (2 * self.inter[idx] + eps) / (self.union[idx] + eps) 
            if dice >best_dice:
                best_dice = tf.cast(dice, tf.float32)
        return best_dice

In [11]:
class LogCallback(keras.callbacks.Callback):
    def __init__(self):
        self.dice_soft = 0
        self.dice_th = 0
        self.best_dice_th = 0
    def on_epoch_end(self, epoch, logs = None):
        if logs is not None:
            if logs['val_dice'] > self.dice_soft:
                self.dice_soft = logs['val_dice']
            if logs['val_dice_th'] > self.dice_th:
                self.dice_th = logs['val_dice_th']
                self.best_dice_th = logs['val_best_dice_th']
        print(f"E: {epoch}, BD: {round(self.dice_soft, 3)}, BDT: {round(self.dice_th, 3)}, BBDT: {round(self.best_dice_th, 3)}")

In [12]:
class Model(tf.keras.Model):
    def __init__(self):
        super().__init__()
        if TrainConfig.num_classes == 1:
            self.model = sm.Unet(P['BACKBONE'], activation = 'sigmoid', classes = TrainConfig.num_classes, encoder_weights='imagenet')
        else:
            self.model = sm.Unet(P['BACKBONE'], activation = 'softmax', classes = TrainConfig.num_classes, encoder_weights ='imagenet')
        self.layer = tf.keras.layers.Layer(dtype = tf.float32)
    def __call__(self, x, training = True):
        return self.layer(self.model(x, training = training))

# Model fit

In [13]:
STEPS_PER_EPOCH = P['STEPS_COE'] * 11000 // BATCH_SIZE # approx 11000 images in the dataset.
FOLDS_TO_TRAIN = [0]
for fold in FOLDS_TO_TRAIN:
    # BUILD MODEL
    K.clear_session()
    with strategy.scope():   
        model = Model()
        model.compile(optimizer=tfa.optimizers.Lookahead(
            tfa.optimizers.AdamW(learning_rate=TrainConfig.lr, weight_decay = TrainConfig.weight_decay)
          ),
                      
          loss = loss_fn,
          metrics=[Dice(),
                   BestDiceTh(),
                   DiceTh()
        ])
        
    checkpoint = tf.keras.callbacks.ModelCheckpoint('/kaggle/working/model-fold-%i.h5'%fold,
                                 verbose=1, monitor='val_dice',
                                 mode='max',
                                save_weights_only = True,
                                save_best_only=True)

    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_dice',mode = 'max', patience=TrainConfig.early_stop, restore_best_weights=True)
    reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=TrainConfig.patience, min_lr=TrainConfig.min_lr)
    cyclic_lr = OneCycleScheduler(TrainConfig.lr, TrainConfig.max_lr, TrainConfig.min_lr)
    logs = LogCallback()
    cbs = [
        checkpoint,
        early_stop,
        reduce,
        cyclic_lr,
        logs
    ]
    history = model.fit(
        get_training_dataset(fold),
        epochs = P['EPOCHS'],
        steps_per_epoch = STEPS_PER_EPOCH,
        callbacks = cbs,
        validation_data = get_validation_dataset(fold),
        verbose=P['VERBOSE']
    )   

Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b2_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5
trainning data load
AUGMENTING
validate data load

Epoch 00001: val_dice improved from -inf to 0.10538, saving model to /kaggle/working/model-fold-0.h5
E: 0, BD: 0.105, BDT: 0.661, BBDT: 0.97

Epoch 00002: val_dice improved from 0.10538 to 0.14559, saving model to /kaggle/working/model-fold-0.h5
E: 1, BD: 0.146, BDT: 0.661, BBDT: 0.97

Epoch 00003: val_dice improved from 0.14559 to 0.17961, saving model to /kaggle/working/model-fold-0.h5
E: 2, BD: 0.18, BDT: 0.661, BBDT: 0.97

Epoch 00004: val_dice improved from 0.17961 to 0.25341, saving model to /kaggle/working/model-fold-0.h5
E: 3, BD: 0.253, BDT: 0.661, BBDT: 0.97

Epoch 00005: val_dice improved from 0.25341 to 0.36691, saving model to /kaggle/working/model-fold-0.h5
E: 4, BD: 0.367, BDT: 0.661, BBDT: 0.97

Epoch 00006: val_dice improved from 0.36691 to 0.52257,