In [1]:
from dataloader import ClfDataset,ClfSegDataset, ClfAttentionDataset, get_balanced_loader, get_loader, get_mixup_gen
from mylib.models import densenet, densenetf , resnet,  metrics, losses,densenetf_avr

from tensorflow.python.keras.callbacks import ModelCheckpoint, CSVLogger, TensorBoard, EarlyStopping, ReduceLROnPlateau,LearningRateScheduler
from tensorflow.python.keras.optimizers import Adam
import tensorflow as tf
import os
import pandas as pd

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
lines = pd.read_csv('train_10fold.csv')

def main(batch_sizes, crop_size, random_move, learning_rate,
         model_path, train_subset, val_subset,
         segmentation_task_ratio, weight_decay, save_folder, epochs):
    '''
    :param batch_sizes: the number of examples of each class in a single batch
    :param crop_size: the input size
    :param random_move: the random move in data augmentation
    :param learning_rate: learning rate of the optimizer
    :param segmentation_task_ratio: the weight of segmentation loss in total loss
    :param weight_decay: l2 weight decay
    :param save_folder: where to save the snapshots, tensorflow logs, etc.
    :param epochs: how many epochs to run
    :return:
    '''
    batch_size = sum(batch_sizes)

    train_dataset = ClfAttentionDataset(crop_size=crop_size, subset=train_subset, move=random_move,lines=lines)

    val_dataset = ClfAttentionDataset(crop_size=crop_size, subset=val_subset, move=None,lines=lines)
    
    g1 = get_balanced_loader(train_dataset, batch_sizes=batch_sizes)
    
    g2 = get_loader(train_dataset, batch_size=batch_size)
    
    train_loader = get_mixup_gen(g1,g2,0.3,batch_size)
    
    val_loader = get_loader(val_dataset, batch_size=batch_size)

    model = densenetf_avr.get_compiled(output_size=2,
                                    optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
                                    loss='categorical_crossentropy',
                                    metrics=['accuracy', metrics.precision, metrics.recall, metrics.fmeasure],
                                    weights = model_path,
                                    weight_decay=weight_decay)
    
    def lr_sch(epoch):
        #120 total
        if epoch <10:
            return 1e-3
        if 10<=epoch<30:
            return 3e-4
        if 30<=epoch<50:
            return 1e-4
        if 50<=epoch<70:
            return 1e-5
        if 70<=epoch<90:
            return 1e-6
        if 90<=epoch<110:
            return 1e-7
        if 110<=epoch<120:
            return 5e-8
    
    lr_scheduler = LearningRateScheduler(lr_sch)

    # checkpointer = ModelCheckpoint(filepath='tmp/%s/weights.{epoch:02d}.h5' % save_folder, verbose=1,
    #                                period=1, save_weights_only=True, monitor='val_accuracy')
    
    checkpointer = ModelCheckpoint(filepath='tmp/%s/weights.{epoch:02d}.h5' % save_folder, verbose=1,
                                   period=1, save_weights_only=True, monitor='val_accuracy')
    
    best_keeper = ModelCheckpoint(filepath='tmp/%s/best.h5' % save_folder, verbose=1, save_weights_only=True,
                                  monitor='val_accuracy', save_best_only=True, period=1, mode='auto')
    csv_logger = CSVLogger('tmp/%s/training.csv' % save_folder)
    
    tensorboard = TensorBoard(log_dir='tmp/%s/logs/' % save_folder, profile_batch = 100000000)
    
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0, mode='auto',
                                   patience=140, verbose=1)
    lr_reducer = ReduceLROnPlateau(monitor='val_accuracy', factor=0.334, patience=10,
                                   verbose=1, mode='auto', epsilon=1.e-5, cooldown=2, min_lr=0)

    model.fit_generator(generator=train_loader, steps_per_epoch=len(train_dataset) // batch_size, max_queue_size=500,
                        validation_data=val_loader, epochs=epochs, validation_steps=len(val_dataset) // batch_size,
                        callbacks=[checkpointer, early_stopping, best_keeper, lr_reducer, csv_logger, tensorboard])

Using TensorFlow backend.


if __name__ == '__main__':
    from random import shuffle
    best_model = None
    for j in range(5):
        x = [0,1,2,3,4]       
        shuffle(x)
        for i in x:
            index = list(range(5))
            val_subset = [index.pop(i)]
            train_subset = index
            main(batch_sizes=[2, 2],
                 crop_size=[32, 32, 32],
                 train_subset = train_subset,
                 val_subset = val_subset,
                 model_path = best_model,
                 random_move=3,
                 learning_rate=1.e-4 * (0.333 ** j),
                 segmentation_task_ratio=0.2,
                 weight_decay=0.,
                 save_folder='test',
                 epochs=20)
            best_model = './tmp/test/weights.h5'

for i in range(10):
    index = list(range(10))

    val_subset = [10]
    train_subset = index

    main(batch_sizes=[2, 2],
          crop_size=[32, 32, 32],
          train_subset = train_subset,
          val_subset = val_subset,
          model_path = None,
          random_move=3,
          learning_rate=3e-4,
          segmentation_task_ratio=0.2,
          weight_decay=1e-6,
          save_folder='10foldval(124nols)_%d'%i,
          epochs=110)

In [2]:
main(batch_sizes=[2, 2],
          crop_size=[32, 32, 32],
          train_subset = list(range(9)),
          val_subset = [1],
          model_path = None,
          random_move=4,
          learning_rate=3e-4,
          segmentation_task_ratio=0.2,
          weight_decay=1e-6,
          save_folder='224avrpoolingattentionfGlobalavrk=18dropout',
          epochs=160)

Model hyper-parameters: {'activation': <function <lambda> at 0x000001DE5883BE58>, 'bn_scale': True, 'weight_decay': 1e-06, 'kernel_initializer': 'he_uniform', 'first_scale': <function <lambda> at 0x000001DE58839948>, 'dhw': [32, 32, 32], 'k': 18, 'bottleneck': [2, 2, 4], 'compression': 2, 'first_layer': 32, 'down_structure': [2, 2, 4], 'output_size': 2, 'dropout_rate': None}
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 32,  0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 32, 32, 32, 1 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv3d (Conv3

Instructions for updating:
Please use Model.fit, which supports generators.
Size 465
Size 465
  ...
    to  
  ['...']
Size 92
  ...
    to  
  ['...']
Train for 116 steps, validate for 23 steps
Epoch 1/160
Epoch 00001: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.01.h5

Epoch 00001: val_accuracy improved from -inf to 0.46739, saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/best.h5
Epoch 2/160
Epoch 00002: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.02.h5

Epoch 00002: val_accuracy did not improve from 0.46739
Epoch 3/160
Epoch 00003: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.03.h5

Epoch 00003: val_accuracy improved from 0.46739 to 0.50000, saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/best.h5
Epoch 4/160
Epoch 00004: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.04.h5

Epoch 00004: val_accuracy improved from 0.50000 to 0.55435, saving model 

Epoch 12/160
Epoch 00012: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.12.h5

Epoch 00012: val_accuracy did not improve from 0.67391
Epoch 13/160
Epoch 00013: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.13.h5

Epoch 00013: val_accuracy did not improve from 0.67391
Epoch 14/160
Epoch 00014: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.14.h5

Epoch 00014: val_accuracy did not improve from 0.67391
Epoch 15/160
Epoch 00015: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.15.h5

Epoch 00015: val_accuracy did not improve from 0.67391
Epoch 16/160
Epoch 00016: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.16.h5

Epoch 00016: val_accuracy improved from 0.67391 to 0.69565, saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/best.h5
Epoch 17/160
Epoch 00017: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.17.h5

Epoch 00017: val_a

Epoch 24/160
Epoch 00024: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.24.h5

Epoch 00024: val_accuracy did not improve from 0.69565
Epoch 25/160
Epoch 00025: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.25.h5

Epoch 00025: val_accuracy did not improve from 0.69565
Epoch 26/160
Epoch 00026: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.26.h5

Epoch 00026: val_accuracy did not improve from 0.69565

Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.00010020000475924463.
Epoch 27/160
Epoch 00027: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.27.h5

Epoch 00027: val_accuracy did not improve from 0.69565
Epoch 28/160
Epoch 00028: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.28.h5

Epoch 00028: val_accuracy did not improve from 0.69565
Epoch 29/160
Epoch 00029: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.29.h5

Epoch 00029: 

Epoch 36/160
Epoch 00036: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.36.h5

Epoch 00036: val_accuracy improved from 0.69565 to 0.72826, saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/best.h5
Epoch 37/160
Epoch 00037: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.37.h5

Epoch 00037: val_accuracy did not improve from 0.72826
Epoch 38/160
Epoch 00038: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.38.h5

Epoch 00038: val_accuracy did not improve from 0.72826
Epoch 39/160
Epoch 00039: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.39.h5

Epoch 00039: val_accuracy did not improve from 0.72826
Epoch 40/160
Epoch 00040: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.40.h5

Epoch 00040: val_accuracy did not improve from 0.72826
Epoch 41/160
Epoch 00041: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.41.h5

Epoch 00041: val_a

Epoch 00048: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.48.h5

Epoch 00048: val_accuracy did not improve from 0.77174
Epoch 49/160
Epoch 00049: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.49.h5

Epoch 00049: val_accuracy did not improve from 0.77174
Epoch 50/160
Epoch 00050: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.50.h5

Epoch 00050: val_accuracy did not improve from 0.77174
Epoch 51/160
Epoch 00051: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.51.h5

Epoch 00051: val_accuracy did not improve from 0.77174
Epoch 52/160
Epoch 00052: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.52.h5

Epoch 00052: val_accuracy did not improve from 0.77174

Epoch 00052: ReduceLROnPlateau reducing learning rate to 3.346680209506303e-05.
Epoch 53/160
Epoch 00053: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.53.h5

Epoch 00053: val_accuracy d

Epoch 00060: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.60.h5

Epoch 00060: val_accuracy did not improve from 0.77174
Epoch 61/160
Epoch 00061: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.61.h5

Epoch 00061: val_accuracy did not improve from 0.77174
Epoch 62/160
Epoch 00062: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.62.h5

Epoch 00062: val_accuracy did not improve from 0.77174
Epoch 63/160
Epoch 00063: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.63.h5

Epoch 00063: val_accuracy did not improve from 0.77174

Epoch 00063: ReduceLROnPlateau reducing learning rate to 1.1177912492712494e-05.
Epoch 64/160
Epoch 00064: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.64.h5

Epoch 00064: val_accuracy did not improve from 0.77174
Epoch 65/160
Epoch 00065: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.65.h5

Epoch 00065: val_accuracy 

Epoch 00072: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.72.h5

Epoch 00072: val_accuracy did not improve from 0.77174
Epoch 73/160
Epoch 00073: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.73.h5

Epoch 00073: val_accuracy did not improve from 0.77174
Epoch 74/160
Epoch 00074: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.74.h5

Epoch 00074: val_accuracy did not improve from 0.77174

Epoch 00074: ReduceLROnPlateau reducing learning rate to 3.7334228454710684e-06.
Epoch 75/160
Epoch 00075: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.75.h5

Epoch 00075: val_accuracy did not improve from 0.77174
Epoch 76/160
Epoch 00076: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.76.h5

Epoch 00076: val_accuracy did not improve from 0.77174
Epoch 77/160
Epoch 00077: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.77.h5

Epoch 00077: val_accuracy 

Epoch 00084: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.84.h5

Epoch 00084: val_accuracy did not improve from 0.77174
Epoch 85/160
Epoch 00085: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.85.h5

Epoch 00085: val_accuracy did not improve from 0.77174

Epoch 00085: ReduceLROnPlateau reducing learning rate to 1.2469632060856384e-06.
Epoch 86/160
Epoch 00086: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.86.h5

Epoch 00086: val_accuracy did not improve from 0.77174
Epoch 87/160
Epoch 00087: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.87.h5

Epoch 00087: val_accuracy did not improve from 0.77174
Epoch 88/160
Epoch 00088: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.88.h5

Epoch 00088: val_accuracy did not improve from 0.77174
Epoch 89/160
Epoch 00089: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.89.h5

Epoch 00089: val_accuracy 

Epoch 00096: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.96.h5

Epoch 00096: val_accuracy did not improve from 0.77174

Epoch 00096: ReduceLROnPlateau reducing learning rate to 4.164856973147835e-07.
Epoch 97/160
Epoch 00097: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.97.h5

Epoch 00097: val_accuracy did not improve from 0.77174
Epoch 98/160
Epoch 00098: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.98.h5

Epoch 00098: val_accuracy did not improve from 0.77174
Epoch 99/160
Epoch 00099: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.99.h5

Epoch 00099: val_accuracy did not improve from 0.77174
Epoch 100/160
Epoch 00100: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.100.h5

Epoch 00100: val_accuracy did not improve from 0.77174
Epoch 101/160
Epoch 00101: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.101.h5

Epoch 00101: val_accura

Epoch 108/160
Epoch 00108: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.108.h5

Epoch 00108: val_accuracy did not improve from 0.77174
Epoch 109/160
Epoch 00109: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.109.h5

Epoch 00109: val_accuracy did not improve from 0.77174
Epoch 110/160
Epoch 00110: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.110.h5

Epoch 00110: val_accuracy did not improve from 0.77174
Epoch 111/160
Epoch 00111: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.111.h5

Epoch 00111: val_accuracy did not improve from 0.77174
Epoch 112/160
Epoch 00112: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.112.h5

Epoch 00112: val_accuracy did not improve from 0.77174
Epoch 113/160
Epoch 00113: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.113.h5

Epoch 00113: val_accuracy did not improve from 0.77174
Epoch 114/160
Epoch 00114: s

Epoch 00120: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.120.h5

Epoch 00120: val_accuracy did not improve from 0.77174
Epoch 121/160
Epoch 00121: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.121.h5

Epoch 00121: val_accuracy did not improve from 0.77174
Epoch 122/160
Epoch 00122: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.122.h5

Epoch 00122: val_accuracy did not improve from 0.77174
Epoch 123/160
Epoch 00123: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.123.h5

Epoch 00123: val_accuracy did not improve from 0.77174
Epoch 124/160
Epoch 00124: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.124.h5

Epoch 00124: val_accuracy did not improve from 0.77174
Epoch 125/160
Epoch 00125: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.125.h5

Epoch 00125: val_accuracy did not improve from 0.77174
Epoch 126/160
Epoch 00126: saving model to

Epoch 00132: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.132.h5

Epoch 00132: val_accuracy did not improve from 0.77174
Epoch 133/160
Epoch 00133: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.133.h5

Epoch 00133: val_accuracy did not improve from 0.77174
Epoch 134/160
Epoch 00134: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.134.h5

Epoch 00134: val_accuracy did not improve from 0.77174
Epoch 135/160
Epoch 00135: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.135.h5

Epoch 00135: val_accuracy did not improve from 0.77174
Epoch 136/160
Epoch 00136: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.136.h5

Epoch 00136: val_accuracy did not improve from 0.77174
Epoch 137/160
Epoch 00137: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.137.h5

Epoch 00137: val_accuracy did not improve from 0.77174
Epoch 138/160
Epoch 00138: saving model to

Epoch 00144: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.144.h5

Epoch 00144: val_accuracy did not improve from 0.77174
Epoch 145/160
Epoch 00145: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.145.h5

Epoch 00145: val_accuracy did not improve from 0.77174
Epoch 146/160
Epoch 00146: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.146.h5

Epoch 00146: val_accuracy did not improve from 0.77174
Epoch 147/160
Epoch 00147: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.147.h5

Epoch 00147: val_accuracy did not improve from 0.77174
Epoch 148/160
Epoch 00148: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.148.h5

Epoch 00148: val_accuracy did not improve from 0.77174
Epoch 149/160
Epoch 00149: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.149.h5

Epoch 00149: val_accuracy did not improve from 0.77174
Epoch 150/160
Epoch 00150: saving model to

Epoch 00156: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.156.h5

Epoch 00156: val_accuracy did not improve from 0.77174
Epoch 157/160
Epoch 00157: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.157.h5

Epoch 00157: val_accuracy did not improve from 0.77174
Epoch 158/160
Epoch 00158: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.158.h5

Epoch 00158: val_accuracy did not improve from 0.77174
Epoch 159/160
Epoch 00159: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.159.h5

Epoch 00159: val_accuracy did not improve from 0.77174
Epoch 160/160
Epoch 00160: saving model to tmp/224avrpoolingattentionfGlobalavrk=18dropout/weights.160.h5

Epoch 00160: val_accuracy did not improve from 0.77174
