In [None]:
import os, re, time, json, math
import datetime
import PIL.Image, PIL.ImageFont, PIL.ImageDraw
import numpy as np
import tensorflow as tf
import pandas as pd
import google
from matplotlib import pyplot as plt
print("Tensorflow version " + tf.__version__)

In [2]:
import tensorflow as tf
import tensorflow.keras as keras

from tf_records_generator import get_dataset, count_data_items
from efficientNet_B0 import EffNet0, freeze_blocks
from log_files import save_config, save_fold_iter_history, get_log_dir, save_logs_pickle, generate_columns
from kfold import get_kfold_split
from clr_schedule import CyclicLR

In [13]:
CONFIG = dict(
    log_prefix = "CLR_",
    effnet_version = 0,
    input_shape=(256, 256, 3),
    image_resolution=256,
    trainable_base=False,
    time=datetime.datetime.now().strftime("_%d_%m_%Y_%H_%M"),
    use_patient_data=True,
    inner_blocks_frozen=4,

    lr_min=0.0001,
    lr_max=0.1,
    lr_decay=None,
    clr_step_coefficient = 8,
    
    replicas=8,
    steps_per_epoch=None,
    validation_steps=None,
    batch_size=40,
    epochs=100,
    optimizer='adam',
    loss='binary_crossentropy',
    
    output_bias=np.log([584/32542]),
    weight_for_0 = (1 / 32542)*(32542+584)/2.0,
    weight_for_1 = (1 / 584)*(32542+584)/2.0
)

In [4]:
IS_COLAB_BACKEND = 'COLAB_GPU' in os.environ  # this is always set on Colab, the value is 0 or 1 depending on GPU presence
if IS_COLAB_BACKEND:
  from google.colab import auth
  # Authenticates the Colab machine and also the TPU using your
  # credentials so that they can access your private GCS buckets.
  auth.authenticate_user()
  data_dir = 'gs://dataset_files/'
else:
  data_dir = 'dataset/'

In [None]:
# Detect hardware
try:
  tpu_resolver = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
  tpu_resolver = None
  gpus = tf.config.experimental.list_logical_devices("GPU")

# Select appropriate distribution strategy
if tpu_resolver:
  tf.config.experimental_connect_to_cluster(tpu_resolver)
  tf.tpu.experimental.initialize_tpu_system(tpu_resolver)
  strategy = tf.distribute.TPUStrategy(tpu_resolver)
  print('Running on TPU ', tpu_resolver.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
  strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
  print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on single GPU ', gpus[0].name)
else:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on CPU')
  
print("Number of accelerators: ", strategy.num_replicas_in_sync)
CONFIG["replicas"] = strategy.num_replicas_in_sync


In [None]:
path = data_dir + str(CONFIG["image_resolution"])
files_train = np.sort(np.array(tf.io.gfile.glob(path + '/train*.tfrec')))
files_test = np.sort(np.array(tf.io.gfile.glob(path + '/test*.tfrec')))


test_ds = get_dataset(files_test, CONFIG)

In [7]:
def make_model(config:dict, strategy)    :
    with strategy.scope():
      model = EffNet0(config=config, trainable_base=config['trainable_base'])

      model.compile(optimizer=keras.optimizers.Adam(), loss=config["loss"], 
                    metrics=[
                                  keras.metrics.TruePositives(name='tp'),
                                  keras.metrics.FalsePositives(name='fp'),
                                  keras.metrics.TrueNegatives(name='tn'),
                                  keras.metrics.FalseNegatives(name='fn'), 
                                  keras.metrics.BinaryAccuracy(name='accuracy'),
                                  keras.metrics.Precision(name='precision'),
                                  keras.metrics.Recall(name='recall'),
                                  keras.metrics.AUC(name='auc'),
                            ]
    )
    return model


In [None]:
kfold_split = get_kfold_split(files_train,3)
save_config(CONFIG)
logs = pd.DataFrame()
for iter, files in kfold_split.items():
    train_ds = get_dataset(files['train'],CONFIG, patient_info=True,repeat=True) 
    val_ds = get_dataset(files['validation'], CONFIG, patient_info=True,repeat=True)
    if CONFIG['steps_per_epoch'] is None:
        CONFIG['steps_per_epoch'] = math.ceil(count_data_items(files["train"])/(CONFIG["batch_size"]*CONFIG["replicas"]))
    if CONFIG['validation_steps'] is None:
        CONFIG['validation_steps'] = math.ceil(count_data_items(files["validation"])/(CONFIG["batch_size"]*CONFIG["replicas"]))

    clr = CyclicLR(base_lr=CONFIG["lr_min"], max_lr=CONFIG["lr_max"],
                                step_size=CONFIG['steps_per_epoch']*CONFIG['clr_step_coefficient'], mode='triangular')
    
    model = make_model(CONFIG, strategy=strategy)
    history = model.fit(train_ds,class_weight={0:CONFIG["weight_for_0"],1:CONFIG["weight_for_1"]},
                        validation_data=val_ds,validation_batch_size=CONFIG["batch_size"]*CONFIG["replicas"],
                        batch_size=CONFIG["batch_size"]*CONFIG["replicas"], epochs=CONFIG["epochs"],
                        steps_per_epoch=CONFIG['steps_per_epoch'],validation_steps=CONFIG['validation_steps'],
                        callbacks=[clr])
    
    if logs.empty:
        logs = generate_columns(logs,history.history)
    logs = logs.append(history.history, ignore_index=True)
    
save_logs_pickle(logs,CONFIG)



In [16]:
log_dir = get_log_dir(CONFIG)
zip_name = os.path.basename(log_dir)
!zip -r $zip_name $log_dir

  adding: logs/CLR_4_EffN0_256_07_11_2020_23_39/ (stored 0%)
  adding: logs/CLR_4_EffN0_256_07_11_2020_23_39/config.json (deflated 37%)
