# Path setting

In [None]:
WORK_DIR = '/content/drive/MyDrive/RANZR/'
SAVE_PATH = WORK_DIR

# Packages install

In [None]:
%%bash
pip install tf_clahe
pip install -U git+https://github.com/qubvel/efficientnet
pip install adabelief-tf
pip install keras-adabound

Collecting tf_clahe
  Downloading https://files.pythonhosted.org/packages/bf/cd/0527c03ce45278628a1c478b428acb60d6772260e971afb99c65cab699a1/tf_clahe-0.1.0-py3-none-any.whl
Collecting tensorflow-addons>=0.10
  Downloading https://files.pythonhosted.org/packages/74/e3/56d2fe76f0bb7c88ed9b2a6a557e25e83e252aec08f13de34369cd850a0b/tensorflow_addons-0.12.1-cp37-cp37m-manylinux2010_x86_64.whl (703kB)
Installing collected packages: tensorflow-addons, tf-clahe
Successfully installed tensorflow-addons-0.12.1 tf-clahe-0.1.0
Collecting git+https://github.com/qubvel/efficientnet
  Cloning https://github.com/qubvel/efficientnet to /tmp/pip-req-build-78276tg2
Collecting keras_applications<=1.0.8,>=1.0.7
  Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl (50kB)
Building wheels for collected packages: efficientnet
  Building wheel for efficientnet (setup.py): started
  Building wheel for eff

  Running command git clone -q https://github.com/qubvel/efficientnet /tmp/pip-req-build-78276tg2


# Google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import Xception
from tensorflow.keras.optimizers import Adam
from functools import partial
import tensorflow_addons as tfa

import tf_clahe
import efficientnet.keras as efn 
from keras_adabound import AdaBound
from adabelief_tf import AdaBeliefOptimizer

# ignoring warnings
import warnings
warnings.simplefilter("ignore")

import os, cv2

# Dataset preprocessing

In [None]:
from sklearn.model_selection import GroupKFold

# Train test split

gkf = GroupKFold(5)

df = pd.read_csv(WORK_DIR + 'train.csv')
df = df.sample(frac=1, random_state = 69).reset_index(drop=True)

label_cols = df.columns[1:-1]

folds = gkf.split(df['StudyInstanceUID'], df[df.columns[1:-1]], groups=df['PatientID'])
df_folds = []

for fold in folds:
    df_folds.append(pd.DataFrame(df.loc[fold[1]]))
    


df_train = df_folds.pop(0)
for i in range(0, len(df_folds)-1):
    df_train = df_train.append(df_folds.pop(0))

df_valid = df_folds.pop(0)


train_paths = WORK_DIR + "/train/" + df_train['StudyInstanceUID'] + '.jpg'
valid_paths = WORK_DIR + "/train/" + df_valid['StudyInstanceUID'] + '.jpg'
train_labels = df_train.drop(columns = ["StudyInstanceUID", "PatientID"]).values
valid_labels = df_valid.drop(columns = ["StudyInstanceUID", "PatientID"]).values

In [None]:
# Main parameters
BATCH_SIZE = 8
STEPS_PER_EPOCH = len(train_paths) // BATCH_SIZE
VALIDATION_STEPS = len(valid_paths) // BATCH_SIZE
EPOCHS = 30
SIZE = (600, 600, 3)
NUM_CLASSES = len(label_cols)

# Label Weights

In [None]:
label_weights = pd.read_csv(WORK_DIR + '/weights.csv', names=["0", "1"]).to_numpy()

# Augmentation

In [None]:
def multiplicative_noise(img, multiplier=[0.95, 1.05]):
    img = img * tf.random.uniform(shape = SIZE,  minval = multiplier[0], maxval = multiplier[1], dtype=tf.float32)
    return  img


augmentations = [
        [partial(tf.image.flip_left_right)],
        [partial(tf.image.random_contrast, lower = 0.8, upper = 1.2)],
        [partial(tf.image.random_brightness, max_delta = 0.2)],
        [partial(tf.image.random_saturation, lower = 0.8, upper = 1.2)],
        [partial(multiplicative_noise)]
    ]
augmentations = np.array(augmentations, dtype=object).reshape(-1)

# Dataset

In [None]:
def build_decoder(with_labels = True,
                  target_size = SIZE[:2], 
                  ext = 'jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels = 3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels = 3)
        else:
            raise ValueError("Image extension not supported")

        
        img = tf.image.resize(img, target_size)
        img = tf_clahe._clahe(img, clip_limit = 4, tile_grid_size = [8,8], gpu_optimized = True)
        img = tf.cast(img, tf.float32) / 255.0

        return img
    
    def decode_with_labels(path, label):
        return (decode(path), tf.cast(label, dtype=tf.float32))
    
    return decode_with_labels if with_labels else decode



def set_shapes(img, label, img_shape=SIZE):
    print(img)
    img.set_shape([img_shape[0], img_shape[1], img_shape[2]])
    label.set_shape([NUM_CLASSES])
    return img, label



@tf.function
def augmentation(imgs, label):
    array = np.random.choice(augmentations, replace = False, size = np.random.randint(1,len(augmentations)))
    for element in array.tolist():
        imgs = element(imgs)
    imgs = tf.clip_by_value(imgs, 0., 1.)
    
    return imgs, label



def build_dataset(paths, labels = None, bsize = 32, cache = True,
                  decode_fn = None, augment_fn = None,
                  augment = True,
                  cache_dir = ""):
    
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls = AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augmentation, num_parallel_calls = AUTO) if augment else dset  
    dset = dset.map(set_shapes, num_parallel_calls = AUTO).prefetch(AUTO)
    dset = dset.batch(bsize, drop_remainder= True)
    return dset

In [None]:
# Tensorflow datasets
train_df = build_dataset(
    train_paths, train_labels, bsize = BATCH_SIZE, 
    cache = True, augment = True)


valid_df = build_dataset(
    valid_paths, valid_labels, bsize = BATCH_SIZE, augment = False, 
    cache = True)

# Custom Loss

In [None]:
import tensorflow.keras.backend as K
import math


def bce_l1(l1 = 1):
    def bce1(y_true,y_pred):
        abs_diff = K.abs(y_true-y_pred)
        return K.sum(
            ((K.binary_crossentropy(y_true,y_pred)) * (((1-y_true)*label_weights[:,0])+(y_true*label_weights[:,1]))) + l1*abs_diff + K.square(abs_diff)
        ,axis = 0)
    return bce1

# EfficientNet B2


In [None]:
def create_model(conv_base):
    model = conv_base.output
    model = layers.GlobalAveragePooling2D()(model)
    model = layers.Dropout(0.3)(model)
    model = layers.Dense(11, activation = "sigmoid")(model)
    model = tf.keras.models.Model(conv_base.input, model, name = conv_base._name)
    return model

In [None]:
import efficientnet.keras as efn 


backbone = efn.EfficientNetB2(
    include_top = False,
    weights = 'noisy-student',
    input_shape = SIZE
)
backbone._name = 'EfficientNetB2'
model = create_model(backbone)
    #model.load_weights('/content/drive/MyDrive/RANZR/Final/EfficientNetB2/H5/Finale.h5')

# Area MCC

In [None]:
"""Matthews Correlation Coefficient Implementation."""

import numpy as np

import tensorflow as tf
from tensorflow.keras import backend as K

from tensorflow_addons.utils.types import AcceptableDTypes, FloatTensorLike, TensorLike
from typeguard import typechecked
#del globals()['AreaMCC'] 
if globals().get("AreaMCC") is None:
  
  print('Defining AreaMCC')
  class AreaMCC(tf.keras.metrics.Metric):
      @typechecked
      def __init__(
          self,
          num_labels: FloatTensorLike = 1,
          num_threshold: int = 200,
          start: float = 0.0,
          stop: float = 1.0,
          name: str = "AreaMCC",
          dtype: AcceptableDTypes = None,
          **kwargs,
      ):
          """Creates a Matthews Correlation Coefficient instance."""
          super().__init__(name=name, dtype=dtype)
          self.num_labels = num_labels
          self.num_threshold = num_threshold
          self.start = start 
          self.stop = stop
          self.mcc = self.add_weight(
              "MCC",
              shape=[self.num_labels],
              initializer="zeros",
              dtype=self.dtype,
          )


      def update_confusion_matrix_value(self, y_true, y_pred):
          true_positive = tf.math.count_nonzero(y_true * y_pred, 0)
          # true_negative
          y_true_negative = tf.math.not_equal(y_true, 1.0)
          y_pred_negative = tf.math.not_equal(y_pred, 1.0)
          true_negative = tf.math.count_nonzero(
              tf.math.logical_and(y_true_negative, y_pred_negative), axis=0
          )
          # predicted sum
          pred_sum = tf.math.count_nonzero(y_pred, 0)
          # Ground truth label sum
          true_sum = tf.math.count_nonzero(y_true, 0)
          false_positive = pred_sum - true_positive
          false_negative = true_sum - true_positive

          numerator1 = true_positive * true_negative
          numerator2 = false_positive * false_negative
          numerator = tf.cast(numerator1 - numerator2, dtype = self.dtype)
          # denominator
          denominator1 = tf.cast(true_positive + false_positive, dtype = self.dtype)
          denominator2 = tf.cast(true_positive + false_negative, dtype = self.dtype)
          denominator3 = tf.cast(true_negative + false_positive, dtype = self.dtype)
          denominator4 = tf.cast(true_negative + false_negative, dtype = self.dtype)
          
          denominator = tf.math.sqrt(
              denominator1 * denominator2 * denominator3 * denominator4
          )

          
          mcc = tf.math.divide_no_nan(numerator, denominator)
          #print(mcc)
          return mcc

      # TODO: sample_weights
      def update_state(self, y_true, y_pred, sample_weight=None):
          y_true = tf.cast(y_true, dtype=self.dtype)
          y_pred = tf.cast(y_pred, dtype=self.dtype)
          
          # self.start = tf.cast(self.start, dtype = self.dtype)
          # self.stop = tf.cast(self.stop, dtype = self.dtype)

          step = tf.constant( (self.stop-self.start) / self.num_threshold)

          i = tf.constant(0, dtype=tf.int32)
          mcc = tf.zeros([self.num_labels],dtype=tf.float32)

          cond = lambda i, mcc : i < self.num_threshold
          body = lambda i, mcc : [i+1, 
                                  tf.add(mcc, 
                                      self.update_confusion_matrix_value(y_true, tf.cast(y_pred > (self.start + (tf.cast(i, tf.float32)*step)), dtype=self.dtype)) * step ) ]
          a = tf.while_loop(cond, body, [i, mcc])
          self.mcc.assign_add(tf.cast(a[1],self.dtype))

      def result(self):
          return tf.reduce_mean(self.mcc)

      def get_config(self):
          """Returns the serializable config of the metric."""

          config = {
              "num_labels": self.num_labels,
              "num_threshold": self.num_threshold
          }
          base_config = super().get_config()
          return {**base_config, **config}

      def reset_states(self):
          """Resets all of the metric state variables."""
          reset_value = np.zeros(self.num_labels, dtype=self.dtype)
          K.batch_set_value([(v, reset_value) for v in self.variables])

Defining AreaMCC


# Compute metrics

In [None]:
from time import time

def compute_metrics(model, dataset, mode = 'train', start_ = 0.0, normalize = True):
    print(start_, 'start')
    s = 'Computing metrics %s dataset' % mode
    print('-' * len(s))
    print(s)
    print('-' * len(s))
    metrics = np.zeros((3,12))
    start = time()
    y_pred = model.predict(dataset) 
    y_pred = y_pred.astype(np.float32)
    if mode == 'train':
        y_true = np.array(train_labels[:STEPS_PER_EPOCH * BATCH_SIZE, :]) 
    else:
        y_true = np.array(valid_labels[:VALIDATION_STEPS * BATCH_SIZE, :]) 
    
    mcc = AreaMCC(num_labels=1, num_threshold = 200, start = 0.0)
    pmcc = AreaMCC(num_labels=1, num_threshold = 200, start = start_)
    auc = tf.keras.metrics.AUC(num_thresholds = 200, multi_label = False)
    
    for j in range(len(y_true[0])):
        # --- AUC PER SINGOLA LABEL ---
        auc.update_state(y_true[:,j], y_pred[:,j])
        metrics[0,j] = auc.result().numpy()
        auc.reset_states()

        # --- AREAMCC PER SINGOLA LABEL ---
        
        mcc.update_state(y_true[:,j], y_pred[:,j])
        metrics[1,j] = mcc.result().numpy()
        mcc.reset_states()

         # --- PARTIAL AREAMCC PER SINGOLA LABEL ---
        pmcc.update_state(y_true[:,j], y_pred[:,j])
        metrics[2,j] = pmcc.result().numpy()
        print(metrics[2,j])
        pmcc.reset_states()

        if normalize:
          metrics[1,j] = (metrics[1,j]+1)/2
          metrics[2,j] = metrics[2,j] + 1/2
        
    # --- AUC TOTALE ---
    auc = tf.keras.metrics.AUC(multi_label=True)
    auc.update_state(y_true,y_pred)
    metrics[0,11] = auc.result().numpy()

    # --- AREAMCC TOTALE ---
    mcc = AreaMCC(num_labels=11, num_threshold=200)
    mcc.update_state(y_true, y_pred)
    metrics[1,11] = mcc.result().numpy()

    # ---PARTIAL AREAMCC TOTALE ---
    pmcc = AreaMCC(num_labels=11, num_threshold=200, start = start_)
    pmcc.update_state(y_true, y_pred)
    metrics[2,11] = pmcc.result().numpy()
    
    if normalize:
      metrics[1,11] = (metrics[1,11]+1) /2
      metrics[2,11] = metrics[2,11] + 1/2


    print("Metrics computed in %.3f seconds" % (time()-start))
    return metrics

# Training 

In [None]:
SAVE_PATH = WORK_DIR + 'Final/'
makedir = True
dirs = os.listdir(SAVE_PATH)
for dir in dirs:
    if model._name not in dir:
        makedir &= True
    else:
        makedir &= False

if makedir:
    print('-----------')
    print('Creating new directory', model._name)
    print('-----------')
    os.mkdir(SAVE_PATH + model._name)
    os.mkdir(SAVE_PATH + model._name + '/Metrics')
    os.mkdir(SAVE_PATH + model._name + '/History')
    os.mkdir(SAVE_PATH + model._name + '/H5')
    

SAVE_PATH = SAVE_PATH + model._name + '/'
SAVE_PATH

In [None]:
VALID_STEPS = len(valid_paths)//BATCH_SIZE
print('-' * len(model._name))
print("%s" % model._name)
print('-' * len(model._name))
 
with strategy.scope():  
    model.compile(
        #optimizer = DemonAdam(T = STEPS_PER_EPOCH * EPOCHS),
        optimizer = AdaBeliefOptimizer(0.0001, epsilon = 1e-7),
        loss = bce_l1(),
        metrics = [tf.keras.metrics.AUC(multi_label=True,name='auc')]
    )
 
 
    model_save = ModelCheckpoint('%sH5/%s.h5' % (SAVE_PATH, model._name), 
                                save_best_only = True, 
                                save_weights_only = True,
                                monitor = 'val_loss', 
                                mode = 'min', verbose = 0)
 
    early_stop = EarlyStopping(monitor = 'val_loss', min_delta = 0.0001, 
                                patience = 10, mode = 'min', verbose = 0,
                                restore_best_weights = True)
    
    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.5, 
                                patience = 3, min_delta = 0.0001, 
                                mode = 'min', verbose = 0)
 
 
    callbacks = [model_save, early_stop, reduce_lr]
 
 
    history = model.fit(
        train_df,
        epochs=EPOCHS ,
        batch_size=None,
        validation_data=valid_df,
        validation_steps=VALID_STEPS,
        callbacks=callbacks,
        shuffle=False,
        verbose=1,
        steps_per_epoch=STEPS_PER_EPOCH,
        max_queue_size=BATCH_SIZE,
        initial_epoch=0
    )
 
    hist_df = pd.DataFrame(history.history) 
    hist_df.to_csv("%sHistory/%s.csv" % (SAVE_PATH, model._name))
 
 
    metrics_train = compute_metrics(model, train_df, 'train', start_=0.5)
    metrics_valid = compute_metrics(model, valid_df, 'valid', start_=0.5)
 
    metrics = np.zeros((6,12), dtype=np.float32)
    metrics[0] = metrics_train[0]
    metrics[1] = metrics_valid[0]
    metrics[2] = metrics_train[1]
    metrics[3] = metrics_valid[1]
    metrics[4] = metrics_train[2]
    metrics[5] = metrics_valid[2]

    cols = label_cols.to_list()
    cols.append('Mean')
 
    df = pd.DataFrame(metrics, columns=[cols], index=['AUC', 'AUC_valid', 'AMCC', 'AMCC_valid', 'pAMCC', 'pAMCC_valid'])
    df.to_csv("%sMetrics/%s.csv" % (SAVE_PATH, model._name), index = True)