In [1]:
!pip install efficientnet -q



In [2]:
import os, random, re, math, time
import efficientnet.tfkeras as efn
import numpy as np
import pandas as pd
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.model_selection import GroupKFold
import tensorflow_addons as tfa

In [3]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy

In [4]:
import tensorflow.keras.backend as K

CFG = dict(   
    read_size         = 512,    
    rot               = 6.0,
    shr               =   1.1,
    hzoom             =   2.0,
    wzoom             =   2.0,
    hshift            =   2.0,
    wshift            =   2.0,
)


def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear    = math.pi * shear    / 180.

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])
    
    # ROTATION MATRIX
    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    
    rotation_matrix = get_3x3_mat([c1,   s1,   zero, 
                                   -s1,  c1,   zero, 
                                   zero, zero, one])    
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)    
    
    shear_matrix = get_3x3_mat([one,  s2,   zero, 
                                zero, c2,   zero, 
                                zero, zero, one])        
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one/height_zoom, zero,           zero, 
                               zero,            one/width_zoom, zero, 
                               zero,            zero,           one])    
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one,  zero, height_shift, 
                                zero, one,  width_shift, 
                                zero, zero, one])
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), 
                 K.dot(zoom_matrix,     shift_matrix))


def transform(image, cfg):    
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    DIM = cfg["read_size"]
    XDIM = DIM%2
    
    rot = cfg['rot'] * tf.random.normal([1], dtype='float32')
    shr = cfg['shr'] * tf.random.normal([1], dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / cfg['hzoom']
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / cfg['wzoom']
    h_shift = cfg['hshift'] * tf.random.normal([1], dtype='float32') 
    w_shift = cfg['wshift'] * tf.random.normal([1], dtype='float32') 

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x   = tf.repeat(tf.range(DIM//2, -DIM//2,-1), DIM)
    y   = tf.tile(tf.range(-DIM//2, DIM//2), [DIM])
    z   = tf.ones([DIM*DIM], dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -DIM//2+XDIM+1, DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack([DIM//2-idx2[0,], DIM//2-1+idx2[1,]])
    d    = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM, DIM,3])

In [5]:
def dropout(image, DIM=512, PROBABILITY = 0.75, CT = 5, SZ = 0.1):
    # input - one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image with CT squares of side size SZ*DIM removed

    # DO DROPOUT WITH PROBABILITY DEFINED ABOVE
    P = tf.cast( tf.random.uniform([],0,1) < PROBABILITY, tf.int32)
    if (P == 0)|(CT == 0)|(SZ == 0): return image

    for k in range( CT ):
        # CHOOSE RANDOM LOCATION
        x = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        y = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        # COMPUTE SQUARE 
        WIDTH = tf.cast( SZ*DIM,tf.int32) * P
        ya = tf.math.maximum(0,y-WIDTH//2)
        yb = tf.math.minimum(DIM,y+WIDTH//2)
        xa = tf.math.maximum(0,x-WIDTH//2)
        xb = tf.math.minimum(DIM,x+WIDTH//2)
        # DROPOUT IMAGE
        one = image[ya:yb,0:xa,:]
        two = tf.zeros([yb-ya,xb-xa,3]) 
        three = image[ya:yb,xb:DIM,:]
        middle = tf.concat([one,two,three],axis=1)
        image = tf.concat([image[0:ya,:,:],middle,image[yb:DIM,:,:]],axis=0)

    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR 
    image = tf.reshape(image,[DIM,DIM,3])
    return image

In [6]:
def build_decoder(with_labels=True, target_size=(512, 512), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)

        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_dataset(paths, cfg, labels=None, bsize=32, cache=True,
                  decode_fn=None, augment_fn=None, 
                  augment=True,repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(cfg, labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

def build_augmenter(cfg, with_labels=True):
    def augment(img, cfg):
        img = transform(img, cfg)
        img = dropout(img)
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1,0.9)
        img = tf.image.random_hue(img, 0.01)
        img = tf.image.random_saturation(img, 0.7, 1.3)
                
        return img
    
    def augment_with_labels(img, label):
        return augment(img, cfg), label
    
    return augment_with_labels if with_labels else augment

In [7]:
from tensorflow.keras import backend as K
import dill

def binary_focal_loss(gamma=2., alpha=.25):
    """
    Binary form of focal loss.
      FL(p_t) = -alpha * (1 - p_t)**gamma * log(p_t)
      where p = sigmoid(x), p_t = p or 1 - p depending on if the label is 1 or 0, respectively.
    References:
        https://arxiv.org/pdf/1708.02002.pdf
    Usage:
     model.compile(loss=[binary_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
    """
    def binary_focal_loss_fixed(y_true, y_pred):
        """
        :param y_true: A tensor of the same shape as `y_pred`
        :param y_pred:  A tensor resulting from a sigmoid
        :return: Output tensor.
        """
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

        epsilon = K.epsilon()
        # clip to prevent NaN's and Inf's
        pt_1 = K.clip(pt_1, epsilon, 1. - epsilon)
        pt_0 = K.clip(pt_0, epsilon, 1. - epsilon)

        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) \
               -K.sum((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))

    return binary_focal_loss_fixed


def categorical_focal_loss(gamma=2., alpha=.25):
    """
    Softmax version of focal loss.
           m
      FL = ∑  -alpha * (1 - p_o,c)^gamma * y_o,c * log(p_o,c)
          c=1
      where m = number of classes, c = class and o = observation
    Parameters:
      alpha -- the same as weighing factor in balanced cross entropy
      gamma -- focusing parameter for modulating factor (1-p)
    Default value:
      gamma -- 2.0 as mentioned in the paper
      alpha -- 0.25 as mentioned in the paper
    References:
        Official paper: https://arxiv.org/pdf/1708.02002.pdf
        https://www.tensorflow.org/api_docs/python/tf/keras/backend/categorical_crossentropy
    Usage:
     model.compile(loss=[categorical_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
    """
    def categorical_focal_loss_fixed(y_true, y_pred):
        """
        :param y_true: A tensor of the same shape as `y_pred`
        :param y_pred: A tensor resulting from a softmax
        :return: Output tensor.
        """
        y_true = tf.cast(y_true, tf.float32)
        
        # Scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)

        # Clip the prediction value to prevent NaN's and Inf's
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)

        # Calculate Cross Entropy
        cross_entropy = -y_true * K.log(y_pred)

        # Calculate Focal Loss
        loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy

        # Sum the losses in mini_batch
        return K.sum(loss, axis=1)

    return categorical_focal_loss_fixed

In [8]:
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 4

Running on TPU: grpc://10.0.0.2:8470
Running on 8 replicas


In [9]:
# Learning rate schedule for TPU, GPU and CPU.
# Using an LR ramp up because fine-tuning a pre-trained model.
# Starting with a high LR would break the pre-trained weights.

LR_START = 0.00001
LR_MAX = 0.00005 * strategy.num_replicas_in_sync
LR_MIN = 0.00001
LR_RAMPUP_EPOCHS = 3
LR_SUSTAIN_EPOCHS = 0
LR_EXP_DECAY = .8

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
    return lr
    
lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = True)

In [10]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
user_credential = user_secrets.get_gcloud_credential()
user_secrets.set_tensorflow_credential(user_credential)

In [11]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path('siim-1024x-dataset')
ALL_TRAINING_FILENAMES = tf.io.gfile.glob(GCS_DS_PATH + '/*')

In [12]:
df = pd.read_csv(ALL_TRAINING_FILENAMES[0])
label_cols = df[['Atypical Appearance']]

df = df[['image_id', 'opacitycheck']]

In [13]:
gkf  = GroupKFold(n_splits = 5)
df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(df, groups = df.image_id.tolist())):
    df.loc[val_idx, 'fold'] = fold

In [14]:
tf.random.set_seed(9999)

In [15]:
#Image size
img_size = 512

def build_model(n_labels):
    with strategy.scope():
        enet = efn.EfficientNetB0(
            input_shape=(img_size, img_size, 3),
            weights='noisy-student',
            include_top=False
        )

        model1 = tf.keras.Sequential([
            enet,
            tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(n_labels, activation='sigmoid')
        ]) 

    model1.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss = [categorical_focal_loss(gamma=2., alpha=.25)],
        metrics=[tf.keras.metrics.AUC(multi_label=False)]
    )
    #model1.summary()


    with strategy.scope():
        enet = efn.EfficientNetB3(
            input_shape=(img_size, img_size, 3),
            weights='noisy-student',
            include_top=False
        )

        model2 = tf.keras.Sequential([
            enet,
            tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(n_labels, activation='sigmoid')
        ]) 

    model2.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss = [categorical_focal_loss(gamma=2., alpha=.25)],
        metrics=[tf.keras.metrics.AUC(multi_label=False)]
    )
    #model2.summary()


    with strategy.scope():
        enet = efn.EfficientNetB4(
            input_shape=(img_size, img_size, 3),
            weights='noisy-student',
            include_top=False
        )

        model3 = tf.keras.Sequential([
            enet,
            tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(n_labels, activation='sigmoid')
        ]) 

    model3.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss = [categorical_focal_loss(gamma=2., alpha=.25)],
        metrics=[tf.keras.metrics.AUC(multi_label=False)]
    )
    #model3.summary()



    with strategy.scope():
        enet = efn.EfficientNetB7(
            input_shape=(img_size, img_size, 3),
            weights='noisy-student',
            include_top=False
        )

        model4 = tf.keras.Sequential([
            enet,
            tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(n_labels, activation='sigmoid')
        ]) 

    model4.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss = [categorical_focal_loss(gamma=2., alpha=.25)],
        metrics=[tf.keras.metrics.AUC(multi_label=False)]
    )
    #model4.summary()



    model11 = tf.keras.Sequential()
    for layer in model1.layers[:-2]:
        model11.add(layer)
    for layer in model11.layers:
        layer.trainable = True
    model22 = tf.keras.Sequential()
    for layer in model2.layers[:-2]:
        model22.add(layer)
    for layer in model22.layers:
        layer.trainable = True
    model33 = tf.keras.Sequential()
    for layer in model3.layers[:-2]:
        model33.add(layer)
    for layer in model33.layers:
        layer.trainable = True
    model44 = tf.keras.Sequential()
    for layer in model4.layers[:-2]:
        model44.add(layer)
    for layer in model44.layers:
        layer.trainable = True


    with strategy.scope(): 
        x = tf.keras.Input(shape = (img_size, img_size, 3))
        x1 = model11(x)
        x2 = model22(x)
        x3 = model33(x)
        x4 = model44(x)
        x5 = tf.keras.layers.concatenate([x1, x2, x3, x4], axis = 3)
        x6 = tf.keras.layers.GlobalAveragePooling2D()(x5)
        x6 = tf.keras.layers.Dropout(0.5)(x6)
        x6 = tf.keras.layers.Dense(n_labels, activation='sigmoid')(x6)
        out = tf.keras.Model(inputs = x, outputs = x6)

    out.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001),
        loss = [binary_focal_loss(gamma=2., alpha=.25)],
        metrics=[tf.keras.metrics.AUC(multi_label=False)]
    )
    out.summary()
    return out

In [16]:
tf.tpu.experimental.initialize_tpu_system()

<tensorflow.python.tpu.topology.Topology at 0x7fd080257b90>

In [17]:
filepath = ALL_TRAINING_FILENAMES[1]
for i in range(5):
        
    valid_paths = filepath + '/' + df[df['fold'] == i]['image_id'] + '.jpg' #"/train/"
    train_paths = filepath + '/' + df[df['fold'] != i]['image_id'] + '.jpg' #"/train/" 
    valid_labels = df[df['fold'] == i][['opacitycheck']]
    train_labels = df[df['fold'] != i][['opacitycheck']]

    IMSIZE = (512, 512, 512, 512, 512, 512, 512, 512)
    IMS = 7
    
    decoder = build_decoder(with_labels=True, target_size=(IMSIZE[IMS], IMSIZE[IMS]), ext='jpg')
    test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[IMS], IMSIZE[IMS]),ext='jpg')

    train_dataset = build_dataset(
        train_paths, CFG, train_labels, bsize=BATCH_SIZE, decode_fn=decoder, augment=True,
    )

    valid_dataset = build_dataset(
        valid_paths, CFG, valid_labels, bsize=BATCH_SIZE, decode_fn=decoder,
        repeat=False, shuffle=False, augment=False
    )

    try:
        n_labels = train_labels.shape[1]
    except:
        n_labels = 1

    # BUILD MODEL
    K.clear_session()
    with strategy.scope():
        out = build_model(n_labels)
        
    steps_per_epoch = train_paths.shape[0] // BATCH_SIZE
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        f'fold_{i}.h5', save_best_only=True, monitor='val_auc', mode='max')
    #lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
    #    monitor="val_auc", patience=3, min_lr=1e-6, mode='max', factor=0.3,epsilon=0.0001, cooldown=2)
    
    history = out.fit(
        train_dataset, 
        epochs=35,
        verbose=1,
        callbacks=[checkpoint,lr_callback],
        steps_per_epoch=steps_per_epoch,
        validation_data=valid_dataset)

    hist_df = pd.DataFrame(history.history)
    hist_df.to_csv(f'history{i}.csv')
    
    #Clear memory else it will fail next iteration
    tf.tpu.experimental.initialize_tpu_system()

Downloading data from https://github.com/qubvel/efficientnet/releases/download/v0.0.1/efficientnet-b0_noisy-student_notop.h5
Downloading data from https://github.com/qubvel/efficientnet/releases/download/v0.0.1/efficientnet-b3_noisy-student_notop.h5
Downloading data from https://github.com/qubvel/efficientnet/releases/download/v0.0.1/efficientnet-b4_noisy-student_notop.h5
Downloading data from https://github.com/qubvel/efficientnet/releases/download/v0.0.1/efficientnet-b7_noisy-student_notop.h5
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 512, 512, 3) 0                                            
__________________________________________________________________________________________________
sequential_4 (Sequential)       (None, 16, 16, 1280) 4049564     input_5[0][0]           