# Imports

In [1]:
!pip install efficientnet -q
!pip install focal-loss

Collecting focal-loss
  Downloading focal_loss-0.0.6-py3-none-any.whl (18 kB)
Installing collected packages: focal-loss
Successfully installed focal-loss-0.0.6


In [2]:
import os

import efficientnet.tfkeras as efn
import numpy as np
import pandas as pd
import tensorflow as tf

from focal_loss import BinaryFocalLoss
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GroupKFold

# Helpers

In [3]:
def binary_focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25):
    """
    Implementation of Focal Loss from the paper in multiclass classification
    Formula:
        loss = -alpha_t*((1-p_t)^gamma)*log(p_t)
        
        p_t = y_pred, if y_true = 1
        p_t = 1-y_pred, otherwise
        
        alpha_t = alpha, if y_true=1
        alpha_t = 1-alpha, otherwise
        
        cross_entropy = -log(p_t)
    Parameters:
        alpha -- the same as wighting factor in balanced cross entropy
        gamma -- focusing parameter for modulating factor (1-p)
    Default value:
        gamma -- 2.0 as mentioned in the paper
        alpha -- 0.25 as mentioned in the paper
    """

    # Define epsilon so that the backpropagation will not result in NaN
    # for 0 divisor case
    epsilon = K.epsilon()
    # Add the epsilon to prediction value
    #y_pred = y_pred + epsilon
    # Clip the prediciton value
    y_pred = K.clip(y_pred, epsilon, 1.0-epsilon)
    # Calculate p_t
    p_t = tf.where(K.equal(y_true, 1), y_pred, 1-y_pred)
    # Calculate alpha_t
    alpha_factor = K.ones_like(y_true)*alpha
    alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1-alpha_factor)
    # Calculate cross entropy
    cross_entropy = -K.log(p_t)
    weight = alpha_t * K.pow((1-p_t), gamma)
    # Calculate focal loss
    loss = weight * cross_entropy
    # Sum the losses in mini_batch
    loss = K.sum(loss, axis=1)
    
    return loss


def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy


def build_decoder(with_labels=True, target_size=(256, 256), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)

        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=128, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

# Prepare

In [4]:
COMPETITION_NAME = "hpa-768768"
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 16
GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)
GCS_DS_PATH

Running on TPU: grpc://10.0.0.2:8470
Running on 8 replicas


'gs://kds-b685f8a833c7d14d373d0373cb155e4ef69f97f30d2907e6adfbd2a1'

In [5]:
IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600, 700, 800)
IMS = 9
n_labels = 19 
binary_focal_loss = BinaryFocalLoss(gamma=5)

decoder = build_decoder(with_labels=True, target_size=(IMSIZE[IMS], IMSIZE[IMS]))

with strategy.scope():
    model = tf.keras.Sequential([
        efn.EfficientNetB7(
            input_shape=(IMSIZE[IMS], IMSIZE[IMS], 3),
            weights='imagenet',
            include_top=False),
        #tf.keras.layers.GlobalAveragePooling2D(),
        # tf.keras.layers.Dense(n_labels, activation='sigmoid')
        tf.keras.layers.GlobalMaxPool2D(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(n_labels, activation='sigmoid')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adadelta(), # optimizer=tf.keras.optimizers.Adam(),
        loss=binary_focal_loss, # 'binary_crossentropy'
        metrics=[tf.keras.metrics.AUC(multi_label=True)])
    model.summary()

Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b7_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnet-b7 (Functional) (None, 25, 25, 2560)      64097680  
_________________________________________________________________
global_max_pooling2d (Global (None, 2560)              0         
_________________________________________________________________
dense (Dense)                (None, 256)               655616    
_________________________________________________________________
dense_1 (Dense)              (None, 64)                16448     
_________________________________________________________________
dense_2 (Dense)              (None, 19)                1235      
Total params: 64,770,979
Trainable params: 64,460,259
Non-trainable params: 310,7

# Train

In [6]:
# color data generating
load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
df_in = pd.read_csv('../input/hpa-labels-csv-hg/df_green.csv')

colours = ['green', 'blue', 'red', 'yellow']
for colour in colours:
    df = df_in.copy()
    df.ID = df.ID[0][:-5] + colour

    label_cols = df.columns[2:21]
    paths = GCS_DS_PATH + '/' + df['ID'] + '.png'
    labels = df[label_cols].values

    (train_paths, valid_paths, train_labels, valid_labels) = train_test_split(paths, labels, test_size=0.2, random_state=42)
    train_dataset = build_dataset(train_paths, train_labels, bsize=BATCH_SIZE, decode_fn=decoder)
    valid_dataset = build_dataset(valid_paths, valid_labels, bsize=BATCH_SIZE, decode_fn=decoder,
                                  repeat=False, shuffle=False, augment=False)
    
    steps_per_epoch = train_paths.shape[0] // BATCH_SIZE
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        f'effb7_individual_model_{colour}_800.h5', save_best_only=True, monitor='val_loss', mode='min')
    lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", patience=3, min_lr=1e-6, mode='min')

    history = model.fit(
        train_dataset, 
        epochs=20,
        verbose=1,
        callbacks=[checkpoint, lr_reducer],
        steps_per_epoch=steps_per_epoch,
        validation_data=valid_dataset)

    hist_df = pd.DataFrame(history.history)
    hist_df.to_csv(f'effb7_individual_history_{colour}_800.csv')

Epoch 1/20


ResourceExhaustedError: 9 root error(s) found.
  (0) Resource exhausted: {{function_node __inference_train_function_163742}} Compilation failure: Ran out of memory in memory space hbm. Used 25.74G of 15.48G hbm. Exceeded hbm capacity by 10.26G.

Total hbm usage >= 26.26G:
    reserved        530.00M 
    program          25.74G 
    arguments       unknown size 

Output size unknown.

Program hbm requirement 25.74G:
    global            84.0K
    scoped           68.92M
    HLO temp         25.68G (77.0% utilization: Unpadded (18.42G) Padded (23.92G), 6.8% fragmentation (1.75G))

  Largest program allocations in hbm:

  1. Size: 937.50M
     Operator: op_type="DepthwiseConv2dNativeBackpropInput" op_name="gradient_tape/sequential/efficientnet-b7/block2b_dwconv/depthwise/DepthwiseConv2dNativeBackpropInput"
     Shape: f32[16,200,200,288]{3,0,2,1:T(8,128)}
     Unpadded size: 703.12M
     Extra memory due to padding: 234.38M (1.3x expansion)
     XLA label: %fusion.2580 = (f32[288]{0:T(512)}, f32[288]{0:T(512)}, f32[16,200,200,288]{3,0,2,1:T(8,128)}) fusion(s32[]{:T(256)} %get-tuple-element.40607, f32[16,200,200,288]{3,0,2,1:T(8,128)} %fusion.3652.remat2, f32[288]{0:T(512)} %get-tuple-element.39220, f32[288]{...
     Allocation type: HLO temp
     ==========================

  2. Size: 937.50M
     Operator: op_type="DepthwiseConv2dNativeBackpropInput" op_name="gradient_tape/sequential/efficientnet-b7/block2d_dwconv/depthwise/DepthwiseConv2dNativeBackpropInput"
     Shape: f32[16,200,200,288]{3,0,2,1:T(8,128)}
     Unpadded size: 703.12M
     Extra memory due to padding: 234.38M (1.3x expansion)
     XLA label: %fusion.2570 = (f32[288]{0:T(512)}, f32[288]{0:T(512)}, f32[16,200,200,288]{3,0,2,1:T(8,128)}) fusion(s32[]{:T(256)} %get-tuple-element.40607, f32[16,200,200,288]{3,0,2,1:T(8,128)} %fusion.22802.remat5, f32[288]{0:T(512)} %get-tuple-element.39136, f32[288]...
     Allocation type: HLO temp
     ==========================

  3. Size: 937.50M
     Operator: op_type="Conv2D" op_name="sequential/efficientnet-b7/block2b_expand_conv/Conv2D"
     Shape: f32[16,200,200,288]{3,0,2,1:T(8,128)}
     Unpadded size: 703.12M
     Extra memory due to padding: 234.38M (1.3x expansion)
     XLA label: %fusion.3652.remat2 = f32[16,200,200,288]{3,0,2,1:T(8,128)} fusion(f32[1,1,48,288]{3,2,1,0:T(8,128)} %get-tuple-element.42138, f32[48]{0:T(256)} %fusion.22085, f32[48]{0:T(256)} %get-tuple-element.42123, f32[48]{0:T(256)} %get-tuple-element.42122, f32[16,2...
     Allocation type: HLO temp
     ==========================

  4. Size: 937.50M
     Operator: op_type="DepthwiseConv2dNativeBackpropInput" op_name="gradient_tape/sequential/efficientnet-b7/block2c_dwconv/depthwise/DepthwiseConv2dNativeBackpropInput"
     Shape: f32[16,200,200,288]{3,0,2,1:T(8,128)}
     Unpadded size: 703.12M
     Extra memory due to padding: 234.38M (1.3x expansion)
     XLA label: %fusion.2575 = (f32[288]{0:T(512)}, f32[288]{0:T(512)}, f32[16,200,200,288]{3,0,2,1:T(8,128)}) fusion(s32[]{:T(256)} %get-tuple-element.40607, f32[16,200,200,288]{3,0,2,1:T(8,128)} %convolution.2748.r ... [truncated]

# References

- [[HPA] classification efnb7 train](https://www.kaggle.com/h053473666/hpa-classification-efnb7-train)
- [[HPA] classification efnb7 train 13cc0d](https://www.kaggle.com/aristotelisch/hpa-classification-efnb7-train-13cc0d?scriptVersionId=60520853)