In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install -q efficientnet==1.1.0

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
user_credential = user_secrets.get_gcloud_credential()
user_secrets.set_tensorflow_credential(user_credential)

In [None]:
from kaggle_datasets import KaggleDatasets

In [None]:
import math, re, os, glob, time, random
from collections import namedtuple
from functools import partial
from tensorflow.keras import regularizers

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa

# from classification_models.tfkeras import Classifiers
import efficientnet.tfkeras as efn

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.model_selection import KFold, StratifiedKFold

import seaborn as sns
from matplotlib import pyplot as plt

print("Tensorflow version " + tf.__version__)

K = tf.keras.backend
L = tf.keras.layers
AUTO = tf.data.experimental.AUTOTUNE

In [None]:
GCS_PATH_2 = KaggleDatasets().get_gcs_path('chinese-rec')  # 'sp-tfrecords' or 'sp-tfrecords-sz3

In [None]:
GCS_PATH_2

In [None]:
TRAINING_FILENAMES = tf.io.gfile.glob(GCS_PATH_2 +'/Chinese_Rec/trainrecord/*.tfrec')


In [None]:
TRAINING_FILENAMES

In [None]:
ll ../input/tfrecord/trainrecord

In [None]:
def get_strategy():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
        print('Running on TPU ', tpu.master())
    except ValueError:
        tpu = None

    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
    else:
        strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

    print("REPLICAS: ", strategy.num_replicas_in_sync)
    
    return strategy

strategy = get_strategy()

In [None]:
SEED = 1029

IM_SZ = 224  # 224, 320, 384, 448, 512
IMAGE_SIZE = [IM_SZ, IM_SZ]
WHICH_FOLD = 0

# training setup
EPOCHS = 8
BATCH_SIZE = 16 * strategy.num_replicas_in_sync
LR = 6e-4

random.seed(SEED)
np.random.seed(SEED)

In [None]:
from kaggle_datasets import KaggleDatasets


In [None]:
GCS_PATH_2 = KaggleDatasets().get_gcs_path('tfrecord')  # 'sp-tfrecords' or 'sp-tfrecords-sz3

In [None]:
TRAINING_FILENAMES = tf.io.gfile.glob(GCS_PATH_2 +'/trainrecord/*.tfrec')


In [None]:
TRAINING_FILENAMES

In [None]:
VALID_FILENAMES = [TRAINING_FILENAMES[WHICH_FOLD]]
TRAIN_FILENAMES = TRAINING_FILENAMES[:WHICH_FOLD] + TRAINING_FILENAMES[WHICH_FOLD+1:]

In [None]:
MEAN_C = [123.68, 116.779, 103.939]

# for torch preproc mode
MEAN_T = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

def decode_image(image, mode='torch'):
    assert mode in (None, 'tf', 'torch', 'caffe'), "mode must be one of None, 'tf', 'torch', 'caffe'"
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    image = tf.image.resize(image, IMAGE_SIZE)
    if mode is None:
        return image
    if mode == 'tf':
        image = image / 255.0  # convert image to floats in [0, 1] range
    if mode == 'torch':
        image = image / 255.0
        image = (image - MEAN_T) / STD
    if mode == 'caffe':
        image = image - MEAN_C
    return image

def deprocessing(image, mode='torch'):
    assert mode in (None, 'tf', 'torch', 'caffe'), "mode must be one of None, 'tf', 'torch', 'caffe'"
    def rescale(x):
        low, high = x.min(), x.max()
        x_rescaled = (x - low) / (high - low)
        return x_rescaled
    if mode is None or mode == 'tf':
        return rescale(image)
    if mode == 'torch':
        return rescale(image * STD + MEAN_T)
    if mode == 'caffe':
        return rescale(image + MEAN_C)
def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), # tf.string means bytestring
        "label": tf.io.FixedLenFeature([], tf.int64),  # shape [] means single element
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    label = tf.one_hot(tf.cast(example['label'], tf.int32), CLASSES)
    return image, label

def read_unlabeled_tfrecord(example):
    UNLABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),  # tf.string means bytestring
        "filename": tf.io.FixedLenFeature([], tf.string),  # shape [] means single element
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    filename = example['filename']
    return image, filename

def data_augment(image, label, p_hsv=0.6, p_affine=0.75, p_cutout=0.):
    # data augmentation. Thanks to the dataset.prefetch(AUTO) statement in the next function (below),
    # this happens essentially for free on TPU. Data pipeline code is executed on the "CPU" part
    # of the TPU while the TPU itself is computing gradients.
    r_hsv = tf.random.uniform([1], minval=0, maxval=1, dtype='float32', seed=SEED)
    r_affine = tf.random.uniform([1], minval=0, maxval=1, dtype='float32', seed=SEED)
    r_cutout = tf.random.uniform([1], minval=0, maxval=1, dtype='float32', seed=SEED)
    
    image = tf.image.random_flip_left_right(image)
    if r_hsv < p_hsv:
        image = tf.image.random_brightness(image, 0.15)
        image = tf.image.random_saturation(image, 0.8, 2.3)
        image = tf.image.random_contrast(image, 0.8, 1.3)
    if r_affine < p_affine:
        image = shift_scale_rotate(image, h_shift=0.1*IM_SZ, w_shift=0.1*IM_SZ)
    if r_cutout < p_cutout:
        image = cutout(image)
    
    return image, label

def get_training_dataset():
    dataset = tf.data.TFRecordDataset(TRAIN_FILENAMES, num_parallel_reads=AUTO)
    return (
        dataset
        .map(read_labeled_tfrecord, num_parallel_calls=AUTO)
         .cache()
        .map(data_augment, num_parallel_calls=AUTO)
        .repeat()
        .shuffle(100000)
        .batch(BATCH_SIZE)
        .prefetch(AUTO)
    )

def get_validation_dataset():
    dataset = tf.data.TFRecordDataset(VALID_FILENAMES, num_parallel_reads=AUTO)
    return (
        dataset
        .map(read_labeled_tfrecord, num_parallel_calls=AUTO)
        .batch(BATCH_SIZE)
        .cache()
        .prefetch(AUTO)
    )
def get_test_dataset():
    dataset = tf.data.TFRecordDataset(TEST_FILENAMES, num_parallel_reads=AUTO)
    return (
        dataset
        .map(read_unlabeled_tfrecord, num_parallel_calls=AUTO)
        .batch(BATCH_SIZE)
    )

def count_data_items(filenames):
    # the number of data items is written in the name of the .tfrec files, i.e. flowers00-230.tfrec = 230 data items
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

In [None]:
def transform(image, matrix, border_mode=0):
    BORDERS = ['constant', 'nearest']
    border_mode = BORDERS[border_mode]
    
    DIM = IMAGE_SIZE[0]
    C = DIM // 2
    
    # LIST DESTINATION PIXEL INDICES
    y, x = tf.meshgrid(tf.range(DIM), tf.range(DIM))
    x_c, y_c = tf.reshape(x - C, [-1]), tf.reshape(y - C, [-1])
    x, y = tf.reshape(x, [-1]), tf.reshape(y, [-1])
    if matrix.shape[0] == 2:
        coord = tf.stack( [x_c,y_c] )   # (2, DIM*DIM)
    else:
        z = tf.ones([DIM*DIM], dtype='int32')
        coord = tf.stack( [x_c,y_c,z] )   # (3, DIM*DIM)
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    coord_pr = tf.matmul(matrix, tf.cast(coord, dtype='float32'))
    coord_pr = tf.cast(tf.round(coord_pr[:2,:] + C), dtype='int32')   # (2, DIM*DIM)
    
    # FIND ORIGIN PIXEL VALUES
    if border_mode == 'constant':
        x_pr, y_pr = coord_pr[0,:], coord_pr[1,:]
        outside_ind = tf.logical_or( tf.logical_or(y_pr>DIM-1 , y_pr<0), tf.logical_or(x_pr>DIM-1 , x_pr<0))

        x_pr = tf.boolean_mask(x_pr, tf.logical_not(outside_ind))   # (<DIM*DIM, )
        y_pr = tf.boolean_mask(y_pr, tf.logical_not(outside_ind))   # (<DIM*DIM, )
        x    = tf.boolean_mask(x, tf.logical_not(outside_ind))   # (<DIM*DIM, )
        y    = tf.boolean_mask(y, tf.logical_not(outside_ind))   # (<DIM*DIM, )

        coord_pr = tf.transpose( tf.stack( [x_pr, y_pr] ) )   # (<DIM*DIM, 2)
        coord = tf.cast(tf.transpose( tf.stack( [x, y] ) ), 'int64')   # (<DIM*DIM, 2)

        im_channels = tf.split(image, 3, axis=-1)
        rot_channels = []
        for im_val in im_channels:
            rot_val = tf.squeeze(tf.gather_nd(im_val, coord_pr), axis=-1)   # (<DIM*DIM, )
            rot = tf.SparseTensor(coord, rot_val, [DIM, DIM])
            rot_channels.append(tf.sparse.to_dense(rot, default_value=0, validate_indices=False))

        rot_image = tf.transpose(tf.stack(rot_channels), [1, 2, 0])   # (DIM, DIM, 3)
    
    if border_mode == 'nearest':
        coord_pr = tf.clip_by_value(coord_pr, 0, DIM - 1)   # (2, DIM*DIM)
        rot_image = tf.reshape( tf.gather_nd(image, tf.transpose(coord_pr)), [DIM, DIM, 3] )
    
    return rot_image

def rotate(image, angle):
    angle = math.pi * angle / 180.
    c1 = tf.math.cos(angle)
    s1 = tf.math.sin(angle)
    t_matrix = tf.reshape( tf.stack([c1,s1, -s1,c1]), [2,2] )
    return transform(image, t_matrix)

def shear(image, angle):
    angle = math.pi * angle / 180.
    c1 = tf.math.cos(angle)
    s1 = tf.math.sin(angle)
    one = tf.constant([1], dtype='float32')
    zero = tf.constant([0], dtype='float32')
    t_matrix = tf.reshape( tf.stack([one[0],s1, zero[0],c1]), [2,2] )
    return transform(image, t_matrix)

def zoom(image, height_zoom, width_zoom):
    one = tf.constant([1], dtype='float32')
    zero = tf.constant([0], dtype='float32')
    t_matrix = tf.reshape( tf.stack([one[0]/height_zoom,zero[0], zero[0],one[0]/width_zoom]), [2,2] )
    return transform(image, t_matrix)

def shift(image, height_shift, width_shift):
    one = tf.constant([1], dtype='float32')
    zero = tf.constant([0], dtype='float32')
    t_matrix = tf.reshape( tf.stack([one[0],zero[0],height_shift, zero[0],one[0],width_shift, zero[0],zero[0],one[0]]), [3,3] )
    return transform(image, t_matrix)

def shift_scale_rotate(image, rotation=15, shear=8, h_zoom=1, w_zoom=1, h_shift=20, w_shift=20):
    rot = rotation * tf.random.normal([1], dtype='float32')
    shr = shear * tf.random.normal([1], dtype='float32')
    h_zoom = h_zoom + tf.random.normal([1], dtype='float32') / 10.
    w_zoom = w_zoom + tf.random.normal([1], dtype='float32') / 10.
    h_shift = h_shift * tf.random.normal([1], dtype='float32')
    w_shift = w_shift * tf.random.normal([1], dtype='float32')
    
    angle = math.pi * rot / 180.
    c1 = tf.math.cos(angle)
    s1 = tf.math.sin(angle)
    one = tf.constant([1], dtype='float32')
    zero = tf.constant([0], dtype='float32')
    rot_matrix = tf.reshape( tf.concat([c1,s1, -s1,c1], axis=0), [2,2] )
    
    angle = math.pi * shr / 180.
    c1 = tf.math.cos(angle)
    s1 = tf.math.sin(angle)
    shr_matrix = tf.reshape( tf.concat([one,s1, zero,c1], axis=0), [2,2] )
    
    z_matrix = tf.reshape( tf.concat([one/h_zoom,zero, zero,one/w_zoom], axis=0), [2,2] )
    
    t_matrix = K.dot(K.dot(rot_matrix, shr_matrix), z_matrix)
    t_image = transform(image, t_matrix)
    
    s_matrix = tf.reshape( tf.concat([one,zero,h_shift, zero,one,w_shift, zero,zero,one], axis=0), [3,3] )
    return transform(t_image, s_matrix)

def cutout(image, min_height=0.4, min_width=0.4, max_height=0.6, max_width=0.6):
    DIM = IMAGE_SIZE[0]
    
    cut_height = tf.cast(tf.round(tf.random.uniform([], minval=min_height*DIM, maxval=max_height*DIM)), 'int32')
    cut_width = tf.cast(tf.round(tf.random.uniform([], minval=min_width*DIM, maxval=max_width*DIM)), 'int32')
    x_min = tf.random.uniform([], minval=-cut_width//2, maxval=DIM-1-cut_width//2, dtype='int32')
    x_max = x_min + cut_width
    y_min = tf.random.uniform([], minval=-cut_height//2, maxval=DIM-1-cut_height//2, dtype='int32')
    y_max = y_min + cut_height
    if x_min < 0:
        cut_width -= 0 - x_min
        x_min = tf.clip_by_value(x_min, 0, x_max)
    if y_min < 0:
        cut_height -= 0 - y_min
        y_min = tf.clip_by_value(y_min, 0, y_max)
    if x_max > DIM:
        cut_width -= x_max - DIM
        x_max = tf.clip_by_value(x_max, x_min, DIM)
    if y_max > DIM:
        cut_height -= y_max - DIM
        y_max = tf.clip_by_value(y_max, y_min, DIM)
    
    cut_area = tf.zeros([cut_height, cut_width, 3], dtype='float32')
    pad_top = y_min
    pad_bottom = DIM - y_max
    pad_left = x_min
    pad_right = DIM - x_max
    cut_mask = tf.pad(cut_area, [[pad_top, pad_bottom], [pad_left, pad_right], [0,0]], constant_values=1)
    
    cut_image = tf.multiply(image, cut_mask)
    return cut_image

In [None]:
class LRSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    SCHEDULES = set(['cosine', 'linear', 'linear_cosine'])
    
    def __init__(self, iterations, sched_profile='cosine', max_lr=3e-3,
                 div_factor=40, warmup=0.3, final_div=None):

        assert sched_profile in self.SCHEDULES
        self.sched_profile = sched_profile

        self.max_lr = max_lr
        self.init_lr = max_lr/div_factor

        self.final_div = final_div
        if self.final_div is None: self.final_div = div_factor*1e4
        self.final_lr = self.max_lr/self.final_div

        self.total_iteration = iterations
        self.up_iteration = int(self.total_iteration * warmup)
        self.down_iteration = self.total_iteration - self.up_iteration

    def _annealing_cos(self, start, end, pct):
        cos_out = tf.math.cos(tf.constant(np.pi) * pct) + 1
        return end + (start-end)/2 * cos_out

    def _annealing_linear(self, start, end, pct):
        return start + pct * (end-start)
    
    def _annealing_function(self):
        if self.sched_profile == 'cosine':
            return self._annealing_cos
        if self.sched_profile == 'linear':
            return self._annealing_linear
    
    @tf.function
    def __call__(self, step):
        if self.sched_profile != 'linear_cosine':
            anneal = self._annealing_function()

        if step <= self.up_iteration:
            if self.sched_profile == 'linear_cosine':
                anneal = self._annealing_linear
            pct = step / self.up_iteration
            curr_lr = anneal(self.init_lr, self.max_lr, pct)
        else:
            if self.sched_profile == 'linear_cosine':
                anneal = self._annealing_cos
            pct = (step-self.up_iteration) / self.down_iteration
            curr_lr = anneal(self.max_lr, self.final_lr, pct)

        return curr_lr
    
    def plot_sched(self):
        fig_sz = (6,4)
        subplts = (111,)
        
        init_lr = self.init_lr
        max_lr = self.max_lr
        final_lr = self.final_lr
        if self.sched_profile == 'linear_cosine':
            anneal_up = self._annealing_linear
            anneal_down = self._annealing_cos
        else:
            anneal_up = anneal_down = self._annealing_function()
        lrs = [anneal_up(init_lr, max_lr, it/self.up_iteration) for it in range(self.up_iteration)] + \
              [anneal_down(max_lr, final_lr, it/self.down_iteration) for it in range(self.down_iteration)]
        
        plt.subplots(figsize=fig_sz)
        plt.tight_layout()
        ax = plt.subplot(subplts[0])
        ax.plot(range(self.total_iteration), lrs)
        ax.set_xlabel('Iteration')
        ax.set_ylabel('Learning rate')
        
        plt.show()

class ConcatPooling2D(L.Layer):
    "Layer that concats `GlobalAveragePooling2D` and `GlobalMaxPooling2D`,"
    def __init__(self):
        "Output will be 2*output_size or 2 if output_size is None"
        super().__init__()
        self.ap = L.GlobalAveragePooling2D()
        self.mp = L.GlobalMaxPooling2D()
    def call(self, x): return tf.concat([self.mp(x), self.ap(x)], 1)

In [None]:
NUM_TRAIN_IMAGES = count_data_items(TRAIN_FILENAMES)
NUM_VALIDATION_IMAGES = count_data_items(VALID_FILENAMES)
STEPS_PER_EPOCH = math.ceil(NUM_TRAIN_IMAGES / BATCH_SIZE)
# VALIDATION_STEPS = math.ceil(NUM_VALIDATION_IMAGES / BATCH_SIZE)
print('Dataset: {} training images, {} validation images'.format(NUM_TRAIN_IMAGES, NUM_VALIDATION_IMAGES))

In [None]:
start_time = fold_start_time = time.time()

which_folds = [0] + np.random.choice(np.arange(1,10), 2).tolist()
oof_labels = []
oof_preds = []

In [None]:
print("Tensorflow version " + tf.__version__)
AUTO = tf.data.experimental.AUTOTUNE # used in tf.data.Dataset API

SEED = 1029
DATA_PATH = '../input/shopee-product-detection-open/'
CLASSES=800
IM_SZ = 224  # 320, 384, 448, 512
IMAGE_SIZE = [IM_SZ, IM_SZ]
N_SPLITS = 10

In [None]:
start_time = fold_start_time = time.time()

which_folds = [0] + np.random.choice(np.arange(1,10), 2).tolist()
oof_labels = []
oof_preds = []
EPOCHS = 20

for i, fd in enumerate(which_folds):
    print(f"Fold {i+1}")
    
    VALID_FILENAMES = [TRAINING_FILENAMES[fd]]
    TRAIN_FILENAMES = TRAINING_FILENAMES[:fd] + TRAINING_FILENAMES[fd+1:]
    train_ds = get_training_dataset()
    valid_ds = get_validation_dataset()
    
    strategy = get_strategy()
    with strategy.scope():
        pretrained_model = efn.EfficientNetB5(weights='imagenet', include_top=False ,input_shape=[*IMAGE_SIZE, 3])
        pretrained_model.trainable = True  # False = transfer learning, True = fine-tuning

        model = tf.keras.Sequential([
            pretrained_model,
            L.GlobalAveragePooling2D(),
            L.Dense(CLASSES, activation='softmax')
        ])

        model.compile(
            optimizer=tf.keras.optimizers.Adam(LRSchedule(STEPS_PER_EPOCH * EPOCHS, max_lr=LR)),
            loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
            metrics=[tf.keras.metrics.CategoricalAccuracy(), tfa.metrics.F1Score(CLASSES
                                                                                 , average='macro', threshold=None)]
        )

        ckpt_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=f'./best_model_3{i}.h5',
            save_weights_only=True,
            monitor='val_categorical_accuracy',
            mode='max',
            save_best_only=True
        )
    
    model.fit(
        train_ds,
        epochs=EPOCHS,
        steps_per_epoch=STEPS_PER_EPOCH,
             validation_data=valid_ds,
        callbacks=[ckpt_callback]
    )
    model.load_weights(f'best_model_3{i}.h5')
    
    images_ds = valid_ds.map(lambda image, label: image)
    labels_ds = valid_ds.map(lambda image, label: label).unbatch()
    valid_labels = next(iter(labels_ds.batch(NUM_VALIDATION_IMAGES))).numpy()
    valid_labels = np.argmax(valid_labels, -1)
    valid_preds = model.predict(images_ds)
    valid_preds = np.argmax(valid_preds, axis=-1)
    oof_labels.append(valid_labels)
    oof_preds.append(valid_preds)
    
    training_time = time.time() - fold_start_time
    print("FOLD TRAINING TIME: {:0.1f}s".format(training_time))
    fold_start_time = time.time()
    print()
    
keras_fit_training_time = time.time() - start_time
print("KERAS FIT TRAINING TIME: {:0.1f}s".format(keras_fit_training_time))

In [None]:
which_folds = [0] + np.random.choice(np.arange(1,10), 2).tolist()


In [None]:
which_folds

In [None]:
def categorical_focal_loss(num_classes, gamma=2., alpha=.25, smooth_alpha=0.05):
    """
    Softmax version of focal loss.
           m
      FL = ∑  -alpha * (1 - p_o,c)^gamma * y_o,c * log(p_o,c)
          c=1
      where m = number of classes, c = class and o = observation
    Parameters:
      alpha -- the same as weighing factor in balanced cross entropy
      gamma -- focusing parameter for modulating factor (1-p)
    Default value:
      gamma -- 2.0 as mentioned in the paper
      alpha -- 0.25 as mentioned in the paper
    References:
        Official paper: https://arxiv.org/pdf/1708.02002.pdf
        https://www.tensorflow.org/api_docs/python/tf/keras/backend/categorical_crossentropy
    Usage:
     model.compile(loss=[categorical_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
    """
    def categorical_focal_loss_fixed(y_true, y_pred):
        """
        :param y_true: A tensor of the same shape as `y_pred`
        :param y_pred: A tensor resulting from a softmax
        :return: Output tensor.
        """
        if smooth_alpha > 0:
            y_true = y_true * (1 - smooth_alpha) + smooth_alpha / num_classes

        # Scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)

        # Clip the prediction value to prevent NaN's and Inf's
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)

        # Calculate Cross Entropy
        cross_entropy = -y_true * K.log(y_pred)

        # Calculate Focal Loss
        loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy

        # Sum the losses in mini_batch
        return K.sum(loss, axis=1)

    return categorical_focal_loss_fixed  

In [None]:
class Generalized_mean_pooling2D(tf.keras.layers.Layer):
    def __init__(self, p=3, epsilon=1e-6, name='', **kwargs):
      super(Generalized_mean_pooling2D, self).__init__(name, **kwargs)
      self.init_p = p
      self.epsilon = epsilon
    
    def build(self, input_shape):
      if isinstance(input_shape, list) or len(input_shape) != 4:
        raise ValueError('`GeM` pooling layer only allow 1 input with 4 dimensions(b, h, w, c)')
      self.build_shape = input_shape
      self.p = self.add_weight(
              name='p',
              shape=[1,],
              initializer=tf.keras.initializers.Constant(value=self.init_p),
              regularizer=None,
              trainable=True,
              dtype=tf.float32
              )
      self.built=True

    def call(self, inputs):
      input_shape = inputs.get_shape()
      if isinstance(inputs, list) or len(input_shape) != 4:
        raise ValueError('`GeM` pooling layer only allow 1 input with 4 dimensions(b, h, w, c)')
      return (tf.reduce_mean(tf.abs(inputs**self.p), axis=[1,2], keepdims=False) + self.epsilon)**(1.0/self.p)


class CosFace(L.Layer):
    def __init__(self, n_classes=10, s=30.0, m=0.35, regularizer=None, **kwargs):
        super(CosFace, self).__init__(**kwargs)
        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.regularizer = regularizers.get(regularizer)
        print('aaaaaa',s,m,n_classes)

    def build(self, input_shape):
        super(CosFace, self).build(input_shape[0])
        self.W = self.add_weight(shape=(input_shape[0][-1], self.n_classes),
                                initializer='glorot_uniform',
                                trainable=True,
                                regularizer=self.regularizer)

    def call(self, inputs):
        x, y = inputs
        c = K.shape(x)[-1]
        
        # normalize weights
        W = tf.nn.l2_normalize(self.W, axis=0)
        # dot product
        logits = x @ W
        # add margin
        target_logits = logits - self.m
        logits = logits * (1 - y) + target_logits * y
        # feature re-scale
        logits *= self.s
        out = tf.nn.softmax(logits)

        return out

    def compute_output_shape(self, input_shape):
        return (None, self.n_classes)

In [None]:
input = L.Input(shape=(28, 28, 1))
label = L.Input(shape=(10,))

x = L.Conv2D(32, kernel_size=(3, 3), activation='relu')(input)
x = L.MaxPooling2D(pool_size=(2, 2))(x)
x = L.Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = L.MaxPooling2D(pool_size=(2, 2))(x)

x = L.BatchNormalization()(x)
x = L.Dropout(0.5)(x)
x = L.Flatten()(x)
x = L.Dense(512, kernel_initializer='he_normal')(x)
x = L.BatchNormalization()(x)
output = CosFace(10)([x, label])

model =  tf.keras.Model([input, label], output)

In [None]:
x_input = L.Input(shape=[*IMAGE_SIZE, 3], name='imgs', dtype='float32')
r_label = L.Input(shape=(800,))
print(x_input)
#pretrained_model = efn.EfficientNetB5(weights='imagenet', include_top=False ,input_shape=[*IMAGE_SIZE, 3])
#pretrained_model.trainable = True  
backbone='efficientnet-b5'
weights='imagenet'
model_fn = getattr(efn, f'EfficientNetB{backbone[-1]}')
x = model_fn(input_shape=[*IMAGE_SIZE, 3], weights=weights, include_top=False)(x_input)
x = Generalized_mean_pooling2D()(x)

    # feature vector
weight_decay = 1e-4
x = L.BatchNormalization()(x)
x = L.Dropout(0.2)(x)
x = L.Flatten()(x)

    # root
x1 = L.Dense(512, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay))(x)
x1 = L.BatchNormalization()(x1)
x1 = tf.nn.l2_normalize(x1, axis=1)
root = CosFace(800, regularizer=regularizers.l2(weight_decay), name='root')([x1, r_label])
x1 = L.Dense(800, use_bias=False)(x1)
root2 = L.Lambda(lambda x: K.softmax(x), name='root2')(x1)

    # model
model = tf.keras.Model(
        inputs = [x_input,r_label],
        outputs = [root, root2]
    )



In [None]:

def prepare_metric_learning(image, label, mode='train'):
    if mode == 'train':
        return (image, label), label
    else:
        return (image, tf.zeros_like(label)), label

In [None]:
which_folds = [0] + np.random.choice(np.arange(1,10), 2).tolist()

In [None]:
which_folds

In [None]:
start_time = fold_start_time = time.time()

oof_labels = []
oof_preds = []
EPOCHS = 100
for i, fd in enumerate(which_folds):
    print(f"Fold {i+1}")
    
    VALID_FILENAMES = [TRAINING_FILENAMES[fd]]
    TRAIN_FILENAMES = TRAINING_FILENAMES[:fd] + TRAINING_FILENAMES[fd+1:]
    train_ds = get_training_dataset()
    train_ds = train_ds.map(lambda a, b: prepare_metric_learning(a, b, 'train'))

    valid_ds = get_validation_dataset().map(lambda a, b: prepare_metric_learning(a, b, 'valid'))
    weight_decay = 1e-4
 
    strategy = get_strategy()
    with strategy.scope():
        x_input = L.Input(shape=[*IMAGE_SIZE, 3], name='imgs', dtype='float32')
        r_label = L.Input(shape=(800,))
        print(x_input)
        #pretrained_model = efn.EfficientNetB5(weights='imagenet', include_top=False ,input_shape=[*IMAGE_SIZE, 3])
        #pretrained_model.trainable = True  
        backbone='efficientnet-b5'
        weights='imagenet'
        model_fn = getattr(efn, f'EfficientNetB{backbone[-1]}')
        x = model_fn(input_shape=[*IMAGE_SIZE, 3], weights=weights, include_top=False)(x_input)
        x = Generalized_mean_pooling2D()(x)

            # feature vector
        weight_decay = 1e-4
        x = L.BatchNormalization()(x)
        x = L.Dropout(0.2)(x)
        x = L.Flatten()(x)

            # root
        x1 = L.Dense(512, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay))(x)
        x1 = L.BatchNormalization()(x1)
        x1 = tf.nn.l2_normalize(x1, axis=1)
        root = CosFace(800, regularizer=regularizers.l2(weight_decay), name='root')([x1, r_label])
        x1 = L.Dense(800, use_bias=False)(x1)
        root2 = L.Lambda(lambda x: K.softmax(x), name='root2')(x1)

            # model
        model = tf.keras.Model(
                inputs = [x_input,r_label],
                outputs = [root, root2]
            )


        model.compile(
            optimizer=tf.keras.optimizers.Adam(LRSchedule(STEPS_PER_EPOCH * EPOCHS, max_lr=LR)),
            loss=categorical_focal_loss(800),
            metrics=[tf.keras.metrics.CategoricalAccuracy(), tfa.metrics.F1Score(CLASSES
                                                                                 , average='macro', threshold=None)]
        )

        ckpt_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=f'./best_modelmetric_{i}.h5',
            save_weights_only=True,
            monitor='val_loss',
            mode='min',
            save_best_only=True
        )
    print('i am model', train_ds)
    model.fit(
        train_ds,
        epochs=EPOCHS,
        steps_per_epoch=STEPS_PER_EPOCH,
             validation_data=valid_ds,
        callbacks=[ckpt_callback]
    )
    model.load_weights(f'best_modelmetric_{i}.h5')
    #images_ds = valid_ds.map(lambda image, label: image)

    images_ds = valid_ds
    labels_ds = valid_ds.map(lambda image, label: label).unbatch()

    valid_labels = next(iter(labels_ds.batch(NUM_VALIDATION_IMAGES))).numpy()
    valid_labels = np.argmax(valid_labels, -1)
    valid_preds = model.predict(images_ds)
    valid_preds = np.argmax(valid_preds, axis=-1)
    oof_labels.append(valid_labels)
    oof_preds.append(valid_preds)
    
    training_time = time.time() - fold_start_time
    print("FOLD TRAINING TIME: {:0.1f}s".format(training_time))
    fold_start_time = time.time()
    print()
    
keras_fit_training_time = time.time() - start_time
print("KERAS FIT TRAINING TIME: {:0.1f}s".format(keras_fit_training_time))

In [None]:
input = L.Input(shape=(28, 28, 1))
label = L.Input(shape=(10,))

x = L.Conv2D(32, kernel_size=(3, 3), activation='relu')(input)
x = L.MaxPooling2D(pool_size=(2, 2))(x)
x = L.Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = L.MaxPooling2D(pool_size=(2, 2))(x)

x = L.BatchNormalization()(x)
x = L.Dropout(0.5)(x)
x = L.Flatten()(x)
x = L.Dense(512, kernel_initializer='he_normal')(x)
x = L.BatchNormalization()(x)
output = CosFace(10)([x, label])

model =  tf.keras.Model([input, label], output)

In [None]:
class ArcFace(L.Layer):
    def __init__(self, n_classes=10, s=30.0, m=0.50, regularizer=None, **kwargs):
        super(ArcFace, self).__init__(**kwargs)
        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.regularizer = regularizers.get(regularizer)

    def build(self, input_shape):
        super(ArcFace, self).build(input_shape[0])
        self.W = self.add_weight(name='W',
                                shape=(input_shape[0][-1], self.n_classes),
                                initializer='glorot_uniform',
                                trainable=True,
                                regularizer=self.regularizer)

    def call(self, inputs):
        x, y = inputs
        c = K.shape(x)[-1]
        # normalize feature
        x = tf.nn.l2_normalize(x, axis=1)
        # normalize weights
        W = tf.nn.l2_normalize(self.W, axis=0)
        # dot product
        logits = x @ W
        # add margin
        # clip logits to prevent zero division when backward
        theta = tf.acos(K.clip(logits, -1.0 + K.epsilon(), 1.0 - K.epsilon()))
        target_logits = tf.cos(theta + self.m)
        # sin = tf.sqrt(1 - logits**2)
        # cos_m = tf.cos(logits)
        # sin_m = tf.sin(logits)
        # target_logits = logits * cos_m - sin * sin_m
        #
        logits = logits * (1 - y) + target_logits * y
        # feature re-scale
        logits *= self.s
        out = tf.nn.softmax(logits)

        return out

    def compute_output_shape(self, input_shape):
        return (None, self.n_classes)

In [None]:
which_folds = [0] + np.random.choice(np.arange(1,10), 3).tolist()

In [None]:
which_folds

start_time = fold_start_time = time.time()

oof_labels = []
oof_preds = []
EPOCHS = 80
for i, fd in enumerate(which_folds):
    print(f"Fold {i+1}")
    
    VALID_FILENAMES = [TRAINING_FILENAMES[fd]]
    TRAIN_FILENAMES = TRAINING_FILENAMES[:fd] + TRAINING_FILENAMES[fd+1:]
    train_ds = get_training_dataset()
    train_ds = train_ds.map(lambda a, b: prepare_metric_learning(a, b, 'train'))

    valid_ds = get_validation_dataset().map(lambda a, b: prepare_metric_learning(a, b, 'valid'))
    weight_decay = 1e-4
 
    strategy = get_strategy()
    with strategy.scope():
        x_input = L.Input(shape=[*IMAGE_SIZE, 3], name='imgs', dtype='float32')
        r_label = L.Input(shape=(800,))
        print(x_input)
        #pretrained_model = efn.EfficientNetB5(weights='imagenet', include_top=False ,input_shape=[*IMAGE_SIZE, 3])
        #pretrained_model.trainable = True  
        backbone='efficientnet-b5'
        weights='imagenet'
        model_fn = getattr(efn, f'EfficientNetB{backbone[-1]}')
        x = model_fn(input_shape=[*IMAGE_SIZE, 3], weights=weights, include_top=False)(x_input)
        x = Generalized_mean_pooling2D()(x)

            # feature vector
        weight_decay = 1e-4
        x = L.BatchNormalization()(x)
        x = L.Dropout(0.2)(x)
        x = L.Flatten()(x)

            # root
        x = L.Dense(512, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay))(x)
        x = L.BatchNormalization()(x)
        x = tf.nn.l2_normalize(x, axis=1)
        output = ArcFace(n_classes=800)([x, r_label])



            # model
        model = tf.keras.Model(
                inputs = [x_input,r_label],
                outputs = output
            )


        model.compile(
            optimizer=tf.keras.optimizers.Adam(LRSchedule(STEPS_PER_EPOCH * EPOCHS, max_lr=LR)),
            loss='categorical_crossentropy',
            metrics=[tf.keras.metrics.CategoricalAccuracy(), tfa.metrics.F1Score(CLASSES
                                                                                 , average='macro', threshold=None)]
        )

        ckpt_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=f'./best_modelmetric2_{i}.h5',
            save_weights_only=True,
            monitor='val_loss',
            mode='min',
            save_best_only=True
        )
    print('i am model', train_ds)
    model.fit(
        train_ds,
        epochs=EPOCHS,
        steps_per_epoch=STEPS_PER_EPOCH,
             validation_data=valid_ds,
        callbacks=[ckpt_callback]
    )
    model.load_weights(f'best_modelmetric2_{i}.h5')
    #images_ds = valid_ds.map(lambda image, label: image)

    images_ds = valid_ds
    labels_ds = valid_ds.map(lambda image, label: label).unbatch()

    valid_labels = next(iter(labels_ds.batch(NUM_VALIDATION_IMAGES))).numpy()
    valid_labels = np.argmax(valid_labels, -1)
    valid_preds = model.predict(images_ds)
    valid_preds = np.argmax(valid_preds, axis=-1)
    oof_labels.append(valid_labels)
    oof_preds.append(valid_preds)
    
    training_time = time.time() - fold_start_time
    print("FOLD TRAINING TIME: {:0.1f}s".format(training_time))
    fold_start_time = time.time()
    print()
    
keras_fit_training_time = time.time() - start_time
print("KERAS FIT TRAINING TIME: {:0.1f}s".format(keras_fit_training_time))

In [None]:
start_time = fold_start_time = time.time()

oof_labels = [] oof_preds = [] EPOCHS = 80 for i, fd in enumerate(which_folds): print(f"Fold {i+1}")

VALID_FILENAMES = [TRAINING_FILENAMES[fd]]
TRAIN_FILENAMES = TRAINING_FILENAMES[:fd] + TRAINING_FILENAMES[fd+1:]
train_ds = get_training_dataset()
train_ds = train_ds.map(lambda a, b: prepare_metric_learning(a, b, 'train'))

valid_ds = get_validation_dataset().map(lambda a, b: prepare_metric_learning(a, b, 'valid'))
weight_decay = 1e-4

strategy = get_strategy()
with strategy.scope():
    x_input = L.Input(shape=[*IMAGE_SIZE, 3], name='imgs', dtype='float32')
    r_label = L.Input(shape=(800,))
    print(x_input)
    #pretrained_model = efn.EfficientNetB5(weights='imagenet', include_top=False ,input_shape=[*IMAGE_SIZE, 3])
    #pretrained_model.trainable = True  
    backbone='efficientnet-b5'
    weights='imagenet'
    model_fn = getattr(efn, f'EfficientNetB{backbone[-1]}')
    x = model_fn(input_shape=[*IMAGE_SIZE, 3], weights=weights, include_top=False)(x_input)
    x = Generalized_mean_pooling2D()(x)

        # feature vector
    weight_decay = 1e-4
    x = L.BatchNormalization()(x)
    x = L.Dropout(0.2)(x)
    x = L.Flatten()(x)

        # root
    x = L.Dense(512, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay))(x)
    x = L.BatchNormalization()(x)
    x = tf.nn.l2_normalize(x, axis=1)
    output = ArcFace(n_classes=800)([x, r_label])



        # model
    model = tf.keras.Model(
            inputs = [x_input,r_label],
            outputs = output
        )


    model.compile(
        optimizer=tf.keras.optimizers.Adam(LRSchedule(STEPS_PER_EPOCH * EPOCHS, max_lr=LR)),
        loss='categorical_crossentropy',
        metrics=[tf.keras.metrics.CategoricalAccuracy(), tfa.metrics.F1Score(CLASSES
                                                                             , average='macro', threshold=None)]
    )

    ckpt_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'./best_modelmetric2_{i}.h5',
        save_weights_only=True,
        monitor='val_loss',
        mode='min',
        save_best_only=True
    )
print('i am model', train_ds)
model.fit(
    train_ds,
    epochs=EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
         validation_data=valid_ds,
    callbacks=[ckpt_callback]
)
model.load_weights(f'best_modelmetric2_{i}.h5')
#images_ds = valid_ds.map(lambda image, label: image)

images_ds = valid_ds
labels_ds = valid_ds.map(lambda image, label: label).unbatch()

valid_labels = next(iter(labels_ds.batch(NUM_VALIDATION_IMAGES))).numpy()
valid_labels = np.argmax(valid_labels, -1)
valid_preds = model.predict(images_ds)
valid_preds = np.argmax(valid_preds, axis=-1)
oof_labels.append(valid_labels)
oof_preds.append(valid_preds)

training_time = time.time() - fold_start_time
print("FOLD TRAINING TIME: {:0.1f}s".format(training_time))
fold_start_time = time.time()
print()
keras_fit_training_time = time.time() - start_time print("KERAS FIT TRAINING TIME: {:0.1f}s".format(keras_fit_training_time))

start_time = fold_start_time = time.time()

oof_labels = []
oof_preds = []
EPOCHS = 80
for i, fd in enumerate(which_folds):
    print(f"Fold {i+1}")
    
    VALID_FILENAMES = [TRAINING_FILENAMES[fd]]
    TRAIN_FILENAMES = TRAINING_FILENAMES[:fd] + TRAINING_FILENAMES[fd+1:]
    train_ds = get_training_dataset()
    train_ds = train_ds.map(lambda a, b: prepare_metric_learning(a, b, 'train'))

    valid_ds = get_validation_dataset().map(lambda a, b: prepare_metric_learning(a, b, 'valid'))
    weight_decay = 1e-4
 
    strategy = get_strategy()
    with strategy.scope():
        x_input = L.Input(shape=[*IMAGE_SIZE, 3], name='imgs', dtype='float32')
        r_label = L.Input(shape=(800,))
        print(x_input)
        #pretrained_model = efn.EfficientNetB5(weights='imagenet', include_top=False ,input_shape=[*IMAGE_SIZE, 3])
        #pretrained_model.trainable = True  
        backbone='efficientnet-b5'
        weights='imagenet'
        model_fn = getattr(efn, f'EfficientNetB{backbone[-1]}')
        x = model_fn(input_shape=[*IMAGE_SIZE, 3], weights=weights, include_top=False)(x_input)
        x = Generalized_mean_pooling2D()(x)

            # feature vector
        weight_decay = 1e-4
        x = L.BatchNormalization()(x)
        x = L.Dropout(0.2)(x)
        x = L.Flatten()(x)

            # root
        x = L.Dense(512, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay))(x)
        x = L.BatchNormalization()(x)
        x = tf.nn.l2_normalize(x, axis=1)
        output = ArcFace(n_classes=800)([x, r_label])



            # model
        model = tf.keras.Model(
                inputs = [x_input,r_label],
                outputs = output
            )


        model.compile(
            optimizer=tf.keras.optimizers.Adam(LRSchedule(STEPS_PER_EPOCH * EPOCHS, max_lr=LR)),
            loss='categorical_crossentropy',
            metrics=[tf.keras.metrics.CategoricalAccuracy(), tfa.metrics.F1Score(CLASSES
                                                                                 , average='macro', threshold=None)]
        )

        ckpt_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=f'./best_modelmetric2_{i}.h5',
            save_weights_only=True,
            monitor='val_loss',
            mode='min',
            save_best_only=True
        )
    print('i am model', train_ds)
    model.fit(
        train_ds,
        epochs=EPOCHS,
        steps_per_epoch=STEPS_PER_EPOCH,
             validation_data=valid_ds,
        callbacks=[ckpt_callback]
    )
    model.load_weights(f'best_modelmetric2_{i}.h5')
    #images_ds = valid_ds.map(lambda image, label: image)

    images_ds = valid_ds
    labels_ds = valid_ds.map(lambda image, label: label).unbatch()

    valid_labels = next(iter(labels_ds.batch(NUM_VALIDATION_IMAGES))).numpy()
    valid_labels = np.argmax(valid_labels, -1)
    valid_preds = model.predict(images_ds)
    valid_preds = np.argmax(valid_preds, axis=-1)
    oof_labels.append(valid_labels)
    oof_preds.append(valid_preds)
    
    training_time = time.time() - fold_start_time
    print("FOLD TRAINING TIME: {:0.1f}s".format(training_time))
    fold_start_time = time.time()
    print()
    
keras_fit_training_time = time.time() - start_time
print("KERAS FIT TRAINING TIME: {:0.1f}s".format(keras_fit_training_time))