### About

This notebook is a fork of https://www.kaggle.com/hidehisaarai1213/glret21-efficientnetb0-baseline-inference , please upvote the work of @hidehisaarai1213 first

In [1]:
#!pip install -U keras-efficientnet-v2
#!pip install -U git+https://github.com/leondgarse/keras_efficientnet_v2

In [2]:
"""
Creates a EfficientNetV2 Model as defined in:
Mingxing Tan, Quoc V. Le. (2021).
EfficientNetV2: Smaller Models and Faster Training
arXiv preprint arXiv:2104.00298.
"""
import os
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Activation,
    Add,
    BatchNormalization,
    Conv2D,
    Dense,
    DepthwiseConv2D,
    Dropout,
    GlobalAveragePooling2D,
    Input,
    PReLU,
    Reshape,
    Multiply,
)

BATCH_NORM_DECAY = 0.9
BATCH_NORM_EPSILON = 0.001
CONV_KERNEL_INITIALIZER = keras.initializers.VarianceScaling(scale=2.0, mode="fan_out", distribution="truncated_normal")
# CONV_KERNEL_INITIALIZER = 'glorot_uniform'

BLOCK_CONFIGS = {
    "b0": {  # width 1.0, depth 1.0
        "first_conv_filter": 32,
        "expands": [1, 4, 4, 4, 6, 6],
        "out_channels": [16, 32, 48, 96, 112, 192],
        "depthes": [1, 2, 2, 3, 5, 8],
        "strides": [1, 2, 2, 2, 1, 2],
        "use_ses": [0, 0, 0, 1, 1, 1],
    },
    "b1": {  # width 1.0, depth 1.1
        "first_conv_filter": 32,
        "expands": [1, 4, 4, 4, 6, 6],
        "out_channels": [16, 32, 48, 96, 112, 192],
        "depthes": [2, 3, 3, 4, 6, 9],
        "strides": [1, 2, 2, 2, 1, 2],
        "use_ses": [0, 0, 0, 1, 1, 1],
    },
    "b2": {  # width 1.1, depth 1.2
        "first_conv_filter": 32,
        "output_conv_filter": 1408,
        "expands": [1, 4, 4, 4, 6, 6],
        "out_channels": [16, 32, 56, 104, 120, 208],
        "depthes": [2, 3, 3, 4, 6, 10],
        "strides": [1, 2, 2, 2, 1, 2],
        "use_ses": [0, 0, 0, 1, 1, 1],
    },
    "b3": {  # width 1.2, depth 1.4
        "first_conv_filter": 40,
        "output_conv_filter": 1536,
        "expands": [1, 4, 4, 4, 6, 6],
        "out_channels": [16, 40, 56, 112, 136, 232],
        "depthes": [2, 3, 3, 5, 7, 12],
        "strides": [1, 2, 2, 2, 1, 2],
        "use_ses": [0, 0, 0, 1, 1, 1],
    },
    "t": {  # width 1.4 * 0.8, depth 1.8 * 0.9, from timm
        "first_conv_filter": 24,
        "output_conv_filter": 1024,
        "expands": [1, 4, 4, 4, 6, 6],
        "out_channels": [24, 40, 48, 104, 128, 208],
        "depthes": [2, 4, 4, 6, 9, 14],
        "strides": [1, 2, 2, 2, 1, 2],
        "use_ses": [0, 0, 0, 1, 1, 1],
    },
    "s": {  # width 1.4, depth 1.8
        "first_conv_filter": 24,
        "output_conv_filter": 1280,
        "expands": [1, 4, 4, 4, 6, 6],
        "out_channels": [24, 48, 64, 128, 160, 256],
        "depthes": [2, 4, 4, 6, 9, 15],
        "strides": [1, 2, 2, 2, 1, 2],
        "use_ses": [0, 0, 0, 1, 1, 1],
    },
    "m": {  # width 1.6, depth 2.2
        "first_conv_filter": 24,
        "output_conv_filter": 1280,
        "expands": [1, 4, 4, 4, 6, 6, 6],
        "out_channels": [24, 48, 80, 160, 176, 304, 512],
        "depthes": [3, 5, 5, 7, 14, 18, 5],
        "strides": [1, 2, 2, 2, 1, 2, 1],
        "use_ses": [0, 0, 0, 1, 1, 1, 1],
    },
    "l": {  # width 2.0, depth 3.1
        "first_conv_filter": 32,
        "output_conv_filter": 1280,
        "expands": [1, 4, 4, 4, 6, 6, 6],
        "out_channels": [32, 64, 96, 192, 224, 384, 640],
        "depthes": [4, 7, 7, 10, 19, 25, 7],
        "strides": [1, 2, 2, 2, 1, 2, 1],
        "use_ses": [0, 0, 0, 1, 1, 1, 1],
    },
    "xl": {
        "first_conv_filter": 32,
        "output_conv_filter": 1280,
        "expands": [1, 4, 4, 4, 6, 6, 6],
        "out_channels": [32, 64, 96, 192, 256, 512, 640],
        "depthes": [4, 8, 8, 16, 24, 32, 8],
        "strides": [1, 2, 2, 2, 1, 2, 1],
        "use_ses": [0, 0, 0, 1, 1, 1, 1],
    },
}

FILE_HASH_DICT = {
    "b0": {"21k-ft1k": "4e4da4eb629897e4d6271e131039fe75", "21k": "5dbb4252df24b931e74cdd94d150f25a", "imagenet": "9abdc43cb00f4cb06a8bdae881f412d6"},
    "b1": {"21k-ft1k": "5f1aee82209f4f0f20bd24460270564e", "21k": "a50ae65b50ceff7f5283be2f4506d2c2", "imagenet": "5d4223b59ff268828d5112a1630e234e"},
    "b2": {"21k-ft1k": "ec384b84441ddf6419938d1e5a0cbef2", "21k": "9f718a8bbb7b63c5313916c5e504790d", "imagenet": "1814bc08d4bb7a5e0ed3ccfe1cf18650"},
    "b3": {"21k-ft1k": "4a27827b0b2df508bed31ae231003bb1", "21k": "ade5bdbbdf1d54c4561aa41511525855", "imagenet": "cda85b8494c7ec5a68dffb335a254bab"},
    "l": {"21k-ft1k": "30327edcf1390d10e9a0de42a2d731e3", "21k": "7970f913eec1b4918e007c8580726412", "imagenet": "2b65f5789f4d2f1bf66ecd6d9c5c2d46"},
    "m": {"21k-ft1k": "0c236c3020e3857de1e5f2939abd0cc6", "21k": "3923c286366b2a5137f39d1e5b14e202", "imagenet": "ac3fd0ff91b35d18d1df8f1895efe1d5"},
    "s": {"21k-ft1k": "93046a0d601da46bfce9d4ca14224c83", "21k": "10b05d878b64f796ab984a5316a4a1c3", "imagenet": "3b91df2c50c7a56071cca428d53b8c0d"},
    "t": {"imagenet": "46632458117102758518158bf35444d7"},
    "xl": {"21k-ft1k": "9aaa2bd3c9495b23357bc6593eee5bce", "21k": "c97de2770f55701f788644336181e8ee"},
    "v1-b0": {"noisy_student": "d125a518737c601f8595937219243432", "imagenet": "cc7d08887de9df8082da44ce40761986"},
    "v1-b1": {"noisy_student": "8f44bff58fc5ef99baa3f163b3f5c5e8", "imagenet": "a967f7be55a0125c898d650502c0cfd0"},
    "v1-b2": {"noisy_student": "b4ffed8b9262df4facc5e20557983ef8", "imagenet": "6c8d1d3699275c7d1867d08e219e00a7"},
    "v1-b3": {"noisy_student": "9d696365378a1ebf987d0e46a9d26ddd", "imagenet": "d78edb3dc7007721eda781c04bd4af62"},
    "v1-b4": {"noisy_student": "a0f61b977544493e6926186463d26294", "imagenet": "4c83aa5c86d58746a56675565d4f2051"},
    "v1-b5": {"noisy_student": "c3b6eb3f1f7a1e9de6d9a93e474455b1", "imagenet": "0bda50943b8e8d0fadcbad82c17c40f5"},
    "v1-b6": {"noisy_student": "20dd18b0df60cd7c0387c8af47bd96f8", "imagenet": "da13735af8209f675d7d7d03a54bfa27"},
    "v1-b7": {"noisy_student": "7f6f6dd4e8105e32432607ad28cfad0f", "imagenet": "d9c22b5b030d1e4f4c3a96dbf5f21ce6"},
    "v1-l2": {"noisy_student": "5fedc721febfca4b08b03d1f18a4a3ca"},
}


def _make_divisible(v, divisor=4, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


def conv2d_no_bias(inputs, filters, kernel_size, strides=1, padding="VALID", name=""):
    return Conv2D(filters, kernel_size, strides=strides, padding=padding, use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + "conv")(
        inputs
    )


def batchnorm_with_activation(inputs, activation="swish", name=""):
    """Performs a batch normalization followed by an activation. """
    bn_axis = 1 if K.image_data_format() == "channels_first" else -1
    nn = BatchNormalization(
        axis=bn_axis,
        momentum=BATCH_NORM_DECAY,
        epsilon=BATCH_NORM_EPSILON,
        name=name + "bn",
    )(inputs)
    if activation:
        nn = Activation(activation=activation, name=name + activation)(nn)
        # nn = PReLU(shared_axes=[1, 2], alpha_initializer=tf.initializers.Constant(0.25), name=name + "PReLU")(nn)
    return nn


def se_module(inputs, se_ratio=4, name=""):
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    h_axis, w_axis = [2, 3] if K.image_data_format() == "channels_first" else [1, 2]

    filters = inputs.shape[channel_axis]
    # reduction = _make_divisible(filters // se_ratio, 8)
    reduction = filters // se_ratio
    # se = GlobalAveragePooling2D()(inputs)
    # se = Reshape((1, 1, filters))(se)
    se = tf.reduce_mean(inputs, [h_axis, w_axis], keepdims=True)
    se = Conv2D(reduction, kernel_size=1, use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + "1_conv")(se)
    # se = PReLU(shared_axes=[1, 2])(se)
    se = Activation("swish")(se)
    se = Conv2D(filters, kernel_size=1, use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + "2_conv")(se)
    se = Activation("sigmoid")(se)
    return Multiply()([inputs, se])


def MBConv(inputs, output_channel, stride, expand_ratio, shortcut, kernel_size=3, drop_rate=0, use_se=0, is_fused=False, name=""):
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    input_channel = inputs.shape[channel_axis]

    if is_fused and expand_ratio != 1:
        nn = conv2d_no_bias(inputs, input_channel * expand_ratio, (3, 3), strides=stride, padding="same", name=name + "sortcut_")
        nn = batchnorm_with_activation(nn, name=name + "sortcut_")
    elif expand_ratio != 1:
        nn = conv2d_no_bias(inputs, input_channel * expand_ratio, (1, 1), strides=(1, 1), padding="same", name=name + "sortcut_")
        nn = batchnorm_with_activation(nn, name=name + "sortcut_")
    else:
        nn = inputs

    if not is_fused:
        # nn = keras.layers.ZeroPadding2D(padding=1, name=name + "pad")(nn)
        nn = DepthwiseConv2D(kernel_size, padding="same", strides=stride, use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=name + "MB_dw_")(
            nn
        )
        nn = batchnorm_with_activation(nn, name=name + "MB_dw_")

    if use_se:
        nn = se_module(nn, se_ratio=4 * expand_ratio, name=name + "se_")

    # pw-linear
    if is_fused and expand_ratio == 1:
        nn = conv2d_no_bias(nn, output_channel, (3, 3), strides=stride, padding="same", name=name + "fu_")
        nn = batchnorm_with_activation(nn, name=name + "fu_")
    else:
        nn = conv2d_no_bias(nn, output_channel, (1, 1), strides=(1, 1), padding="same", name=name + "MB_pw_")
        nn = batchnorm_with_activation(nn, activation=None, name=name + "MB_pw_")

    if shortcut:
        if drop_rate > 0:
            nn = Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=name + "drop")(nn)
        return Add()([inputs, nn])
    else:
        return nn


def EfficientNetV2(
    model_type,
    input_shape=(None, None, 3),
    num_classes=1000,
    dropout=0.2,
    first_strides=2,
    drop_connect_rate=0,
    classifier_activation="softmax",
    include_preprocessing=False,
    pretrained="imagenet",
    model_name="EfficientNetV2",
    kwargs=None,  # Not used, just recieving parameter
):
    if isinstance(model_type, dict):  # For EfficientNetV1 configures
        model_type, blocks_config = model_type.popitem()
    else:
        blocks_config = BLOCK_CONFIGS.get(model_type.lower(), BLOCK_CONFIGS["s"])
    expands = blocks_config["expands"]
    out_channels = blocks_config["out_channels"]
    depthes = blocks_config["depthes"]
    strides = blocks_config["strides"]
    use_ses = blocks_config["use_ses"]
    first_conv_filter = blocks_config.get("first_conv_filter", out_channels[0])
    output_conv_filter = blocks_config.get("output_conv_filter", 1280)
    kernel_sizes = blocks_config.get("kernel_sizes", [3] * len(depthes))

    inputs = Input(shape=input_shape)
    if include_preprocessing:
        channel_axis = 1 if K.image_data_format() == "channels_first" else -1
        try:
            Rescaling = keras.layers.Rescaling
            Normalization = keras.layers.Normalization
        except:
            Rescaling = keras.layers.experimental.preprocessing.Rescaling
            Normalization = keras.layers.experimental.preprocessing.Normalization
        nn = Rescaling(1.0 / 255.0)(inputs)
        nn = Normalization(mean=[0.485, 0.456, 0.406], variance=[0.229, 0.224, 0.225], axis=channel_axis)(nn)
    else:
        nn = inputs
    out_channel = _make_divisible(first_conv_filter, 8)
    nn = conv2d_no_bias(nn, out_channel, (3, 3), strides=first_strides, padding="same", name="stem_")
    nn = batchnorm_with_activation(nn, name="stem_")

    pre_out = out_channel
    global_block_id = 0
    total_blocks = sum(depthes)
    for id, (expand, out_channel, depth, stride, se, kernel_size) in enumerate(zip(expands, out_channels, depthes, strides, use_ses, kernel_sizes)):
        out = _make_divisible(out_channel, 8)
        is_fused = True if se == 0 else False
        for block_id in range(depth):
            stride = stride if block_id == 0 else 1
            shortcut = True if out == pre_out and stride == 1 else False
            name = "stack_{}_block{}_".format(id, block_id)
            block_drop_rate = drop_connect_rate * global_block_id / total_blocks
            nn = MBConv(nn, out, stride, expand, shortcut, kernel_size, block_drop_rate, se, is_fused, name=name)
            pre_out = out
            global_block_id += 1

    output_conv_filter = _make_divisible(output_conv_filter, 8)
    nn = conv2d_no_bias(nn, output_conv_filter, (1, 1), strides=(1, 1), padding="valid", name="post_")
    nn = batchnorm_with_activation(nn, name="post_")

    if num_classes > 0:
        nn = GlobalAveragePooling2D(name="avg_pool")(nn)
        if dropout > 0 and dropout < 1:
            nn = Dropout(dropout)(nn)
        nn = Dense(num_classes, activation=classifier_activation, dtype="float32", name="predictions")(nn)

    model = Model(inputs=inputs, outputs=nn, name=model_name)
    reload_model_weights(model, model_type, pretrained)
    return model


def reload_model_weights(model, model_type, pretrained="imagenet"):
    pretrained_dd = {"imagenet": "imagenet", "imagenet21k": "21k", "imagenet21k-ft1k": "21k-ft1k", "noisy_student": "noisy_student"}
    if not pretrained in pretrained_dd:
        print(">>>> No pretrained available, model will be randomly initialized")
        return
    pre_tt = pretrained_dd[pretrained]
    if model_type not in FILE_HASH_DICT or pre_tt not in FILE_HASH_DICT[model_type]:
        print(">>>> No pretrained available, model will be randomly initialized")
        return

    if model_type.startswith("v1"):
        pre_url = "https://github.com/leondgarse/keras_efficientnet_v2/releases/download/effnetv1_pretrained/efficientnet{}-{}.h5"
    else:
        pre_url = "https://github.com/leondgarse/keras_efficientnet_v2/releases/download/effnetv2_pretrained/efficientnetv2-{}-{}.h5"
    url = pre_url.format(model_type, pre_tt)
    file_name = os.path.basename(url)
    file_hash = FILE_HASH_DICT[model_type][pre_tt]

    try:
        pretrained_model = keras.utils.get_file(file_name, url, cache_subdir="models/efficientnetv2", file_hash=file_hash)
    except:
        print("[Error] will not load weights, url not found or download failed:", url)
        return
    else:
        print(">>>> Load pretrained from:", pretrained_model)
        model.load_weights(pretrained_model, by_name=True, skip_mismatch=True)


def EfficientNetV2B0(input_shape=(224, 224, 3), num_classes=1000, dropout=0.2, classifier_activation="softmax", pretrained="imagenet", **kwargs):
    return EfficientNetV2(model_type="b0", model_name="EfficientNetV2B0", **locals(), **kwargs)


def EfficientNetV2B1(input_shape=(240, 240, 3), num_classes=1000, dropout=0.2, classifier_activation="softmax", pretrained="imagenet", **kwargs):
    return EfficientNetV2(model_type="b1", model_name="EfficientNetV2B1", **locals(), **kwargs)


def EfficientNetV2B2(input_shape=(260, 260, 3), num_classes=1000, dropout=0.3, classifier_activation="softmax", pretrained="imagenet", **kwargs):
    return EfficientNetV2(model_type="b2", model_name="EfficientNetV2B2", **locals(), **kwargs)


def EfficientNetV2B3(input_shape=(300, 300, 3), num_classes=1000, dropout=0.3, classifier_activation="softmax", pretrained="imagenet", **kwargs):
    return EfficientNetV2(model_type="b3", model_name="EfficientNetV2B3", **locals(), **kwargs)


def EfficientNetV2T(input_shape=(320, 320, 3), num_classes=1000, dropout=0.2, classifier_activation="softmax", pretrained="imagenet", **kwargs):
    return EfficientNetV2(model_type="t", model_name="EfficientNetV2T", **locals(), **kwargs)


def EfficientNetV2S(input_shape=(384, 384, 3), num_classes=1000, dropout=0.2, classifier_activation="softmax", pretrained="imagenet", **kwargs):
    return EfficientNetV2(model_type="s", model_name="EfficientNetV2S", **locals(), **kwargs)


def EfficientNetV2M(input_shape=(480, 480, 3), num_classes=1000, dropout=0.3, classifier_activation="softmax", pretrained="imagenet", **kwargs):
    return EfficientNetV2(model_type="m", model_name="EfficientNetV2M", **locals(), **kwargs)


def EfficientNetV2L(input_shape=(480, 480, 3), num_classes=1000, dropout=0.4, classifier_activation="softmax", pretrained="imagenet", **kwargs):
    return EfficientNetV2(model_type="l", model_name="EfficientNetV2L", **locals(), **kwargs)


def EfficientNetV2XL(input_shape=(512, 512, 3), num_classes=1000, dropout=0.4, classifier_activation="softmax", pretrained="imagenet21k-ft1k", **kwargs):
    return EfficientNetV2(model_type="xl", model_name="EfficientNetV2XL", **locals(), **kwargs)


def get_actual_drop_connect_rates(model):
    return [ii.rate for ii in model.layers if isinstance(ii, keras.layers.Dropout)]

In [3]:
!pip install ../input/kerasapplications/ > /dev/null
!pip install ../input/efficientnet-keras-source-code/ > /dev/null

[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m


In [4]:
import gc
import os
import math
import random
import re
import warnings
from pathlib import Path
from PIL import Image
from typing import Optional, Tuple

import efficientnet.tfkeras as efn
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy import spatial
from sklearn.preprocessing import normalize
from tqdm import tqdm
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


## Settings

In [5]:
DATADIR = Path("../input/landmark-retrieval-2021/")
TEST_IMAGE_DIR = DATADIR / "test"
TRAIN_IMAGE_DIR = DATADIR / "index"

TOPK = 100
N_CLASSES = 81313

SEED = 20210912

## Utilities

In [6]:
import time

from contextlib import contextmanager


@contextmanager
def timer(name):
    t0 = time.time()
    print(f"[{name}]")
    yield
    print(f'[{name}] done in {time.time() - t0:.0f} s')

In [7]:
def set_seed(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)


set_seed(SEED)

In [8]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    return strategy

In [9]:
strategy = auto_select_accelerator()
REPLICAS = strategy.num_replicas_in_sync
AUTO = tf.data.experimental.AUTOTUNE

Running on 1 replicas


## Model

In [10]:
class GeM(tf.keras.layers.Layer):
    def __init__(self, pool_size, init_norm=3.0, normalize=False, **kwargs):
        self.pool_size = pool_size
        self.init_norm = init_norm
        self.normalize = normalize

        super(GeM, self).__init__(**kwargs)

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'pool_size': self.pool_size,
            'init_norm': self.init_norm,
            'normalize': self.normalize,
        })
        return config

    def build(self, input_shape):
        feature_size = input_shape[-1]
        self.p = self.add_weight(name='norms', shape=(feature_size,),
                                 initializer=tf.keras.initializers.constant(self.init_norm),
                                 trainable=True)
        super(GeM, self).build(input_shape)

    def call(self, inputs):
        x = inputs
        x = tf.math.maximum(x, 1e-6)
        x = tf.pow(x, self.p)

        x = tf.nn.avg_pool(x, self.pool_size, self.pool_size, 'VALID')
        x = tf.pow(x, 1.0 / self.p)

        if self.normalize:
            x = tf.nn.l2_normalize(x, 1)
        return x

    def compute_output_shape(self, input_shape):
        return tuple([None, input_shape[-1]])

In [11]:
class ArcMarginProduct(tf.keras.layers.Layer):
    '''
    Implements large margin arc distance.

    Reference:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
            blob/master/src/modeling/metric_learning.py
    '''
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProduct, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProduct, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

In [12]:
def build_model0(size=256, efficientnet_size=0, weights="imagenet", count=0):
    inp = tf.keras.layers.Input(shape=(size, size, 3), name="inp1")
    label = tf.keras.layers.Input(shape=(), name="inp2")
    x = getattr(efn, f"EfficientNetB{efficientnet_size}")(
        weights=weights, include_top=False, input_shape=(size, size, 3))(inp)
    x = GeM(8)(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, name="dense_before_arcface", kernel_initializer="he_normal")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = ArcMarginProduct(
        n_classes=N_CLASSES,
        s=30,
        m=0.5,
        name="head/arc_margin",
        dtype="float32"
    )([x, label])
    output = tf.keras.layers.Softmax(dtype="float32")(x)
    model = tf.keras.Model(inputs=[inp, label], outputs=[output])
    opt = tf.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        optimizer=opt,
        loss=[tf.keras.losses.SparseCategoricalCrossentropy()],
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
    )
    return model

In [13]:
def build_model_s(size=256, efficientnet_size=0, weights="imagenet", count=0):
    inp = tf.keras.layers.Input(shape=(size, size, 3), name="inp1")
    label = tf.keras.layers.Input(shape=(), name="inp2")
    
    #x = getattr(efn, f"EfficientNetB{efficientnet_size}")(
    #    weights=weights, include_top=False, input_shape=(size, size, 3))(inp)

    #x = keras_efficientnet_v2.EfficientNetV2S(input_shape=(size, size, 3), drop_connect_rate=0.2, num_classes=0, pretrained="imagenet21k-ft1k")(inp)
    #x = EfficientNetV2S(input_shape=(size, size, 3), drop_connect_rate=0.2, num_classes=0, pretrained="imagenet21k-ft1k")(inp)
    x = EfficientNetV2S(input_shape=(size, size, 3), drop_connect_rate=0.2, num_classes=0, pretrained=weights)(inp)

    x = GeM(8)(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, name="dense_before_arcface", kernel_initializer="he_normal")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = ArcMarginProduct(
        n_classes=N_CLASSES,
        s=30,
        m=0.5,
        name="head/arc_margin",
        dtype="float32"
    )([x, label])
    output = tf.keras.layers.Softmax(dtype="float32")(x)
    model = tf.keras.Model(inputs=[inp, label], outputs=[output])
    lr_decayed_fn = tf.keras.experimental.CosineDecay(1e-3, count)
    opt = tfa.optimizers.AdamW(lr_decayed_fn, learning_rate=1e-4)
    model.compile(
        optimizer=opt,
        loss=[tf.keras.losses.SparseCategoricalCrossentropy()],
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
    )
    return model

def build_model_m(size=256, efficientnet_size=0, weights="imagenet", count=0):
    inp = tf.keras.layers.Input(shape=(size, size, 3), name="inp1")
    label = tf.keras.layers.Input(shape=(), name="inp2")
    
    #x = getattr(efn, f"EfficientNetB{efficientnet_size}")(
    #    weights=weights, include_top=False, input_shape=(size, size, 3))(inp)

    #x = keras_efficientnet_v2.EfficientNetV2S(input_shape=(size, size, 3), drop_connect_rate=0.2, num_classes=0, pretrained="imagenet21k-ft1k")(inp)
    #x = EfficientNetV2S(input_shape=(size, size, 3), drop_connect_rate=0.2, num_classes=0, pretrained="imagenet21k-ft1k")(inp)
    x = EfficientNetV2M(input_shape=(size, size, 3), drop_connect_rate=0.2, num_classes=0, pretrained=weights)(inp)

    x = GeM(8)(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, name="dense_before_arcface", kernel_initializer="he_normal")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = ArcMarginProduct(
        n_classes=N_CLASSES,
        s=30,
        m=0.5,
        name="head/arc_margin",
        dtype="float32"
    )([x, label])
    output = tf.keras.layers.Softmax(dtype="float32")(x)
    model = tf.keras.Model(inputs=[inp, label], outputs=[output])
    lr_decayed_fn = tf.keras.experimental.CosineDecay(1e-3, count)
    opt = tfa.optimizers.AdamW(lr_decayed_fn, learning_rate=1e-4)
    model.compile(
        optimizer=opt,
        loss=[tf.keras.losses.SparseCategoricalCrossentropy()],
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
    )
    return model

def build_model_l(size=256, efficientnet_size=0, weights="imagenet", count=0):
    inp = tf.keras.layers.Input(shape=(size, size, 3), name="inp1")
    label = tf.keras.layers.Input(shape=(), name="inp2")
    
    #x = getattr(efn, f"EfficientNetB{efficientnet_size}")(
    #    weights=weights, include_top=False, input_shape=(size, size, 3))(inp)

    #x = keras_efficientnet_v2.EfficientNetV2S(input_shape=(size, size, 3), drop_connect_rate=0.2, num_classes=0, pretrained="imagenet21k-ft1k")(inp)
    #x = EfficientNetV2S(input_shape=(size, size, 3), drop_connect_rate=0.2, num_classes=0, pretrained="imagenet21k-ft1k")(inp)
    x = EfficientNetV2L(input_shape=(size, size, 3), drop_connect_rate=0.2, num_classes=0, pretrained=weights)(inp)

    x = GeM(8)(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, name="dense_before_arcface", kernel_initializer="he_normal")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = ArcMarginProduct(
        n_classes=N_CLASSES,
        s=30,
        m=0.5,
        name="head/arc_margin",
        dtype="float32"
    )([x, label])
    output = tf.keras.layers.Softmax(dtype="float32")(x)
    model = tf.keras.Model(inputs=[inp, label], outputs=[output])
    lr_decayed_fn = tf.keras.experimental.CosineDecay(1e-3, count)
    opt = tfa.optimizers.AdamW(lr_decayed_fn, learning_rate=1e-4)
    model.compile(
        optimizer=opt,
        loss=[tf.keras.losses.SparseCategoricalCrossentropy()],
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
    )
    return model

In [14]:
def create_model_for_inference0(weights_path: str, efficientnet_size=5):
    with strategy.scope():
        base_model = build_model(
            size=256,
            efficientnet_size=efficientnet_size,  # 5, # 4,
            weights=None,
            count=0)
        base_model.load_weights(weights_path)
        model = tf.keras.Model(inputs=base_model.get_layer("inp1").input,
                               outputs=base_model.get_layer("dense_before_arcface").output)
        return model

In [15]:
def create_model_for_inference(weights_path: str, efficientnet_size=5):
    with strategy.scope():
        base_model = build_model(
            size=256,
            efficientnet_size=efficientnet_size,  # 5, # 4,
            weights=None,
            count=0)
        base_model.load_weights(weights_path)
        model = tf.keras.Model(inputs=base_model.get_layer("inp1").input,
                               outputs=base_model.get_layer("dense_before_arcface").output)
        return model

    
def create_model_s_for_inference(weights_path: str, efficientnet_size=5):
    with strategy.scope():
        base_model = build_model_s(
            size=256,
            efficientnet_size=efficientnet_size,  # 5, # 4,
            weights=None,
            count=0)
        base_model.load_weights(weights_path)
        model = tf.keras.Model(inputs=base_model.get_layer("inp1").input,
                               outputs=base_model.get_layer("dense_before_arcface").output)
        return model
    
def create_model_m_for_inference(weights_path: str, efficientnet_size=5):
    with strategy.scope():
        base_model = build_model_m(
            size=256,
            efficientnet_size=efficientnet_size,  # 5, # 4,
            weights=None,
            count=0)
        base_model.load_weights(weights_path)
        model = tf.keras.Model(inputs=base_model.get_layer("inp1").input,
                               outputs=base_model.get_layer("dense_before_arcface").output)
        return model
    
def create_model_l_for_inference(weights_path: str, im_size=256, efficientnet_size=5):
    with strategy.scope():
        base_model = build_model_l(
            size=im_size, # 256,
            efficientnet_size=efficientnet_size,  # 5, # 4,
            weights=None,
            count=0)
        base_model.load_weights(weights_path)
        model = tf.keras.Model(inputs=base_model.get_layer("inp1").input,
                               outputs=base_model.get_layer("dense_before_arcface").output)
        return model    

## Feature Extraction

In [16]:
def to_hex(image_id) -> str:
    return '{0:0{1}x}'.format(image_id, 16)


def get_image_path(subset, image_id):
    name = to_hex(image_id)
    return os.path.join(DATASET_DIR, subset, name[0], name[1], name[2], '{}.jpg'.format(name))


def load_image_tensor(image_path, im_size=256):
    tensor = tf.convert_to_tensor(np.array(Image.open(image_path).convert("RGB")))
    tensor = tf.image.resize(tensor, size=(im_size, im_size)) # , method='bilinear')   # , method='lanczos3')
    tensor = tf.expand_dims(tensor, axis=0)
    return tf.cast(tensor, tf.float32) / 255.0


def create_batch(files, im_size=256):
    images = []
    for f in files:
        images.append(load_image_tensor(f, im_size=im_size))
    return tf.concat(images, axis=0)

In [17]:
def extract_global_features(image_root_dir, n_models=4):
    image_paths = []
    for root, dirs, files in os.walk(image_root_dir):
        for file in files:
            if file.endswith('.jpg'):
                 image_paths.append(os.path.join(root, file))
      
    num_embeddings = len(image_paths)

    ids = num_embeddings * [None]
    ids = []
    for path in image_paths:
        ids.append(path.split('/')[-1][:-4])
    
    embeddings = np.zeros((num_embeddings, 512))
    image_paths = np.array(image_paths)
    chunk_size = 128 # 256 # 512
    
    n_chunks = len(image_paths) // chunk_size
    if len(image_paths) % chunk_size != 0:
        n_chunks += 1

    n_models =  1 # 4  # 4  # 4 # 3
    for n in range(n_models):
        print(f"Getting Embedding for fold{n} model.")
        #"""
        lmodels_512 = [
            "../input/googlelmreffv2l512/2021_google_lm_retrieval_eff_v2l_v01_512_v02_fold3_ep57.h5"
        ]
        test_im_size = 512
        model = create_model_l_for_inference(lmodels_512[n], im_size=test_im_size)
        
        for i in tqdm(range(n_chunks)):
            files = image_paths[i * chunk_size:(i + 1) * chunk_size]
            batch = create_batch(files,im_size=test_im_size)  #  512)
            embedding_tensor = tf.math.l2_normalize(model.predict(batch), axis = 1)
            embeddings[i * chunk_size:(i + 1) * chunk_size] += embedding_tensor * (0.225 / n_models)
        del model
        gc.collect()     
        #"""
        
    n_models = -4 # 8 # 4 # 3 # 4 # 3
    for n in range(n_models):
        print(f"Getting Embedding for fold{n} model.")
        
        lmodels_384 = [
            #"../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold0_ep38.h5",
            "../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold0_ep39.h5", # 0.311
            
            #"../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold1_ep38.h5",
            "../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold1_ep39.h5", # 0.317
            
            #"../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold2_ep41.h5",
            "../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold2_ep42.h5", # 0.309
            
            #"../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold3_ep37.h5",  #
            "../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold3_ep41.h5"   # 0.322
        ]
        model = create_model_l_for_inference(lmodels_384[n], im_size=384)

        for i in tqdm(range(n_chunks)):
            files = image_paths[i * chunk_size:(i + 1) * chunk_size]
            batch = create_batch(files,im_size=384)
            embedding_tensor = tf.math.l2_normalize(model.predict(batch), axis = 1)
            embeddings[i * chunk_size:(i + 1) * chunk_size] += embedding_tensor * (0.275 / n_models)
        del model
        gc.collect()     
        
        
        lmodels_384 = [
            "../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold0_ep38.h5",
            #"../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold0_ep39.h5", # 0.311
            
            "../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold1_ep38.h5",
            #"../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold1_ep39.h5", # 0.317
            
            "../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold2_ep41.h5",
            #"../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold2_ep42.h5", # 0.309
            
            "../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold3_ep37.h5",  #
            #"../input/googlelmreffv2l384/2021_google_lm_retrieval_eff_v2l_384_v01_fold3_ep41.h5"   # 0.322
        ]
        model = create_model_l_for_inference(lmodels_384[n], im_size=384)

        for i in tqdm(range(n_chunks)):
            files = image_paths[i * chunk_size:(i + 1) * chunk_size]
            batch = create_batch(files,im_size=384)
            embedding_tensor = tf.math.l2_normalize(model.predict(batch), axis = 1)
            embeddings[i * chunk_size:(i + 1) * chunk_size] += embedding_tensor * (0.275 / n_models)
        del model
        gc.collect()     
        
        
        # ../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_v02_fold0.h5
        # '../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_v02_fold3.h5'
        
        #"""
        #model = create_model_l_for_inference(f"../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_v02_fold{n}.h5", efficientnet_size=6)
        #model = create_model_l_for_inference(f"../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_256_v02_fold3.h5", efficientnet_size=6)
        
        lmodels = [
            "../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_v02_fold0.h5",
            "../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_v02_fold1.h5",
            "../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_v02_fold2.h5",
            "../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_256_v02_fold3.h5",   # 0.306
            
            "../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_v02_fold3.h5",       # 0.303
            "../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_v02_fold3_ep25.h5",  # 0.295
            "../input/googlelmreffv2l256/2021_google_lm_retrieval_eff_v2l_v02_fold3_ep30.h5"   # 0.301
        ]
        model = create_model_l_for_inference(lmodels[n])
        
        #model = create_model_l_for_inference(lmodels[1])
        for i in tqdm(range(n_chunks)):
            files = image_paths[i * chunk_size:(i + 1) * chunk_size]
            batch = create_batch(files)
            embedding_tensor = tf.math.l2_normalize(model.predict(batch), axis = 1)
            embeddings[i * chunk_size:(i + 1) * chunk_size] += embedding_tensor * (0.28 / n_models)
        del model
        gc.collect()
        #"""
        
        #model = create_model_for_inference(f"../input/covid-tweet/fold{n}.h5")
        #model = create_model_for_inference(f"../input/data-glret2021-eff3/fold{n}.h5")
        # ../input/google-lmr-eff5-256-v01
        # ../input/google-lmr-eff5-256-v01/2021_google_lm_retrieval_eff5_256_v01_eff5_v01_fold0.h5
        #model = create_model_for_inference(f"../input/google-lmr-eff5-256-v01/2021_google_lm_retrieval_eff5_256_v01_eff5_v01_fold{n}.h5")
        
        #"""
        # ../input/google-lmr-eff6-256-v01/2021_google_lm_retrieval_eff6_v02_fold0.h5
        
        #model = create_model_for_inference(f"../input/google-lmr-eff6-256-v01/2021_google_lm_retrieval_eff6_v02_fold{n}.h5", efficientnet_size=6)   
        #model = create_model_for_inference(f"../input/googlelmreffv2s256/2021_google_lm_retrieval_eff_v2s_v01_fold{n}.h5", efficientnet_size=6)
        #model = create_model_for_inference(f"../input/googlelmreffv2s256/2021_google_lm_retrieval_eff_v2s_v02_fold{n}.h5", efficientnet_size=6)
        #model = create_model_for_inference(f"../input/googlelmreffv2s256/2021_google_lm_retrieval_eff_v2s_v03_fold{n}.h5", efficientnet_size=6)
        
        # ../input/googlelmreffv2m256/2021_google_lm_retrieval_eff_v2m_v01_fold3.h5
        #model = create_model_m_for_inference(f"../input/../input/googlelmreffv2m256/2021_google_lm_retrieval_eff_v2m_v01_fold{n}.h5", efficientnet_size=6)
        model = create_model_m_for_inference(f"../input/googlelmreffv2m256/2021_google_lm_retrieval_eff_v2m_v02_fold{n}.h5", efficientnet_size=6)
        #model = create_model_m_for_inference(f"../input/../input/googlelmreffv2m256/2021_google_lm_retrieval_eff_v2m_v03_fold{n}.h5", efficientnet_size=6)

        for i in tqdm(range(n_chunks)):
            files = image_paths[i * chunk_size:(i + 1) * chunk_size]
            batch = create_batch(files)
            embedding_tensor = tf.math.l2_normalize(model.predict(batch), axis = 1)
            embeddings[i * chunk_size:(i + 1) * chunk_size] += embedding_tensor * (0.12 / n_models)
        del model
        gc.collect()
        #"""
        
        """
        #model = create_model_for_inference(f"../input/google-lmr-eff5-256-v01/2021_google_lm_retrieval_eff5_256_v01_eff5_v01_fold{n}.h5")
        
        model = create_model_s_for_inference(f"../input/googlelmreffv2s256/2021_google_lm_retrieval_eff_v2s_v02_fold{n}.h5", efficientnet_size=6)
        #model = create_model_m_for_inference(f"../input/../input/googlelmreffv2m256/2021_google_lm_retrieval_eff_v2m_v03_fold{n}.h5", efficientnet_size=6)

        for i in tqdm(range(n_chunks)):
            files = image_paths[i * chunk_size:(i + 1) * chunk_size]
            batch = create_batch(files)
            embedding_tensor = tf.math.l2_normalize(model.predict(batch), axis = 1)
            embeddings[i * chunk_size:(i + 1) * chunk_size] += embedding_tensor * (0.137 / n_models)
        del model
        gc.collect()       
        #        #model = create_model_for_inference(f"../input/googlelmreffv2s256/2021_google_lm_retrieval_eff_v2s_v03_fold{n}.h5", efficientnet_size=6)
        """
        tf.keras.backend.clear_session()

    embeddings = tf.math.l2_normalize(embeddings, axis=1)    # normalize(embeddings, axis=1)

    return ids, embeddings

## Main

In [18]:
def get_predictions():
    with timer("Getting Test Embeddings"):
        test_ids, test_embeddings = extract_global_features(str(TEST_IMAGE_DIR))

    with timer("Getting Train Embeddings"):
        train_ids, train_embeddings = extract_global_features(str(TRAIN_IMAGE_DIR))

    PredictionString_list = []
    with timer("Matching..."):
        for test_index in range(test_embeddings.shape[0]):
            distances = spatial.distance.cdist(test_embeddings[np.newaxis, test_index, :], train_embeddings, 'cosine')[0]
            partition = np.argpartition(distances, TOPK)[:TOPK]
            nearest = sorted([(train_ids[p], distances[p]) for p in partition], key=lambda x: x[1])
            pred_str = ""
            for train_id, cosine_distance in nearest:
                pred_str += train_id
                pred_str += " "
            PredictionString_list.append(pred_str)

    return test_ids, PredictionString_list


def main():
    test_image_list = []
    for root, dirs, files in os.walk(str(TEST_IMAGE_DIR)):
        for file in files:
            if file.endswith('.jpg'):
                 test_image_list.append(os.path.join(root, file))
    #"""                
    if len(test_image_list)==1129:
        sub_df = pd.read_csv('../input/landmark-retrieval-2021/sample_submission.csv')
        sub_df.to_csv('submission.csv', index=False)
        return
    #"""
    test_ids, PredictionString_list = get_predictions()
    sub_df = pd.DataFrame(data={'id': test_ids, 'images': PredictionString_list})
    sub_df.to_csv('submission.csv', index=False)

In [19]:
main()