# References


- https://github.com/tyunist/memory_efficient_mish_swish/blob/master/mish.py
- https://www.kaggle.com/ankursingh12/shopee-ensemble-b3-nfnet-l0-inference

**Pre-trained models:** https://www.kaggle.com/parthdhameliya77/shopee-pytorch-models

**Self-trained model:** https://www.kaggle.com/sudhavijayakumar/arcface-512x512-eff-b6-pt

# Goal

1) To ensemble EfficientNetB[6] + eca-nfnet-10 pretrained models to make predictions on the SHOPEE image dataset for product matching by [IMAGE].

2) To use [TF-DIF Vectorizer] sklearn model as the backone to train SHOPEE metadata for product matching by [Title].

3) Combine predictions from Step[1] & Step[2].

# Import libraries

In [141]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

import numpy as np 
import pandas as pd 

import math
import random 
import os 
import cv2
import timm

from tqdm import tqdm 

import albumentations as Albumentations
from albumentations.pytorch.transforms import ToTensorV2

import torch 
from torch.utils.data import Dataset 
from torch import nn
import torch.nn.functional as F 

import gc

#RAPIDS AI for NearestNeighbors, Vectorizing Text.
import cudf 
import cuml
import cupy
from cuml.feature_extraction.text import TfidfVectorizer
from cuml.neighbors import NearestNeighbors

# PyTorch Configurations

## Pretrained models config

In [142]:
class pretrained_model_config:
    img_size = 512
    batch_size = 12
    seed = 2020
    
    device = 'cuda'
    classes = 11014
    
    model_name1 = 'eca_nfnet_l0'
    model_path1 = '../input/shopee-pytorch-models/arcface_512x512_nfnet_l0 (mish).pt'
    
    model_name2 = 'tf_efficientnet_b6_ns'
    model_path2 = '../input/arcface-512x512-eff-b6-pt/arcface_512x512_tf_efficientnet_b6.pt'
    
    scale = 30 
    margin = 0.5

## PyTorch test image transformation

In [143]:
def get_test_transforms():

    return Albumentations.Compose(
        [
            Albumentations.Resize(pretrained_model_config.img_size,pretrained_model_config.img_size,always_apply=True),
            Albumentations.Normalize(),
        ToTensorV2(p=1.0)
        ]
    )

## Configure PyTorch seed

In [144]:
def configure_seed(seed=2000):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
configure_seed(pretrained_model_config.seed)

# Helper functions

## Read test meta-data & images

In [145]:
def get_testData():
    df = pd.read_csv('../input/shopee-product-matching/test.csv')
    df_cuda = cudf.DataFrame(df)
    df_img_path = '../input/shopee-product-matching/test_images/' + df['image']
    return df, df_cuda, df_img_path

## Merge predictions using Product (Title, Image)

In [146]:
def mergePredictions(product):
    x = np.concatenate([product['image_predictions'], product['text_predictions']])
    return ' '.join( np.unique(x))

## Get similar products with image

In [147]:
def get_similar_products_by_image(df, embeddings,threshold = 0.0):
    
    if len(df) > 3:
        KNN = 50
    else : 
        KNN = 3
    
    model = NearestNeighbors(n_neighbors = KNN, metric = 'cosine')
    model.fit(embeddings)
    distances, indices = model.kneighbors(embeddings)
    
    predictions = []
    for k in tqdm(range(embeddings.shape[0])):
        idx = np.where(distances[k,] < threshold)[0]
        ids = indices[k,idx]
        posting_ids = df['posting_id'].iloc[ids].values
        predictions.append(posting_ids)
        
    del model, distances, indices
    gc.collect()
    return predictions

## Retrieve image embeddings

In [148]:
def get_image_embeddings(image_paths, model_name, model_path):
    embeds = []
    
    model = Product_Image_Prediction_Model(model_name=model_name)
    model.eval()
    
    if model_name == 'eca_nfnet_l0':
        model = replace_activations(model, torch.nn.SiLU, Mish())

    model.load_state_dict(torch.load(model_path))
    model = model.to(pretrained_model_config.device)
    

    image_dataset = ShopeeDataset(image_paths=image_paths,transforms=get_test_transforms())
    image_loader = torch.utils.data.DataLoader(
        image_dataset,
        batch_size=pretrained_model_config.batch_size,
        pin_memory=True,
        drop_last=False,
        num_workers=4
    )
    
    
    with torch.no_grad():
        for img,label in tqdm(image_loader): 
            img = img.cuda()
            label = label.cuda()
            feat = model(img,label)
            image_embeddings = feat.detach().cpu().numpy()
            embeds.append(image_embeddings)
    
    
    del model
    image_embeddings = np.concatenate(embeds)
    print(f'Our image embeddings shape is {image_embeddings.shape}')
    del embeds
    gc.collect()
    return image_embeddings

## Get similar products with title

In [149]:
def get_similar_products_by_title(df, max_features = 25_000):
    
    model = TfidfVectorizer(stop_words = 'english', binary = True, max_features = max_features)
    text_embeddings = model.fit_transform(df_cu['title']).toarray()
    preds = []
    CHUNK = 1024*4

    print('Finding similar titles...')
    CTS = len(df)//CHUNK
    if len(df)%CHUNK!=0: CTS += 1
    for j in range( CTS ):

        a = j*CHUNK
        b = (j+1)*CHUNK
        b = min(b,len(df))
        print('chunk',a,'to',b)

        # COSINE SIMILARITY DISTANCE
        cts = cupy.matmul( text_embeddings, text_embeddings[a:b].T).T

        for k in range(b-a):
            IDX = cupy.where(cts[k,]>0.75)[0]
            o = df.iloc[cupy.asnumpy(IDX)].posting_id.values
            preds.append(o)
    
    del model,text_embeddings
    gc.collect()
    return preds

# Inference

## Data Configuration

In [150]:
class Product_Images(Dataset):
    def __init__(self, image_paths, transforms=None):

        self.image_paths = image_paths
        self.augmentations = transforms

    def __len__(self):
        return self.image_paths.shape[0]

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations:
            augmented = self.augmentations(image=image)
            image = augmented['image']       
    
        return image,torch.tensor(1)

## ArcFace Margin loss function for improved large-scale recognition¶


In [151]:
class ImageRecog_MarginLoss_ArcFace(nn.Module):
    def __init__(self, in_features, out_features, scale=30.0, margin=0.50, easy_margin=False, ls_eps=0.0):
        super(ImageRecog_MarginLoss_ArcFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.scale = scale
        self.margin = margin
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(margin)
        self.sin_m = math.sin(margin)
        self.th = math.cos(math.pi - margin)
        self.mm = math.sin(math.pi - margin) * margin

    def forward(self, input, label):
        cosTheta = F.linear(F.normalize(input), F.normalize(self.weight))
        sineTheta = torch.sqrt(1.0 - torch.pow(cosTheta, 2))
        phi_val = cosTheta * self.cos_m - sineTheta * self.sin_m
        if self.easy_margin:
            phi_val = torch.where(cosTheta > 0, phi_val, cosTheta)
        else:
            phi_val = torch.where(cosTheta > self.th, phi_val, cosTheta - self.mm)
        one_hot_encoding = torch.zeros(cosTheta.size(), device='cuda')
        one_hot_encoding.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot_encoding = (1 - self.ls_eps) * one_hot_encoding + self.ls_eps / self.out_features
        loss = (one_hot_encoding * phi_val) + ((1.0 - one_hot_encoding) * cosTheta)
        loss *= self.scale

        return loss

## Prediction Model for the Shopee products

In [160]:
class Product_Image_Prediction_Model(nn.Module):

    def __init__(
        self, model_name,
        n_classes = pretrained_model_config.classes,
        fc_dim = 512,
        margin = pretrained_model_config.margin,
        scale = pretrained_model_config.scale,
        use_fc = True,
        pretrained = False):


        super(Product_Image_Prediction_Model,self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=pretrained)

        if model_name == 'resnext50_32x4d':
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'efficientnet_b3':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'tf_efficientnet_b5_ns':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
            
        elif model_name == 'tf_efficientnet_b6_ns':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
        
        elif model_name == 'eca_nfnet_l0':
            final_in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            self.backbone.head.global_pool = nn.Identity()

        self.pooling =  nn.AdaptiveAvgPool2d(1)

        self.use_fc = use_fc

        self.dropout = nn.Dropout(p=0.0)
        self.fc = nn.Linear(final_in_features, fc_dim)
        self.bn = nn.BatchNorm1d(fc_dim)
        self._init_params()
        final_in_features = fc_dim

        self.final = ArcMarginProduct(
            final_in_features,
            n_classes,
            scale = scale,
            margin = margin,
            easy_margin = False,
            ls_eps = 0.0
        )

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, image, label):
        feature = self.extract_feat(image)
        #logits = self.final(feature,label)
        return feature

    def extract_feat(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            x = self.dropout(x)
            x = self.fc(x)
            x = self.bn(x)
        return x

## Mish Activation function

In [153]:
class Mish_func(torch.autograd.Function):
        
    @staticmethod
    def forward(ctx, i):
        result = i * torch.tanh(F.softplus(i))
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
  
        v = 1. + i.exp()
        h = v.log() 
        grad_gh = 1./h.cosh().pow_(2) 
        
        grad_hx = i.sigmoid()

        grad_gx = grad_gh *  grad_hx 
        
        grad_f =  torch.tanh(F.softplus(i)) + i * grad_gx 
        
        return grad_output * grad_f 


class Mish(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        pass
    def forward(self, input_tensor):
        return Mish_func.apply(input_tensor)


def replace_activations(model, existing_layer, new_layer):
    
    for name, module in reversed(model._modules.items()):
        if len(list(module.children())) > 0:
            model._modules[name] = replace_activations(module, existing_layer, new_layer)

        if type(module) == existing_layer:
            layer_old = module
            layer_new = new_layer
            model._modules[name] = layer_new
    return model

## 1. Read test data

In [154]:
df,df_cu,image_paths = get_testData()
df.head()

Unnamed: 0,posting_id,image,image_phash,title
0,test_2255846744,0006c8e5462ae52167402bac1c2e916e.jpg,ecc292392dc7687a,Edufuntoys - CHARACTER PHONE ada lampu dan mus...
1,test_3588702337,0007585c4d0f932859339129f709bfdc.jpg,e9968f60d2699e2c,(Beli 1 Free Spatula) Masker Komedo | Blackhea...
2,test_4015706929,0008377d3662e83ef44e1881af38b879.jpg,ba81c17e3581cabe,READY Lemonilo Mie instant sehat kuah dan goreng


## 2. Retrieve image embeddings

In [159]:
image_embeddings1 = get_image_embeddings(image_paths.values, pretrained_model_config.model_name1, pretrained_model_config.model_path1)

Building Model Backbone for eca_nfnet_l0 model


100%|██████████| 1/1 [00:00<00:00,  2.88it/s]


Our image embeddings shape is (3, 512)


In [161]:
image_embeddings2 = get_image_embeddings(image_paths.values, pretrained_model_config.model_name2, pretrained_model_config.model_path2)

Building Model Backbone for tf_efficientnet_b6_ns model


100%|██████████| 1/1 [00:00<00:00,  2.81it/s]

Our image embeddings shape is (3, 512)





In [164]:
image_embeddings = (0.3*image_embeddings1 + 0.7*image_embeddings2)

## 3. Predict similar products by title & image

In [169]:
image_predictions = get_similar_products_by_image(df, image_embeddings, threshold = 0.75)
text_predictions = get_similar_products_by_title(df, max_features = 30000)

100%|██████████| 3/3 [00:00<00:00, 4248.11it/s]


Finding similar titles...
chunk 0 to 3


## 4. Comine predictions & Save

In [170]:
df['image_predictions'] = image_predictions
df['text_predictions'] = text_predictions
df['matches'] = df.apply(mergePredictions, axis = 1)
df[['posting_id', 'matches']].to_csv('submission.csv', index = False)

In [171]:
df[['posting_id', 'matches']]

Unnamed: 0,posting_id,matches
0,test_2255846744,test_2255846744
1,test_3588702337,test_3588702337
2,test_4015706929,test_4015706929


# Inferences

Ensembled predictions using [EfficientNetB6] + [eca-nfnet-l0] along with TF-DIF Vectorizer provided better F1 score over EfficientNetB[3,5] / RESNET152 eca-nfnet-l0 [+] TF-DIF predictions.

**LEARNING**

How to ensemble different pretrained models to make predictions on the SHOPEE dataset.

Step-6

1) To try different ensembling with different neighbour threshhold to see if the score improves.

2) To try [Fastai + Annoy] (Match by image) along with BERT tokenizer(match by title) to find similar products.