# References

- https://www.programmersought.com/article/82038071353/
- https://pypi.org/project/timm/
- https://paperswithcode.com/method/arcface
- https://albumentations.ai/docs/
- https://www.kaggle.com/parthdhameliya77/pytorch-efficientnet-b3-image-tfidf-inference
- https://www.kaggle.com/parthdhameliya77/pytorch-efficientnet-b5-image-tfidf-inference
- https://github.com/rapidsai/cuml

# Goal

1) To use EfficientNetB[3,5,6] pretrained models as the backbone to train SHOPEE image dataset for product matching by [IMAGE].

2) To use [TF-DIF Vectorizer] sklearn model as the backone to train SHOPEE metadata for product matching by [Title].

3) To retrieve product matches(neighboutrs) for text & image embeddings from Step[1],[2] using KNN. 

4) Combine predictions for text, image product matches from Step[3].

# Import libraries

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

import numpy as np 
import pandas as pd 

import math
import random 
import os 
import cv2
import timm

from tqdm import tqdm 

import albumentations as Albumentations 
from albumentations.pytorch.transforms import ToTensorV2

import torch 
from torch.utils.data import Dataset 
from torch import nn
import torch.nn.functional as F 

import gc

#RAPIDS AI for NearestNeighbors, Vectorizing Text.
import cudf 
import cuml
import cupy
from cuml.feature_extraction.text import TfidfVectorizer
from cuml.neighbors import NearestNeighbors

# PyTorch Configurations

**Note:** model config structure adapted from https://www.kaggle.com/parthdhameliya77/pytorch-efficientnet-b3-image-tfidf-inference

## Pretrained models config

In [None]:
class pretrained_model_config:
    
    img_size = 512
    batch_size = 40
    seed = 2020
    
    device = 'cuda'
    classes = 11014
    
    model_name = 'eca_nfnet_l0'
    
    #Change model name here to make predictions with the respective model
    #eca_nfnet_l0
    #tf_efficientnet_b3_ns
    #tf_efficientnet_b5_ns
    #tf_efficientnet_b6_ns
    
    model_path = '../input/shopee-pytorch-models/arcface_512x512_nfnet_l0.pt'
   
    #Change to respective pretrained model path
    # ../input/shopee-pytorch-models/arcface_512x512_nfnet_l0.pt
    #../input/shopee-pytorch-models/arcface_512x512_eff_b3_.pt
    #../input/shopee-pytorch-models/arcface_512x512_eff_b5_.pt
    #../input/arcface-512x512-eff-b6-pt/arcface_512x512_tf_efficientnet_b6.pt
    
    scale = 30 
    margin = 0.5

## Configure PyTorch seed

In [None]:
def configure_seed(seed=2000):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
configure_seed(pretrained_model_config.seed)

## PyTorch test image transformation

In [None]:
def get_test_transforms():

    return Albumentations.Compose(
        [
            Albumentations.Resize(pretrained_model_config.img_size,pretrained_model_config.img_size,always_apply=True),
            Albumentations.Normalize(),
        ToTensorV2(p=1.0)
        ]
    )

# Helper functions

## Read test meta-data & images

In [None]:
def get_testData():
    df = pd.read_csv('../input/shopee-product-matching/test.csv')
    df_cuda = cudf.DataFrame(df)
    df_img_path = '../input/shopee-product-matching/test_images/' + df['image']
    return df, df_cuda, df_img_path

## Merge predictions using Product (Title, Image)

In [None]:
def mergePredictions(product):
    x = np.concatenate([product['image_predictions'], product['text_predictions']])
    return ' '.join( np.unique(x))

## Get similar products with image

In [None]:
def get_similar_products_by_image(df, image_embeddings,neighbour_threshold = 4.5):
    
    if len(df) > 3:
        KNN = 100
    else : 
        KNN = 3
    
    model = NearestNeighbors(n_neighbors = KNN)
    
    model.fit(image_embeddings)
    
    distances, indices = model.kneighbors(image_embeddings)
    
    predictions = []
    for k in tqdm(range(image_embeddings.shape[0])):
        idn = np.where(distances[k,] < neighbour_threshold)[0]
        idi = indices[k,idn]
        id_matched = df['posting_id'].iloc[idi].values
        predictions.append(id_matched)
        
    del model, distances, indices
    
    gc.collect()
    
    return predictions

## Retrieve image embeddings

In [None]:
def get_image_embeddings(image_paths, model_name = pretrained_model_config.model_name):
    embeds = []
    
    model = Product_Image_Prediction_Model(model_name = model_name)
    model.eval()
    model.load_state_dict(torch.load(pretrained_model_config.model_path))
    model = model.to(pretrained_model_config.device)

    image_dataset = Product_Images(image_paths=image_paths,transforms=get_test_transforms())
    image_loader = torch.utils.data.DataLoader(
        image_dataset,
        batch_size=pretrained_model_config.batch_size,
        pin_memory=True,
        drop_last=False,
        num_workers=4
    )
    
    
    with torch.no_grad():
        for img,label in tqdm(image_loader): 
            img = img.cuda()
            label = label.cuda()
            feat = model(img,label)
            image_embeddings = feat.detach().cpu().numpy()
            embeds.append(image_embeddings)
    
    
    del model
    image_embeddings = np.concatenate(embeds)
    print(f'Our image embeddings shape is {image_embeddings.shape}')
    del embeds
    gc.collect()
    return image_embeddings

## Get similar products with title

In [None]:
def get_similar_products_by_title(df, max_features = 25_000):
    
    model = TfidfVectorizer(stop_words = 'english', binary = True, max_features = max_features)
    text_embeddings = model.fit_transform(df_cu['title']).toarray()
    preds = []
    CHUNK_SIZE = 1024*4

    print('Finding similar titles...')
    TEXT_CHUNKS = len(df)//CHUNK_SIZE
    if len(df)%CHUNK_SIZE!=0: TEXT_CHUNKS += 1
    for j in range( TEXT_CHUNKS ):

        a = j*CHUNK_SIZE
        b = (j+1)*CHUNK_SIZE
        b = min(b,len(df))
        print('chunk',a,'to',b)
        
        cts = cupy.matmul( text_embeddings, text_embeddings[a:b].T).T

        for k in range(b-a):
            IDX = cupy.where(cts[k,]>0.75)[0]
            o = df.iloc[cupy.asnumpy(IDX)].posting_id.values
            preds.append(o)
    
    del model,text_embeddings
    gc.collect()
    return preds

# Inference

**Note:** model config structure adapted from https://www.kaggle.com/parthdhameliya77/pytorch-efficientnet-b3-image-tfidf-inference

## Data Configuration

In [None]:
class Product_Images(Dataset):
    def __init__(self, image_paths, transforms=None):

        self.image_paths = image_paths
        self.augmentations = transforms

    def __len__(self):
        return self.image_paths.shape[0]

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations:
            augmented = self.augmentations(image=image)
            image = augmented['image']       
    
        return image,torch.tensor(1)

## ArcFace Margin loss function for improved large-scale recognition

In [None]:
class ImageRecog_MarginLoss_ArcFace(nn.Module):
    def __init__(self, in_features, out_features, scale=30.0, margin=0.50, easy_margin=False, ls_eps=0.0):
        super(ImageRecog_MarginLoss_ArcFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.scale = scale
        self.margin = margin
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(margin)
        self.sin_m = math.sin(margin)
        self.th = math.cos(math.pi - margin)
        self.mm = math.sin(math.pi - margin) * margin

    def forward(self, input, label):
        cosTheta = F.linear(F.normalize(input), F.normalize(self.weight))
        sineTheta = torch.sqrt(1.0 - torch.pow(cosTheta, 2))
        phi_val = cosTheta * self.cos_m - sineTheta * self.sin_m
        if self.easy_margin:
            phi_val = torch.where(cosTheta > 0, phi_val, cosTheta)
        else:
            phi_val = torch.where(cosTheta > self.th, phi_val, cosTheta - self.mm)
        one_hot_encoding = torch.zeros(cosTheta.size(), device='cuda')
        one_hot_encoding.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot_encoding = (1 - self.ls_eps) * one_hot_encoding + self.ls_eps / self.out_features
        loss = (one_hot_encoding * phi_val) + ((1.0 - one_hot_encoding) * cosTheta)
        loss *= self.scale

        return loss

## Prediction Model for the Shopee products

In [None]:
class Product_Image_Prediction_Model(nn.Module):

    def __init__(
        self,
        n_classes = pretrained_model_config.classes,
        model_name = pretrained_model_config.model_name,
        fc_dim = 512,
        margin = pretrained_model_config.margin,
        scale = pretrained_model_config.scale,
        use_fc = False,
        pretrained = False):


        super(Product_Image_Prediction_Model,self).__init__()
        
        print('Compiling & Building SHOPEE Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=pretrained)

        if model_name == 'tf_efficientnet_b3_ns':
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif model_name == 'tf_efficientnet_b5_ns':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
        
        elif model_name == 'tf_efficientnet_b6_ns':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
           
        elif model_name == 'eca_nfnet_l0':
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
            
        self.pooling =  nn.AdaptiveAvgPool2d(1)

        self.use_fc = use_fc

        self.dropout = nn.Dropout(p=0.0)
        
        self.fc = nn.Linear(final_in_features, fc_dim)
        
        self.bn = nn.BatchNorm1d(fc_dim)
        
        self._init_params()
        
        final_in_features = fc_dim

        self.final = ImageRecog_MarginLoss_ArcFace(
            final_in_features,
            n_classes,
            scale = scale,
            margin = margin,
            easy_margin = False,
            ls_eps = 0.0
        )

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, image, label):
        feature = self.extract_feat(image)
        return feature

    def extract_feat(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            x = self.dropout(x)
            x = self.fc(x)
            x = self.bn(x)
        return x

## 1. Read test data

In [None]:
df,df_cu,image_paths = get_testData()
df.head()

## 2. Retrieve image embeddings

In [None]:
image_embeddings = get_image_embeddings(image_paths.values)

## 3. Predict similar products by title & image

In [None]:
image_predictions = get_similar_products_by_image(df, image_embeddings, neighbour_threshold = 4.5)
text_predictions = get_similar_products_by_title(df, max_features = 25_000)

## 4. Comine predictions & Save

In [None]:
df['image_predictions'] = image_predictions
df['text_predictions'] = text_predictions
df['matches'] = df.apply(mergePredictions, axis = 1)
df[['posting_id', 'matches']].to_csv('submission.csv', index = False)

In [None]:
df[['posting_id', 'matches']]

# Inferences

Combined predictions using EfficientNetB6 along with TF-DIF Vectorizer provided better F1 score over EfficientNetB[3,5] / RESNET152 + TF-DIF predictions.

**LEARNING**

1) How to train pretrained EfficientNetB[3,5] on the SHOPEE dataset for more epochs.
2) How to train EfficientNetB[6] on the SHOPEE dataset from scratch and how to stay calm :) P.S B6 training took 34 hours.

**Step-4**

To replace [EfficientNet] model with the [eca-nfnet-10] model and make inferences on the SHOPEE Dataset, to see if the current [F1] Score can be improved.