In [1]:
!nvidia-smi -L

GPU 0: Tesla V100-SXM2-16GB (UUID: GPU-c34b6cb0-ef44-093e-a1d1-c21cfa79c5d0)


In [2]:
!pip install --upgrade --force-reinstall --no-deps kaggle

Collecting kaggle
[?25l  Downloading https://files.pythonhosted.org/packages/3a/e7/3bac01547d2ed3d308ac92a0878fbdb0ed0f3d41fb1906c319ccbba1bfbc/kaggle-1.5.12.tar.gz (58kB)
[K     |█████▋                          | 10kB 16.6MB/s eta 0:00:01[K     |███████████▏                    | 20kB 21.7MB/s eta 0:00:01[K     |████████████████▊               | 30kB 26.4MB/s eta 0:00:01[K     |██████████████████████▎         | 40kB 20.6MB/s eta 0:00:01[K     |███████████████████████████▉    | 51kB 18.0MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 6.6MB/s 
[?25hBuilding wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.12-cp37-none-any.whl size=73053 sha256=c63c9fcaa30274e844709f283fb021467c49b687debdb8897b2ab74cd6af0be1
  Stored in directory: /root/.cache/pip/wheels/a1/6a/26/d30b7499ff85a4a4593377a87ecf55f7d08af42f0de9b60303
Successfully built kaggle
Installing collected packa

In [3]:
import os

os.environ["KAGGLE_USERNAME"] = 'edyanakov'
os.environ["KAGGLE_KEY"] = '0d10f707a41579f1f36a7f0f3a36d378'

In [4]:
!kaggle datasets download -d kozodoi/timm-pytorch-image-models
!unzip /content/timm-pytorch-image-models.zip &> /dev/null

Downloading timm-pytorch-image-models.zip to /content
 35% 5.00M/14.1M [00:00<00:00, 17.2MB/s]
100% 14.1M/14.1M [00:00<00:00, 38.6MB/s]


In [5]:
import sys
sys.path.append('/content/pytorch-image-models-master')
import timm

In [8]:
!kaggle competitions download -c shopee-product-matching
!unzip /content/shopee-product-matching.zip &> /dev/null

shopee-product-matching.zip: Skipping, found more recently modified local copy (use --force to force download)


In [7]:
!pip install albumentations==0.5.2 &> /dev/null

In [None]:
import numpy as np 
import pandas as pd 

import os 
import cv2 

import albumentations 
from albumentations.pytorch.transforms import ToTensorV2

import torch 
import torch.nn.functional as F 
from torch import nn 
from torch.optim import Adam
from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from torch.cuda.amp import autocast, GradScaler

import math

from tqdm.notebook import tqdm 
from sklearn.preprocessing import LabelEncoder

In [None]:
class Config:
    
    DATA_DIR = '/content/train_images'
    TRAIN_CSV = '/content/train.csv'
    fold_to_train = 2

    IMG_SIZE = 512
    MEAN = [0.485, 0.456, 0.406]
    STD = [0.229, 0.224, 0.225]

    EPOCHS = 15  
    BATCH_SIZE = 16

    NUM_WORKERS = 4
    DEVICE = 'cuda'

    CLASSES = 11014
    SCALE = 30 
    MARGIN = 0.5

    RANGER = False
    MODEL_NAME =  'eca_nfnet_l1'   #'dm_nfnet_f0' vit_base_patch32_384 eca_nfnet_l0
    FC_DIM = 512
    SCHEDULER_PARAMS = {
            "lr_start": 1e-5,
            "lr_max": 1e-5 * 32,
            "lr_min": 1e-6,
            "lr_ramp_ep": 5,
            "lr_sus_ep": 0,
            "lr_decay": 0.8,
        }

In [None]:
class ShopeeDataset(torch.utils.data.Dataset):

    def __init__(self,df, transform = None):
        self.df = df 
        self.root_dir = Config.DATA_DIR
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self,idx):

        row = self.df.iloc[idx]

        img_path = os.path.join(self.root_dir,row.image)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = row.label_group

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        return {
            'image' : image,
            'label' : torch.tensor(label).long()
        }

In [None]:
def get_train_transforms():
    return albumentations.Compose(
        [   
            albumentations.Resize(Config.IMG_SIZE+32,Config.IMG_SIZE+32,always_apply=True),
            albumentations.CenterCrop(Config.IMG_SIZE,Config.IMG_SIZE,always_apply=True),
            albumentations.HorizontalFlip(p=0.5),
            albumentations.Normalize(mean = Config.MEAN, std = Config.STD),
            ToTensorV2(p=1.0),
        ]
    )

def get_valid_transforms():
    return albumentations.Compose(
        [   
            albumentations.Resize(Config.IMG_SIZE+32,Config.IMG_SIZE+32,always_apply=True),
            albumentations.CenterCrop(Config.IMG_SIZE,Config.IMG_SIZE,always_apply=True),
            albumentations.Normalize(mean = Config.MEAN, std = Config.STD),
            ToTensorV2(p=1.0),
        ]
    )

In [None]:
class ShopeeScheduler(_LRScheduler):
    def __init__(self, optimizer, lr_start=5e-6, lr_max=1e-5,
                 lr_min=1e-6, lr_ramp_ep=5, lr_sus_ep=0, lr_decay=0.8,
                 last_epoch=-1):
        self.lr_start = lr_start
        self.lr_max = lr_max
        self.lr_min = lr_min
        self.lr_ramp_ep = lr_ramp_ep
        self.lr_sus_ep = lr_sus_ep
        self.lr_decay = lr_decay
        super(ShopeeScheduler, self).__init__(optimizer, last_epoch)
        
    def get_lr(self):
        if not self._get_lr_called_within_step:
            warnings.warn("To get the last learning rate computed by the scheduler, "
                          "please use `get_last_lr()`.", UserWarning)
        
        if self.last_epoch == 0:
            self.last_epoch += 1
            return [self.lr_start for _ in self.optimizer.param_groups]
        
        lr = self._compute_lr_from_epoch()
        self.last_epoch += 1
        
        return [lr for _ in self.optimizer.param_groups]
    
    def _get_closed_form_lr(self):
        return self.base_lrs
    
    def _compute_lr_from_epoch(self):
        if self.last_epoch < self.lr_ramp_ep:
            lr = ((self.lr_max - self.lr_start) / 
                  self.lr_ramp_ep * self.last_epoch + 
                  self.lr_start)
        
        elif self.last_epoch < self.lr_ramp_ep + self.lr_sus_ep:
            lr = self.lr_max
            
        else:
            lr = ((self.lr_max - self.lr_min) * self.lr_decay**
                  (self.last_epoch - self.lr_ramp_ep - self.lr_sus_ep) + 
                  self.lr_min)
        return lr

In [None]:
def centralized_gradient(x, use_gc=True, gc_conv_only=False):
    if use_gc:
        if gc_conv_only:
            if len(list(x.size())) > 3:
                x.add_(-x.mean(dim=tuple(range(1, len(list(x.size())))), keepdim=True))
        else:
            if len(list(x.size())) > 1:
                x.add_(-x.mean(dim=tuple(range(1, len(list(x.size())))), keepdim=True))
    return x


class Ranger(Optimizer):

    def __init__(self, params, lr=1e-3,                       # lr
                 alpha=0.5, k=5, N_sma_threshhold=5,           # Ranger options
                 betas=(.95, 0.999), eps=1e-5, weight_decay=0,  # Adam options
                 # Gradient centralization on or off, applied to conv layers only or conv + fc layers
                 use_gc=True, gc_conv_only=False, gc_loc=True
                 ):

        # parameter checks
        if not 0.0 <= alpha <= 1.0:
            raise ValueError(f'Invalid slow update rate: {alpha}')
        if not 1 <= k:
            raise ValueError(f'Invalid lookahead steps: {k}')
        if not lr > 0:
            raise ValueError(f'Invalid Learning Rate: {lr}')
        if not eps > 0:
            raise ValueError(f'Invalid eps: {eps}')

        # parameter comments:
        # beta1 (momentum) of .95 seems to work better than .90...
        # N_sma_threshold of 5 seems better in testing than 4.
        # In both cases, worth testing on your dataset (.90 vs .95, 4 vs 5) to make sure which works best for you.

        # prep defaults and init torch.optim base
        defaults = dict(lr=lr, alpha=alpha, k=k, step_counter=0, betas=betas,
                        N_sma_threshhold=N_sma_threshhold, eps=eps, weight_decay=weight_decay)
        super().__init__(params, defaults)

        # adjustable threshold
        self.N_sma_threshhold = N_sma_threshhold

        # look ahead params

        self.alpha = alpha
        self.k = k

        # radam buffer for state
        self.radam_buffer = [[None, None, None] for ind in range(10)]

        # gc on or off
        self.gc_loc = gc_loc
        self.use_gc = use_gc
        self.gc_conv_only = gc_conv_only
        # level of gradient centralization
        #self.gc_gradient_threshold = 3 if gc_conv_only else 1

        print(
            f"Ranger optimizer loaded. \nGradient Centralization usage = {self.use_gc}")
        if (self.use_gc and self.gc_conv_only == False):
            print(f"GC applied to both conv and fc layers")
        elif (self.use_gc and self.gc_conv_only == True):
            print(f"GC applied to conv layers only")

    def __setstate__(self, state):
        print("set state called")
        super(Ranger, self).__setstate__(state)

    def step(self, closure=None):
        loss = None
        # note - below is commented out b/c I have other work that passes back the loss as a float, and thus not a callable closure.
        # Uncomment if you need to use the actual closure...

        # if closure is not None:
        #loss = closure()

        # Evaluate averages and grad, update param tensors
        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()

                if grad.is_sparse:
                    raise RuntimeError(
                        'Ranger optimizer does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]  # get state dict for this param

                if len(state) == 0:  # if first time to run...init dictionary with our desired entries
                    # if self.first_run_check==0:
                    # self.first_run_check=1
                    #print("Initializing slow buffer...should not see this at load from saved model!")
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)

                    # look ahead weight storage now in state dict
                    state['slow_buffer'] = torch.empty_like(p.data)
                    state['slow_buffer'].copy_(p.data)

                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(
                        p_data_fp32)

                # begin computations
                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                # GC operation for Conv layers and FC layers
                # if grad.dim() > self.gc_gradient_threshold:
                #    grad.add_(-grad.mean(dim=tuple(range(1, grad.dim())), keepdim=True))
                if self.gc_loc:
                    grad = centralized_gradient(grad, use_gc=self.use_gc, gc_conv_only=self.gc_conv_only)

                state['step'] += 1

                # compute variance mov avg
                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)

                # compute mean moving avg
                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)

                buffered = self.radam_buffer[int(state['step'] % 10)]

                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * \
                        state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma
                    if N_sma > self.N_sma_threshhold:
                        step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (
                            N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = 1.0 / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                # if group['weight_decay'] != 0:
                #    p_data_fp32.add_(-group['weight_decay']
                #                     * group['lr'], p_data_fp32)

                # apply lr
                if N_sma > self.N_sma_threshhold:
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    G_grad = exp_avg / denom
                else:
                    G_grad = exp_avg

                if group['weight_decay'] != 0:
                    G_grad.add_(p_data_fp32, alpha=group['weight_decay'])
                # GC operation
                if self.gc_loc == False:
                    G_grad = centralized_gradient(G_grad, use_gc=self.use_gc, gc_conv_only=self.gc_conv_only)

                p_data_fp32.add_(G_grad, alpha=-step_size * group['lr'])
                p.data.copy_(p_data_fp32)

                # integrated look ahead...
                # we do it at the param level instead of group level
                if state['step'] % group['k'] == 0:
                    # get access to slow param tensor
                    slow_p = state['slow_buffer']
                    # (fast weights - slow weights) * alpha
                    slow_p.add_(p.data - slow_p, alpha=self.alpha)
                    # copy interpolated weights to RAdam param tensor
                    p.data.copy_(slow_p)

        return loss

In [None]:
#credit : https://github.com/tyunist/memory_efficient_mish_swish/blob/master/mish.py

''' I just wanted to understand and implement custom backward activation in PyTorch so I choose this.
    You can also simply use this function below too.

class Mish(nn.Module):
    def __init__(self):
        super(Mish, self).__init__()

    def forward(self, input):
        return input * (torch.tanh(F.softplus(input)))
'''

class Mish_func(torch.autograd.Function):
    
    @staticmethod
    def forward(ctx, i):
        result = i * torch.tanh(F.softplus(i))
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_tensors[0]
  
        v = 1. + i.exp()
        h = v.log() 
        grad_gh = 1./h.cosh().pow_(2) 

        # Note that grad_hv * grad_vx = sigmoid(x)
        #grad_hv = 1./v  
        #grad_vx = i.exp()
        
        grad_hx = i.sigmoid()

        grad_gx = grad_gh *  grad_hx #grad_hv * grad_vx 
        
        grad_f =  torch.tanh(F.softplus(i)) + i * grad_gx 
        
        return grad_output * grad_f 


class Mish(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        print("Mish initialized")
        pass
    def forward(self, input_tensor):
        return Mish_func.apply(input_tensor)

In [None]:
def replace_activations(model, existing_layer, new_layer):
    for name, module in reversed(model._modules.items()):
        if len(list(module.children())) > 0:
            model._modules[name] = replace_activations(module, existing_layer, new_layer)

        if type(module) == existing_layer:
            layer_old = module
            layer_new = new_layer
            model._modules[name] = layer_new
    return model

In [None]:
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, scale=30.0, margin=0.50, easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.scale = scale
        self.margin = margin
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(margin)
        self.sin_m = math.sin(margin)
        self.th = math.cos(math.pi - margin)
        self.mm = math.sin(math.pi - margin) * margin

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.scale

        return nn.CrossEntropyLoss(weight = normedWeights)(output,label)

class ShopeeModel(nn.Module):

    def __init__(
        self,
        n_classes = Config.CLASSES,
        model_name = Config.MODEL_NAME,
        fc_dim = Config.FC_DIM,
        margin = Config.MARGIN,
        scale = Config.SCALE,
        use_fc = False,
        pretrained = True):


        super(ShopeeModel,self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        # self.backbone.stages[1][0].downsample.pool = nn.AdaptiveMaxPool2d(1)
        # self.backbone.stages[2][0].downsample.pool = nn.AdaptiveMaxPool2d(1)
        # self.backbone.stages[3][0].downsample.pool = nn.AdaptiveMaxPool2d(1)

        if model_name == 'resnext50_32x4d':
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
            self.backbone.global_pool = nn.Identity()

        elif 'efficientnet' in model_name:
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
            self.backbone.global_pool = nn.Identity()
        
        elif 'nfnet' in model_name:
            final_in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            self.backbone.head.global_pool = nn.Identity()

        elif 'vit' in model_name:
            final_in_features = self.backbone.head.in_features
            self.backbone.head = nn.Identity()

        self.pooling =  nn.AdaptiveAvgPool2d(1)
        self.max_pooling = nn.AdaptiveMaxPool2d(1)
        self.bn = nn.BatchNorm1d(final_in_features)

        self.use_fc = use_fc

        if use_fc:
            self.dropout = nn.Dropout(p=0.0)
            self.fc1 = nn.Linear(final_in_features, fc_dim+512)
            self.bn1 = nn.BatchNorm1d(fc_dim+512)
            self.fc = nn.Linear(fc_dim+512, fc_dim)
            self.bn = nn.BatchNorm1d(fc_dim)
            self.relu = nn.ReLU()
            self._init_params()
            final_in_features = fc_dim

        self.final = ArcMarginProduct(
            final_in_features,
            n_classes,
            scale = scale,
            margin = margin,
            easy_margin = False,
            ls_eps = 0.0
        )

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, image, label):
        feature = self.extract_feat(image)
        logits = self.final(feature,label)
        return feature, logits

    def extract_feat(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            #x = self.dropout(x)
            x = self.fc1(x)
            x = self.bn1(x)
            x = self.relu(x)
            x = self.fc(x)
            x = self.bn(x)
            x = self.relu(x)
        return x

In [None]:
def train_fn(model, data_loader, optimizer, scheduler, i):
    model.train()
    fin_loss = 0.0
    tk = tqdm(data_loader, desc = "Epoch" + " [TRAIN] " + str(i+1))

    for t,data in enumerate(tk):
        for k,v in data.items():
            data[k] = v.to(Config.DEVICE)
        optimizer.zero_grad()
        _, loss = model(**data)

        loss.backward()
        optimizer.step()
        fin_loss += loss.item() 

        tk.set_postfix({'loss' : '%.6f' %float(fin_loss/(t+1)), 'LR' : optimizer.param_groups[0]['lr']})

    scheduler.step()

    return fin_loss / len(data_loader)

def eval_fn(model, data_loader, i):
    model.eval()
    fin_loss = 0.0
    embeds = []
    tk = tqdm(data_loader, desc = "Epoch" + " [VALID] " + str(i+1))

    with torch.no_grad():
        for t,data in enumerate(tk):
            for k,v in data.items():
                data[k] = v.to(Config.DEVICE)
                
            features, loss = model(**data)

            image_embeddings = features.detach().cpu().numpy()
            embeds.append(image_embeddings)

            fin_loss += loss.item() 

            tk.set_postfix({'loss' : '%.6f' %float(fin_loss/(t+1))})\
            
        image_embeddings = np.concatenate(embeds)
        print(f'Our image embeddings shape is {image_embeddings.shape}')

        return fin_loss / len(data_loader), image_embeddings

In [None]:
def getMetric(col):
    def f1score(row):
        n = len( np.intersect1d(row.target,row[col]) )
        return 2*n / (len(row.target)+len(row[col]))
    return f1score

from sklearn.preprocessing import normalize
import gc

def get_cv(df, outs):
    thresholds = list(np.arange(0.1, 0.7, 0.05))
    scores = []
    
    # set target
    tmp = df.groupby('label_group').posting_id.agg('unique').to_dict()
    df['target'] = df.label_group.map(tmp)

    # Normalize
    outsn = normalize(outs)

    # to torch
    outsn_torch = torch.from_numpy(outsn).cuda()
    
    # calculate cosine simularity with torch cuda()
    distances = 1 - torch.matmul(outsn_torch, outsn_torch.T).cpu().T
    
    for threshold in thresholds:
        predictions = []
        for k in range(outs.shape[0]):
            idx = np.where(distances[k,] < threshold)[0]
            o = df.iloc[idx].posting_id.values
            predictions.append(o)
        df["preds"] = predictions
        #df['oof'] = df.apply(combine_for_cv,axis=1)
        df['f1'] = df.apply(getMetric("preds"),axis=1)
        score = df['f1'].mean()
        print(f'Our f1 score for threshold {threshold} is {score}')
        scores.append(score)
    thresholds_scores = pd.DataFrame({'thresholds': thresholds, 'scores': scores})
    max_score = thresholds_scores[thresholds_scores['scores'] == thresholds_scores['scores'].max()]
    best_threshold = max_score['thresholds'].values[0]
    best_score = max_score['scores'].values[0]
    print(f'Our best score is {best_score} and has a threshold {best_threshold}')
    gc.collect()
    torch.cuda.empty_cache()

    return best_score

In [None]:
from sklearn.model_selection import GroupKFold

data = pd.read_csv(Config.TRAIN_CSV)
nSamples = data['label_group'].value_counts().values
normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]
normedWeights = torch.FloatTensor(normedWeights).to(Config.DEVICE)

skf = GroupKFold(5)
data['fold'] = -1
for i, (train_idx, valid_idx) in enumerate(skf.split(X=data, groups=data['label_group'])):
    data.loc[valid_idx, 'fold'] = i

labelencoder= LabelEncoder()
data['label_group'] = labelencoder.fit_transform(data['label_group'])

In [None]:
# image = cv2.imread(data['filepath'].values[19814])
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# image = cv2.resize(image, (512, 512))
# plt.imshow(image)

In [None]:
def run_training():
    
    train = data[data['fold']!=Config.fold_to_train].reset_index(drop=True)
    valid = data[data['fold']==Config.fold_to_train].reset_index(drop=True)
    
    trainset = ShopeeDataset(train, transform = get_train_transforms())
    validset = ShopeeDataset(valid, transform = get_valid_transforms())

    trainloader = torch.utils.data.DataLoader(
        trainset,
        batch_size = Config.BATCH_SIZE,
        pin_memory = True,
        num_workers = Config.NUM_WORKERS,
        shuffle = True,
        drop_last = True
    )

    validloader = torch.utils.data.DataLoader(
        validset,
        batch_size = Config.BATCH_SIZE,
        pin_memory = True,
        num_workers = Config.NUM_WORKERS,
        shuffle = False,
        drop_last = False
    )

    model = ShopeeModel()
    model.to(Config.DEVICE)
    
    if Config.RANGER:
      existing_layer = torch.nn.SiLU
      new_layer = Mish()
      model = replace_activations(model, existing_layer, new_layer) # in eca_nfnet_l0 SiLU() is used, but it will be replace by Mish()
      optimizer = Ranger(model.parameters(), lr = Config.SCHEDULER_PARAMS['lr_start'])
    else:
      optimizer = Adam(model.parameters(), lr = Config.SCHEDULER_PARAMS['lr_start'])
    
    scheduler = ShopeeScheduler(optimizer,**Config.SCHEDULER_PARAMS)

    valid_every = 1
    for epoch in range(Config.EPOCHS):

        avg_loss_train = train_fn(model, trainloader, optimizer, scheduler, epoch)
        avg_loss_valid, image_embeddings = eval_fn(model, validloader, i)
        torch.save(model.state_dict(),f'arcface_512x512_nfnet_l0_epoch{epoch}_fold{Config.fold_to_train}.pth')

        if (epoch == 0) or (epoch % valid_every == 0) or (epoch == 14):
          _ = get_cv(valid, image_embeddings)

In [None]:
run_training()

Building Model Backbone for eca_nfnet_l0 model


Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ecanfnet_l0_ra2-e3e9ac50.pth" to /root/.cache/torch/hub/checkpoints/ecanfnet_l0_ra2-e3e9ac50.pth


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 1', max=1712.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6489587905494529
Our f1 score for threshold 0.15000000000000002 is 0.681743525662333
Our f1 score for threshold 0.20000000000000004 is 0.7098306641801627
Our f1 score for threshold 0.25000000000000006 is 0.7284123843124848
Our f1 score for threshold 0.30000000000000004 is 0.7321959563194947
Our f1 score for threshold 0.3500000000000001 is 0.7118980610083098
Our f1 score for threshold 0.40000000000000013 is 0.6663090909283472
Our f1 score for threshold 0.45000000000000007 is 0.5988803108340941
Our f1 score for threshold 0.5000000000000001 is 0.5160990584036014
Our f1 score for threshold 0.5500000000000002 is 0.4168781722055954
Our f1 score for threshold 0.6000000000000002 is 0.30814921281676205
Our f1 score for threshold 0.6500000000000001 is 0.2016806747571833
Our best score is 0.7321959563194947 and has a threshold 0.30000000000000004


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 2', max=1712.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6563480793485832
Our f1 score for threshold 0.15000000000000002 is 0.6819957020742804
Our f1 score for threshold 0.20000000000000004 is 0.7080893400382262
Our f1 score for threshold 0.25000000000000006 is 0.7338174046028733
Our f1 score for threshold 0.30000000000000004 is 0.7566785078466594
Our f1 score for threshold 0.3500000000000001 is 0.7768774369046163
Our f1 score for threshold 0.40000000000000013 is 0.7861136719768606
Our f1 score for threshold 0.45000000000000007 is 0.7830937080926255
Our f1 score for threshold 0.5000000000000001 is 0.7559439677224955
Our f1 score for threshold 0.5500000000000002 is 0.6926407428851094
Our f1 score for threshold 0.6000000000000002 is 0.5827202998081421
Our f1 score for threshold 0.6500000000000001 is 0.44040305719917344
Our best score is 0.7861136719768606 and has a threshold 0.40000000000000013


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 3', max=1712.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6527550645771121
Our f1 score for threshold 0.15000000000000002 is 0.675865871168831
Our f1 score for threshold 0.20000000000000004 is 0.7011199608737333
Our f1 score for threshold 0.25000000000000006 is 0.7290427280411651
Our f1 score for threshold 0.30000000000000004 is 0.7519206900914858
Our f1 score for threshold 0.3500000000000001 is 0.7741249764667001
Our f1 score for threshold 0.40000000000000013 is 0.7948729748948288
Our f1 score for threshold 0.45000000000000007 is 0.8083614159114381
Our f1 score for threshold 0.5000000000000001 is 0.808001768003866
Our f1 score for threshold 0.5500000000000002 is 0.7787057865004651
Our f1 score for threshold 0.6000000000000002 is 0.702105726433735
Our f1 score for threshold 0.6500000000000001 is 0.5658057035455277
Our best score is 0.8083614159114381 and has a threshold 0.45000000000000007


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 4', max=1712.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6497733029909066
Our f1 score for threshold 0.15000000000000002 is 0.672670878577069
Our f1 score for threshold 0.20000000000000004 is 0.6935367158245654
Our f1 score for threshold 0.25000000000000006 is 0.7170372705802693
Our f1 score for threshold 0.30000000000000004 is 0.7390353261248209
Our f1 score for threshold 0.3500000000000001 is 0.7630940295376354
Our f1 score for threshold 0.40000000000000013 is 0.7840749494743823
Our f1 score for threshold 0.45000000000000007 is 0.802871037867982
Our f1 score for threshold 0.5000000000000001 is 0.8152236953181262
Our f1 score for threshold 0.5500000000000002 is 0.8206115836603193
Our f1 score for threshold 0.6000000000000002 is 0.7983230813782778
Our f1 score for threshold 0.6500000000000001 is 0.7314303125793272
Our best score is 0.8206115836603193 and has a threshold 0.5500000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 5', max=1712.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6462898801743306
Our f1 score for threshold 0.15000000000000002 is 0.6672869379491041
Our f1 score for threshold 0.20000000000000004 is 0.6861676093883281
Our f1 score for threshold 0.25000000000000006 is 0.7089685432412782
Our f1 score for threshold 0.30000000000000004 is 0.7343177390777164
Our f1 score for threshold 0.3500000000000001 is 0.7582675439602017
Our f1 score for threshold 0.40000000000000013 is 0.7807405108276818
Our f1 score for threshold 0.45000000000000007 is 0.8018221829203782
Our f1 score for threshold 0.5000000000000001 is 0.8161646458981792
Our f1 score for threshold 0.5500000000000002 is 0.8262019846851166
Our f1 score for threshold 0.6000000000000002 is 0.8142838895042809
Our f1 score for threshold 0.6500000000000001 is 0.7702878492623185
Our best score is 0.8262019846851166 and has a threshold 0.5500000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 6', max=1712.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6429369751921216
Our f1 score for threshold 0.15000000000000002 is 0.6640268753900271
Our f1 score for threshold 0.20000000000000004 is 0.682222727022098
Our f1 score for threshold 0.25000000000000006 is 0.7055840115813636
Our f1 score for threshold 0.30000000000000004 is 0.7279999597266669
Our f1 score for threshold 0.3500000000000001 is 0.7498380715419627
Our f1 score for threshold 0.40000000000000013 is 0.7734963028754445
Our f1 score for threshold 0.45000000000000007 is 0.7973056002213718
Our f1 score for threshold 0.5000000000000001 is 0.811502963900225
Our f1 score for threshold 0.5500000000000002 is 0.8265755783671115
Our f1 score for threshold 0.6000000000000002 is 0.8222639920549294
Our f1 score for threshold 0.6500000000000001 is 0.7968697320750328
Our best score is 0.8265755783671115 and has a threshold 0.5500000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 7', max=1712.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6404887310879387
Our f1 score for threshold 0.15000000000000002 is 0.6618192503129227
Our f1 score for threshold 0.20000000000000004 is 0.6801332248345693
Our f1 score for threshold 0.25000000000000006 is 0.7024285032242864
Our f1 score for threshold 0.30000000000000004 is 0.7233519488211719
Our f1 score for threshold 0.3500000000000001 is 0.7459332875754168
Our f1 score for threshold 0.40000000000000013 is 0.7708638272897426
Our f1 score for threshold 0.45000000000000007 is 0.7925064267373543
Our f1 score for threshold 0.5000000000000001 is 0.8099356060647466
Our f1 score for threshold 0.5500000000000002 is 0.8250286443368173
Our f1 score for threshold 0.6000000000000002 is 0.8256595095911657
Our f1 score for threshold 0.6500000000000001 is 0.8006813645401702
Our best score is 0.8256595095911657 and has a threshold 0.6000000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 8', max=1712.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6373556094607823
Our f1 score for threshold 0.15000000000000002 is 0.6609273648660147
Our f1 score for threshold 0.20000000000000004 is 0.678706504412043
Our f1 score for threshold 0.25000000000000006 is 0.7000106314000308
Our f1 score for threshold 0.30000000000000004 is 0.7219826813403191
Our f1 score for threshold 0.3500000000000001 is 0.7455355563072248
Our f1 score for threshold 0.40000000000000013 is 0.7690647124472711
Our f1 score for threshold 0.45000000000000007 is 0.7920114097274377
Our f1 score for threshold 0.5000000000000001 is 0.8095214053925595
Our f1 score for threshold 0.5500000000000002 is 0.82510618707253
Our f1 score for threshold 0.6000000000000002 is 0.8287397523359041
Our f1 score for threshold 0.6500000000000001 is 0.8102880064212309
Our best score is 0.8287397523359041 and has a threshold 0.6000000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 9', max=1712.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6362947262229907
Our f1 score for threshold 0.15000000000000002 is 0.6596644075966301
Our f1 score for threshold 0.20000000000000004 is 0.6773243923716108
Our f1 score for threshold 0.25000000000000006 is 0.6991037017562025
Our f1 score for threshold 0.30000000000000004 is 0.7210602620544113
Our f1 score for threshold 0.3500000000000001 is 0.7428782619414579
Our f1 score for threshold 0.40000000000000013 is 0.7673738535934878
Our f1 score for threshold 0.45000000000000007 is 0.7895891894489642
Our f1 score for threshold 0.5000000000000001 is 0.8079168461877367
Our f1 score for threshold 0.5500000000000002 is 0.8236126950433458
Our f1 score for threshold 0.6000000000000002 is 0.8279999293236515
Our f1 score for threshold 0.6500000000000001 is 0.8122413622161312
Our best score is 0.8279999293236515 and has a threshold 0.6000000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 10', max=1712.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6340756302017122
Our f1 score for threshold 0.15000000000000002 is 0.659256722012263
Our f1 score for threshold 0.20000000000000004 is 0.6772746210769784
Our f1 score for threshold 0.25000000000000006 is 0.6980275978069377
Our f1 score for threshold 0.30000000000000004 is 0.7193955646004282
Our f1 score for threshold 0.3500000000000001 is 0.7424110591419437
Our f1 score for threshold 0.40000000000000013 is 0.7671482321577064
Our f1 score for threshold 0.45000000000000007 is 0.7888855562344224
Our f1 score for threshold 0.5000000000000001 is 0.8069792772531685
Our f1 score for threshold 0.5500000000000002 is 0.8221469039711962
Our f1 score for threshold 0.6000000000000002 is 0.8281037130795073
Our f1 score for threshold 0.6500000000000001 is 0.8134864898754883
Our best score is 0.8281037130795073 and has a threshold 0.6000000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 11', max=1712.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6340219066253303
Our f1 score for threshold 0.15000000000000002 is 0.6588158763792592
Our f1 score for threshold 0.20000000000000004 is 0.6769147031770333
Our f1 score for threshold 0.25000000000000006 is 0.698687492781704
Our f1 score for threshold 0.30000000000000004 is 0.7192354588311753
Our f1 score for threshold 0.3500000000000001 is 0.7428786347266398
Our f1 score for threshold 0.40000000000000013 is 0.7674380885721183
Our f1 score for threshold 0.45000000000000007 is 0.7898510511127645
Our f1 score for threshold 0.5000000000000001 is 0.8074868447429435
Our f1 score for threshold 0.5500000000000002 is 0.8232335664644523
Our f1 score for threshold 0.6000000000000002 is 0.8283255579252079
Our f1 score for threshold 0.6500000000000001 is 0.8121529349045054
Our best score is 0.8283255579252079 and has a threshold 0.6000000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 12', max=1712.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6337933533221755
Our f1 score for threshold 0.15000000000000002 is 0.6586333041520264
Our f1 score for threshold 0.20000000000000004 is 0.6765202425101665
Our f1 score for threshold 0.25000000000000006 is 0.6977113517922333
Our f1 score for threshold 0.30000000000000004 is 0.7194585815207225
Our f1 score for threshold 0.3500000000000001 is 0.7427462513328669
Our f1 score for threshold 0.40000000000000013 is 0.767611437824244
Our f1 score for threshold 0.45000000000000007 is 0.7894576760158363
Our f1 score for threshold 0.5000000000000001 is 0.8074352878671713
Our f1 score for threshold 0.5500000000000002 is 0.8233180902320641
Our f1 score for threshold 0.6000000000000002 is 0.8287389635212934
Our f1 score for threshold 0.6500000000000001 is 0.8135088454888479
Our best score is 0.8287389635212934 and has a threshold 0.6000000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 13', max=1712.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6322403087313776
Our f1 score for threshold 0.15000000000000002 is 0.6580633887903609
Our f1 score for threshold 0.20000000000000004 is 0.6757665317926375
Our f1 score for threshold 0.25000000000000006 is 0.6970524791155922
Our f1 score for threshold 0.30000000000000004 is 0.7185449365028644
Our f1 score for threshold 0.3500000000000001 is 0.7418117834247597
Our f1 score for threshold 0.40000000000000013 is 0.7663014174510506
Our f1 score for threshold 0.45000000000000007 is 0.7883007339069318
Our f1 score for threshold 0.5000000000000001 is 0.8061128518525107
Our f1 score for threshold 0.5500000000000002 is 0.8227872866676855
Our f1 score for threshold 0.6000000000000002 is 0.8298231771228671
Our f1 score for threshold 0.6500000000000001 is 0.8160865928502746
Our best score is 0.8298231771228671 and has a threshold 0.6000000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 14', max=1712.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6324911314205762
Our f1 score for threshold 0.15000000000000002 is 0.6580727377471023
Our f1 score for threshold 0.20000000000000004 is 0.675561782557396
Our f1 score for threshold 0.25000000000000006 is 0.6969732670590842
Our f1 score for threshold 0.30000000000000004 is 0.7185055692699605
Our f1 score for threshold 0.3500000000000001 is 0.741473906401356
Our f1 score for threshold 0.40000000000000013 is 0.7660363809450024
Our f1 score for threshold 0.45000000000000007 is 0.788373815963553
Our f1 score for threshold 0.5000000000000001 is 0.8060265925634501
Our f1 score for threshold 0.5500000000000002 is 0.821883452710975
Our f1 score for threshold 0.6000000000000002 is 0.8294464780341406
Our f1 score for threshold 0.6500000000000001 is 0.8158978469913425
Our best score is 0.8294464780341406 and has a threshold 0.6000000000000002


HBox(children=(FloatProgress(value=0.0, description='Epoch [TRAIN] 15', max=1712.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Epoch [VALID] 5', max=429.0, style=ProgressStyle(descript…


Our image embeddings shape is (6850, 2304)
Our f1 score for threshold 0.1 is 0.6327366184889691
Our f1 score for threshold 0.15000000000000002 is 0.6579138097273066
Our f1 score for threshold 0.20000000000000004 is 0.6756721452639127
Our f1 score for threshold 0.25000000000000006 is 0.6969931514392561
Our f1 score for threshold 0.30000000000000004 is 0.7179999833908304
Our f1 score for threshold 0.3500000000000001 is 0.7410009774850518
Our f1 score for threshold 0.40000000000000013 is 0.7660818575211582
Our f1 score for threshold 0.45000000000000007 is 0.7882005950230411
Our f1 score for threshold 0.5000000000000001 is 0.80607834873303
Our f1 score for threshold 0.5500000000000002 is 0.8218336350668868
Our f1 score for threshold 0.6000000000000002 is 0.8287648295324488
Our f1 score for threshold 0.6500000000000001 is 0.8159598158675103
Our best score is 0.8287648295324488 and has a threshold 0.6000000000000002
