In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [2]:
# import sys
# sys.path.append('../tools')

In [3]:
import timm
import pandas as pd
import numpy as np
# from tqdm.notebook import tqdm
from madgrad import MADGRAD
# from fastai.vision.all import *
from tools.train_utils import *
# from train_utils import *

In [4]:


class CONF( ConfigClass):
    arcface_m = .5
    arcface_s = 30.0
    lr = 1e-2
    lr_mult = 100.0
    n_epoch = 5
    start_m = 0.8
    end_m = 1.0
    bs = 64
    gradient_clip=1
    split_nfnet=0
    embedding_size=1024
    experiment_id=1
    val_k=0
    OUTPUT_CLASSES= 11014

conf = CONF()

In [5]:
class ArcFaceNet(nn.Module):
    def __init__(self, arch, embedding_size=512, classifier=None):
        super().__init__()
        self.body = create_body(arch, cut=-2)
        nf = num_features_model(nn.Sequential(*self.body.children()))      
        self.after_conv=nn.Sequential(
                AdaptiveConcatPool2d(),
                Flatten(),
                nn.Linear(nf*2,embedding_size),
                nn.BatchNorm1d(embedding_size)
            )       

        self.classifier = classifier if classifier is not None else None
        self.outputEmbs = False
    def forward(self, x):
        x = self.body(x)
        embeddings = self.after_conv(x)
        if self.outputEmbs:
            return [self.classifier(embeddings), embeddings]
        return self.classifier(embeddings)


In [6]:

PATH = Path('../data')
# PATH = Path('data')
train_df = add_splits(pd.read_csv(PATH/'train.csv'))
valid_df = train_df[train_df.is_valid==True].copy()

train_df.is_valid=False

train_df= pd.concat([train_df, valid_df])





In [7]:
conv_layer = 'adaptconcat'
saved_folder = '../data/saved_models/image_model/'
saved_name  = f'fine_tune_{conf.n_epoch}epoch_nfnetl0-bs-{conf.bs}-pre-mgrad-{conv_layer}-embed-{conf.embedding_size}-m{(int(conf.start_m*10)):02d}-{int(conf.end_m)*10}'

In [9]:
# image_getter = ImageFileGetter(PATH)
arch = timm.create_model("eca_nfnet_l0", pretrained = False)
classifier = ArcFaceClassifier(in_features=conf.embedding_size)
model = ArcFaceNet(arch = arch, embedding_size=conf.embedding_size, classifier=None)
model.load_state_dict(torch.load( saved_folder +  saved_name))
model.classifier = classifier

In [None]:
# arch = timm.create_model("eca_nfnet_l0", pretrained = True)
# classifier = ArcFaceClassifier(in_features=conf.embedding_size)
# model = ArcFaceNet(arch = arch, embedding_size=conf.embedding_size, classifier=classifier)

In [10]:
loss_func = ArcFaceLoss()
opt_func = partial(OptimWrapper, opt=MADGRAD)
if conf.split_nfnet:
    split_func= split_nfnet
else:
    split_func = split_2way

f1_tracker = TrackerCallback(monitor='F1 embeddings', comp=np.greater)

In [11]:
print('Starting', conf.experiment_id)
print(conf)


Starting 1
{'OUTPUT_CLASSES': 11014, 'arcface_m': 0.5, 'arcface_s': 30.0, 'bs': 64, 'embedding_size': 1024, 'end_m': 1.0, 'experiment_id': 1, 'gradient_clip': 1, 'lr': 0.01, 'lr_mult': 100.0, 'n_epoch': 5, 'split_nfnet': 0, 'start_m': 0.8, 'val_k': 0}


In [13]:

# opt_func=Adam

learn = Learner(get_dls(train_df, 224, conf.bs), model,splitter=split_func, 
                opt_func=opt_func,loss_func=loss_func, 
                cbs = [CutMix, GradientClip,MixedPrecision, MarginScheduler(conf.start_m, conf.end_m),  F1FromEmbs, f1_tracker],
                metrics=[F1EmbedMetric(), accuracy])



In [14]:
learn.model


ArcFaceNet(
  (body): Sequential(
    (0): Sequential(
      (conv1): ScaledStdConv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (act2): SiLU(inplace=True)
      (conv2): ScaledStdConv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act3): SiLU(inplace=True)
      (conv3): ScaledStdConv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act4): SiLU(inplace=True)
      (conv4): ScaledStdConv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    )
    (1): Sequential(
      (0): Sequential(
        (0): NormFreeBlock(
          (downsample): DownsampleAvg(
            (pool): Identity()
            (conv): ScaledStdConv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
          )
          (act1): SiLU()
          (conv1): ScaledStdConv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))
          (act2): SiLU(inplace=True)
          (conv2): ScaledStdConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          

In [None]:
# learn.load('stage1-fine_tune_3epoch_nfnetl1-bs-64-pre-mgrad-gdconv-embed-1024-m03-10')

In [None]:
learn.fine_tune(conf.n_epoch,conf.lr, freeze_epochs = 1, lr_mult=conf.lr_mult)
# learn.fit_one_cycle(3)


In [None]:
# %debug

In [None]:
learn.save('stage1-'+ saved_name)

In [None]:

print('saving',f'model' )
save_without_classifier(learn.model, saved_folder +  saved_name)