## Обучение FastVit+GemPooling+MLP
В данной тетрадке расположен код обучения и инференса FastVit модели. Перед использованием кода нужно удостовериться, что все нужные библиотеки установлены и указан ваш wandb_api_key в СFG.wandb_key. Для инференса рекомендуется использовать модель с наименьшим RMSE.

Вывод: Данный эксперимент показывает нам, что FastVit модели, не смотря на очень быстрый инференс показывают SOTA результаты среди трансформеров.

In [1]:
from IPython.display import clear_output

#!pip install lightning timm opendatasets albumentations catboost gdown open_clip_torch wandb
#clear_output()

In [5]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
from math import sin,cos,pi
from sklearn.metrics import accuracy_score,f1_score,balanced_accuracy_score
import albumentations as A
import open_clip
from albumentations.pytorch.transforms import ToTensorV2
from catboost import CatBoostClassifier,Pool,cv
from copy import deepcopy
import wandb
from transformers import get_cosine_schedule_with_warmup
import timm 
pl.seed_everything(56)

Seed set to 56


56

# Конфиги:
- использована модель `fastvit_ma36` с весами `apple_dist_in1k`
- функция активации [PReLU в MLP](../experiments/CLIP_train/experiments.ipynb)
- [Cosine Scheduler with warmup](../experiments/CLIP_train/experiments.ipynb)
- GeM Pooling
- было выбрано оптимальное разделение на обучающую и валидационную выборку: 20%

<div>
<img src="images/FastViT.avif" width="800"/>
</div>

In [7]:
class CFG:
    class data:
        train_data= './aiijc23-4/train_scores.csv'
        test_data = './simple_sub.csv'
        train_path='./aiijc23-4/train/train/'
        test_path = './aiijc23-4/test/test/'
        num_workers = 4
        val_split_size = 0.2
        batch_size = 32
        seed = 56
    class model:
        model ='fastvit_ma36.apple_dist_in1k'
        pretrained = True
        num_labels = 1
        scheduler= True
        max_epoches = 4
        lr = 2e-4
        p = 3
        eps=1e-6
        warmup_step = 0.1
        warmup_epoch = 4
        num_cycles=0.55
        weight_decay= 0.02
        betas=(0.9, 0.999)
    wandb_key="your_key"
    seed=56

# Предобработка датасета

In [9]:
def make_df(path,root_path=CFG.data.train_path):
    data = pd.read_csv(path)
    df = pd.DataFrame()
    df['image'] = data['IMAGE'].apply(lambda x:root_path + x)
    df['label'] = data['SCORE']
    return df

In [10]:
class PLDataset(Dataset):
    def __init__(self, df,preprocess):
        super().__init__()
        self.cfg = CFG.data
        self.data = df[['image','label']].values
        self.preprocess = preprocess
    def __getitem__(self, index):
        image = Image.open(self.data[index][0]).convert('RGB')
        image = self.preprocess(image)
        label = self.data[index][1]
        return image,label
    def __len__(self):
        return len(self.data)

In [11]:
class PLDataModule(pl.LightningDataModule):
    def __init__(self,preprocess):
        super().__init__()
        self.cfg = CFG.data
        self.train_dataset_path = self.cfg.train_data
        self.test_dataset_path = self.cfg.test_path
        self.val_split_size = self.cfg.val_split_size
        self.batch_size = self.cfg.batch_size
        self.num_workers = self.cfg.num_workers
        self.is_setup = False
        self.preprocess = preprocess
    def prepare_data(self):
        self.train_df = make_df(self.train_dataset_path)
        self.test_df = make_df(CFG.data.test_data,
                               root_path=CFG.data.test_path)
        
    def setup(self, stage: str):
        if self.is_setup:
            return None
        self.train_df, self.val_df = train_test_split(self.train_df, test_size=self.val_split_size,random_state=self.cfg.seed)
        self.train_dataset = PLDataset(self.train_df,self.preprocess)
        self.val_dataset = PLDataset(self.val_df,self.preprocess)
        self.test_dataset = PLDataset(self.test_df,self.preprocess)
        self.is_setup = True
    
    def train_dataloader(self):
        return DataLoader(self.train_dataset,
                         batch_size=self.batch_size,
                         num_workers=self.num_workers,
                         shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset,
                          batch_size=self.batch_size,
                          num_workers=self.num_workers)
    
    def predict_dataloader(self):
        return DataLoader(self.test_dataset,
                          batch_size=self.batch_size,
                          num_workers=self.num_workers)

# Метрики и лосс
- были использованы различные метрики для создания новых гипотез
- была выбрана метрика RMSE, потому что она показывала себя лучше всего на регрессионных метриках
$$ RMSE = \sqrt{\frac{\sum_{i=1}^n(pred_i-act_i)^2}{n}}$$

In [None]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss

# GeM Pooling
<div>
<img src="images/GeM-Pooling.png" width="600"/>
</div>


In [13]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, requires_grad=True):
        super().__init__()
        self.p = nn.Parameter(torch.ones(1) * p, requires_grad=requires_grad)
        self.eps = eps

    def forward(self, x: torch.Tensor):
        return x.clamp(min=self.eps).pow(self.p).mean((-2, -1)).pow(1.0 / self.p)

# Инициализация модели и алгоритм обучения

In [14]:
class PLModule(pl.LightningModule):
    def __init__(self,model):
        super().__init__()
        self.cfg = CFG.model
        self.model = model
        self.global_pools = nn.ModuleList([GeM(p=self.cfg.p) for _ in range(4)])
        self.mlp = nn.Sequential(nn.Linear(1140,1140 // 2),
                                 nn.PReLU(),
                                 nn.LayerNorm(1140 // 2),
                                 nn.Linear(1140 // 2,1),
                                )
        self.criterion = RMSELoss()
        self.last_loss = 0
        self.losses = []
        
    def forward(self,x):
        features = self.model(x)
        features = torch.cat([global_pool(m) for m, global_pool in zip(features, self.global_pools)], dim=1)
        features = self.mlp(features)
        return torch.squeeze(features)

    def training_step(self, batch, i):
        x,targets = batch
        x,targets = x.float(),targets.float()
        logits = self(x)
        loss = self.criterion(logits, targets)
        self.log_dict({'train_loss':loss.item()})
        self.last_loss = loss.item()
        return loss
    
    def predict_step(self, batch, i):
        x,targets = batch
        x,targets = x.float(),targets.float()
        logits = self(x)
        return logits
        
    def validation_step(self, batch, _):
        x,targets = batch
        x,targets = x.float(),targets.float()
        logits = self(x)
        loss = self.criterion(logits,targets)
        self.log_dict({'val_loss':loss.item()})
        self.last_loss = loss.item()
    
                
    def on_validation_epoch_end(self):
        self.losses = []
            
    def configure_optimizers(self):
        optim = torch.optim.AdamW(self.parameters(),
                                  lr=self.cfg.lr,
                                  betas=self.cfg.betas,
                                  eps=self.cfg.eps,
                                  weight_decay = self.cfg.weight_decay)
        
        scheduler = get_cosine_schedule_with_warmup(optim,
                                                    num_warmup_steps = TRAIN_STEPS * self.cfg.warmup_epoch * self.cfg.warmup_step, 
                                                    num_training_steps = TRAIN_STEPS * self.cfg.warmup_epoch,
                                                    num_cycles = self.cfg.num_cycles)
        scheduler = {'scheduler':scheduler,
                     'interval':'step',
                     'frequency':1}
        return  [optim],[scheduler]

In [15]:
data_config = timm.data.resolve_model_data_config(CFG.model.model)
data_config['input_size'] = (3,224,224)
processor = timm.data.create_transform(**data_config, is_training=False)

In [16]:
model = timm.create_model(CFG.model.model,
                          pretrained=CFG.model.pretrained,
                          num_classes=0,
                          features_only=True)

In [17]:
dm = PLDataModule(processor)
dm.prepare_data()
dm.setup(0)

In [18]:
TRAIN_STEPS = len(dm.train_dataloader())

In [19]:
pl_model = PLModule(model)

In [20]:
wandb.login(key=CFG.wandb_key)
os.environ['WANDB_API_KEY'] = CFG.wandb_key
wandb.init(project='AIIJC',name='fastvit_ma36_gempooling')

[34m[1mwandb[0m: Currently logged in as: [33mandrey20007[0m ([33mandrey2007[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [26]:
lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval='epoch')
checkpoint_cb = pl.callbacks.ModelCheckpoint(
    dirpath='./outputs_fastvit/',
    filename='model_{epoch:02d}-{val_loss:.4f}',
    monitor='val_loss',
    mode='min',
    save_last=True
)

trainer = pl.Trainer(
    accelerator="gpu",
    precision=32,
    callbacks = [lr_monitor,checkpoint_cb],
    logger = pl.loggers.WandbLogger(),
    min_epochs=1,
    devices=[0],
    check_val_every_n_epoch=1,
    max_epochs=CFG.model.max_epoches
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(pl_model,datamodule=dm)

# Предсказание

In [None]:
preds = trainer.predict(pl_model,datamodule=dm,ckpt_path='outputs/model_epoch=02-val_loss=0.4845.ckpt')

Restoring states from the checkpoint path at outputs/model_epoch=02-val_loss=0.4845.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at outputs/model_epoch=02-val_loss=0.4845.ckpt


Predicting: |          | 0/? [00:00<?, ?it/s]

In [29]:
test_df = pd.read_csv('simple_sub.csv')

In [30]:
test_df['SCORE'] = np.concatenate(preds)

In [33]:
test_df.to_csv('fastfit_ma32.csv',index=False)

In [32]:
test_df

Unnamed: 0,IMAGE,SCORE
0,86cc6e863c9b6bb2a0e0db114c9775aa.jpg,8.019901
1,da71671681d9cef5b60727801bf95ef8.jpg,5.779160
2,821a9ff5df6e581c68c0371dc6b1eb90.jpg,7.971606
3,ed842bb42c39fe257ac459b544bb7ba8.jpg,8.025720
4,6eed69b8f6d62f28b809d9cbafcaab0b.jpg,5.954120
...,...,...
9983,46aa642bbcfdbed5eed380ba10fdbbbc.jpg,5.189587
9984,dc737dc9a19844943540816e4488b425.jpg,4.447919
9985,8038a60dac57ba5d445b99978a36a1d5.jpg,5.550060
9986,5381dd99c37acb63db903eac293fbe4c.jpg,4.402526
