In [1]:
from IPython.display import clear_output

!pip install lightning timm opendatasets albumentations catboost gdown open_clip_torch wandb
clear_output()

In [1]:
!nvidia-smi

Thu Aug 24 19:34:51 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.116.04   Driver Version: 525.116.04   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100 80G...  Off  | 00000000:00:05.0 Off |                    0 |
| N/A   42C    P0    46W / 300W |      0MiB / 81920MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [5]:
import opendatasets as od
od.download('https://www.kaggle.com/datasets/andronhl/aiijc23-4')

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username:

  bulbazavril


Your Kaggle Key:

  ········


Downloading aiijc23-4.zip to ./aiijc23-4


100%|██████████| 4.81G/4.81G [00:56<00:00, 91.3MB/s]





In [2]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
from math import sin,cos,pi
from sklearn.metrics import accuracy_score,f1_score,balanced_accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
import albumentations as A
import open_clip
from albumentations.pytorch.transforms import ToTensorV2
from catboost import CatBoostClassifier,Pool,cv
from copy import deepcopy
import wandb
import timm
pl.seed_everything(56)

Global seed set to 56


56

In [1]:
class CFG:
    class data:
        train_data= './aiijc23-4/train_scores.csv'
        test_data = './convnext_fc_large.csv'
        train_path='./aiijc23-4/train/train/'
        test_path = './aiijc23-4/test/test/'
        num_workers = 4
        val_split_size = 0.2
        batch_size = 32
        seed = 56
    class model:
        model ='EVA02-L-14'
        pretrained = 'merged2b_s4b_b131k'
        num_labels = 1
        scheduler= True
        warnap = False
        max_epoches= 4
        mlp_lr = 2e-4
        encoder_lr = 5e-6
        mlp_weights_decay = 2e-5
        encoder_weights_decay = 0
        eps=1e-6
        betas=(0.9, 0.999)
    seed=56

In [4]:
def make_df(path,root_path=CFG.data.train_path):
    data = pd.read_csv(path)
    df = pd.DataFrame()
    df['image'] = data['IMAGE'].apply(lambda x:root_path + x)
    df['label'] = data['SCORE']
    return df

In [5]:
class PLDataset(Dataset):
    def __init__(self, df,preprocess):
        super().__init__()
        self.cfg = CFG.data
        self.data = df[['image','label']].values
        self.preprocess = preprocess
    def __getitem__(self, index):
        image = Image.open(self.data[index][0])
        image = preprocess(image).unsqueeze(0)
        label = self.data[index][1]
        return image[0],label
    def __len__(self):
        return len(self.data)

In [6]:
class PLDataModule(pl.LightningDataModule):
    def __init__(self,preprocess):
        super().__init__()
        self.cfg = CFG.data
        self.train_dataset_path = self.cfg.train_data
        self.test_dataset_path = self.cfg.test_path
        self.val_split_size = self.cfg.val_split_size
        self.batch_size = self.cfg.batch_size
        self.num_workers = self.cfg.num_workers
        self.is_setup = False
        self.preprocess = preprocess
    def prepare_data(self):
        self.train_df = make_df(self.train_dataset_path)
        self.test_df = make_df(CFG.data.test_data,
                               root_path=CFG.data.test_path)
        
    def setup(self, stage: str):
        if self.is_setup:
            return None
        self.train_df, self.val_df = train_test_split(self.train_df, test_size=self.val_split_size,random_state=self.cfg.seed)
        self.train_dataset = PLDataset(self.train_df,self.preprocess)
        self.val_dataset = PLDataset(self.val_df,self.preprocess)
        self.test_dataset = PLDataset(self.test_df,self.preprocess)
        self.is_setup = True
    
    def train_dataloader(self):
        return DataLoader(self.train_dataset,
                         batch_size=self.batch_size,
                         num_workers=self.num_workers,
                         shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset,
                          batch_size=self.batch_size,
                          num_workers=self.num_workers)
    
    def predict_dataloader(self):
        return DataLoader(self.test_dataset,
                          batch_size=self.batch_size,
                          num_workers=self.num_workers)

In [7]:
class LitProgressBar(pl.callbacks.TQDMProgressBar):
    
    def on_predict_batch_end(self,trainer, pl_module, outputs, batch, batch_idx):
        
        bar = super().init_validation_tqdm()
        bar.set_description(f"loss: {pl_module.last_loss:.4f}")
        return bar

In [8]:
class AverageMeter():
    
    def __init__(self):
        self.labels = []
        self.preds = []
        
    def reset(self):
        self.labels = []
        self.preds = []
        
    def update(self,labels,preds):
        self.labels += labels
        self.preds += preds
    
    def calc_metrics(self):
        labels = pd.Series(self.labels)
        preds = pd.Series(self.preds)
        preds_bin = preds.map(round)
        metrics = dict()
        
        metrics['val_rmse'] = (mean_squared_error(labels,preds)) ** 0.5
        metrics['val_mae'] = mean_absolute_error(labels,preds)
        #metrics['val_msle'] = mean_squared_log_error(labels,preds)
        metrics['mape']= mean_absolute_percentage_error(labels,preds)
        metrics['val_r2'] = r2_score(labels,preds)
        
        metrics['val_f1'] = f1_score(labels,preds_bin,average='macro')
        metrics['val_acc'] = accuracy_score(labels,preds_bin)
        metrics['val_w_acc'] = balanced_accuracy_score(labels,preds_bin)
        return metrics

In [9]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss
    
class PLModule(pl.LightningModule):
    def __init__(self,clip):
        super().__init__()
        self.cfg = CFG.model
        self.clip = clip
        self.mlp = nn.Sequential(nn.Linear(768,768 * 2),
                                 nn.ReLU(),
                                 nn.LayerNorm(768 * 2),
                                 nn.Linear(768 * 2 ,1))
        self.criterion = RMSELoss()
        self.avg_meter = AverageMeter()
        self.last_loss = 0
        self.losses = []
        
    def forward(self,x):
        features = self.clip.encode_image(x)
        features = self.mlp(features)
        return torch.squeeze(features)

    def training_step(self, batch, i):
        x,targets = batch
        x,targets = x.float(),targets.float()
        logits = self(x)
        loss = self.criterion(targets, logits)
        self.log_dict({'train_loss':loss.item()})
        self.last_loss = loss.item()
        return loss
    
    def predict_step(self, batch, i):
        x,targets = batch
        x,targets = x.float(),targets.float()
        logits = self(x)
        return logits.tolist()
        
    def validation_step(self, batch, _):
        x,targets = batch
        x,targets = x.float(),targets.float()
        logits = self(x)
        loss = self.criterion(targets,logits)
        self.log_dict({'val_loss':loss.item()})
        self.avg_meter.update(targets.cpu().detach().tolist(),
                              logits.cpu().detach().tolist())
    
                
    def on_validation_epoch_end(self):
        self.log_dict(self.avg_meter.calc_metrics())
        self.avg_meter.reset()
            
    def configure_optimizers(self):
        grouped_parameters = [
                            {'params':self.clip.parameters(),
                               'lr':self.cfg.encoder_lr,
                               'weights_decay':self.cfg.encoder_weights_decay},
                             {'params':self.mlp.parameters(),
                               'lr':self.cfg.mlp_lr,
                               'weights_decay':self.cfg.mlp_weights_decay},
                             ]
        optim = torch.optim.AdamW(grouped_parameters,
                                  betas=self.cfg.betas,
                                  eps=self.cfg.eps)
        return optim

In [10]:
model, _, preprocess = open_clip.create_model_and_transforms(CFG.model.model,
                                                             pretrained=CFG.model.pretrained)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [11]:
dm = PLDataModule(preprocess)
dm.prepare_data()
dm.setup(0)

In [12]:
pl_model = PLModule(model)

In [13]:
wandb.login(key="673ae6e9b51cc896110db5327738b993795fffad")
os.environ['WANDB_API_KEY'] = "673ae6e9b51cc896110db5327738b993795fffad"
wandb.init(project='AIIJC',name='eva_vit_l')

[34m[1mwandb[0m: Currently logged in as: [33mandrey20007[0m ([33mandrey2007[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [14]:
lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval='epoch')
checkpoint_cb = pl.callbacks.ModelCheckpoint(
    dirpath='./outputs_convnext/',
    filename='model_{epoch:02d}-{val_loss:.4f}',
    monitor='val_loss',
    mode='min',
    save_last=True
)
bar = LitProgressBar()

trainer = pl.Trainer(
    accelerator="gpu",
    precision=32,
    #callbacks = [lr_monitor,checkpoint_cb],
    logger = pl.loggers.WandbLogger(),
    min_epochs=1,
    devices=[0],
    check_val_every_n_epoch=1,
    max_epochs=CFG.model.max_epoches
)

  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [15]:
trainer.fit(pl_model,datamodule=dm)

You are using a CUDA device ('NVIDIA A100 80GB PCIe') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | clip      | CustomTextCLIP | 149 M 
1 | mlp       | Sequential     | 528 K 
2 | criterion | RMSELoss       | 0     
---------------------------------------------
150 M     Trainable params
0         Non-trainable params
150 M     Total params
600.878   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=4` reached.
wandb: Network error (ReadTimeout), entering retry loop.


In [15]:
preds = trainer.predict(pl_model,datamodule=dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

In [16]:
test_df = pd.read_csv('/notebooks/simple_solution_nocliptrained.csv')

In [17]:
test_df['SCORE'] = np.concatenate(preds)

In [21]:
test_df.to_csv('convnextv8.csv',index=False)

In [19]:
test_df['SCORE'] = test_df['SCORE'].map(round)

In [20]:
test_df

Unnamed: 0,IMAGE,SCORE
0,86cc6e863c9b6bb2a0e0db114c9775aa.jpg,8
1,da71671681d9cef5b60727801bf95ef8.jpg,6
2,821a9ff5df6e581c68c0371dc6b1eb90.jpg,8
3,ed842bb42c39fe257ac459b544bb7ba8.jpg,8
4,6eed69b8f6d62f28b809d9cbafcaab0b.jpg,6
...,...,...
9983,46aa642bbcfdbed5eed380ba10fdbbbc.jpg,5
9984,dc737dc9a19844943540816e4488b425.jpg,4
9985,8038a60dac57ba5d445b99978a36a1d5.jpg,6
9986,5381dd99c37acb63db903eac293fbe4c.jpg,4
