In [1]:
import os
import gc
import cv2
import time
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
from pytorch_toolbelt import losses as L

# Utils
from tqdm.auto import tqdm

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

## using gpu:1
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

def seed_everything(seed=123):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything()

In [2]:
class Customize_Model(nn.Module):
    def __init__(self, model_name, cls):
        super().__init__()
        self.model = timm.create_model(model_name, 
                                       pretrained=True, 
                                       num_classes=cls, 
                                       drop_rate= CFG['drop_out'], 
                                       drop_path_rate= CFG['drop_path'])
        
    def forward(self, image):
        x = self.model(image)
        return x

In [3]:
def get_train_transform(img_size):
    return A.Compose([
        A.SmallestMaxSize(max_size=img_size, interpolation=3, p=1),
#         A.Resize(img_size, img_size),
        
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
#         A.HorizontalFlip(p=0.5),
#         A.VerticalFlip(p=0.5),
#         A.Blur(blur_limit= 3, p=0.3), 
        A.GaussNoise(p=0.3),
        A.OneOf([
                A.Cutout(max_h_size=10, max_w_size=16),
                A.CoarseDropout(max_holes=4),
            ], p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.15, rotate_limit= 0,
                                        interpolation=cv2.INTER_LINEAR, border_mode=0, p=0.7),
        ToTensorV2(p=1.0),
    ])


def get_test_transform(img_size):
    return A.Compose([
        A.SmallestMaxSize(max_size=img_size, interpolation=3, p=1),
#         A.Resize(img_size, img_size),
        ToTensorV2(p=1.0),
    ])

In [4]:
from toolbox.audio_aug import *

class Customize_Dataset(Dataset):
    def __init__(self, df, transforms=None, mixup=0):
        self.df = df
        self.transforms = transforms
        self.mixup= mixup
        
    def mixup_aug(self, img_1, mask_1, 
                        img_2, mask_2):
        """
        img: numpy array of shape (height, width,channel)
        mask: numpy array of shape (height, width,channel)
        """
        ## mixup
        weight= np.random.beta(a=0.4, b=0.4)
        img= img_1*weight + img_2*(1-weight)
        mask= mask_1*weight + mask_2*(1-weight)
        return img.astype(np.uint8), mask
    
    def read_data(self, data):
        img = cv2.imread(data['image_path'])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        label= [0]*CFG['num_classes']
        cls = data['label']
        label[cls]= 1
        label= np.array(label)
        return img, label
    
    def __getitem__(self, index):
        data = self.df.loc[index]
        img, label= self.read_data(data)
        
        # use mixup
        if self.mixup and np.random.rand() >= (1-self.mixup):
            img_1= img
            label_1= np.array(label)
            while True:
                indx= np.random.randint(len(self.df))
                data= self.df.loc[indx]
                img_2, label_2= self.read_data(data)
                if label_1.argmax(0)!=label_2.argmax(0): break
            img, label= self.mixup_aug(img_1, label_1, 
                                       img_2, label_2)
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return {
            'image': torch.tensor(img/255, dtype=torch.float32),
            'label': torch.tensor(label, dtype=torch.float32),
        }
    
    def __len__(self):
        return len(self.df)

In [5]:
class Customize_loss(nn.Module):
    def  __init__(self):
        super().__init__()
        self.CrossEntropy= nn.CrossEntropyLoss(weight= cls_weight, label_smoothing=0.25)
        self.FocalCosineLoss= L.FocalCosineLoss()
        self.soft_ce= L.SoftCrossEntropyLoss(smooth_factor=0.25)
        self.bi_temp= L.BiTemperedLogisticLoss(t1=0.8, t2=1.2)
    
    def forward(self, y_pred, y_true):
        loss= 1.0 * self.CrossEntropy(y_pred, y_true)
        return loss

In [6]:
def train_epoch(dataloader, model, criterion, optimizer):
    scaler= amp.GradScaler()
    model.train()

    ep_loss= []
    for i, data in enumerate(tqdm(dataloader)):

        imgs= data['image'].to('cuda')
        labels= data['label'].to('cuda')
        
        with amp.autocast():
            preds= model(imgs)
            loss= criterion(preds, labels)
            ep_loss.append(loss.item())
            loss/= CFG['gradient_accumulation']
            scaler.scale(loss).backward()
            
            if (i+1) % CFG['gradient_accumulation']== 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
                
    return np.mean(ep_loss)

In [7]:
from metrics import *

def valid_epoch(dataloader, model, criterion):
    model.eval()
    
    ep_loss= []
    all_pred= []
    all_label= []
    for i, data in enumerate(tqdm(dataloader)):

        imgs= data['image'].to('cuda')
        labels= data['label'].to('cuda')
        all_label.extend(labels.cpu().numpy())
        
        with torch.no_grad():
            preds= model(imgs)
            loss= criterion(preds, labels)
            ep_loss.append(loss.item())
        all_pred.extend(preds.cpu().softmax(dim=-1).numpy())
        
    
    ## caculate metrics
    all_label= np.array(all_label).argmax(1)
    all_pred= np.array(all_pred)
    
    acc= Accuracy(all_pred, all_label)
    print(f'accuracy: {acc}')
    recall= Mean_Recall(all_pred, all_label)
    print(f'mean_recall: {recall}')
    
    score= recall
    return np.mean(ep_loss), score

# CFG

In [8]:
timm.list_models(pretrained=True)

['adv_inception_v3',
 'bat_resnext26ts.ch_in1k',
 'beit_base_patch16_224.in22k_ft_in22k',
 'beit_base_patch16_224.in22k_ft_in22k_in1k',
 'beit_base_patch16_384.in22k_ft_in22k_in1k',
 'beit_large_patch16_224.in22k_ft_in22k',
 'beit_large_patch16_224.in22k_ft_in22k_in1k',
 'beit_large_patch16_384.in22k_ft_in22k_in1k',
 'beit_large_patch16_512.in22k_ft_in22k_in1k',
 'beitv2_base_patch16_224.in1k_ft_in22k',
 'beitv2_base_patch16_224.in1k_ft_in22k_in1k',
 'beitv2_large_patch16_224.in1k_ft_in22k',
 'beitv2_large_patch16_224.in1k_ft_in22k_in1k',
 'botnet26t_256',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_tiny',
 'coatnet_0_rw_224.sw_in1k',
 'coatnet_1_rw_224.sw_in1k',
 'coatnet_2_rw_224.sw_in12k',
 'coatnet_2_rw_224.sw_in12k_ft_in1k',
 'coatnet_3_rw_224.sw_in12k',
 'coatnet_bn_0_r

In [9]:
CFG= {
    'fold': 0,
    'epoch': 30,
    'model_name': 'poolformer_s24',
    'finetune': False,
    
    'img_size': 128,
    'batch_size': 64,
    'gradient_accumulation': 1,
    'gradient_checkpoint': False,
    'drop_out': 0.3,
    'drop_path': 0.2,
    
    'lr': 1e-4,
    'weight_decay': 0,
    
    'num_classes': 506, 
    'load_model': False, 
    'save_model': './train_model_copy'
}

if CFG['finetune']:
    CFG['lr']= 5e-5
    CFG['load_model']= f"./train_model_copy/cv{CFG['fold']}_best.pth"
CFG

{'fold': 0,
 'epoch': 30,
 'model_name': 'poolformer_s24',
 'finetune': False,
 'img_size': 128,
 'batch_size': 64,
 'gradient_accumulation': 1,
 'gradient_checkpoint': False,
 'drop_out': 0.3,
 'drop_path': 0.2,
 'lr': 0.0001,
 'weight_decay': 0,
 'num_classes': 506,
 'load_model': False,
 'save_model': './train_model_copy'}

# Prepare Dataset

In [10]:
df_1= pd.read_csv('Data/train_ex2020.csv')
df_2= pd.read_csv('Data/train_ex2021.csv')
df_3= pd.read_csv('Data/train_ex2022.csv')
df_4= pd.read_csv('Data/train_ex1.csv')
df= pd.concat([df_2, df_3], axis=0).reset_index(drop=True)
cmp_df= pd.read_csv('Data/train.csv')
cmp_name= list(cmp_df['group'].unique())
df= df[~df['group'].isin(cmp_name)].reset_index(drop=True)
label_name= df['label_name'].unique().tolist()
print(f'num_classes: {len(label_name)}')
label_dict= {}
for name in label_name: label_dict[name]= label_name.index(name)
df['label_name']= df['label_name'].map(label_dict)
df['label']= df['label_name']

train_dataset= df[df['fold']!=CFG['fold']].reset_index(drop=True)
label= train_dataset['label'].unique().tolist()

from sklearn.utils.class_weight import compute_class_weight
cls_weight= compute_class_weight(class_weight='balanced', classes=list(range(CFG['num_classes'])), y=train_dataset['label'].values)
cls_weight= torch.tensor(cls_weight).cuda()

valid_dataset= df[df['fold']==CFG['fold']].reset_index(drop=True)
print(f'train dataset: {len(train_dataset)}')
print(f'valid dataset: {len(valid_dataset)}')

train_dataset= Customize_Dataset(train_dataset, get_train_transform(CFG['img_size']), mixup=0.5)
valid_dataset= Customize_Dataset(valid_dataset, get_test_transform(CFG['img_size']), mixup=False)

train_loader= DataLoader(train_dataset, batch_size= CFG['batch_size'], shuffle=True, num_workers=0)
valid_loader= DataLoader(valid_dataset, batch_size=32, shuffle=False, num_workers=0)
df.head()

num_classes: 506
train dataset: 639564
valid dataset: 160401




Unnamed: 0,image_path,label,label_name,group,fold
0,Data/train_img_ex2021\acafly\XC109605._0.png,0,0,XC109605.,3.0
1,Data/train_img_ex2021\acafly\XC109605._1.png,0,0,XC109605.,3.0
2,Data/train_img_ex2021\acafly\XC109605._10.png,0,0,XC109605.,3.0
3,Data/train_img_ex2021\acafly\XC109605._11.png,0,0,XC109605.,3.0
4,Data/train_img_ex2021\acafly\XC109605._2.png,0,0,XC109605.,3.0


# Train

In [11]:
## create model
if CFG['load_model']:
    print(f"load_model: {CFG['load_model']}")
    model= torch.load(CFG['load_model'], map_location= 'cuda')
else:
    model= Customize_Model(CFG['model_name'], CFG['num_classes'])
    
if CFG['gradient_checkpoint']: 
    print('use gradient checkpoint')
    model.model.set_grad_checkpointing(enable=True)
model.to('cuda')
    
## hyperparameter
criterion= Customize_loss()
optimizer= optim.AdamW(model.parameters(), lr= CFG['lr'], weight_decay= CFG['weight_decay'])

## start training
best_score= 0
for ep in range(1, CFG['epoch']+1):
    print(f'\nep: {ep}')
    
    train_loss= train_epoch(train_loader, model, criterion, optimizer)
    valid_loss, valid_acc= valid_epoch(valid_loader, model, criterion)
    print(f'train loss: {round(train_loss, 5)}')
    print(f'valid loss: {round(valid_loss, 5)}, valid_acc: {round(valid_acc, 5)}')
    
    if valid_acc >= best_score:
        best_score= valid_acc
        torch.save(model, f"{CFG['save_model']}/cv{CFG['fold']}_best.pth")
        print(f'model save at score: {round(best_score, 5)}')
        
    ## save model every epoch
#     torch.save(model, f"{CFG['save_model']}/cv{CFG['fold']}_ep{ep}.pth")

Downloading: "https://github.com/sail-sg/poolformer/releases/download/v1.0/poolformer_s24.pth.tar" to C:\Users\User/.cache\torch\hub\checkpoints\poolformer_s24.pth.tar



ep: 1


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.24962437890038092
mean_recall: 0.2872241089485388
train loss: 13.22691
valid loss: 12.69125, valid_acc: 0.28722
model save at score: 0.28722

ep: 2


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.34106395845412435
mean_recall: 0.3948924841113419
train loss: 12.31575
valid loss: 12.39872, valid_acc: 0.39489
model save at score: 0.39489

ep: 3


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.3739752245933629
mean_recall: 0.43716848587756507
train loss: 11.99201
valid loss: 12.19058, valid_acc: 0.43717
model save at score: 0.43717

ep: 4


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.3985823031028485
mean_recall: 0.46229167792150555
train loss: 11.8059
valid loss: 12.10011, valid_acc: 0.46229
model save at score: 0.46229

ep: 5


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.4177405377771959
mean_recall: 0.4815871376643952
train loss: 11.67123
valid loss: 12.03154, valid_acc: 0.48159
model save at score: 0.48159

ep: 6


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.4239188034987313
mean_recall: 0.49168910655785075
train loss: 11.56997
valid loss: 11.96437, valid_acc: 0.49169
model save at score: 0.49169

ep: 7


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.43676161619940024
mean_recall: 0.503988229335552
train loss: 11.49331
valid loss: 11.94473, valid_acc: 0.50399
model save at score: 0.50399

ep: 8


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.4493051788953934
mean_recall: 0.5074956432578547
train loss: 11.42949
valid loss: 11.93471, valid_acc: 0.5075
model save at score: 0.5075

ep: 9


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.4372977724577777
mean_recall: 0.5083584567684056
train loss: 11.3669
valid loss: 11.87418, valid_acc: 0.50836
model save at score: 0.50836

ep: 10


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.445683006963797
mean_recall: 0.5113133207015257
train loss: 11.31773
valid loss: 11.86206, valid_acc: 0.51131
model save at score: 0.51131

ep: 11


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.46913672608026136
mean_recall: 0.5227692051300685
train loss: 11.27065
valid loss: 11.85429, valid_acc: 0.52277
model save at score: 0.52277

ep: 12


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.4571916633936197
mean_recall: 0.5228203323021017
train loss: 11.23187
valid loss: 11.85331, valid_acc: 0.52282
model save at score: 0.52282

ep: 13


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.450932350795818
mean_recall: 0.5146265292507333
train loss: 11.19708
valid loss: 11.85725, valid_acc: 0.51463

ep: 14


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.46311431973616124
mean_recall: 0.5273886682235172
train loss: 11.165
valid loss: 11.81632, valid_acc: 0.52739
model save at score: 0.52739

ep: 15


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.46554572602415195
mean_recall: 0.5335544783054046
train loss: 11.13949
valid loss: 11.81337, valid_acc: 0.53355
model save at score: 0.53355

ep: 16


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.45731011652047054
mean_recall: 0.5279031543781953
train loss: 11.11376
valid loss: 11.80297, valid_acc: 0.5279

ep: 17


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.45796472590569887
mean_recall: 0.5233933075088463
train loss: 11.09808
valid loss: 11.84375, valid_acc: 0.52339

ep: 18


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.4705456948522765
mean_recall: 0.5265434466951201
train loss: 11.0725
valid loss: 11.8154, valid_acc: 0.52654

ep: 19


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.4790306793598544
mean_recall: 0.5362391932832687
train loss: 11.05703
valid loss: 11.80325, valid_acc: 0.53624
model save at score: 0.53624

ep: 20


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.48698574198415223
mean_recall: 0.5407762644035399
train loss: 11.03984
valid loss: 11.80069, valid_acc: 0.54078
model save at score: 0.54078

ep: 21


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.4646853822607091
mean_recall: 0.5277459421709233
train loss: 11.0243
valid loss: 11.7814, valid_acc: 0.52775

ep: 22


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


  0%|          | 0/5013 [00:00<?, ?it/s]

accuracy: 0.47591972618624573
mean_recall: 0.5353359232331274
train loss: 11.00654
valid loss: 11.80723, valid_acc: 0.53534

ep: 23


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/9994 [00:00<?, ?it/s]

  'image': torch.tensor(img/255, dtype=torch.float32),


KeyboardInterrupt: 