In [1]:
!nvidia-smi

Wed May 18 02:08:41 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.54       Driver Version: 510.54       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro RTX 5000     Off  | 00000000:3B:00.0 Off |                  Off |
| 33%   29C    P0    48W / 230W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Quadro RTX 5000     Off  | 00000000:5E:00.0 Off |                  Off |
| 32%   31C    P0    39W / 230W |      0MiB / 16384MiB |      0%      Default |
|       

# Summary

* Augmentation      
    + 기본적으로 Albumentation 사용
    + Sobel filter를 이용한 증강을 추가(다른 filter 및 LBP 등등 사용해봤는데, Sobel filter가 가장 성능이 잘 나왔습니다)      
    + Train, Test 시에도 augmentation 수행      

* Train     
    + CosineAnnealingWarmUpRestarts - Scheduler(warmup을 하지 않고 학습)      
    + LabelSmoothing_with_CrossEntropy - Loss  
    + AdamW - optimizer
    + amp.GradScaler        
    + EarlyStopping(Loss, Acc 동시에 적용 |, & 대신 -> and, or 사용)      

* Model     
    + EfficientNet_b7 사용
    + 5-fold 진행
    + Public 기준 상위 2개 모델 blending      
        -> EfficientNet_b7 (Public-LB: 0.89066, LR: 2e-4)       
        -> EfficientNet_b7 (Public-LB: 0.88658, LR: 1.5e-4)     
    + TTA
        -> tta.HorizontalFlip()     
        -> tta.VerticalFlip()       
        -> tta.Rotate90(angles=[0, 90, 180, 270])       

# Library import

In [3]:
import os 
import cv2
import copy
import math
import random
import argparse
import datetime
import warnings
import numpy as np
import pandas as pd
import ttach as tta 
import albumentations
from albumentations.core.transforms_interface import ImageOnlyTransform

from glob import glob
from tqdm import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms

from torch.nn.modules.loss import _WeightedLoss
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import _LRScheduler
from efficientnet_pytorch import EfficientNet

warnings.filterwarnings(action='ignore')


# Hyperparameter definition
seed = 10
suffix = (datetime.datetime.now() + datetime.timedelta(hours=9)).strftime("%y%m%d_%H%M")

config = {
    # Model parameters
    'model': 'efficientnet_b7',
    'batch_size': 32,
    'pretrain': True,
    
    # Optimizer parameters
    'optimizer': 'AdamW',
    'lr': 2e-4,
    'lr_t': 15,
    'lr_scheduler': 'CosineAnnealingWarmUpRestarts',
    'gamma': 0.524,
    'loss_function': 'CE_with_Lb',
    'patience': 10,
    'weight_decay': 0.002157,
    'label_smoothing': 0.8283,
    
    # Training parameters
    'epochs': 200,
    'n_fold': 5,
    'num_workers': 16,
    'text': "A",
    'device': '0,1,2,3'
    }


model_save_name='./RESULTS/'+config['text']+"_"+suffix+"("+ str(config['model'])+"_"+\
                                                            str(config['batch_size'])+"_"+\
                                                            str(config['pretrain'])+"__"+\
                                                            str(config['optimizer'])+"_"+\
                                                            str(config['lr'])+"_"+\
                                                            str(config['lr_t'])+"_"+\
                                                            str(config['lr_scheduler'])+"_"+\
                                                            str(config['gamma'])+"_"+\
                                                            str(config['loss_function'])+"_"+\
                                                            str(config['patience'])+"_"+\
                                                            str(config['weight_decay'])+"_"+\
                                                            str(config['label_smoothing'])+")_fold_"
                                                        
config['model_save_name'] = model_save_name
# -------------------------------------------------------------------------------------------

os.environ["PYTHONHASHSEED"] = str(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"] = config['device']
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")    
print('Device: %s' % device)
if (device.type == 'cuda') or (torch.cuda.device_count() > 1):
    print('GPU activate --> Count of using GPUs: %s' % torch.cuda.device_count())
config['device'] = device

# -------------------------------------------------------------------------------------------

Device: cuda
GPU activate --> Count of using GPUs: 4


* Augmentation 시 albumentation을 사용하였으며, 추가적으로 Sobel filter를 이용하여 edge를 추출 후 unsharp image와 합친 Augmentation을 만들어 사용

* train 시 Sharpen, HueSaturationValue, FancyPCA, Emboss의 augmentation을 통해 이미지를 선명하게 만든 후 다른 aumgmentation을 수행

* test 시에는 Sharpen, HueSaturationValue, FancyPCA, Emboss의 augmentation만 수행            

### Definition functions

In [4]:
def img_load(path):
        img = cv2.imread(path)[:,:,::-1]
        img = cv2.resize(img, (512, 512))
        return img


def score_function(real, pred):
        score = f1_score(real, pred, average="macro")
        return score


class Magnitude_Sobel32f_Unsharp_compose(ImageOnlyTransform):
    def __init__(self, dx=1, dy=0, ksize=3, blur_limit=(1,5), sigmaX=2.0, always_apply=False, p=0.5):
        super(Magnitude_Sobel32f_Unsharp_compose, self).__init__(always_apply=always_apply, p=p)
        self.dx = dx
        self.dy = dy
        self.ksize = ksize
        self.blur_limit = blur_limit
        self.sigmaX = sigmaX
        
    def apply(self, img, **params):        
        sobelx32f_x = cv2.Sobel(img, cv2.CV_32F, self.dx, self.dy, ksize=self.ksize)
        sobelx32f_y = cv2.Sobel(img, cv2.CV_32F, self.dy, self.dx, ksize=self.ksize)
        sobel32f = cv2.magnitude(sobelx32f_x, sobelx32f_y) 
        sobel32f = np.clip(sobel32f, 0, 255).astype(np.uint8) 
                
        gaussian = cv2.GaussianBlur(img, self.blur_limit, self.sigmaX)
        unsharp_image = cv2.addWeighted(img, self.sigmaX, gaussian, -1.0, 0)
        
        return  cv2.add(unsharp_image, sobel32f)


class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels = labels
        self.mode = mode
        self.augmentation = albumentations.Compose([
            albumentations.Sharpen(p=0.7),
            albumentations.HueSaturationValue(hue_shift_limit=5, sat_shift_limit=5, val_shift_limit=5, p=0.3),
            albumentations.FancyPCA(alpha=0.1, p=0.3),
            albumentations.Emboss(p=0.5),
            Magnitude_Sobel32f_Unsharp_compose(dx=1, dy=0, ksize=1, blur_limit=(1,5), sigmaX=2.0, p=0.3),
            
            albumentations.Transpose(p=0.3),
            albumentations.HorizontalFlip(p=0.5),
            albumentations.RandomRotate90(p=0.5),
            albumentations.VerticalFlip(p=0.5),

            albumentations.CLAHE(clip_limit=5, p=0.4),
            albumentations.ElasticTransform(alpha_affine=30, p=0.4), 
            albumentations.Posterize(p=0.5),

            albumentations.GaussNoise(p=0.3),
            albumentations.GaussianBlur(blur_limit=(1, 5), p=0.3),
            albumentations.GlassBlur(sigma=0.1, max_delta=2, iterations=1, p=0.2),  
            albumentations.GridDistortion(num_steps=20, distort_limit=0.3, border_mode=1, p=0.2), 
            ])
        
        self.test_augmentation = albumentations.Compose([
            albumentations.Sharpen(p=0.7),
            albumentations.HueSaturationValue(hue_shift_limit=5, sat_shift_limit=5, val_shift_limit=5, p=0.3),
            albumentations.FancyPCA(alpha=0.1, p=0.3),
            albumentations.Emboss(p=0.5),
            ])

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img = self.img_paths[idx]
        raw_img = copy.deepcopy(img)
        
        if self.mode=='train':
            augmented = self.augmentation(image=img) 
            img = augmented['image']
        else:
            augmented = self.test_augmentation(image=img) 
            img = augmented['image']
            
        img = transforms.ToTensor()(img)
        raw_img = transforms.ToTensor()(raw_img)
        label = self.labels[idx]

        return {'img' : img,
                'raw_img' : raw_img, 
                'label' : label}


class CosineAnnealingWarmUpRestarts(_LRScheduler):
    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)
    
    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch
                
        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr


class SmoothCrossEntropyLoss(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth_one_hot(targets:torch.Tensor, n_classes:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = torch.empty(size=(targets.size(0), n_classes),
                    device=targets.device) \
                .fill_(smoothing /(n_classes-1)) \
                .scatter_(1, targets.data.unsqueeze(1), 1.-smoothing)
        return targets

    def forward(self, inputs, targets):
        targets = SmoothCrossEntropyLoss._smooth_one_hot(targets, inputs.size(-1),
            self.smoothing)
        lsm = F.log_softmax(inputs, -1)

        if self.weight is not None:
            lsm = lsm * self.weight.unsqueeze(0)

        loss = -(targets * lsm).sum(-1)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss


class EarlyStopping(object):
    def __init__(self, mode='min', min_delta=0, patience=10, percentage=False):
        self.mode = mode
        self.min_delta = min_delta
        self.patience = patience
        self.best = None
        self.num_bad_epochs = 0
        self.is_better = None
        self._init_is_better(mode, min_delta, percentage)

        if patience == 0:
            self.is_better = lambda a, b: True
            self.step = lambda a: False

    def step(self, metrics):
        if self.best is None:
            self.best = metrics
            return False

        if torch.isnan(metrics):
            return True

        if self.is_better(metrics, self.best):
            self.num_bad_epochs = 0
            self.best = metrics
        else:
            self.num_bad_epochs += 1

        if self.num_bad_epochs >= self.patience:
            return True

        return False

    def _init_is_better(self, mode, min_delta, percentage):
        if mode not in {'min', 'max'}:
            raise ValueError('mode ' + mode + ' is unknown!')
        if not percentage:
            if mode == 'min':
                self.is_better = lambda a, best: a < best - min_delta
            if mode == 'max':
                self.is_better = lambda a, best: a > best + min_delta
        else:
            if mode == 'min':
                self.is_better = lambda a, best: a < best - (
                            best * min_delta / 100)
            if mode == 'max':
                self.is_better = lambda a, best: a > best + (
                            best * min_delta / 100)

# Modeling & Prediction

In [5]:
class Network(torch.nn.Module):
    def __init__(self, config):
        super(Network, self).__init__()
        self.model = config['model']
        self.class_num = config['class_num']
        self.pretrain = config['pretrain']

        if self.model == 'efficientnet_b7':
            self.model =  EfficientNet.from_pretrained('efficientnet-b7', num_classes=self.class_num)
            
    def forward(self, x):
        x = self.model(x)
        return x

### Data load

In [6]:
# Dataload
train_png = sorted(glob('./open/train/*.png'))    
train_y = pd.read_csv("./open/train_df.csv")

train_labels = train_y["label"]
label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}
train_labels = [label_unique[k] for k in train_labels]

train_imgs = [img_load(m) for m in tqdm(train_png)]
config['class_num'] = len(label_unique)

100%|██████████| 4277/4277 [01:39<00:00, 42.92it/s]


### Train

In [7]:
def train_func(train_imgs, train_labels, config):
    
    print('model_save_name: '+config['model_save_name'].split("/")[-1])
    # Cross Validation
    kfold = StratifiedKFold(n_splits=config['n_fold'],shuffle=True,random_state=seed)
    n_fold = config['n_fold']
    k_train_f1, k_valid_f1 = [], []   

    for fold, (train_idx, valid_idx) in enumerate(kfold.split(train_imgs,train_labels)):

        Train_set = [train_imgs[i] for i in train_idx]
        Valid_set = [train_imgs[i] for i in valid_idx]
        Train_label_set = [train_labels[i] for i in train_idx]
        Valid_label_set = [train_labels[i] for i in valid_idx]

        # Train
        Train_dataset = Custom_dataset(np.array(Train_set), np.array(Train_label_set), mode='train')
        Train_loader = DataLoader(Train_dataset, batch_size=config['batch_size'], pin_memory=True,
                                num_workers=config['num_workers'], prefetch_factor=config['batch_size']*2, 
                                shuffle=True)

        # Valid
        Valid_dataset = Custom_dataset(np.array(Valid_set), np.array(Valid_label_set), mode='test')
        Valid_loader = DataLoader(Valid_dataset, batch_size=config['batch_size'], pin_memory=True,
                                num_workers=config['num_workers'], prefetch_factor=config['batch_size']*2, 
                                shuffle=True)
        
        model = Network(config).to(config['device'])
        model = nn.DataParallel(model).to(config['device'])

        if config['lr_scheduler'] == 'CosineAnnealingLR':
            optimizer = torch.optim.AdamW(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config['lr_t'], eta_min=0)
            
        elif config['lr_scheduler'] == 'CosineAnnealingWarmUpRestarts':
            optimizer = torch.optim.AdamW(model.parameters(), lr=0, weight_decay=config['weight_decay'])
            scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=config['lr_t'], eta_max=config['lr'], gamma=config['gamma'], T_mult=1, T_up=0)
        
        criterion = SmoothCrossEntropyLoss(smoothing=config['label_smoothing']).to(config['device'])
        scaler = torch.cuda.amp.GradScaler() 
        early_stopping = EarlyStopping(patience=config['patience'], mode='max')
        early_stopping_loss = EarlyStopping(patience=config['patience'], mode='min')
        
        best=0.5
        best_loss=100
        each_fold_train_loss, each_fold_train_f1 = [], []
        each_fold_valid_loss, each_fold_valid_f1 = [], []
        epochs = config['epochs']
        
        for epoch in range(epochs):
            train_loss, train_pred, train_real = 0, [], []
            valid_loss, valid_pred, valid_real = 0, [], []

            model.train()
            for batch_id, batch in tqdm(enumerate(Train_loader), total=len(Train_loader)):
                
                optimizer.zero_grad()
                x = torch.tensor(batch['img'], dtype=torch.float32).to(config['device'])
                y = torch.tensor(batch['label'], dtype=torch.long).to(config['device'])

                with torch.cuda.amp.autocast():
                    pred = model(x)
                loss = criterion(pred, y)
                
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

                train_loss += loss.item()
                train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
                train_real += y.detach().cpu().numpy().tolist()
            train_loss = train_loss/len(Train_loader)
            train_f1 = score_function(train_real, train_pred)
            each_fold_train_loss.append(train_loss)
            each_fold_train_f1.append(train_f1)
            scheduler.step()
        
            model.eval()
            for batch_id, val_batch in tqdm(enumerate(Valid_loader), total=len(Valid_loader)):
                with torch.no_grad():
                    val_x = torch.tensor(val_batch['img'], dtype=torch.float32).to(config['device'])
                    val_y = torch.tensor(val_batch['label'], dtype=torch.long).to(config['device'])

                    val_pred = model(val_x)
                    val_loss = criterion(val_pred, val_y)

                valid_loss += val_loss.item()
                valid_pred += val_pred.argmax(1).detach().cpu().numpy().tolist()
                valid_real += val_y.detach().cpu().numpy().tolist()
            
            valid_loss = valid_loss/len(Valid_loader)
            valid_f1 = score_function(valid_real, valid_pred)
            each_fold_valid_loss.append(valid_loss)
            each_fold_valid_f1.append(valid_f1)
            
            print_best = 0    
            if (each_fold_valid_f1[-1] >= best) or (each_fold_valid_loss[-1] <= best_loss):
                difference = each_fold_valid_f1[-1] - best
                if (each_fold_valid_f1[-1] >= best):
                    best = each_fold_valid_f1[-1] 
                if (each_fold_valid_loss[-1] <= best_loss):
                    best_loss = each_fold_valid_loss[-1]
                
                pprint_best = each_fold_valid_f1[-1]
                pprint_best_loss = each_fold_valid_loss[-1]
                
                best_idx = epoch+1
                model_state_dict = model.module.state_dict() if torch.cuda.device_count() > 1 else model.module.state_dict()
                best_model_wts = copy.deepcopy(model_state_dict)
                
                # load and save best model weights
                model.module.load_state_dict(best_model_wts)
                torch.save(best_model_wts, config['model_save_name'] + str(fold+1) + ".pt")
                print_best = '==> best model saved %d epoch / acc: %.5f  loss: %.5f  /  difference %.5f'%(best_idx, pprint_best, pprint_best_loss, difference)

            print(f'Fold : {fold+1}/{n_fold}    epoch : {epoch+1}/{epochs}')
            print(f'TRAIN_Loss : {train_loss:.5f}    TRAIN_F1 : {train_f1:.5f}')
            print(f'VALID_Loss : {valid_loss:.5f}    VALID_F1 : {valid_f1:.5f}    BEST : {pprint_best:.5f}    BEST_LOSS : {pprint_best_loss:.5f}')
            print('\n') if type(print_best)==int else print(print_best,'\n')

            if early_stopping.step(torch.tensor(each_fold_valid_f1[-1])) and early_stopping_loss.step(torch.tensor(each_fold_valid_loss[-1])):
                break
            
        print("VALID Loss: ", pprint_best_loss, ", VALID F1: ", pprint_best)
            
        k_train_f1.append(pprint_best_loss)
        k_valid_f1.append(pprint_best)
        
    print(config['model_save_name'].split("/")[-1] + ' is saved!')

    print("1Fold - VALID Loss: ", k_train_f1[0], ", 1Fold - VALID F1: ", k_valid_f1[0])
    print("2Fold - VALID Loss: ", k_train_f1[1], ", 2Fold - VALID F1: ", k_valid_f1[1])
    print("3Fold - VALID Loss: ", k_train_f1[2], ", 3Fold - VALID F1: ", k_valid_f1[2])
    print("4Fold - VALID Loss: ", k_train_f1[3], ", 4Fold - VALID F1: ", k_valid_f1[3])
    print("5Fold - VALID Loss: ", k_train_f1[4], ", 5Fold - VALID F1: ", k_valid_f1[4])

    print("k-fold Valid Loss: ",np.mean(k_train_f1),", k-fold Valid F1: ",np.mean(k_valid_f1))
    
    return config['model_save_name'].split("/")[-1]

In [None]:
model_path_1 = train_func(train_imgs, train_labels, config)

# Blending을 하려면 아래 코드 또한 돌려야합니다.
# config['lr'] = 1.5e-4
# model_path_2 = train_func(train_imgs, train_labels, config)


# Inference

In [None]:
config['model_save_list'] = [model_path_1] # Blending X
# config['model_save_list'] = [model_path_1, model_path_2] # Blending O
config['save_name'] = 'INFERNECE_SAMPLE'


# Dataload
test_png = sorted(glob('/data/open/test/*.png'))
test_imgs = [img_load(n) for n in tqdm(test_png)]

# Test
Test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test')
Test_loader = DataLoader(Test_dataset, batch_size=config['batch_size'], pin_memory=True,
                        num_workers=config['num_workers'], prefetch_factor=config['batch_size']*2, 
                        shuffle=False)

transforms = tta.Compose([
    tta.HorizontalFlip(),
    tta.VerticalFlip(),
    tta.Rotate90(angles=[0, 90, 180, 270]),
    ])

In [None]:
models = []
tta_models = []

for model_name in config['model_save_list']:
    for fold in range(config['n_fold']):
    
        model_dict = torch.load('./RESULTS/'+model_name + str(fold+1) + ".pt")
        model = Network(config).to(config['device']) 
        model = nn.DataParallel(model).to(config['device'])
        model.module.load_state_dict(model_dict) if torch.cuda.device_count() > 1 else model.load_state_dict(model_dict)
        
        tta_model = tta.ClassificationTTAWrapper(model, transforms, merge_mode='sum').to(config['device'])
        models.append(model)
        tta_models.append(tta_model)

results = []
for batch_id, batch in tqdm(enumerate(Test_loader), total=len(Test_loader)):
    x = torch.tensor(batch['img'], dtype = torch.float32, device = device)
    raw_x = torch.tensor(batch['raw_img'], dtype = torch.float32, device = device)
    
    for fold, (model, tta_model) in enumerate(zip(models, tta_models)):
        model.eval()
        tta_model.eval() 
        with torch.no_grad():
            with torch.cuda.amp.autocast():
                if fold == 0:
                    output = model(x)+tta_model(batch['raw_img'])
                else:
                    output = output+model(x)+tta_model(batch['raw_img'])

    output = len(models)*output
    output = torch.tensor(torch.argmax(output, axis=-1), dtype=torch.int32).cpu().numpy()
    results.extend(output)

label_decoder = {val:key for key, val in label_unique.items()}
results = [label_decoder[result] for result in results]
submission = pd.read_csv("./open/sample_submission.csv")
submission["label"] = results

submission.to_csv("./RESULTS/{}.csv".format(config['save_name']), index=False)