## libraries

In [None]:
import sys
import warnings
import sklearn.exceptions
warnings.filterwarnings("ignore")
#general
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVR
from sklearn.preprocessing import RobustScaler
import pickle
from tqdm.auto import tqdm
from collections import defaultdict
import os
import numpy as np
import pandas as pd
import random
import gc
import cv2
gc.enable()
import glob
pd.set_option('display.max_columns', None) 
from sklearn.linear_model import RidgeCV

# visualization
import matplotlib.pyplot as plt
%matplotlib inline

# augmentation
from albumentations.pytorch import ToTensorV2
import albumentations as A

# deep learning
import timm
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, OneCycleLR, CosineAnnealingLR, ReduceLROnPlateau, StepLR, LambdaLR
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import imageio
from PIL import Image
from tqdm.notebook import tqdm
tqdm.pandas()

# metrics
from sklearn.metrics import mean_squared_error
from torch.nn import DataParallel

## Config

In [None]:
class Config:
    expid = "010923" # 实验id
    num_workers = 10 # cpu线程
    inp_channels = 3 # input channel
    gpu_parallel = False # 并行gpu
    batch_size = 16
    
    out_features = 1 # output dim
    epoch_step_valid = 3 # 第n个epoch后开始valid
    data_dir = "/home/xm/workspace/petfinder-pawpularity-score/" # 数据路径
    output_dir = f"/home/xm/workspace/output" # 输出路径
    img_train_dir = os.path.join(data_dir, "train") # 训练图片路径
    img_test_dir = os.path.join(data_dir, "test")  # 测试图片路径
    random_seed = 42 # seed

    n_epoch = 5 
    n_fold = 10 #
    steps_per_epoch = 31 # 每n个steps进行一次valid
    model_path = "swin_large_patch4_window7_224" # 模型，swin_large_patch4_window12_384, swin_large_patch4_window7_224 
    pretrained = True # 使用预训练权重
    im_size =  224 # image size
    
    lr = 2e-5
    opt_wd_non_norm_bias = 0.01 # weight decay
    opt_wd_norm_bias = 0 # bias weight decay
    opt_beta1 = 0.9 # adam
    opt_beta2 = 0.99 # adam
    opt_eps = 1e-5 # 最小lr

    scheduler_name = "OneCycleLR" # 调度器
    reduce_lr_factor = 0.6 # 学习率减小幅度
    reduce_lr_patience = 1 # 学习率减小耐心值
    T_0 = 4 # CosineAnnealingWarmRestarts调度器周期
    T_max =4 # CosineAnnealingLR 调度器周期
    T_mult =1 # 学习率变化倍数
    min_lr = 1e-7 # CosineAnnealingWarmRestarts 最小学习率
    max_lr = 2e-5 # OneCycleLR 最大学习率

    tta = True # calculate cv score in case TTA is executed
    tta_times = 4 # tta次数
    tta_beta = 1 / tta_times # 每次tta的权重

    mixup = False # 是否使用mixup增强
    if mixup:
        mixup_epoch = n_epoch
    else:
        mixup_epoch = 0
    mixup_alpha =0.2

    is_debug = False # 调试模式
    if is_debug:
        n_epoch = 1
        n_fold = 2
        n_sample_debug = 500
        tta_times = 2
        tta_beta = 1 / tta_times

In [None]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
seed_torch(seed=Config.random_seed)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Using device: {device}')

def divice_norm_bias(model): 
    norm_bias_params = []
    non_norm_bias_params = []
    except_wd_layers = ['norm', '.bias']
    for n, p in model.named_parameters():
        if any([nd in n for nd in except_wd_layers]):
            norm_bias_params.append(p)
        else:
            non_norm_bias_params.append(p)
    return norm_bias_params, non_norm_bias_params

def usr_rmse_score(output, target):
    y_pred = torch.sigmoid(output).cpu()
    y_pred = y_pred.detach().numpy()*100
    target = target.cpu()*100
    
    return mean_squared_error(target, y_pred, squared=False)

def rmse_oof(_oof_df, fold=None):
    oof_df = _oof_df.copy()
    if fold is not None:
        oof_df = oof_df[oof_df["fold"] == fold]
    target = oof_df['Pawpularity'].values
    y_pred = oof_df['pred'].values
    if fold is not None:
        print(f'fold {fold}: {mean_squared_error(target, y_pred, squared=False)}')
    else:
        overall_rmse = mean_squared_error(target, y_pred, squared=False)
        print(f'overall: {overall_rmse}')
        return overall_rmse

class MetricMonitor:
    def __init__(self, float_precision=3):
        self.float_precision = float_precision
        self.reset()

    def reset(self):
        self.metrics = defaultdict(lambda: {"val": 0, "count": 0, "avg": 0})

    def update(self, metric_name, val):
        metric = self.metrics[metric_name]

        metric["val"] += val
        metric["count"] += 1
        metric["avg"] = metric["val"] / metric["count"]

    def __str__(self):
        return " | ".join(
            [
                "{metric_name}: {avg:.{float_precision}f}".format(
                    metric_name=metric_name, avg=metric["avg"],
                    float_precision=self.float_precision
                )
                for (metric_name, metric) in self.metrics.items()
            ]
        )
    
def get_scheduler(optimizer):
    scheduler = None
    if Config.scheduler_name == 'CosineAnnealingWarmRestarts':
        scheduler = CosineAnnealingWarmRestarts(
            optimizer,
            T_0=Config.T_0,
            eta_min=Config.min_lr,
            last_epoch=-1
        )
    elif Config.scheduler_name == 'OneCycleLR':
        scheduler = OneCycleLR(
            optimizer,
            max_lr=Config.max_lr,
            pct_start = 0.25, # same as fastai, defaut 0.3
            steps_per_epoch=int(((Config.n_fold - 1) * train_df.shape[0]) / (Config.n_fold * Config.batch_size)) + 1,
            epochs = Config.n_epoch
        )

    elif Config.scheduler_name == 'CosineAnnealingLR':
        scheduler = CosineAnnealingLR(
            optimizer,
            T_max=Config.T_max,
            eta_min=Config.min_lr,
            last_epoch=-1
        )
    elif Config.scheduler_name == 'ReduceOnPlateauLR':
        scheduler = ReduceLROnPlateau(
            optimizer,
            mode = 'min',
            factor=Config.reduce_lr_factor,
            patience=Config.reduce_lr_patience,
            verbose = True
        )
    return scheduler


from torch.nn.modules.loss import _WeightedLoss

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight = None, reduction = 'mean', smoothing = 0.0, pos_weight = None):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction
        self.pos_weight = pos_weight

    @staticmethod
    def _smooth(targets, n_labels, smoothing = 0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad(): targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1), self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight, pos_weight = self.pos_weight)
        if  self.reduction == 'sum': loss = loss.sum()
        elif  self.reduction == 'mean': loss = loss.mean()
        return loss

In [None]:
def return_imgfilepath(name, folder=Config.img_train_dir):
    path = os.path.join(folder, f'{name}.jpg')
    return path

train_df = pd.read_csv(os.path.join(Config.data_dir, 'train.csv'))

# set image filepath
train_df['file_path'] = train_df['Id'].progress_apply(lambda x: return_imgfilepath(x))

# del 27 image
del_list = ["b148cbea87c3dcc65a05b15f78910715", "9a0238499efb15551f06ad583a6fa951", "e359704524fa26d6a3dcd8bfeeaedd2e", "5a642ecc14e9c57a05b8e010414011f2", "bf8501acaeeedc2a421bac3d9af58bb7", 
            "01430d6ae02e79774b651175edd40842", "1feb99c2a4cac3f3c4f8a4510421d6f5", "6ae42b731c00756ddd291fa615c822a1", "13d215b4c71c3dc603cd13fc3ec80181", "3877f2981e502fe1812af38d4f511fd2", 
            "5ef7ba98fc97917aec56ded5d5c2b099", "988b31dd48a1bc867dbc9e14d21b05f6", "9b3267c1652691240d78b7b3d072baf3", "72b33c9c368d86648b756143ab19baeb", "2b737750362ef6b31068c4a4194909ed", 
            "b49ad3aac4296376d7520445a27726de", "9f5a457ce7e22eecd0992f4ea17b6107", "dd042410dc7f02e648162d7764b50900", "a9513f7f0c93e179b87c01be847b3e4c", "1059231cf2948216fcc2ac6afb4f8db8", 
            "87c6a8f85af93b84594a36f8ffd5d6b8", "8ffde3ae7ab3726cff7ca28697687a42", "dbc47155644aeb3edd1bd39dba9b6953", "38426ba3cbf5484555f2b5e9504a6b03", "54563ff51aa70ea8c6a9325c15f55399", 
            "fe47539e989df047507eaa60a16bc3fd", "78a02b3cb6ed38b2772215c0c0a7f78e"]

train_df = train_df.drop(train_df[train_df["Id"].isin(del_list)].index).reset_index(drop=True)

if Config.is_debug:
    train_df = train_df.sample(500).reset_index(drop = True)
train_df['norm_score'] = train_df['Pawpularity'] / 100
num_bins = int(np.floor(1+(3.3)*(np.log2(len(train_df)))))
train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False)
target_bins = train_df['bins']
print("num_bins:", num_bins)

train_df['fold'] = -1
skf = StratifiedKFold(n_splits = Config.n_fold, shuffle=True, random_state =Config.random_seed)
for i, (_, valid_index) in enumerate(skf.split(train_df.index, train_df['bins'])):
    train_df.iloc[valid_index, -1] = i
    
train_df['fold'] = train_df['fold'].astype('int')

train_df

In [None]:
class PetDataset(Dataset):
    def __init__(self, image_filepaths, targets, transform=None):
        self.image_filepaths = image_filepaths
        self.targets = targets
        self.transform = transform
    
    def __len__(self):
        return len(self.image_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.image_filepaths[idx]
        with open(image_filepath, 'rb') as f:
            image = Image.open(f)
            image_rgb = image.convert('RGB')
        image = np.array(image_rgb)

        if self.transform is not None:
            image = self.transform(image = image)["image"]
        
        image = image / 255 # convert to 0-1
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        target = self.targets[idx]

        image = torch.tensor(image, dtype = torch.float)
        target = torch.tensor(target, dtype = torch.float)
        return image, target

In [None]:
IMAGENET_MEAN = [0.485, 0.456, 0.406]  # RGB
IMAGENET_STD = [0.229, 0.224, 0.225]  # RGB
def get_train_transforms(epoch, dim = Config.im_size):
    return A.Compose(
        [             
            # resize like Resize in fastai
            A.SmallestMaxSize(max_size=dim, p=1.0),
            A.RandomCrop(height=dim, width=dim, p=1.0),
            A.VerticalFlip(p = 0.5),
            A.HorizontalFlip(p = 0.5)
            #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
        ]
  )

def get_inference_fixed_transforms(mode=0, dim = Config.im_size):
    if mode == 0: # do not original aspects, colors and angles
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ], p=1.0)
    elif mode == 1:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),,
                A.VerticalFlip(p = 1.0)
            ], p=1.0)    
    elif mode == 2:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
                A.HorizontalFlip(p = 1.0)
            ], p=1.0)
    elif mode == 3:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
                A.Transpose(p=1.0)
            ], p=1.0)
        
def get_inference_random_transforms(mode=0, dim = Config.im_size):
    if mode == 0: # do not original aspects, colors and angles
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ], p=1.0)
    else:
        return A.Compose(
            [            
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                A.VerticalFlip(p = 0.5),
                A.HorizontalFlip(p = 0.5)
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ]
      ) 

In [None]:
# train_dataset = PetDataset(
#     image_filepaths = train_df['file_path'].values,
#     targets = train_df['Pawpularity'].values /100,
#     transform = get_train_transforms(0)
# )

# print(train_dataset[0][0].shape)
# train_dataset[0]

## model

In [None]:
class PetNet(nn.Module):
    def __init__(
        self,
        model_name = Config.model_path,
        out_features = Config.out_features,
        inp_channels=Config.inp_channels,
        pretrained=Config.pretrained
    ):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels, num_classes = out_features)
        print("self.model.head.in_features:",self.model.head.in_features)
        self.model.head = nn.Linear(self.model.head.in_features, 128) # 1536
        self.dropout1 = nn.Dropout(0.1)
        self.dense1 = nn.Linear(128, 64)
        self.relu = nn.ReLU()
        self.dense2 = nn.Linear(64, 1)

    
    def forward(self, image):
        x1 = self.model(image)          # [bs, 128]
        x = self.dropout1(x1)           # [bs, 128]
        x = self.dense1(x)              # [bs, 64]
        x = self.relu(x)                # [bs, 64]
        x = self.dense2(x)              # [bs, 1]
        x2 = torch.cat([x, x1], dim=1)  # [bs, 129]
        return x, x2

## helper function

In [None]:
def valid_fn(y_valid, X_valid_paths, model, criterion, epoch):
    model.eval()
    #model = layer_freeze(model)
    test_targets = []
    test_preds = []
    valid_dataset = PetDataset(
      image_filepaths = X_valid_paths,
      targets = y_valid,
      transform = get_inference_fixed_transforms(0)
    )
    valid_loader = DataLoader(
      valid_dataset,
      batch_size = Config.batch_size,
      shuffle = False,
      num_workers = Config.num_workers,
      pin_memory = False
    )
    metric_monitor = MetricMonitor()
    stream = tqdm(valid_loader)
    for i, (images, target) in enumerate(stream, start = 1):
        images = images.to(device, non_blocking = True).float()
        target = target.to(device, non_blocking = True).float().view(-1, 1)
        with torch.no_grad():
            output, _ = model(images)
        loss = criterion(output, target)
        rmse_score = usr_rmse_score(output, target)
        metric_monitor.update('Loss', loss.item())
        metric_monitor.update('RMSE', rmse_score)
        stream.set_description(f"Epoch: {epoch:02}. Valid. {metric_monitor}")

        targets = (target.detach().cpu().numpy() * 100).ravel().tolist()
        pred = (torch.sigmoid(output).detach().cpu().numpy() * 100).ravel().tolist()

        test_preds.extend(pred)
        test_targets.extend(targets)
    test_preds = np.array(test_preds)
    test_targets = np.array(test_targets)
    del valid_loader, valid_dataset, target, output
    gc.collect()
    torch.cuda.empty_cache()
    return test_targets, test_preds

In [None]:
def tta_fn(y_valid, X_valid_paths, model, criterion, epoch):
    model.eval()
    #model = layer_freeze(model)
    test_targets = []
    test_preds = []
    for tta_mode in range(Config.tta_times):
        print(f'tta mode:{tta_mode}')
        valid_dataset = PetDataset(
          image_filepaths = X_valid_paths,
          targets = y_valid,
          transform = get_inference_fixed_transforms(tta_mode)
        )
        valid_loader = DataLoader(
          valid_dataset,
          batch_size = Config.batch_size,
          shuffle = False,
          num_workers = Config.num_workers,
          pin_memory = False
        )
        metric_monitor = MetricMonitor()
        stream = tqdm(valid_loader)
        tta_preds = []
        for i, (images, target) in enumerate(stream, start = 1):
            images = images.to(device, non_blocking = True).float()
            target = target.to(device, non_blocking = True).float().view(-1, 1)
            with torch.no_grad():
                output, _ = model(images)

            targets = (target.detach().cpu().numpy() * 100).ravel().tolist()
            pred = (torch.sigmoid(output).detach().cpu().numpy() * 100).ravel().tolist()
            loss = criterion(output, target)
            rmse_score = usr_rmse_score(output, target)
            metric_monitor.update('Loss', loss.item())
            metric_monitor.update('RMSE', rmse_score)
            stream.set_description(f"Epoch: {epoch:02}. Valid. {metric_monitor}")

            tta_preds.extend(pred)
            if tta_mode == 0:
                test_targets.extend(targets)
        test_preds.append(tta_preds)
    test_preds = np.array(test_preds)
    # default preds * tta_beta + aug_preds mean * ( 1 - tta_beta)
    #print(test_preds.shape)
    final_preds = Config.tta_beta * test_preds[0] + ( 1 - Config.tta_beta) * np.mean(test_preds[1:], axis =0)
    test_targets = np.array(test_targets)
    del valid_loader, valid_dataset, target, output
    gc.collect()
    torch.cuda.empty_cache()
    return test_targets, final_preds

## training loop

In [None]:
new_filename_list = []
def training_loop(filepaths, targets):
    oof_df = pd.DataFrame()
    for i_fold, (train_idx, valid_idx) in enumerate(skf.split(filepaths, target_bins)):
        print(f'=== fold {i_fold}: training ===')
        """
        separate train/valid data 
        """
        X_train_paths = filepaths[train_idx]
        y_train = targets[train_idx]
        X_valid_paths = filepaths[valid_idx]
        y_valid = targets[valid_idx]
        valid_ids = ids[valid_idx]

        train_dataset = PetDataset(image_filepaths = X_train_paths, targets = y_train, transform = get_train_transforms(0))
        train_loader = DataLoader(train_dataset, batch_size=Config.batch_size, shuffle=True, num_workers=Config.num_workers, pin_memory=False)
        """
        instantiate model, cost function and optimizer
        """
        model = PetNet()

        if Config.gpu_parallel: # 根据配置决定是否启用多GPU
            num_gpu = torch.cuda.device_count()
            model = DataParallel(model, device_ids=range(num_gpu))

        model = model.to(device)
        criterion = SmoothBCEwLogits(smoothing=0.10) # nn.BCEWithLogitsLoss() # SmoothBCEwLogits(smoothing=0.10)  
        norm_bias_params, non_norm_bias_params = divice_norm_bias(model)
        #print(f"norm bias params: {len(norm_bias_params)}, non norm bias params: {len(non_norm_bias_params)}")
        optimizer = torch.optim.AdamW(
            [
              {'params': norm_bias_params, 'weight_decay': Config.opt_wd_norm_bias},
              {'params': non_norm_bias_params, 'weight_decay': Config.opt_wd_non_norm_bias},
          ],
          betas=(Config.opt_beta1, Config.opt_beta2),
          eps=Config.opt_eps,
          lr = Config.lr,
          amsgrad = False
        )
        scheduler = get_scheduler(optimizer)
        """
        train / valid loop
        """
        best_rmse = np.inf
        scaler = GradScaler()
        for epoch in range(1, Config.n_epoch + 1):
            print(f'=== fold:{i_fold} epoch: {epoch}: training ===')
            
            metric_monitor = MetricMonitor()
            stream = tqdm(train_loader)

            for batch_idx, (images, target) in enumerate(stream, start = 1):
            #for batch_idx, (images, target) in enumerate(train_loader):
                model.train()
                #train_fn(train_loader, model, criterion, optimizer, epoch, params, scheduler)
                if Config.mixup_epoch >= epoch:
                    images, target_a, target_b, lam = mixup_data(images, target.view(-1 ,1))
                    images = images.to(device, dtype = torch.float)
                    target_a = target_a.to(device, dtype = torch.float)
                    target_b = target_b.to(device, dtype = torch.float)
                else:
                    images = images.to(device, non_blocking = True).float()
                    target = target.to(device, non_blocking = True).float().view(-1, 1)
                optimizer.zero_grad()
                with autocast(): # mixed precision
                    output, _ = model(images)
                    loss = mixup_criterion(criterion, output, target_a, target_b, lam) if Config.mixup_epoch >= epoch else criterion(output, target)

                rmse_score = usr_rmse_score(output, target)
                metric_monitor.update('Loss', loss.item())
                metric_monitor.update('RMSE', rmse_score)
                stream.set_description(f'Epoch: {epoch:02}. Train. {metric_monitor}, Lr:{optimizer.param_groups[0]["lr"]:.1e}')
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

                if (scheduler is not None) & (Config.scheduler_name != 'ReduceOnPlateauLR') :
                    scheduler.step()
            
                if ( ( ( batch_idx % Config.steps_per_epoch == 0) & (epoch >= Config.epoch_step_valid) ) | ( batch_idx == len(train_loader) ) ):
                    valid_targets, preds = valid_fn(y_valid, X_valid_paths, model, criterion, epoch)
                    valid_rmse = round(mean_squared_error(valid_targets, preds, squared=False), 3)
                    print(f'epoch: {epoch}, batch: {batch_idx}/{len(train_loader)}, valid rmse: {valid_rmse}')
                    if Config.scheduler_name == 'ReduceOnPlateauLR':
                        scheduler.step(valid_rmse)

                    if valid_rmse < best_rmse:
                        best_rmse = valid_rmse
                        model_name = Config.model_path
                        torch.save(model.state_dict(), f'{Config.output_dir}/{Config.expid}_{model_name}_fold{i_fold}.pth')
                        print("saved model.")
                        _oof_df = pd.DataFrame(data={'Id': valid_ids, 'pred':preds, 'fold': i_fold, 'Pawpularity':valid_targets}, index=valid_idx)
        
        old_filename = f'{Config.output_dir}/{Config.expid}_{model_name}_fold{i_fold}.pth'
        new_filename = f'{Config.output_dir}/{Config.expid}_{model_name}_fold{i_fold}_cv{best_rmse*1000:.0f}.pth'
        new_filename_list.append(new_filename)
        os.rename(old_filename,  new_filename)

        del model, output, train_loader, train_dataset
        gc.collect()
        
        torch.cuda.empty_cache()
        oof_df = pd.concat([oof_df, _oof_df])
    return oof_df.sort_values('Id')

In [None]:
ids = train_df['Id'].values
filepaths = train_df['file_path'].values
targets = train_df['Pawpularity'].values/100
oof_df = training_loop(filepaths, targets)

## TTA

In [None]:
def tta_loop(filepaths, targets):
    oof_df = pd.DataFrame()
    for i_fold, (train_idx, valid_idx) in enumerate(skf.split(filepaths, target_bins)):
        print(f'=== fold {i_fold}: validation ===')
        """
        separate valid data 
        """
        X_valid_paths = filepaths[valid_idx]
        y_valid = targets[valid_idx]
        valid_ids = ids[valid_idx]
        """
        instantiate model, cost function and optimizer
        """
        model = PetNet()
        model_name = new_filename_list[i_fold] # f'{Config.output_dir}/{Config.expid}_{model_name}_fold{i_fold}.pth'
        model.load_state_dict(torch.load(model_name))
        model = model.to(device)
        criterion = nn.BCEWithLogitsLoss()
        epoch = 0
        valid_targets, preds = tta_fn(y_valid, X_valid_paths, model, criterion, epoch)
        valid_rmse = round(mean_squared_error(valid_targets, preds, squared=False), 3)
        _oof_df = pd.DataFrame(data={'Id': valid_ids, 'pred':preds, 'fold': i_fold, 'Pawpularity':valid_targets}, index=valid_idx)
        del model
        gc.collect()
        
        torch.cuda.empty_cache()
        oof_df = pd.concat([oof_df, _oof_df])
    return oof_df.sort_values('Id')

In [None]:
if Config.tta:
    oof_tta_df = tta_loop(filepaths, targets)

## cv score

### without TTA

In [None]:
# no TTA 
for i in range(Config.n_fold):
    rmse_oof(oof_df, i) 
oof_df_overall_rmse = rmse_oof(oof_df)
oof_df.to_csv(f'{Config.output_dir}/{Config.expid}_oof_{oof_df_overall_rmse*1000:.0f}.csv', index=False)

In [None]:
plt.hist(oof_df['Pawpularity'].values, alpha = 0.4, color = 'b', label = 'target', bins = 50)
pred_bins = int((np.max(oof_df['pred'].values) - np.min(oof_df['pred'].values)) //2)
plt.hist(oof_df['pred'].values, alpha = 0.4, color = 'g', label = 'prediction', bins = pred_bins)
plt.legend()
plt.show()

### with TTA

In [None]:
# with TTA
for i in range(Config.n_fold):
    rmse_oof(oof_tta_df, i)
oof_tta_df_overall_rmse = rmse_oof(oof_tta_df)
oof_tta_df.to_csv(f'{Config.output_dir}/{Config.expid}_oof_tta_{oof_tta_df_overall_rmse*1000:.0f}.csv', index=False)

In [None]:
plt.hist(oof_tta_df['Pawpularity'].values, alpha = 0.4, color = 'b', label = 'target', bins = 50)
tta_pred_bins = int((np.max(oof_tta_df['pred'].values) - np.min(oof_tta_df['pred'].values)) //2)
plt.hist(oof_df['pred'].values, alpha = 0.4, color = 'g', label = 'prediction', bins = tta_pred_bins)
plt.legend()
plt.show()