# Implement the Swin large transformer (use Pytorch to reproduce fastai)

*Reference:*[pytorch baseline(try to reproduce fastai function)](https://www.kaggle.com/code/chunyuwang19990210/pytorch-baseline-try-to-reproduce-fastai-notebook/edit) 

In [1]:
# # Clear output folder
# # 注意:不能在local computer 執行此程式，否則電腦的檔案會全被刪光
# import os

# def remove_folder_contents(folder):
#     for the_file in os.listdir(folder):
#         file_path = os.path.join(folder, the_file)
#         try:
#             if os.path.isfile(file_path):
#                 os.unlink(file_path)
#             elif os.path.isdir(file_path):
#                 remove_folder_contents(file_path)
#                 os.rmdir(file_path)
#         except Exception as e:
#             print(e)

# folder_path = '../'
# remove_folder_contents(folder_path)
# # os.rmdir(folder_path)

## Libraries

In [2]:
# !pip install albumentations
!pip install timm
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.9.2-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m75.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from timm)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors (from timm)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m80.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: safetensors, huggingface-hub, timm
Successfully installed huggingface-hub-0.15.1 safetensors-0.3.1 timm-0.9.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/

In [3]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import os
import numpy as np
import pandas as pd
import random
import cv2
import gc
import glob
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVR
from sklearn.preprocessing import RobustScaler
import pickle
from tqdm.auto import tqdm
from collections import defaultdict

# augmenatation
from albumentations.pytorch import ToTensorV2
import albumentations as A

# deep learning
import timm
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, OneCycleLR, CosineAnnealingLR, \
    ReduceLROnPlateau, StepLR, LambdaLR
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import mean_squared_error

In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [5]:
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
    os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/swin-transformer/swin_large_patch4_window7_224_22kto1k.pth' '/root/.cache/torch/hub/checkpoints/swin_large_patch4_window7_224_22kto1k.pth'

cp: cannot stat '../input/swin-transformer/swin_large_patch4_window7_224_22kto1k.pth': No such file or directory


## utils function

In [6]:
def seed_everything(seed):
    #os.environ['PYTHONSEED'] = str(seed)
    np.random.seed(seed%(2**32-1))
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic =True
    torch.backends.cudnn.benchmark = False

def get_imgfilepath(ID_name, img_train_path):
    path = os.path.join(img_train_path, f'{ID_name}.jpg')
    return  path

## Dataset & Strait-Kfold

In [15]:
def StratifiedKFold_process(train_df, n_fold, random_seed, debug=False):
    if debug:
        train_df = train_df.sample(128).reset_index(drop=True)
    train_df['norm_score'] = train_df['Pawpularity'] / 100

    num_bins = int(np.floor(1+(3.3)*(np.log2(len(train_df)))))
    train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False)
    train_df['fold'] = -1

    skf = StratifiedKFold(n_splits=n_fold, shuffle=True,
                          random_state=random_seed)

    for i, (_, train_index) in enumerate(skf.split(train_df.index, train_df['bins'])):
        train_df.iloc[train_index, -1] = i

    train_df['fold'] = train_df['fold'].astype('int')

    return train_df


def get_transform():
    ''' Data augmentation definition '''
    return A.Compose([
        A.Resize(height=224, width=224),
        A.Normalize(
            mean = [0.485, 0.456, 0.406],
            std = [0.229, 0.224, 0.225] ),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=180, p=0.7),
        A.ShiftScaleRotate(
            shift_limit=0.1, scale_limit=0.1, 
            rotate_limit=4.5, p=0.5 ),
        A.HueSaturationValue(
            hue_shift_limit=0.2, sat_shift_limit=0.2,
            val_shift_limit=0.2, p=0.5 ),
        A.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1),
            contrast_limit=(-0.1, 0.1),
            p=0.5 )
    ])

def get_valid_transform():
  return A.Compose([
      A.Resize(height=224, width=224),
      A.Normalize(
            mean = [0.485, 0.456, 0.406],
            std = [0.229, 0.224, 0.225] )
  ])

class PetDataset(Dataset):
    def __init__(self, img_filepaths, targets=None, transform=None):
        self.img_filepaths = img_filepaths
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.img_filepaths)

    def __getitem__(self, idx):
        img_filepath = self.img_filepaths[idx]
        img = cv2.imread(img_filepath)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.array(img_rgb) / 255
        if self.transform is not None:
            img = self.transform(image = img)['image']

        img = np.transpose(img, (2,0,1)).astype(np.float32)
        img = torch.tensor(img, dtype=torch.float)
        if self.targets is not None:
            target = self.targets[idx]
            target = torch.tensor(target, dtype=torch.float)
            return img, target
        else:
            return img

## Model architecture

In [16]:
class PetNet(nn.Module):
    def __init__(self, backbone_name, out_features,
                 inp_channels, pretrained, backbone_path):
        super(PetNet, self).__init__()
        self.backbone = timm.create_model(model_name=backbone_name, pretrained=pretrained,
                      # pretrained_cfg_overlay=dict(file=backbone_path),
                      in_chans=inp_channels, num_classes=out_features)
        
        self.embeded_size = self.backbone.head.in_features
        self.fc1 = nn.Linear(out_features, int(out_features/2))
        self.fc2 = nn.Linear(int(out_features/2), int(out_features/4))
        self.fc3 = nn.Linear(int(out_features/4), 1)
        
    def forward(self, X_data):
        embeded_feature = self.backbone(X_data)
        output = self.fc1(embeded_feature)
        output = self.fc2(output)
        output = self.fc3(output)
        return {"output":output, "embeded":embeded_feature}

## Training & Validation process

In [17]:
def divide_norm_bias(model):
    norm_bias_params = []
    non_norm_bias_params = []
    except_wd_layers = ['norm', '.bias']
    for n, p in model.named_parameters():
        if any([nd in n for nd in except_wd_layers]):
            norm_bias_params.append(p)
        else:
            non_norm_bias_params.append(p)
    return norm_bias_params, non_norm_bias_params

##########################################################################
def training(train_loader, model, criterion, epoch, config):
    norm_bias_params, non_norm_bias_params = divide_norm_bias(model=model)
    optimizer = torch.optim.AdamW(
                    [{'params': norm_bias_params, 'weight_decay': 0},
                    {'params': non_norm_bias_params, 'weight_decay': 0.01}],
                      betas=(0.9, 0.99),
                      eps=1e-5,
                      lr=config.lr,
                      amsgrad=False)

    metric_monitor = MetricMonitor()
    model.train()
    grad_scaler = GradScaler()
    stream = tqdm(train_loader)

    for batch_idx, (img, target) in enumerate(stream, start=1):
        img = img.to(device, non_blocking=True).float()
        target = target.to(device, non_blocking=True).float().view(-1,1)

        with autocast():
            output = model(img)

        loss = criterion(output["output"], target)
        rmse_score = usr_rmse_score(output["output"], target)

        metric_monitor.update('Loss', loss.item())
        metric_monitor.update('RMSE', rmse_score)

        grad_scaler.scale(loss).backward()
        grad_scaler.step(optimizer)
        grad_scaler.update()
        optimizer.zero_grad()
        stream.set_description(f'Epoch: {epoch}. Train. {metric_monitor}')

        
##################################################################################        
def validation(valid_loader, model, criterion, epoch):
    metric_monitor = MetricMonitor()
    model.eval()
    stream = tqdm(valid_loader)
    final_targets = []
    final_preds = []
    for i, (img, target) in enumerate(stream, start=1):
        img = img.to(device, non_blocking=True).float()
        target = target.to(device, non_blocking=True).float().view(-1, 1)

        with torch.no_grad():
            output = model(img)

        loss = criterion(output['output'], target)
        rmse_score = usr_rmse_score(output['output'], target)
        metric_monitor.update('Loss', loss.item())
        metric_monitor.update('RMSE', rmse_score)
        stream.set_description(f'Epoch: {epoch}. Validation. {metric_monitor}')

        target = (target.detach().cpu().numpy()*100).ravel().tolist()
        pred = (torch.sigmoid(output['output']).detach().cpu().numpy() * 100).ravel().tolist()
        final_targets.extend(target)
        final_preds.extend(pred)

    final_preds = np.array(final_preds)
    final_targets = np.array(final_targets)
    del valid_loader, target, output, img
    gc.collect() # 釋放記憶體
    torch.cuda.empty_cache() # 釋放顯卡記憶體
    return final_targets, final_preds

## Metrics Definition

In [18]:
''' Metrics '''
def usr_rmse_score(output, target):
    y_pred = torch.sigmoid(output).cpu()
    y_pred = y_pred.detach().numpy()*100
    target = target.cpu()*100

    return mean_squared_error(target, y_pred, squared=False)

def rmse_oof(_oof_df, fold=None):
    oof_df = _oof_df.copy()
    if fold is not None:
        oof_df = oof_df[oof_df["fold"] == fold]
    target = oof_df['Pawpularity'].values
    y_pred = oof_df['pred'].values
    if fold is not None:
        print(f'fold {fold}: {mean_squared_error(target, y_pred, squared=False)}')
    else:
        print(f'overall: {mean_squared_error(target, y_pred, squared=False)}')
class MetricMonitor:
    def __init__(self, float_precision=3):
        self.float_precision = float_precision
        self.reset()

    def reset(self):
        self.metrics = defaultdict(lambda: {"val": 0, "count": 0, "avg": 0})

    def update(self, metric_name, val):
        metric = self.metrics[metric_name]

        metric["val"] += val
        metric["count"] += 1
        metric["avg"] = metric["val"] / metric["count"]

    def __str__(self):
        return " | ".join( [
                "{metric_name}: {avg:.{float_precision}f}".format(
                    metric_name=metric_name, avg=metric["avg"],
                    float_precision=self.float_precision
                )
                for (metric_name, metric) in self.metrics.items() ] )

## Parameter Configuration

In [19]:
''' params '''
class Config:
    random_seed = 555
    ## dataloader and strait-kfold
    n_fold = 10
    batch = 32
    num_workers = 8

    ## model definition
    backbone_name = "swin_large_patch4_window7_224"
    backbone_path = "../input/swin-transformer/swin_large_patch4_window7_224_22kto1k.pth"
    out_features = 1024
    inp_channels = 3
    pretrained = True

    # training setting
    n_epoch = 5
    lr = 2e-5
    debug = True
    if debug:
        batch = 8
        num_workers = 2
        n_epoch = 3
        n_fold = 3
        n_sample_debug = 500

## Main Part (training backcone model)

In [20]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'Using device:{device}')

seed_everything(seed=Config.random_seed)

data_path = "../content/drive/MyDrive/Tsai/Pawpularity/Dataset"
# data_path = "../input/petfinder-pawpularity-score"
img_train_path = os.path.join(data_path,'train')
meta_train_path = os.path.join(data_path, 'train.csv')

train_df = pd.read_csv(meta_train_path)
train_df['img_file_path'] = train_df['Id'].map(lambda x: get_imgfilepath(x, img_train_path))
# 調整路徑的斜槓方向 (在 Windows 的 local 上跑再把註解去掉)
# for i in range(len(train_df['img_file_path'])):
#     train_df['img_file_path'][i] = train_df['img_file_path'][i].replace("\\", "/")
#     train_df['img_file_path'][i] = train_df['img_file_path'][i].replace("//", "/")

train_df_stage1 = StratifiedKFold_process(train_df=train_df, n_fold=Config.n_fold,
                      random_seed=555, debug=Config.debug)

oof_df = pd.DataFrame()
# cross-validation training and evaluation
for fold in range(Config.n_fold):
    print(f'=============== fold {fold}: training ===============\n')
    train = train_df_stage1[train_df_stage1['fold'] != fold]
    valid = train_df_stage1[train_df_stage1['fold'] == fold]
    valid_idx = valid.index

    X_train_paths = train['img_file_path'].values
    y_train = train['norm_score'].values
    X_valid_paths = valid['img_file_path'].values
    y_valid = valid['norm_score'].values

    train_dataset = PetDataset(
        img_filepaths = X_train_paths,
        targets = y_train,
        transform = get_transform()
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size = Config.batch,
        shuffle = True,
        num_workers = Config.num_workers,
        pin_memory = True  # 加快 tensor 從記憶體轉移到 GPU 記憶體的速度
    )

    valid_dataset = PetDataset(
        img_filepaths=X_valid_paths,
        targets=y_valid,
        transform=get_valid_transform()
    )

    valid_loader = DataLoader(
        valid_dataset,
        batch_size=Config.batch,
        shuffle=True,
        num_workers=Config.num_workers,
        pin_memory=True
    )

    model = PetNet(backbone_name=Config.backbone_name, out_features=Config.out_features,
                   inp_channels=Config.inp_channels, pretrained=Config.pretrained, 
                  backbone_path=Config.backbone_path)
    model = model.to(device)
    criterion = nn.BCEWithLogitsLoss()

    best_rmse = np.inf
    for epoch in range(1, Config.n_epoch + 1):
        print(f'\n============== epoch{epoch}: training ==============')
        training(train_loader=train_loader, model=model, criterion=criterion,
                 epoch=epoch, config=Config)
        valid_target, pred = validation(valid_loader=valid_loader, model=model,
                                        criterion=criterion, epoch=epoch)
        valid_rmse = round(mean_squared_error(valid_target, pred, squared=False), 3)
        print(f'epoch {epoch}: RMSE= {valid_rmse}')

        if valid_rmse < best_rmse:
            best_rmse = valid_rmse
            best_epoch = epoch
            best_model = model
            _oof_df = pd.DataFrame(data={'Pawpularity':valid_target, 'pred':pred, 'fold':fold},
                                   index=valid_idx)
    
    torch.save(model.state_dict(), f'./swinlarge_transformer{fold}.pth')
    print(f"The best training is in epoch {best_epoch}")
    print("save model!!")

    del train_dataset, train_loader, valid_dataset, valid_loader
    gc.collect()
    torch.cuda.empty_cache()
    oof_df = pd.concat([oof_df, _oof_df])




Using device:cuda:0








Epoch: 1. Train. Loss: 0.679 | RMSE: 22.269: 100%|██████████| 11/11 [00:27<00:00,  2.46s/it]
Epoch: 1. Validation. Loss: 0.679 | RMSE: 21.546: 100%|██████████| 6/6 [00:17<00:00,  2.83s/it]


epoch 1: RMSE= 22.148



Epoch: 2. Train. Loss: 0.659 | RMSE: 19.392: 100%|██████████| 11/11 [00:03<00:00,  3.46it/s]
Epoch: 2. Validation. Loss: 0.677 | RMSE: 21.236: 100%|██████████| 6/6 [00:01<00:00,  4.67it/s]


epoch 2: RMSE= 21.767



Epoch: 3. Train. Loss: 0.661 | RMSE: 19.061: 100%|██████████| 11/11 [00:03<00:00,  3.45it/s]
Epoch: 3. Validation. Loss: 0.674 | RMSE: 19.794: 100%|██████████| 6/6 [00:01<00:00,  4.53it/s]


epoch 3: RMSE= 22.052
The best training is in epoch 2
save model!!




Epoch: 1. Train. Loss: 0.674 | RMSE: 20.553: 100%|██████████| 11/11 [00:03<00:00,  3.34it/s]
Epoch: 1. Validation. Loss: 0.664 | RMSE: 19.780: 100%|██████████| 6/6 [00:01<00:00,  4.33it/s]


epoch 1: RMSE= 21.249



Epoch: 2. Train. Loss: 0.668 | RMSE: 20.341: 100%|██████████| 11/11 [00:03<00:00,  3.40it/s]
Epoch: 2. Validation. Loss: 0.662 | RMSE: 20.467: 100%|██████████| 6/6 [00:01<00:00,  4.21it/s]


epoch 2: RMSE= 21.022



Epoch: 3. Train. Loss: 0.669 | RMSE: 20.134: 100%|██████████| 11/11 [00:03<00:00,  3.37it/s]
Epoch: 3. Validation. Loss: 0.667 | RMSE: 19.990: 100%|██████████| 6/6 [00:01<00:00,  4.18it/s]


epoch 3: RMSE= 20.365
The best training is in epoch 3
save model!!




Epoch: 1. Train. Loss: 0.672 | RMSE: 20.906: 100%|██████████| 11/11 [00:03<00:00,  3.25it/s]
Epoch: 1. Validation. Loss: 0.662 | RMSE: 19.478: 100%|██████████| 6/6 [00:01<00:00,  4.19it/s]


epoch 1: RMSE= 21.036



Epoch: 2. Train. Loss: 0.673 | RMSE: 20.798: 100%|██████████| 11/11 [00:03<00:00,  3.35it/s]
Epoch: 2. Validation. Loss: 0.668 | RMSE: 20.685: 100%|██████████| 6/6 [00:01<00:00,  4.16it/s]


epoch 2: RMSE= 21.645



Epoch: 3. Train. Loss: 0.673 | RMSE: 20.888: 100%|██████████| 11/11 [00:03<00:00,  3.28it/s]
Epoch: 3. Validation. Loss: 0.661 | RMSE: 18.461: 100%|██████████| 6/6 [00:01<00:00,  4.08it/s]


epoch 3: RMSE= 20.086
The best training is in epoch 3
save model!!


In [21]:
oof_df

Unnamed: 0,Pawpularity,pred,fold
1,55.000000,42.901863,0
3,12.000000,42.577667,0
8,32.000000,43.407162,0
9,28.000000,43.334011,0
12,22.000000,42.296066,0
...,...,...,...
103,48.000000,41.293545,2
107,31.000000,41.088795,2
113,18.000000,41.325878,2
114,37.000000,41.196579,2


In [22]:
for i in range(Config.n_fold):
    rmse_oof(oof_df, i)
rmse_oof(oof_df)

fold 0: 21.767203812336813
fold 1: 20.364530065895195
fold 2: 20.08592342139373
overall: 20.75738705695441


## Training with other machine learning model

In [23]:
from sklearn.svm import SVR
import xgboost as xgb
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import VotingRegressor
import optuna


train_df_stage2 = StratifiedKFold_process(train_df=train_df, n_fold=Config.n_fold,
                      random_seed=666, debug=Config.debug)

X_valid_paths_all = np.array([])
y_valid_all = np.array([])

for fold in range(Config.n_fold):

    backbone_model = PetNet(backbone_name=Config.backbone_name, out_features=Config.out_features,
               inp_channels=Config.inp_channels, pretrained=Config.pretrained,
                       backbone_path=Config.backbone_path)

    backbone_model.load_state_dict(torch.load(f'./swinlarge_transformer{fold}.pth'))


    print("="*20 + f"{fold}-fold training" + "="*20)
    
    train = train_df_stage2[train_df_stage2['fold'] != fold]
    valid = train_df_stage2[train_df_stage2['fold'] == fold]
    
    X_train_paths = train['img_file_path'].values
    y_train = train['norm_score'].values
    X_valid_paths = valid['img_file_path'].values
    y_valid = valid['norm_score'].values

    X_valid_paths_all = np.concatenate([X_valid_paths_all, X_valid_paths], axis=0)
    y_valid_all = np.concatenate([y_valid_all, y_valid], axis=0)
    
    train_dataset = PetDataset(
        img_filepaths = X_train_paths,
        targets = y_train,
        transform = get_transform()
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size = 1,
        shuffle = True,
        num_workers = Config.num_workers,
        pin_memory = True  
    )
    
    
    backbone_model = backbone_model.to(device)
    backbone_model.eval()

    stream_train = tqdm(train_loader)
    # stream_valid = tqdm(valid_loader)

    ########## training additional head model ##########
    train_embeded = []
    train_target = []
    for i, (img, target) in enumerate(stream_train, start=1):
        img = img.to(device, non_blocking=True).float()
        target = target.to(device, non_blocking=True).float().view(-1, 1)
    ## stage 1: 先將 training data 輸入 backbone 輸出特徵
        with torch.no_grad():
            output = backbone_model(img)
        embeded_feature = output["embeded"].detach().cpu().numpy()
        train_embeded.append(embeded_feature)
        target = target.detach().cpu().numpy()*100
        train_target.append(target)
        del embeded_feature, output

    ## train_embeded、train_target 分別就是要丟給其他 machine learning model 訓練的 x,y
    train_embeded = np.array(train_embeded)
    train_embeded = train_embeded.reshape((train_embeded.shape[0], train_embeded.shape[2]))
    train_target = np.array(train_target)
    train_target = train_target.reshape((train_target.shape[0], train_target.shape[2]))
    
    ## stage 2: 將 backbone 輸出的特徵向量分別輸入不同演算法訓練
    
    print('Fitting SVR')
    svr = SVR(C=20.0)
    svr.fit(train_embeded, train_target)
    
    print('Fitting XGBOOST')
    XGB = xgb.XGBRegressor(max_depth=5)
    XGB.fit(train_embeded, train_target)
    
    print('Fitting MLP')
    mlp = MLPRegressor(hidden_layer_sizes=(2048,1024,512),
                      max_iter=5000,
                      activation='relu',
                      solver='adam')
    mlp.fit(train_embeded, train_target)

    print("Save various head model!!")
    pickle.dump(svr, open(f"./{fold}_fold_SVR_head.pkl","wb"))
    pickle.dump(XGB, open(f"./{fold}_fold_XGB_head.pkl","wb"))
    pickle.dump(mlp, open(f"./{fold}_fold_MLP_head.pkl","wb"))

del train_dataset, train_loader, train_embeded, train_target, backbone_model, target, img





100%|██████████| 85/85 [00:27<00:00,  3.13it/s]
  y = column_or_1d(y, warn=True)


Fitting SVR
Fitting XGBOOST
Fitting MLP


  y = column_or_1d(y, warn=True)


Save various head model!!


100%|██████████| 85/85 [00:16<00:00,  5.29it/s]
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Fitting SVR
Fitting XGBOOST
Fitting MLP
Save various head model!!


100%|██████████| 86/86 [00:03<00:00, 27.73it/s]
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Fitting SVR
Fitting XGBOOST
Fitting MLP
Save various head model!!


### evaluation & determine best weight

In [24]:
###################### evaluation and determine the weight configuration ######################
valid_dataset = PetDataset(
        img_filepaths=X_valid_paths_all,
        targets=y_valid_all,
        transform=get_valid_transform()
    )

valid_loader = DataLoader(
    valid_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=Config.num_workers,
    pin_memory=True
)

stream_valid = tqdm(valid_loader)

pred_svr_all = []
pred_xgb_all = []
pred_mlp_all = []

for fold in range(Config.n_fold):
  print("="*20 + f"Load {fold}-fold model" + "="*20)
  print("Load backbone model!!")
  backbone_model = PetNet(backbone_name=Config.backbone_name, out_features=Config.out_features,
               inp_channels=Config.inp_channels, pretrained=Config.pretrained,
                       backbone_path=Config.backbone_path)

  backbone_model.load_state_dict(torch.load(f'./swinlarge_transformer{fold}.pth'))
  backbone_model = backbone_model.to(device)
  backbone_model.eval()
  
  print("Load other head model!!")
  svr_head = pickle.load(open(f"./{fold}_fold_SVR_head.pkl","rb"))
  XGB_head = pickle.load(open(f"./{fold}_fold_XGB_head.pkl","rb"))
  mlp_head = pickle.load(open(f"./{fold}_fold_MLP_head.pkl","rb"))

  valid_embeded = []
  valid_target = []

  for i, (img, target) in enumerate(stream_valid, start=1):
    img = img.to(device, non_blocking=True).float()
    target = target.to(device, non_blocking=True).float().view(-1, 1)
    with torch.no_grad():
        output = backbone_model(img)
    embeded_feature = output["embeded"].detach().cpu().numpy()
    valid_embeded.append(embeded_feature)
    target = target.detach().cpu().numpy()*100
    valid_target.append(target)
    del embeded_feature, output

  valid_embeded = np.array(valid_embeded)
  valid_embeded = valid_embeded.reshape((valid_embeded.shape[0], valid_embeded.shape[2]))
  valid_target = np.array(valid_target)
  valid_target = valid_target.reshape((valid_target.shape[0], valid_target.shape[2]))
  
  pred_svr = svr_head.predict(valid_embeded)
  pred_xgb = XGB_head.predict(valid_embeded)
  pred_mlp = mlp_head.predict(valid_embeded)

  pred_svr_all.append(pred_svr)
  pred_xgb_all.append(pred_xgb) 
  pred_mlp_all.append(pred_mlp)

pred_svr_all = np.array(pred_svr_all)
pred_xgb_all = np.array(pred_xgb_all)
pred_mlp_all = np.array(pred_mlp_all)

pred_svr_mean = np.mean(pred_svr_all, axis=0)
pred_xgb_mean = np.mean(pred_xgb_all, axis=0)
pred_mlp_mean = np.mean(pred_mlp_all, axis=0)

RMSE_svr = mean_squared_error(valid_target, pred_svr_mean, squared=False)
RMSE_xgb = mean_squared_error(valid_target, pred_xgb_mean, squared=False)
RMSE_mlp = mean_squared_error(valid_target, pred_mlp_mean, squared=False)


def objective(trial):
  w_svr = trial.suggest_float('w_svr', 0, 1.0)
  w_xgb = trial.suggest_float('w_xgb', 0, 1.0 - w_svr)
  w_mlp = 1.0 - w_svr - w_xgb

  final_pred = pred_svr_mean*w_svr + pred_xgb_mean*w_xgb + pred_mlp*w_mlp

  return mean_squared_error(valid_target, final_pred, squared=False)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=1000, n_jobs=-1)
best_weight = dict()
weight = study.best_params
best_weight['svr'] = weight['w_svr']
best_weight['xgb'] = weight['w_xgb']
best_weight['mlp'] = 1.0 - weight['w_svr'] - weight['w_xgb']

print(f"The RMSE of SVR: {RMSE_svr}")
print(f"The RMSE of XGBOOST: {RMSE_xgb}")
print(f"The RMSE of MLP: {RMSE_mlp}")

print(f"The weight configuration:\n")
print(f"SVR:{best_weight['svr']}")
print(f"xgboost:{best_weight['xgb']}")
print(f"MLP:{best_weight['mlp']}")

pickle.dump(best_weight, open(f"./best_weight.pkl","wb"))
del valid_dataset, valid_loader, target, img, valid_embeded, valid_target, best_weight, backbone_model
gc.collect() 
torch.cuda.empty_cache()


  0%|          | 0/128 [00:00<?, ?it/s]

Load backbone model!!
Load other head model!!


100%|██████████| 128/128 [00:09<00:00, 13.96it/s]


Load backbone model!!
Load other head model!!
Load backbone model!!
Load other head model!!


[I 2023-06-01 16:51:15,140] A new study created in memory with name: no-name-b6244063-17e8-464c-9662-439cea682eef
[I 2023-06-01 16:51:15,147] Trial 0 finished with value: 19.438646268463877 and parameters: {'w_svr': 0.8367845868082465, 'w_xgb': 0.07156308730346823}. Best is trial 0 with value: 19.438646268463877.
[I 2023-06-01 16:51:15,153] Trial 1 finished with value: 19.447560837329025 and parameters: {'w_svr': 0.6178519321495002, 'w_xgb': 0.0686683108029308}. Best is trial 0 with value: 19.438646268463877.
[I 2023-06-01 16:51:15,157] Trial 2 finished with value: 19.38597195272023 and parameters: {'w_svr': 0.814987845352003, 'w_xgb': 0.014269388900741756}. Best is trial 2 with value: 19.38597195272023.
[I 2023-06-01 16:51:15,162] Trial 3 finished with value: 19.57064468386802 and parameters: {'w_svr': 0.7459290283768861, 'w_xgb': 0.19088438770538893}. Best is trial 2 with value: 19.38597195272023.
[I 2023-06-01 16:51:15,165] Trial 4 finished with value: 20.44610248319125 and paramete

The RMSE of SVR: 19.369577215336918
The RMSE of XGBOOST: 21.034469604492188
The RMSE of MLP: 20.831392288208008
The weight configuration:

SVR:0.9772379335645612
xgboost:6.268547533056033e-06
MLP:0.02275579788790571


# Testing phase

In [25]:
img_test_path = os.path.join(data_path,'test')
meta_test_path = os.path.join(data_path, 'test.csv')
test_df = pd.read_csv(meta_test_path)

test_df['img_file_path'] = test_df['Id'].map(lambda x: get_imgfilepath(x, img_test_path))


test_dataset = PetDataset(
              img_filepaths = test_df['img_file_path'],
              transform = get_valid_transform()
)

test_loader = DataLoader(
              test_dataset,
              batch_size = 1,
              shuffle = False,
              num_workers = Config.num_workers,
              pin_memory = True
)



In [26]:
test_df

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,img_file_path
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1,../content/drive/MyDrive/Tsai/Pawpularity/Data...
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0,../content/drive/MyDrive/Tsai/Pawpularity/Data...
2,4e429cead1848a298432a0acad014c9d,0,0,0,1,0,1,1,1,0,1,1,1,../content/drive/MyDrive/Tsai/Pawpularity/Data...
3,80bc3ccafcc51b66303c2c263aa38486,1,0,1,0,0,0,0,0,0,0,1,0,../content/drive/MyDrive/Tsai/Pawpularity/Data...
4,8f49844c382931444e68dffbe20228f4,1,1,1,0,1,1,0,1,0,1,1,0,../content/drive/MyDrive/Tsai/Pawpularity/Data...
5,b03f7041962238a7c9d6537e22f9b017,0,0,1,1,1,1,1,1,1,0,1,0,../content/drive/MyDrive/Tsai/Pawpularity/Data...
6,c978013571258ed6d4637f6e8cc9d6a3,1,0,0,0,1,1,0,1,0,1,1,1,../content/drive/MyDrive/Tsai/Pawpularity/Data...
7,e0de453c1bffc20c22b072b34b54e50f,1,0,1,0,0,0,0,0,1,0,0,1,../content/drive/MyDrive/Tsai/Pawpularity/Data...


## predict

In [27]:
test_pred_svr_all = []
test_pred_xgb_all = []
test_pred_mlp_all = []

for fold in range(Config.n_fold):
  print("="*20 + f"Load {fold}-fold model" + "="*20)
  print("Load backbone model!!")
  backbone_model = PetNet(backbone_name=Config.backbone_name, out_features=Config.out_features,
               inp_channels=Config.inp_channels, pretrained=Config.pretrained,
                       backbone_path=Config.backbone_path)

  backbone_model.load_state_dict(torch.load(f'./swinlarge_transformer{fold}.pth'))
  backbone_model = backbone_model.to(device)
  backbone_model.eval()
  
  print("Load other head model!!")
  svr_head = pickle.load(open(f"./{fold}_fold_SVR_head.pkl","rb"))
  XGB_head = pickle.load(open(f"./{fold}_fold_XGB_head.pkl","rb"))
  mlp_head = pickle.load(open(f"./{fold}_fold_MLP_head.pkl","rb"))

  stream_test = tqdm(test_loader)
  
  test_embeded = []
  test_target = []

  for i, img in enumerate(stream_test, start=1):
    img = img.to(device, non_blocking=True).float()
    with torch.no_grad():
        output = backbone_model(img)
    embeded_feature = output["embeded"].detach().cpu().numpy()
    test_embeded.append(embeded_feature)
    del embeded_feature, output

  test_embeded = np.array(test_embeded)
  test_embeded = test_embeded.reshape((test_embeded.shape[0], test_embeded.shape[2]))

  test_pred_svr = svr_head.predict(test_embeded)
  test_pred_xgb = XGB_head.predict(test_embeded)
  test_pred_mlp = mlp_head.predict(test_embeded)

  test_pred_svr_all.append(test_pred_svr)
  test_pred_xgb_all.append(test_pred_xgb) 
  test_pred_mlp_all.append(test_pred_mlp)

test_pred_svr_all = np.array(test_pred_svr_all)
test_pred_xgb_all = np.array(test_pred_xgb_all)
test_pred_mlp_all = np.array(test_pred_mlp_all)

test_pred_svr_mean = np.mean(test_pred_svr_all, axis=0)
test_pred_xgb_mean = np.mean(test_pred_xgb_all, axis=0)
test_pred_mlp_mean = np.mean(test_pred_mlp_all, axis=0)

best_weight = pickle.load(open(f"./best_weight.pkl","rb"))

final_pred = test_pred_svr_mean*best_weight['svr'] + test_pred_xgb_mean*best_weight['xgb'] + test_pred_mlp_mean*best_weight['mlp']

gc.collect()
torch.cuda.empty_cache()

Load backbone model!!
Load other head model!!


100%|██████████| 8/8 [00:04<00:00,  1.96it/s]


Load backbone model!!
Load other head model!!


100%|██████████| 8/8 [00:00<00:00, 16.27it/s]


Load backbone model!!
Load other head model!!


100%|██████████| 8/8 [00:00<00:00, 15.83it/s]


In [28]:
print(best_weight['svr'])
print(best_weight['xgb'])
print(best_weight['mlp'])

0.9772379335645612
6.268547533056033e-06
0.02275579788790571


In [29]:
test_df["Pawpularity"] = final_pred
test_df[["Id","Pawpularity"]].to_csv("submission.csv", index=False)
test_df[["Id","Pawpularity"]]

Unnamed: 0,Id,Pawpularity
0,4128bae22183829d2b5fea10effdb0c3,35.991383
1,43a2262d7738e3d420d453815151079e,35.88176
2,4e429cead1848a298432a0acad014c9d,35.721538
3,80bc3ccafcc51b66303c2c263aa38486,36.211495
4,8f49844c382931444e68dffbe20228f4,36.132379
5,b03f7041962238a7c9d6537e22f9b017,36.007298
6,c978013571258ed6d4637f6e8cc9d6a3,35.768847
7,e0de453c1bffc20c22b072b34b54e50f,35.539432
