In [None]:
!pip install ../input/hydracore105/omegaconf-2.0.5-py3-none-any.whl
from omegaconf import OmegaConf


In [None]:
import os
import sys
import warnings
from pprint import pprint
from glob import glob
from tqdm import tqdm
import gc

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchvision.transforms as T

sys.path.append('/kaggle/input/pytorch-image-models/pytorch-image-models-master')
import timm
from sklearn.model_selection import StratifiedKFold
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset

import pytorch_lightning as pl
from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning import callbacks
from pytorch_lightning.callbacks.progress import ProgressBarBase
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import LightningDataModule, LightningModule

warnings.filterwarnings("ignore")

In [None]:
class PetfinderDataset(Dataset):
    def __init__(self, df, transform, image_size=224):
        super().__init__()
        self._X = df["Id"].values
        self._y = None
        if "Pawpularity" in df.keys():
            self._y = df["Pawpularity"].values
        self._transform = transform

    def __len__(self):
        return len(self._X)

    def __getitem__(self, idx):
        image_path = self._X[idx]
        image = read_image(image_path)
        image = self._transform(image)
        if self._y is not None:
            label = self._y[idx]
            return image, label
        return image

In [None]:
root_dir = "../input/petfinder-pawpularity-score"
test_df = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
test_df["Id"] = test_df["Id"].apply(lambda x: os.path.join(root_dir, "test", x + ".jpg"))

train_df = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
train_df["Id"] = train_df["Id"].apply(lambda x: os.path.join(root_dir, "train", x + ".jpg"))

In [None]:
def test_fn(model, test_loader, device):
    model.eval()
    preds_all = []
    pbar = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()
        logits = model(imgs).squeeze(1)  # output = model(input)
        pred = logits.sigmoid().detach().cpu().numpy() * 100.
        preds_all.append(pred)
    preds_all = np.concatenate(preds_all)
    return preds_all

In [None]:
predict_df = pd.DataFrame()

In [None]:
class BaseModel(nn.Module):
    def __init__(self, config):
        super(BaseModel, self).__init__()
        self.backbone = timm.create_model(
            config.model.name, pretrained=False, num_classes=0, in_chans=3
        )
        num_features = self.backbone.num_features
        self.fc = nn.Sequential(
            nn.Dropout(0.5), nn.Linear(num_features, 1)
        )

    def forward(self, x):
        f = self.backbone(x)
        out = self.fc(f)
        return out  

def get_transforms():
    return T.Compose(
        [
            T.Resize([image_size, image_size]),
            T.ConvertImageDtype(torch.float),
            T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),            
        ]
    )    

IMAGENET_MEAN = [0.485, 0.456, 0.406]  # RGB
IMAGENET_STD = [0.229, 0.224, 0.225]  # RGB

## EfficientNet_model

In [None]:
config = {'root': '/kaggle/input/petfinder-pawpularity-score/',
          'model':{
              'name': 'tf_efficientnet_b7_ns',
              'output_dim': 1
          },
          'tta_fold': 1
}
config = OmegaConf.create(config)
image_size = 600

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
test_dataset = PetfinderDataset(test_df, get_transforms())
test_loader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=1,
    pin_memory=False,
    drop_last=False)

from glob import glob
state_dict_paths = sorted(glob('../input/tf-efficientnet-rmse/rmse_tf_efficientnet_b7_ns_600/checkpoints/*'))
print(len(state_dict_paths))

for index, path in enumerate(state_dict_paths):
    for fold in range(config.tta_fold):
        state_dict = torch.load(path)["state_dict"]
        model = BaseModel(config)
        model.load_state_dict(state_dict)
        model.eval().to(device)
        preds = test_fn(model, test_loader, device)
        predict_df[f"rmse_efficientnet_b7_{index}_{fold}_{image_size}"] = preds

In [None]:
config = {'root': '/kaggle/input/petfinder-pawpularity-score/',
          'model':{
              'name': 'tf_efficientnet_b4',
              'output_dim': 1
          },
          'tta_fold': 1
}
config = OmegaConf.create(config)
image_size = 480

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
test_dataset = PetfinderDataset(test_df, get_transforms())
test_loader = DataLoader(
    test_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=1,
    pin_memory=False,
    drop_last=False)

from glob import glob
state_dict_paths = sorted(glob('../input/tf-efficientnet-rmse/rmse_tf_efficientnet_b4_10fold/checkpoints/*'))
print(len(state_dict_paths))

for index, path in enumerate(state_dict_paths):
    for fold in range(config.tta_fold):
        state_dict = torch.load(path)["state_dict"]
        model = BaseModel(config)
        model.load_state_dict(state_dict)
        model.eval().to(device)
        preds = test_fn(model, test_loader, device)
        predict_df[f"rmse_tf_efficientnet_b4_{index}_{fold}_{image_size}"] = preds

## Separate Model

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

class CatDogDataset(Dataset):

    def __init__(self, df, transforms, type_):
        self.type = type_
        self.df = df
        self.transforms = transforms
        self.file_names = df['Id'].values
        if type_ != "test":
            self.labels = df["label"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        path = self.file_names[index]
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.transforms(image=image)['image'].float()
        if self.type == "test":
            return image
        label = torch.tensor(self.labels[index]).long()
        return image, label

def valid_transforms():
    valid_transform = A.Compose(
        [
            A.Resize(image_size, image_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(),
        ]
    )
    return valid_transform

class Custom2DCNN(nn.Module):
    def __init__(self):
        super(Custom2DCNN, self).__init__()
        model_name = "tf_efficientnet_b1"
        backborn = timm.create_model(model_name, pretrained=False, in_chans=3)
        if 'efficientnet' in model_name:
            n_features = backborn.classifier.in_features
            backborn.classifier = nn.Identity()
            self.backborn = backborn
        else:
            n_features = list(backborn.children())[-1].in_features
            backborn = list(backborn.children())[:-1]
            self.backborn = torch.nn.Sequential(*backborn)
        self.fc = nn.Linear(n_features, 1)

    def forward(self, x):
        x = self.backborn(x)
        x = self.fc(x)
        return x
    
def dog_test_fn(model, test_loader, device):
    model.eval()
    image_preds_all = []
    image_targets_all = []
    pbar = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()
        image_preds = model(imgs).squeeze(1)  # output = model(input)
        image_preds_all += [np.where(image_preds.detach().cpu().numpy() < 0.5, 0, 1)]

    image_preds_all = np.concatenate(image_preds_all)
    return image_preds_all

state_dict = torch.load("../input/dog-cat-classifier/fold_0_7.pth")

device = 'cuda' if torch.cuda.is_available() else 'cpu'
test_dataset = CatDogDataset(test_df, valid_transforms(), 'test')
test_loader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=1,
    pin_memory=False,
    drop_last=False)
model = Custom2DCNN().to(device)
model.load_state_dict(state_dict)

test_preds = dog_test_fn(model, test_loader, device)

test_df["type"] = test_preds
test_df_cat = test_df[test_df["type"] == 1]
test_df_dog = test_df[test_df["type"] == 0]

In [None]:
del model, test_dataset, test_loader, state_dict
gc.collect()

## 犬の推論

In [None]:
config = {'root': '/kaggle/input/petfinder-pawpularity-score/',
          'model':{
              'name': 'swin_large_patch4_window7_224',
              'output_dim': 1
          },
          'tta_fold': 1
}
config = OmegaConf.create(config)
image_size = 224

test_dataset = PetfinderDataset(test_df_dog, get_transforms())
test_loader = DataLoader(
    test_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=1,
    pin_memory=False,
    drop_last=False)

from glob import glob
state_dict = "../input/cat-dog-separate/dog_swin_large_patch4_window7_224/checkpoints/best_loss-003.ckpt"

state_dict = torch.load(state_dict)["state_dict"]
model = BaseModel(config)
model.load_state_dict(state_dict)
model.eval().to(device)
preds = test_fn(model, test_loader, device)
test_df_dog["preds"] = preds

### 猫の推論

In [None]:
test_dataset = PetfinderDataset(test_df_cat, get_transforms())
test_loader = DataLoader(
    test_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=1,
    pin_memory=False,
    drop_last=False)

from glob import glob
state_dict = "../input/cat-dog-separate/cat_swin_large_patch4_window7_224/chekpoints/best_loss-003.ckpt"

if len(test_df_cat) != 0:  
    state_dict = torch.load(state_dict)["state_dict"]
    model = BaseModel(config)
    model.load_state_dict(state_dict)
    model.eval().to(device)
    preds = test_fn(model, test_loader, device)
    test_df_cat["preds"] = preds
    
test_df_dog = pd.concat([test_df_dog, test_df_cat])
test_df_dog = test_df_dog.sort_index()    

In [None]:
predict_df["cat_dog"] = test_df_dog["preds"]

## 384model

In [None]:
config = {'root': '/kaggle/input/petfinder-pawpularity-score/',
          'model':{
              'name': 'swin_large_patch4_window12_384',
              'output_dim': 1
          },
          'tta_fold': 2
}
config = OmegaConf.create(config)

IMAGENET_MEAN = [0.485, 0.456, 0.406]  # RGB
IMAGENET_STD = [0.229, 0.224, 0.225]  # RGB
image_size = 384

def get_transforms():
    return T.Compose(
        [
            T.Resize([image_size, image_size]),
            T.RandomHorizontalFlip(),
            T.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),            
            T.ConvertImageDtype(torch.float),
            T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),            
        ]
    )

In [None]:
class Model384(nn.Module):
    def __init__(self, config):
        super(Model384, self).__init__()
        self.backbone = timm.create_model(
            config.model.name, pretrained=False, num_classes=0, in_chans=3
        )
        num_features = self.backbone.num_features
        self.fc = nn.Sequential(
            nn.Dropout(0.5), nn.Linear(num_features, 1)
        )

    def forward(self, x):
        f = self.backbone(x)
        out = self.fc(f)
        return out  


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
test_dataset = PetfinderDataset(test_df, get_transforms())
test_loader = DataLoader(
    test_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=1,
    pin_memory=False,
    drop_last=False)

In [None]:
from glob import glob
state_dict_paths = sorted(glob('../input/swin-384/bce/*.ckpt'))
len(state_dict_paths)

In [None]:
for index, path in enumerate(state_dict_paths):
    for fold in range(config.tta_fold):
        state_dict = torch.load(path)["state_dict"]
        model = Model384(config)
        model.load_state_dict(state_dict)
        model.eval().to(device)
        preds = test_fn(model, test_loader, device)
        predict_df[f"BCE_{index}_{fold}_{image_size}"] = preds

In [None]:
from glob import glob
state_dict_paths = sorted(glob('../input/swin-384/rmse/*.ckpt'))

for index, path in enumerate(state_dict_paths):
    for fold in range(config.tta_fold):
        state_dict = torch.load(path)["state_dict"]
        model = Model384(config)
        model.load_state_dict(state_dict)
        model.eval().to(device)
        preds = test_fn(model, test_loader, device)
        predict_df[f"RMSE_{index}_{fold}_{image_size}"] = preds

# 224 model

In [None]:
config = {'root': '/kaggle/input/petfinder-pawpularity-score/',
          'model':{
              'name': 'swin_large_patch4_window7_224',
              'output_dim': 1
          },
          'tta_fold': 1
}
config = OmegaConf.create(config)

IMAGENET_MEAN = [0.485, 0.456, 0.406]  # RGB
IMAGENET_STD = [0.229, 0.224, 0.225]  # RGB
image_size = 224

def get_transforms():
    return T.Compose(
        [
            T.Resize([image_size, image_size]),  
#             T.RandomHorizontalFlip(),
#             T.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
            T.ConvertImageDtype(torch.float),
            T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),            
        ]
    )

test_dataset = PetfinderDataset(test_df, get_transforms())
test_loader = DataLoader(
    test_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=1,
    pin_memory=False,
    drop_last=False)

In [None]:
state_dict_paths = sorted(glob('../input/swin-224-10/bce/*.ckpt'))

for index, path in enumerate(state_dict_paths):
    for fold in range(config.tta_fold):
        state_dict = torch.load(path)["state_dict"]
        model = Model384(config)
        model.load_state_dict(state_dict)
        model.eval().to(device)
        preds = test_fn(model, test_loader, device)
        predict_df[f"BCE_{index}_{fold}_{image_size}"] = preds

In [None]:
state_dict_paths = sorted(glob('../input/swin-224-10/rmse/*.ckpt'))

for index, path in enumerate(state_dict_paths):
    for fold in range(config.tta_fold):
        state_dict = torch.load(path)["state_dict"]
        model = Model384(config)
        model.load_state_dict(state_dict)
        model.eval().to(device)
        preds = test_fn(model, test_loader, device)
        predict_df[f"RMSE_{index}_{fold}_{image_size}"] = preds

In [None]:
state_dict_paths = sorted(glob('../input/down-sample-swin/down_sample_swin_large_patch4_window7_224/checkpoints/*.ckpt'))

for index, path in enumerate(state_dict_paths):
    for fold in range(config.tta_fold):
        state_dict = torch.load(path)["state_dict"]
        model = Model384(config)
        model.load_state_dict(state_dict)
        model.eval().to(device)
        preds = test_fn(model, test_loader, device)
        predict_df[f"DOWN_SAMPLE_{index}_{fold}_{image_size}"] = preds

In [None]:
import gc
del model, test_dataset, test_loader
gc.collect()

## OOF・予測値を追加

In [None]:
def change_id(x):
    return x.split('/')[-1].replace('.jpg', '')

train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
dog = pd.read_csv('../input/cat-dog-separate/dog_swin_large_patch4_window7_224/oof_swin_large_patch4_window7_224_dog_bce.csv')
cat = pd.read_csv('../input/cat-dog-separate/cat_swin_large_patch4_window7_224/oof_swin_large_patch4_window7_224_cat_bce.csv')
dog["Id"] = dog["Id"].apply(change_id)
cat["Id"] = cat["Id"].apply(change_id)

dog = pd.concat([dog, cat])
dog = pd.merge(train, dog, on='Id')

In [None]:
train_df["rmse_384"] = pd.read_csv('../input/swin-384/rmse/oof_swin_large_patch4_window12_384_rmse.csv')["oof_pred"]
train_df["bce_384"] = pd.read_csv('../input/swin-384/bce/oof_swin_large_patch4_window12_384_bce.csv')["oof_pred"]
train_df["rmse_224"] = pd.read_csv('../input/swin-224-10/rmse/oof_swin_large_patch4_window7_224_10_rmse.csv')["oof_pred"]
train_df["bce_224"] = pd.read_csv('../input/swin-224-10/bce/oof_swin_large_patch4_window7_224_10_bce.csv')["oof_pred"]

train_df["efficientnet_b7"] = pd.read_csv('../input/tf-efficientnet-rmse/rmse_tf_efficientnet_b7_ns_600/oof_tf_efficientnet_b7_ns_rmse.csv')["oof_pred"]
train_df["tf_efficientnet_b4"] = pd.read_csv('../input/tf-efficientnet-rmse/rmse_tf_efficientnet_b4_10fold/oof_tf_efficientnet_b4_10_rmse.csv')["oof_pred"]
train_df["dog_cat"] = dog["oof_pred"]
train_df["down_sample"] = pd.read_csv('../input/get-down-oof/oof_down-sample-swin.csv')["oof_pred"]

del dog, cat, train

In [None]:
def get_pred(type, image_size):
    all_columns = predict_df.columns
    use_columns = [col for col in all_columns if type in col and str(image_size) in col]
    return predict_df[use_columns].mean(axis=1)

In [None]:
# predict_df

In [None]:
test_df["rmse_384"] = get_pred("RMSE", 384)
# test_df["Pawpularity"] = get_pred("RMSE", 384)
# test_df["Pawpularity"] = test_df["Pawpularity"].astype('int32')

test_df["bce_384"] = get_pred("BCE", 384)
test_df["rmse_224"] = get_pred("RMSE", 224)
test_df["bce_224"] = get_pred("BCE", 224)

test_df["efficientnet_b7"] = get_pred("rmse", 600)
test_df["tf_efficientnet_b4"] = get_pred("rmse", 480)
test_df["dog_cat"] = predict_df["cat_dog"]
test_df["down_sample"] = get_pred("DOWN", 224)

In [None]:
# train_df["std"] = np.std([train_df["rmse_384"], train_df["bce_384"], train_df["rmse_224"], train_df["bce_224"]], axis=0)
# test_df["std"] = np.std([test_df["rmse_384"], test_df["bce_384"], test_df["rmse_224"], test_df["bce_224"]], axis=0)

# train_df["mean"] = np.mean([train_df["rmse_384"], train_df["bce_384"], train_df["rmse_224"], train_df["bce_224"]], axis=0)
# test_df["mean"] = np.mean([test_df["rmse_384"], test_df["bce_384"], test_df["rmse_224"], test_df["bce_224"]], axis=0)

In [None]:
# average = (test_df["rmse_384"] + test_df["bce_384"] + test_df["bce_224"] + test_df["rmse_224"]) / 4

# submission = pd.read_csv("../input/petfinder-pawpularity-score/sample_submission.csv")
# submission['Pawpularity'] = average
# submission[['Id', 'Pawpularity']].to_csv('submission.csv', index=False)

In [None]:
use_blend = True
blend_model = "ridge" #lgbm

## Blending

In [None]:
cat_dog = pd.read_csv('../input/cat-vs-dog/train_with_type.csv')
train_df["cat_dog"] = cat_dog["pred_label"]

def trans_cat_dog_label(x):
    if x == 0:
        return 0
    else:
        return 200

train_df["cat_dog"] = train_df["cat_dog"].apply(trans_cat_dog_label)
train_df["fold_value"] = train_df["Pawpularity"] + train_df["cat_dog"]

In [None]:
delete_columns = ["file_path", "Pawpularity", "Id", "fold", "fold_value", "cat_dog"]
features = [col for col in train_df.columns if col not in delete_columns]
    
train_df["fold"] = 0

if len(test_df) < 10:
    test_df = pd.concat([test_df, test_df]).reset_index(drop=True)

skf = StratifiedKFold(
    n_splits=5, shuffle=True, random_state=2021
)

for fold, (trn_ind, val_ind) in enumerate(skf.split(train_df, train_df["fold_value"])):
    train_df.loc[val_ind, 'fold'] = int(fold)


In [None]:
if use_blend and blend_model == "lgbm":
    import lightgbm as lgb
    test_predictions = np.zeros(test_df.shape[0])
    train_predictions = np.zeros(train_df.shape[0])

    params = {
        'objective': 'rmse',
        'boosting_type': 'gbdt',
        'max_depth': -1,
        'max_bin':100,
        'learning_rate': 0.003,
        'n_jobs':-1,
        'verbose': -1}

    for fold in range(5):
        x_train = train_df[train_df["fold"] != fold]
        x_val = train_df[train_df["fold"] == fold]
        val_index = x_val.index
        x_test = test_df

        x_train = x_train.reset_index(drop=True)
        x_val = x_val.reset_index(drop=True)

        y_train = x_train["Pawpularity"]
        y_val = x_val["Pawpularity"]

        x_train = x_train[features]
        x_val = x_val[features]
        x_test = x_test[features]

        train_dataset = lgb.Dataset(x_train, y_train)
        val_dataset = lgb.Dataset(x_val, y_val)
        model = lgb.train(params = params,
                          num_boost_round=1000,
                          train_set = train_dataset, 
                          valid_sets = [train_dataset, val_dataset], 
                          verbose_eval = 250,
                          early_stopping_rounds=50)

        test_predictions += model.predict(x_test) / 5
        train_predictions[val_index] = model.predict(x_val)

    lgb.plot_importance(model,max_num_features=20)
    
elif use_blend and blend_model == "ridge":
    from sklearn.linear_model import Ridge
    test_predictions = np.zeros(test_df.shape[0])
    train_predictions = np.zeros(train_df.shape[0])

    for fold in range(5):
        x_train = train_df[train_df["fold"] != fold]
        x_val = train_df[train_df["fold"] == fold]
        val_index = x_val.index
        x_test = test_df

        x_train = x_train.reset_index(drop=True)
        x_val = x_val.reset_index(drop=True)

        y_train = x_train["Pawpularity"]
        y_val = x_val["Pawpularity"]

        x_train = x_train[features]
        x_val = x_val[features]
        x_test = x_test[features]

        clf = Ridge(alpha=1.0)
        clf.fit(x_train, y_train)
        
        test_predictions += clf.predict(x_test) / 5
        train_predictions[val_index] = clf.predict(x_val)

In [None]:
from sklearn.metrics import mean_squared_error
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

print("efficientnet_b7:", rmse(train_df["efficientnet_b7"].values, train_df["Pawpularity"].values))
print("tf_efficientnet_b4:", rmse(train_df["tf_efficientnet_b4"].values, train_df["Pawpularity"].values))
print("dog_cat:", rmse(train_df["dog_cat"].values, train_df["Pawpularity"].values))
print("bce_384:", rmse(train_df["bce_384"].values, train_df["Pawpularity"].values))
print("rmse_224:", rmse(train_df["rmse_224"].values, train_df["Pawpularity"].values))
print("bce_224:", rmse(train_df["bce_224"].values, train_df["Pawpularity"].values))
print("down:", rmse(train_df["down_sample"].values, train_df["Pawpularity"].values))

if use_blend:
    print("blend_score:", rmse(train_predictions, train_df["Pawpularity"].values))
    submission = pd.read_csv("../input/petfinder-pawpularity-score/sample_submission.csv")
    submission['Pawpularity'] = test_predictions[:len(submission)]
    submission[['Id', 'Pawpularity']].to_csv('submission.csv', index=False)    

In [None]:
# submission = pd.read_csv("../input/petfinder-pawpularity-score/sample_submission.csv")
# submission['Pawpularity'] = (test_df["rmse_1"] + test_df["bce_1"]) / 2
# submission[['Id', 'Pawpularity']].to_csv('submission_simple.csv', index=False)

In [None]:
# sort = train_df.sort_values('down_sample', ascending=True).reset_index(drop=True)
# # 移動平均を算出
# def get_rolling_mean(df: pd.DataFrame, windows: list, feature: str):
#     for window in windows:
#         df[feature + "_rolling_" + str(window)] = df[feature].rolling(window=window).mean()
#     return df

# sort = get_rolling_mean(sort, [64], "down_sample")
# sort = get_rolling_mean(sort, [64], "Pawpularity")

In [None]:
# print(train_df["Pawpularity"].describe())
# fig, ax = plt.subplots(figsize=(20,4))
# ax.plot(sort["Pawpularity"], label="target")
# ax.plot(sort["Pawpularity_rolling_64"], label="target_rolling")
# # ax.plot(sort["ens"], label="ens")
# # ax.plot(sort["efficientnet_b7"], label="efficientnet_b7")
# ax.plot(sort["down_sample"], label="down_sample")
# ax.grid()
# # ax.plot(sort_2["rmse_224_rolling_100"], label="rmse_224")
# plt.show()

In [None]:
# up_value = 1
# for i in range(len(sort)):
#     row = sort.loc[i]
#     if (row["efficientnet_b7"] - row["bce_384"]) > up_value:
#         sort.loc[i, "efficientnet_b7"] = row["efficientnet_b7"] + up_value * 10
#     elif (row["efficientnet_b7"] - row["bce_384"]) < -up_value:
#         sort.loc[i, "efficientnet_b7"] = row["efficientnet_b7"] - up_value * 10

In [None]:
# rmse(sort["efficientnet_b7"].values, sort["Pawpularity"].values)

In [None]:
# for i in range(1, 11):
#     print(f"score_{i*1000}:", rmse(sort["bce_384_rolling_3"][3:i*1000].values, sort["Pawpularity"][3:i*1000].values), rmse(sort["bce_384_rolling_3"][3 + (i-1)*1000:i*1000].values, sort["Pawpularity"][3 + (i-1)*1000:i*1000].values))

In [None]:
# for i in range(1, 11):
#     print(f"score_{i*1000}:", rmse(sort["bce_384_rolling_3"][3:i*1000].values, sort["Pawpularity"][3:i*1000].values), rmse(sort["bce_384"][3 + (i-1)*1000:i*1000].values, sort["Pawpularity"][3 + (i-1)*1000:i*1000].values))

In [None]:
# for i in range(len(sort)):
#     if i > 9000:
#         sort.loc[i, "bce_384"] = sort["Pawpularity"][9000:].mean()

In [None]:
# sort["bce_384"][8000:].describe()

In [None]:
# rmse(sort["bce_384"].values, sort["Pawpularity"].values)