In [1]:
import sys
sys.path.append('../input/U-2-Net/')
sys.path.append('../')

In [2]:
# from u2net_test import extract
# extract('../input/petfinder-pawpularity-score/train', '../input/petfinder-pawpularity-score/train_U2NET')
# extract('../input/petfinder-pawpularity-score/test', '../input/petfinder-pawpularity-score/test_U2NET')

In [3]:
import numpy as np, pandas as pd
from glob import glob
import shutil, os
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import GroupKFold
from sklearn.decomposition import IncrementalPCA
from tqdm.notebook import tqdm
import torch.nn.functional as F
from albumentations.pytorch.transforms import ToTensorV2
import seaborn as sns
import PIL.Image as Image
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
from cuml.svm import SVR
import time
import pandas_profiling as pdp
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from utils.util import *
from utils.losses import *
import torch.nn as nn
import math
import transformers as T
import albumentations
import pandas as pd
import cv2
import numpy as np
import timm
import torch.nn as nn
from utils.util import EarlyStopping
from sklearn import metrics
import torch
from tqdm import tqdm
import math
import albumentations
import tez
import torch.optim as optim
import warnings
warnings.simplefilter('ignore')

In [4]:
class CONFIG:
    DATA_PATH = Path('../input/petfinder-pawpularity-score')
    OUTPUT_DIR = Path('../output/11')
    MODEL_PATH = Path(f'../input/pretrained_models/swin_large_patch4_window12_384_add_petcategory_segmentation/')
    MODEL_NAME = 'swin_large_patch4_window12_384'
    batch_size = 10
    fold=5
    image_size = 384
    device='cuda'
    SEED=42
if not os.path.isdir(CONFIG.OUTPUT_DIR):
    os.makedirs(CONFIG.OUTPUT_DIR)
LOGGER = init_logger(OUTPUT_DIR=CONFIG.OUTPUT_DIR)
fix_seed(CONFIG.SEED)
pet_category = ['Abyssinian', 'Bengal', 'Birman', 'Bombay', 'British_Shorthair',
 'Egyptian_Mau' ,'Maine_Coon', 'Persian', 'Ragdoll', 'Russian_Blue' ,'Siamese',
 'Sphynx', 'american_bulldog' ,'american_pit_bull_terrier', 'basset_hound',
 'beagle', 'boxer' ,'chihuahua', 'english_cocker_spaniel', 'english_setter',
 'german_shorthaired' ,'great_pyrenees', 'havanese', 'japanese_chin',
 'keeshond', 'leonberger', 'miniature_pinscher', 'newfoundland', 'pomeranian',
 'pug' ,'saint_bernard' ,'samoyed' ,'scottish_terrier', 'shiba_inu',
 'staffordshire_bull_terrier' ,'wheaten_terrier' ,'yorkshire_terrier']

In [5]:
train_df = pd.read_csv(CONFIG.DATA_PATH / 'train.csv')
train_df['path'] = train_df['Id'].map(lambda x:str(CONFIG.DATA_PATH/'train'/x)+'.jpg')
train_df['mask_path'] = train_df['Id'].map(lambda x:str(CONFIG.DATA_PATH/'train_U2NET'/x)+'.jpg')
train_df['image_size'] = train_df['path'].apply(lambda image_id : Image.open(image_id).size)
train_df['width'] = train_df['image_size'].apply(lambda x: x[0])
train_df['height'] = train_df['image_size'].apply(lambda x: x[1])

test_df = pd.read_csv(CONFIG.DATA_PATH / 'test.csv')
test_df['path'] = test_df['Id'].map(lambda x:str(CONFIG.DATA_PATH/'test'/x)+'.jpg')
test_df['mask_path'] = test_df['Id'].map(lambda x:str(CONFIG.DATA_PATH/'test_U2NET'/x)+'.jpg')
test_df['image_size'] = test_df['path'].apply(lambda image_id : Image.open(image_id).size)
test_df['width'] = test_df['image_size'].apply(lambda x: x[0])
test_df['height'] = test_df['image_size'].apply(lambda x: x[1])

In [6]:
train_df = get_train_data(train_df, train_df['Pawpularity'], n_splits = CONFIG.fold, regression=True)

In [7]:
train_aug = albumentations.Compose(
    [
    albumentations.Resize(CONFIG.image_size, CONFIG.image_size, p=1),
#     albumentations.VerticalFlip(p=0.5),
#     albumentations.HorizontalFlip(p=0.5),
    albumentations.HueSaturationValue(
            hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5
        ),
    albumentations.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5
        ),
    albumentations.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
        max_pixel_value=255.0,
        p=1.0,)],p=1.0,
)
test_aug = albumentations.Compose(
    [
        albumentations.Resize(CONFIG.image_size, CONFIG.image_size, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

# train_aug = albumentations.Compose(
#     [albumentations.Resize(CONFIG.image_size, CONFIG.image_size, p=1),
#     albumentations.VerticalFlip(p=0.5),
#     albumentations.HorizontalFlip(p=0.5)], p=1.0)

In [8]:
class PawpularDataset:
    def __init__(self, df, dense_features, targets, augmentations):
        self.image_paths = df['path'].tolist()
        self.mask_paths = df['mask_path'].tolist()
        self.dense_features = dense_features
        self.targets = targets
        if self.targets is None:
            self.targets = torch.ones(len(self.image_paths))
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):
        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.mask_paths[item])
        mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
        idx = np.where(mask > 127.5)
        h_max = idx[0].min()
        h_min = idx[0].max()
        w_max = idx[1].min()
        w_min = idx[1].max()
        
        image = image[h_max:h_min, w_max:w_min,:]
        mask = mask[h_max:h_min, w_max:w_min]
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image, mask=mask)
            image = augmented["image"]
            mask = augmented["mask"]
        
        targets = self.targets[item]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        features = self.dense_features[item, :]
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "mask" : torch.tensor(mask, dtype=torch.float),
            "features": torch.tensor(features, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.float)
        }

In [9]:
class PawpularModel(nn.Module):
    def __init__(self, pet_classify_model, model_name):
        super().__init__()
        self.pet_classify_model = pet_classify_model
        self.pet_classify_model.requires_grad = False
        self.model = timm.create_model(model_name, pretrained=False, in_chans=3)
#         self.model.patch_embed.proj=nn.Conv2d(4, 96, kernel_size=(4, 4), stride=(4, 4))
        self.model.head = nn.Linear(self.model.head.in_features, 128)
        self.dropout = nn.Dropout(0.1)
        self.dense1 = nn.Linear(177, 64)
        self.dense2 = nn.Linear(64, 1)

    def forward(self, image, features, mask=None):
        p = self.pet_classify_model(F.adaptive_avg_pool2d(image, (224,224)))
        p = torch.softmax(p, dim=1)
        x1 = self.model(image * (mask>0.5).unsqueeze(1))
        x = self.dropout(x1)
        x = torch.cat([x, features, p], dim=1)
        x = self.dense1(x)
        x = self.dense2(x)
        return torch.cat([x, x1, features, p], dim=1)
    
class pet_categor_extract_model(nn.Module):
    def __init__(self,clasCONFIGnum):
        super().__init__()
        self.model = timm.create_model('efficientnet_b0', pretrained=False, in_chans=3)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, 128)
        self.dropout = nn.Dropout(0.1)
        self.dense = nn.Linear(128
                               , class_num)

    def forward(self, image):
        x = self.model(image)
        x = self.dropout(x)
        x = self.dense(x)
        return x.squeeze(1)

In [10]:
def extract_feature(model, df):
    
    
    model.eval()
    
    dense_features = [
        'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
        'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
    ]
    df_folds = df.reset_index(drop=True)
    
    dataset = PawpularDataset(
        df_folds, 
        dense_features=df_folds[dense_features].values, targets=None,
        augmentations=test_aug
    )
    
    loader = DataLoader(
        dataset,
        batch_size=CONFIG.batch_size,
        shuffle=False,
        pin_memory=True,
        num_workers=4,
        drop_last=False,
    )

    embedding_feature = []
    
    for iter, data in tqdm(enumerate(loader), total=len(loader)):
        img, mask, feature ,target = data['image'], data['mask'], data['features'], data['targets']
        img = img.to(CONFIG.device)
        mask = mask.to(CONFIG.device)
        feature = feature.to(CONFIG.device)
        target = target.to(CONFIG.device)
        batch_size = target.size(0)
        with torch.no_grad():
            embedding = model(img, feature, mask)
        embedding_feature.append(embedding.data.cpu())
    
    embedding_feature = np.concatenate(embedding_feature)

    return embedding_feature[:,:1].ravel().tolist(), embedding_feature[:,1:]

In [11]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))
def main():
    super_final_predictions = []
    super_final_predictions2 = []
    super_final_oof_predictions = []
    super_final_oof_predictions2 = []
    super_final_oof_true = []
    for fold in range(CONFIG.fold):
        LOGGER.info(f"========== fold: {fold} extract ==========")

        # ====================================================
        # Data Loader
        # ====================================================
        cl_model = pet_categor_extract_model(class_num=37)
        cl_model.to(CONFIG.device)
        model = PawpularModel(cl_model, model_name=CONFIG.MODEL_NAME)
        model.to(CONFIG.device)

        if torch.cuda.device_count()>1:
            model=nn.DataParallel(model)
            model.load_state_dict(torch.load(CONFIG.MODEL_PATH / f"{CONFIG.MODEL_NAME}_{fold}_best.pth")["model"])
        else:
            model.load_state_dict(fix_model_state_dict(torch.load(CONFIG.MODEL_PATH / f"{CONFIG.MODEL_NAME}_{fold}_best.pth")["model"]))
        #学習に向けたデータの準備
        if CONFIG.training_step:
            train_idx = train_df[train_df.fold!=fold].index
            val_idx = train_df[train_df.fold ==fold].index
            train_folds = train_df.loc[train_idx].reset_index(drop=True)
            valid_folds = train_df.loc[val_idx].reset_index(drop=True)
            test_folds = test_df.reset_index(drop=True)
            preds_train, embed_train = extract_feature(model, train_folds)
            preds_val, embed_val = extract_feature(model, valid_folds)
            preds_test, embed_test = extract_feature(model, test_folds)

            ##fit SVR to train data
            print('Fitting SVR...')
            clf = SVR(C =20.0)
            clf.fit(embed_train.astype('float32'), np.array([sigmoid(x) * 100 for x in preds_train]))
            pickle.dump(clf, open(CONFIG.OUTPUT_DIR / f"SVR_fold_{fold}.pkl", "wb"))
        
            ##fit SVR to oof data
            oof_pred_SVR = clf.predict(embed_val.astype('float32'))
            oof_pred_NN  =[sigmoid(x) * 100 for x in preds_val]
            final_oof_true = valid_folds.Pawpularity.values.astype('int32')

            super_final_oof_predictions.append(oof_pred_SVR)
            super_final_oof_predictions2.append(oof_pred_NN)
            super_final_oof_true.append(final_oof_true)

            rsme_svr = np.sqrt(np.mean((np.array(final_oof_true) - np.array(oof_pred_SVR))**2.0))
            print('SVR RSME =',rsme_svr,'\n')

            rsme_nn = np.sqrt(np.mean((np.array(final_oof_true) - np.array(oof_pred_NN))**2.0))
            print('NN RSME =',rsme_nn,'\n')

            w = 0.5
            oof2 = (1-w)*np.array(oof_pred_SVR) + w*np.array(oof_pred_NN)
            rsme_en = np.sqrt( np.mean( (super_final_oof_true[-1] - oof2)**2.0 ) )
            print('Ensemble RSME =',rsme_en,'\n')
        
        else:
            print('Loading SVR...',LOAD_SVR_FROM_PATH+name)
            clf = pickle.load(open(LOAD_SVR_FROM_PATH+name, "rb"))
        ##fit SVR to test data
        test_pred_SVR = clf.predict(embed_test.astype('float32'))
        test_pred_NN  =[sigmoid(x) * 100 for x in preds_test]
        
        super_final_predictions.append(test_pred_SVR)
        super_final_predictions2.append(test_pred_NN)
        
    true = np.hstack(super_final_oof_true)

    oof = np.hstack(super_final_oof_predictions)
    rsme = np.sqrt( np.mean( (oof - true)**2.0 ))
    print('Overall CV SVR head RSME =',rsme)

    oof2 = np.hstack(super_final_oof_predictions2)
    rsme = np.sqrt( np.mean( (oof2 - true)**2.0 ))
    print('Overall CV NN head RSME =',rsme)

    oof3 = (1-w)*oof + w*oof2
    rsme = np.sqrt( np.mean( (oof3 - true)**2.0 ))
    print('Overall CV Ensemble heads RSME with 50% NN and 50% SVR =',rsme)
        
    # submission
    submission = test_df.copy()
    
    best_w = 0.8
    super_final_predictions = np.mean(np.column_stack(super_final_predictions), axis=1)
    super_final_predictions2 = np.mean(np.column_stack(super_final_predictions2), axis=1)
    submission["Pawpularity"] = (1-best_w)*super_final_predictions + best_w*super_final_predictions2
    submission = submission[["Id", "Pawpularity"]]
    submission.to_csv(CONFIG.OUTPUT_DIR / "submission.csv", index=False)

In [12]:
if __name__ == "__main__":
    main()

100%|██████████████████████████████████████████████████████████████████████████████████████████| 793/793 [02:51<00:00,  4.62it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████| 199/199 [00:42<00:00,  4.67it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.26it/s]


Fitting SVR...




SVR RSME = 17.75708127566837 

NN RSME = 17.744409344399877 

Ensemble RSME = 17.747457161839513 



100%|██████████████████████████████████████████████████████████████████████████████████████████| 793/793 [02:49<00:00,  4.67it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████| 199/199 [00:42<00:00,  4.64it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.19it/s]


Fitting SVR...




SVR RSME = 17.27069369277742 

NN RSME = 17.27123801437023 

Ensemble RSME = 17.267747362884954 



100%|██████████████████████████████████████████████████████████████████████████████████████████| 793/793 [02:49<00:00,  4.67it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████| 199/199 [00:42<00:00,  4.64it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.13it/s]


Fitting SVR...




SVR RSME = 17.453362808638097 

NN RSME = 17.443244035397836 

Ensemble RSME = 17.444948400263165 



100%|██████████████████████████████████████████████████████████████████████████████████████████| 793/793 [02:50<00:00,  4.65it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████| 199/199 [00:43<00:00,  4.62it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.15it/s]


Fitting SVR...




SVR RSME = 17.771742762200873 

NN RSME = 17.791827023214633 

Ensemble RSME = 17.778658705257882 



100%|██████████████████████████████████████████████████████████████████████████████████████████| 793/793 [02:49<00:00,  4.67it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████| 199/199 [00:42<00:00,  4.65it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.14it/s]


Fitting SVR...
SVR RSME = 17.46876840266088 

NN RSME = 17.48061201798339 

Ensemble RSME = 17.471256485359678 

Overall CV SVR head RSME = 17.545383011744185
Overall CV NN head RSME = 17.54734197498838
Overall CV Ensemble heads RSME with 50% NN and 50% SVR = 17.543077673235477
