In [1]:
# # based on the post here: https://www.kaggle.com/c/petfinder-pawpularity-score/discussion/275094
import sys
sys.path.append("../")
sys.path.append('../input/U-2-Net/')
sys.path.append("../input/tez-lib/")
sys.path.append("../input/timmmaster/")
sys.path.append("../input/matsuda-utils")

In [2]:
import numpy as np, pandas as pd
from glob import glob
import shutil, os
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import GroupKFold
from sklearn.decomposition import IncrementalPCA
from tqdm.notebook import tqdm
import torch.nn.functional as F
from albumentations.pytorch.transforms import ToTensorV2
import seaborn as sns
import PIL.Image as Image
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import time
import pandas_profiling as pdp
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from utils.util import *
from utils.losses import *
import torch.nn as nn
import transformers as T
import albumentations
import pandas as pd
import cv2
import numpy as np
import timm
import torch.nn as nn
from utils.util import EarlyStopping
from sklearn import metrics
import torch
from tqdm import tqdm
import math
import albumentations
import tez
import torch.optim as optim
import warnings
warnings.simplefilter('ignore')

In [3]:
from u2net_test import extract
extract('../input/petfinder-pawpularity-score/test', '../input/petfinder-pawpularity-score/test_U2NET')

...load U2NEP---4.7 MB


100%|██████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 20.63it/s]


In [4]:
class CONFIG:
    DATA_PATH = Path('../input/petfinder-pawpularity-score')
    MODEL_NAME = 'swin_large_patch4_window12_384'
    MODEL_PATH = Path(f'../input/pretrained_models/swin_large_patch4_window12_384_add_petcategory_segmentation/')
    batch_size = 32
    image_size = 384
    fold = 5
    device='cuda'

In [5]:
test_df = pd.read_csv(CONFIG.DATA_PATH / 'test.csv')
test_df['path'] = test_df['Id'].map(lambda x:str(CONFIG.DATA_PATH/'test'/x)+'.jpg')
test_df['mask_path'] = test_df['Id'].map(lambda x:str(CONFIG.DATA_PATH/'test_U2NET'/x)+'.jpg')
test_df['image_size'] = test_df['path'].apply(lambda image_id : Image.open(image_id).size)
test_df['width'] = test_df['image_size'].apply(lambda x: x[0])
test_df['height'] = test_df['image_size'].apply(lambda x: x[1])

In [6]:
test_aug = albumentations.Compose(
    [
        albumentations.Resize(CONFIG.image_size, CONFIG.image_size, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

In [7]:
class PawpularDataset:
    def __init__(self, df, dense_features, targets, augmentations, mask=True):
        self.image_paths = df['path'].tolist()
        self.mask = mask
        if mask==True:
            self.mask_paths = df['mask_path'].tolist()
        self.dense_features = dense_features
        self.targets = targets
        if self.targets is None:
            self.targets = torch.ones(len(self.image_paths))
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):
        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.mask:
            mask = cv2.imread(self.mask_paths[item])
            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
            idx = np.where(mask > 127.5)
            h_max = idx[0].min()
            h_min = idx[0].max()
            w_max = idx[1].min()
            w_min = idx[1].max()
        
            image = image[h_max:h_min, w_max:w_min,:]
            mask = mask[h_max:h_min, w_max:w_min]
        
            if self.augmentations is not None:
                augmented = self.augmentations(image=image, mask=mask)
                image = augmented["image"]
                mask = augmented["mask"]
        else:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            mask=np.zeros((64,64))
        targets = self.targets[item]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        features = self.dense_features[item, :]
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "mask" : torch.tensor(mask, dtype=torch.float),
            "features": torch.tensor(features, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.float)
        }

In [8]:
class PawpularModel(nn.Module):
    def __init__(self, pet_classify_model, model_name):
        super().__init__()
        self.pet_classify_model = pet_classify_model
        self.pet_classify_model.requires_grad = False
        self.model = timm.create_model(model_name, pretrained=False, in_chans=3)
#         self.model.patch_embed.proj=nn.Conv2d(4, 96, kernel_size=(4, 4), stride=(4, 4))
        self.model.head = nn.Linear(self.model.head.in_features, 128)
        self.dropout = nn.Dropout(0.1)
        self.dense1 = nn.Linear(177, 64)
        self.dense2 = nn.Linear(64, 1)

    def forward(self, image, features, mask=None):
        p = self.pet_classify_model(F.adaptive_avg_pool2d(image, (224,224)))
        p = torch.softmax(p, dim=1)
        if mask!=None:
            x = self.model(image * (mask>0.5).unsqueeze(1))
        else:
            x = self.model(image)
        x = self.dropout(x)
        x = torch.cat([x, features, p], dim=1)
        x = self.dense1(x)
        x = self.dense2(x)
        return torch.sigmoid(x.squeeze(1))
    
class pet_categor_extract_model(nn.Module):
    def __init__(self,class_num):
        super().__init__()
        self.model = timm.create_model('efficientnet_b0', pretrained=False, in_chans=3)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, 128)
        self.dropout = nn.Dropout(0.1)
        self.dense = nn.Linear(128
                               , class_num)

    def forward(self, image):
        x = self.model(image)
        x = self.dropout(x)
        x = self.dense(x)
        return x.squeeze(1)

In [9]:
def inference():
    predictions = []
    dense_features = [
        'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
        'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
    ]
    test_dataset = PawpularDataset(
        test_df, 
        dense_features=test_df[dense_features].values, targets=None,
        augmentations=test_aug
    )
    test_loader = DataLoader(
        test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True
    )

    for fold in range(5):
        cl_model = pet_categor_extract_model(class_num=37)
        cl_model.to(CONFIG.device)
        cl_model.load_state_dict(fix_model_state_dict(torch.load('../input/pretrained_models/efficientnet_b0_Oxford_classifier_size_224.pth')["model"]))

        model = PawpularModel(cl_model, model_name=CONFIG.MODEL_NAME)
        model.to(CONFIG.device)
        if torch.cuda.device_count()>1:
            model=nn.DataParallel(model)
            model.load_state_dict(torch.load(CONFIG.MODEL_PATH / f"{CONFIG.MODEL_NAME}_{fold}_best.pth")["model"])
        else:
            model.load_state_dict(fix_model_state_dict(torch.load(CONFIG.MODEL_PATH / f"{CONFIG.MODEL_NAME}_{fold}_best.pth")["model"]))
        model.eval()
        preds = []
        for i, data in tqdm(enumerate(test_loader), total=len(test_loader)):
            img, mask, feature ,target = data['image'], data['mask'], data['features'], data['targets']
            img = img.to(CONFIG.device)
            mask= mask.to(CONFIG.device)
            feature = feature.to(CONFIG.device)
            target = target.to(CONFIG.device)
            with torch.no_grad():
                y_preds = model(img, feature, mask)
            preds.append(y_preds.to("cpu").numpy())
        preds = np.concatenate(preds)
        predictions.append(preds)
    predictions = np.mean(predictions, axis=0)
    return predictions * 100

In [10]:
# Inference
predictions = inference()
# submission
train_df1 = pd.read_csv(CONFIG.MODEL_PATH / "oof_df.csv")
submission1 = test_df.copy()
submission1["Pawpularity"] = predictions
submission1= submission1[["Id", "Pawpularity"]]

100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.18s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.40it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.40it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.49it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.11it/s]


In [11]:
class CONFIG:
    DATA_PATH = Path('../input/petfinder-pawpularity-score')
    MODEL_NAME = 'swin_large_patch4_window12_384'
    MODEL_PATH = Path(f'../input/pretrained_models/swin_large_patch4_window12_384_add_petcategory/')
    batch_size = 32
    image_size = 384
    fold = 5
    device='cuda'

In [12]:
def inference():
    predictions = []
    dense_features = [
        'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
        'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
    ]
    test_dataset = PawpularDataset(
        test_df, 
        dense_features=test_df[dense_features].values, targets=None,
        augmentations=test_aug, mask=False
    )
    test_loader = DataLoader(
        test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True
    )

    for fold in range(5):
        cl_model = pet_categor_extract_model(class_num=37)
        cl_model.to(CONFIG.device)
        cl_model.load_state_dict(fix_model_state_dict(torch.load('../input/pretrained_models/efficientnet_b0_Oxford_classifier_size_224.pth')["model"]))

        model = PawpularModel(cl_model, model_name=CONFIG.MODEL_NAME)
        model.to(CONFIG.device)
        if torch.cuda.device_count()>1:
            model=nn.DataParallel(model)
            model.load_state_dict(torch.load(CONFIG.MODEL_PATH / f"{CONFIG.MODEL_NAME}_{fold}_best.pth")["model"])
        else:
            model.load_state_dict(fix_model_state_dict(torch.load(CONFIG.MODEL_PATH / f"{CONFIG.MODEL_NAME}_{fold}_best.pth")["model"]))
        model.eval()
        preds = []
        for i, data in tqdm(enumerate(test_loader), total=len(test_loader)):
            img, _, feature ,target = data['image'], data['mask'], data['features'], data['targets']
            img = img.to(CONFIG.device)
            feature = feature.to(CONFIG.device)
            target = target.to(CONFIG.device)
            with torch.no_grad():
                y_preds = model(img, feature)
            preds.append(y_preds.to("cpu").numpy())
        preds = np.concatenate(preds)
        predictions.append(preds)
    predictions = np.mean(predictions, axis=0)
    return predictions * 100

In [13]:
# Inference
predictions = inference()
train_df2 = pd.read_csv(CONFIG.MODEL_PATH / "oof_df.csv")
# submission
submission2 = test_df.copy()
submission2["Pawpularity"] = predictions
submission2= submission1[["Id", "Pawpularity"]]

100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.18it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.66it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.61it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.55it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.32it/s]


In [14]:
ensemble_test_df = pd.merge(submission1, submission2, on="Id").loc[:,["Id", "Pawpularity_x", "Pawpularity_y"]]

In [15]:
from sklearn.linear_model import LinearRegression
ensemble_train_df = pd.merge(train_df1.loc[:,["Id", "preds", "Pawpularity", "fold"]] , train_df2.loc[:,["Id", "preds", "Pawpularity", "fold"]], on=["Id", "Pawpularity", "fold"])
CV_score=[]
preds = []
test_preds = []
for fold in range(5):
    df_=ensemble_train_df.drop(columns=["Id"])
    tr_x, tr_y = df_[df_.fold!=fold].drop(columns=["Pawpularity", "fold"]), df_[df_.fold!=fold].Pawpularity
    val_x, val_y =  df_[df_.fold==fold].drop(columns=["Pawpularity", "fold"]), df_[df_.fold==fold].Pawpularity
    lr = LinearRegression()
    lr.fit(tr_x, tr_y)
    pred = lr.predict(val_x)
    preds.append(pred)
    score = np.sqrt(np.mean((np.array(pred)- np.array(val_y))**2))
    CV_score.append(score)
    print(f"fold{fold}:{score}")
    test_pred = lr.predict(ensemble_test_df.drop(columns=["Id"]))
    test_preds.append(test_pred)
print(f"CV:{sum(CV_score)/len(CV_score)}")

submission = test_df.copy()
submission["Pawpularity"] = np.array(test_preds).mean(0)
submission = submission[["Id", "Pawpularity"]]
submission.to_csv("submission.csv", index=False)

fold0:17.480799245332985
fold1:17.127684855891257
fold2:17.172028125421228
fold3:17.582936426462247
fold4:17.223195700178255
CV:17.317328870657196


In [17]:
submission

Unnamed: 0,Id,Pawpularity
0,4128bae22183829d2b5fea10effdb0c3,41.596434
1,43a2262d7738e3d420d453815151079e,37.259383
2,4e429cead1848a298432a0acad014c9d,41.711307
3,80bc3ccafcc51b66303c2c263aa38486,39.401631
4,8f49844c382931444e68dffbe20228f4,38.809348
5,b03f7041962238a7c9d6537e22f9b017,41.344594
6,c978013571258ed6d4637f6e8cc9d6a3,40.616053
7,e0de453c1bffc20c22b072b34b54e50f,43.781392
