# Load Libraries

In [1]:
# based on the post here: https://www.kaggle.com/c/petfinder-pawpularity-score/discussion/275094

import sys
sys.path.append("../input/tez-lib/")
sys.path.append("../input/timmmaster/")

import tez
import albumentations
import pandas as pd
import cv2
import numpy as np
import timm
import torch.nn as nn
from sklearn import metrics
import torch
from tez.callbacks import EarlyStopping
from tqdm import tqdm
import math
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import os
import albumentations as A
from PIL import Image

class args:
    batch_size = 16
    image_size = 384
    
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

class Config:
    data_dir = "../input/petfinder-pawpularity-score/"
    output_dir = "."
    img_train_dir = os.path.join(data_dir, "train")
    img_test_dir = os.path.join(data_dir, "test")
    random_seed = 42
    tta_times = 4 # 1: no TTA ####
    tta_beta = 1 / tta_times
    model_path = "swin_large_patch4_window7_224"
    pretrained = False
    inp_channels = 3
    im_size =  224
    batch_size = 32
    num_workers = 0 # >0: OS Error
    out_features = 1
    dropout = 0
    
device = "cuda"

# Define Swim Model and Swim Dataset

In [2]:
class PawpularDataset:
    def __init__(self, image_paths, dense_features, targets, augmentations):
        self.image_paths = image_paths
        self.dense_features = dense_features
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):
        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        
        features = self.dense_features[item, :]
        targets = self.targets[item]
        
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "features": torch.tensor(features, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.float),
        }
    
test_aug = albumentations.Compose(
    [
        albumentations.Resize(args.image_size, args.image_size, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)
    
    
class PetDataset(Dataset):
    def __init__(self, image_filepaths, targets, transform=None):
        self.image_filepaths = image_filepaths
        self.targets = targets
        self.transform = transform
    
    def __len__(self):
        return len(self.image_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.image_filepaths[idx]
        with open(image_filepath, 'rb') as f:
            image = Image.open(f)
            image_rgb = image.convert('RGB')
        image = np.array(image_rgb)

        if self.transform is not None:
            image = self.transform(image = image)["image"]
        
        image = image / 255 # convert to 0-1
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        target = self.targets[idx]

        image = torch.tensor(image, dtype = torch.float)
        target = torch.tensor(target, dtype = torch.float)
        return image, target
    
    
IMAGENET_MEAN = [0.485, 0.456, 0.406]  # RGB
IMAGENET_STD = [0.229, 0.224, 0.225]  # RGB
def get_train_transforms(epoch, dim = Config.im_size):
    return A.Compose(
        [             
            # resize like Resize in fastai
            A.SmallestMaxSize(max_size=dim, p=1.0),
            A.RandomCrop(height=dim, width=dim, p=1.0),
            A.VerticalFlip(p = 0.5),
            A.HorizontalFlip(p = 0.5)
            #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
        ]
  )

def get_inference_fixed_transforms(mode=0, dim = Config.im_size):
    if mode == 0: # do not original aspects, colors and angles
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ], p=1.0)
    elif mode == 1:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),,
                A.VerticalFlip(p = 1.0)
            ], p=1.0)    
    elif mode == 2:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
                A.HorizontalFlip(p = 1.0)
            ], p=1.0)
    elif mode == 3:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
                A.Transpose(p=1.0)
            ], p=1.0)
        
def get_inference_random_transforms(mode=0, dim = Config.im_size):
    if mode == 0: # do not original aspects, colors and angles
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ], p=1.0)
    else:
        return A.Compose(
            [            
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                A.VerticalFlip(p = 0.5),
                A.HorizontalFlip(p = 0.5)
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ]
      ) 

In [3]:
class PawpularModel(tez.Model):
    def __init__(self, model_name):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False, in_chans=3)
        self.model.head = nn.Linear(self.model.head.in_features, 128)
        self.dropout = nn.Dropout(0.1)
        self.dense1 = nn.Linear(140, 64)
        self.dense2 = nn.Linear(64, 1)

    def forward(self, image, features, targets=None):
        x1 = self.model(image)
        x = self.dropout(x1)
        x = torch.cat([x, features], dim=1)
        x = self.dense1(x)
        x = self.dense2(x)
        
        x = torch.cat([x, x1, features], dim=1)
        return x, 0, {}
    
    
class PetNet(nn.Module):
    def __init__(
        self,
        model_name = Config.model_path,
        out_features = Config.out_features,
        inp_channels=Config.inp_channels,
        pretrained=Config.pretrained
    ):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels, num_classes = out_features)
        print("self.model.head.in_features:",self.model.head.in_features)
        self.model.head = nn.Linear(self.model.head.in_features, 128) # 1536
        self.dropout1 = nn.Dropout(0.1)
        self.dense1 = nn.Linear(128, 64)
        self.relu = nn.ReLU()
        self.dense2 = nn.Linear(64, 1)

    
    def forward(self, image):
        x1 = self.model(image)          # [bs, 128]
        x = self.dropout1(x1)           # [bs, 128]
        x = self.dense1(x)              # [bs, 64]
        x = self.relu(x)                # [bs, 64]
        x = self.dense2(x)              # [bs, 1]
        x2 = torch.cat([x, x1], dim=1)  # [bs, 129]
        return x, x2

# Import RAPIDS

In [None]:
import cuml, pickle
from cuml.svm import SVR
print('RAPIDS version',cuml.__version__,'\n')

LOAD_SVR_FROM_PATH = None

df = pd.read_csv('../input/petfindermodel/train_df_010524.csv') 
# '../input/petfindermodel2/train_df_10fold_010617.csv'

print('Train shape:', df.shape )
df.head()

In [5]:
####
model_weight = [
                '../input/petfindermodel4/010816_swin_large_patch4_window7_224_fold0_cv17702.pth',
                '../input/petfindermodel4/010816_swin_large_patch4_window7_224_fold1_cv17327.pth',
                '../input/petfindermodel4/010816_swin_large_patch4_window7_224_fold2_cv17635.pth',
                '../input/petfindermodel4/010816_swin_large_patch4_window7_224_fold3_cv17401.pth',
                '../input/petfindermodel4/010816_swin_large_patch4_window7_224_fold4_cv17302.pth',
]

# model_fold_idx_list = [2,3,4,5,7,8,12,13,16,17]

# Infer Test and OOF
In version 1 of this notebook, we extract train embeddings and train RAPIDS SVR heads. (Click version 1 to see this). In later versions and during Kaggle submit, we load these saved RAPIDS SVR fold models and just infer data (without training anything).

In [None]:
super_final_predictions = []
super_final_predictions2 = []
super_final_oof_predictions = []
super_final_oof_predictions2 = []
super_final_oof_true = []

for fold_ in range(5):
    print('#'*25)
    print('### FOLD',fold_)
    print('#'*25)
    
    model = PetNet()
    model.load_state_dict(torch.load(model_weight[fold_]))
    model = model.to(device)
    model.eval()


    df_test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
    test_img_paths = [f"../input/petfinder-pawpularity-score/test/{x}.jpg" for x in df_test["Id"].values]
        
    df_valid = df[df.fold == fold_].reset_index(drop=True)#.iloc[:160]
    valid_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_valid["Id"].values]

    dense_features = [
        'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
        'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
    ]
    
    name = f"SVR_fold_{fold_}.pkl" 
    if LOAD_SVR_FROM_PATH is None:
        # EXTRACT TRAIN EMBEDDINGS
        
        df_train = df[df.fold != fold_].reset_index(drop=True)#.iloc[:320]
        train_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_train["Id"].values]
        
        train_dataset = PetDataset(
          image_filepaths = train_img_paths,
          targets = df_train['Pawpularity'].values/100,
          transform = get_inference_fixed_transforms(0)
        )
    
        train_loader = DataLoader(train_dataset, batch_size=Config.batch_size, shuffle=False, num_workers=Config.num_workers, pin_memory = True)
        
        print('Extracting train embedding...')
        fold_preds = []
        for i, (images, target) in enumerate(tqdm(train_loader), start = 1):
            images = images.to(device, non_blocking = True).float()
            target = target.to(device, non_blocking = True).float().view(-1, 1)
            with torch.no_grad():
                _, output = model(images)
            fold_preds.append(output.detach().cpu().numpy())
        
        embed = np.array([]).reshape((0,128))
        for preds in fold_preds:
            embed = np.concatenate([embed,preds[:,1:]],axis=0)
        
        # FIT RAPIDS SVR
        print('Fitting SVR...')
        clf = SVR(C=20.0)
        clf.fit(embed.astype('float32'), df_train.Pawpularity.values.astype('int32'))
    
        # SAVE RAPIDS SVR 
        pickle.dump(clf, open(name, "wb"))
        
    else:
        # LOAD RAPIDS SVR 
        print('Loading SVR...',LOAD_SVR_FROM_PATH+name)
        clf = pickle.load(open(LOAD_SVR_FROM_PATH+name, "rb"))

        
    # ************ TEST PREDICTIONS ************
    test_dataset = PetDataset(
          image_filepaths = test_img_paths,
          targets = np.zeros(len(test_img_paths)),
          transform = get_inference_fixed_transforms(0)
        )
    test_loader = DataLoader(test_dataset, batch_size=Config.batch_size, shuffle=False, num_workers=Config.num_workers, pin_memory = True)
    print('Predicting test...')
    
    
    fold_preds = []
    for i, (images, target) in enumerate(tqdm(test_loader), start = 1):
        images = images.to(device, non_blocking = True).float()
        target = target.to(device, non_blocking = True).float().view(-1, 1)
        with torch.no_grad():
            _, output = model(images)
        fold_preds.append(output.detach().cpu().numpy())
    

    final_test_predictions = []
    embed = np.array([]).reshape((0,128))
    for preds in fold_preds: #tqdm
        final_test_predictions.extend(preds[:,:1].ravel().tolist())
        embed = np.concatenate([embed,preds[:,1:]],axis=0)

    final_test_predictions = [sigmoid(x) * 100 for x in final_test_predictions]
    final_test_predictions2 = clf.predict(embed)
    super_final_predictions.append(final_test_predictions)
    super_final_predictions2.append(final_test_predictions2)
    
    
    # ************ OOF PREDICTIONS ************ 
    valid_dataset = PetDataset(
          image_filepaths = valid_img_paths,
          targets = df_valid['Pawpularity'].values/100,
          transform = get_inference_fixed_transforms(0)
        )
    valid_loader = DataLoader(valid_dataset, batch_size=Config.batch_size, shuffle=False, num_workers=Config.num_workers, pin_memory = True)
    
    print('Predicting oof...')
    fold_preds = []
    for i, (images, target) in enumerate(tqdm(valid_loader), start = 1):
        images = images.to(device, non_blocking = True).float()
        target = target.to(device, non_blocking = True).float().view(-1, 1)
        with torch.no_grad():
            _, output = model(images)
        fold_preds.append(output.detach().cpu().numpy())
    

    final_oof_predictions = []
    embed = np.array([]).reshape((0,128))
    for preds in fold_preds:
        final_oof_predictions.extend(preds[:,:1].ravel().tolist())
        embed = np.concatenate([embed,preds[:,1:]],axis=0)

    final_oof_predictions = [sigmoid(x) * 100 for x in final_oof_predictions]
    final_oof_predictions2 = clf.predict(embed)    
    super_final_oof_predictions.append(final_oof_predictions)
    super_final_oof_predictions2.append(final_oof_predictions2)
    
    final_oof_true = df_valid['Pawpularity'].values
    super_final_oof_true.append(final_oof_true)
    
    # COMPUTE RSME
    rsme = np.sqrt( np.mean( (super_final_oof_true[-1] - np.array(super_final_oof_predictions[-1]))**2.0 ) )
    print('NN RSME =',rsme,'\n')
    rsme = np.sqrt( np.mean( (super_final_oof_true[-1] - np.array(super_final_oof_predictions2[-1]))**2.0 ) )
    print('SVR RSME =',rsme,'\n')
    
    w = 0.2
    oof2 = (1-w)*np.array(super_final_oof_predictions[-1]) + w*np.array(super_final_oof_predictions2[-1])
    rsme = np.sqrt( np.mean( (super_final_oof_true[-1] - oof2)**2.0 ) )
    print('Ensemble RSME =',rsme,'\n')

# Compute CV Score
Below we compute the overall CV RSME scores of just the NN head, just the SVR head, and an ensemble of 50% NN and 50% SVR heads. Then we plot all ensemble weights to find the optimal weights for NN head and SVR heads.

In [None]:
true = np.hstack(super_final_oof_true)

oof = np.hstack(super_final_oof_predictions)
rsme = np.sqrt( np.mean( (oof - true)**2.0 ))
print('Overall CV NN head RSME =',rsme)

oof2 = np.hstack(super_final_oof_predictions2)
rsme = np.sqrt( np.mean( (oof2 - true)**2.0 ))
print('Overall CV SVR head RSME =',rsme)

oof3 = (1-w)*oof + w*oof2
rsme = np.sqrt( np.mean( (oof3 - true)**2.0 ))
print('Overall CV Ensemble heads RSME with 50% NN and 50% SVR =',rsme)

In [None]:
import matplotlib.pyplot as plt

score = []
for ww in np.arange(0,1.05,0.05):
    oof3 = (1-ww)*oof + ww*oof2
    rsme = np.sqrt( np.mean( (oof3 - true)**2.0 ))
    #print(f'{ww:0.2} CV Ensemble RSME =',rsme)
    score.append(rsme)
best_w = np.argmin(score)*0.05


plt.figure(figsize=(20,5))
plt.plot(np.arange(21)/20.0,score,'-o')
plt.plot([best_w],np.min(score),'o',color='black',markersize=15)
plt.title(f'Best Overall CV RSME={np.min(score):.4} with SVR Ensemble Weight={best_w:.2}',size=16)
plt.ylabel('Overall Ensemble RSME',size=14)
plt.xlabel('SVR Weight',size=14)
plt.show()

# Trust CV or LB?
Above we see that using 50% NN head and 50% SVR head achieves the best overall CV score. However our RAPIDS SVR head isn't helping public LB much. We also notice that our RAPIDS SVR head helped folds `1, 2, 4, 5, 7, 8, 9, 10` but did not help folds `3, 6`. So is public test data just a "bad fold"? Will our RAPIDS SVR head help private LB? Below we force the weight of SVR head to be 10% in order to achieve a slight public LB boost. But maybe for final submission, we should use 50%??

In [None]:
# FORCE SVR WEIGHT TO LOWER VALUE TO HELP PUBLIC LB
# best_w = 0.2
print(f"best_w:{best_w}")

# Make Submission CSV
We make a submission csv using an ensemble of both heads. We use the optimal ensemble weights that we discovered above.

In [None]:
super_final_predictions = np.mean(np.column_stack(super_final_predictions), axis=1)
super_final_predictions2 = np.mean(np.column_stack(super_final_predictions2), axis=1)
df_test["Pawpularity"] = (1-best_w)*super_final_predictions + best_w*super_final_predictions2
df_test = df_test[["Id", "Pawpularity"]]
df_test.to_csv("submission.csv", index=False)
df_test.head()