In [None]:
import os
import gc
import cv2
import math
import copy
import time
import random
from collections import OrderedDict


# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder, normalize
from sklearn.model_selection import StratifiedKFold

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
CONFIG = {"seed": 42, 
          "img_size": 768,
          "model_name": "tf_efficientnet_b6_ns", # tf_efficientnet_b6_ns, tf_efficientnetv2_l_in21k, eca_nfnet_l2 
          "num_classes": 15587, 
          "embedding_size": 512, 
          "train_batch_size": 64, 
          "valid_batch_size": 64, 
          "n_fold": 5, 
          "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
          "gpu_parallel":True, 
          "image_data":"fullbody", 
          "debug":True, 
          "num_workers":10, 
          "s": 30.0, 
          "m": 0.30, 
          "ls_eps": 0.0, 
          "easy_margin": False, 
          "KNN":850,
          
          }

if CONFIG["debug"]:
    CONFIG["img_size"] = 512
    CONFIG["model_name"] = "tf_efficientnet_b0_ns"
    CONFIG["train_batch_size"] = 32
    CONFIG["valid_batch_size"] = 64

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False # set True to be faster
seed_everything(CONFIG['seed'])

In [None]:
BASE_DIR = '/home/workspace'
ROOT_DIR = '/home/workspace/happy-whale-and-dolphin'

if CONFIG["image_data"] == "backfins":
    TRAIN_DIR = f'{ROOT_DIR}/train_backfins_images'
    TEST_DIR = f'{ROOT_DIR}/test_backfins_images'
    
elif CONFIG["image_data"] == "fullbody":
    TRAIN_DIR = f'{ROOT_DIR}/train_fullbody_images'
    TEST_DIR = f'{ROOT_DIR}/test_fullbody_images'
    
else:
    TRAIN_DIR = f'{ROOT_DIR}/train_images'
    TEST_DIR = f'{ROOT_DIR}/test_images'

In [None]:
def get_train_file_path(id):
    return f"{TRAIN_DIR}/{id}"

df = pd.read_csv(f"{ROOT_DIR}/train.csv")
df['file_path'] = df['image'].apply(get_train_file_path) 
df.head()

In [None]:
encoder = LabelEncoder()

with open(f'{ROOT_DIR}/le.pkl', "rb") as fp:
    encoder = joblib.load(fp)
    
df['individual_id'] = encoder.transform(df['individual_id'])

In [None]:
skf = StratifiedKFold(n_splits=CONFIG['n_fold'])
for fold, ( _, val_) in enumerate(skf.split(X=df, y=df.individual_id)):
      df.loc[val_ , "kfold"] = fold

In [None]:
class HappyWhaleDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df 
        self.ids = df['image'].values 
        self.file_names = df['file_path'].values 
        self.labels = df['individual_id'].values 
        self.transforms = transforms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        idx = self.ids[index]  
        img_path = self.file_names[index] 
        img = cv2.imread(img_path) 
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        label = self.labels[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"] 
            
        return {
            'image': img, 
            'label': torch.tensor(label, dtype=torch.long),
            'id': idx 
        }

In [None]:
# 数据增强
data_transforms = {
    "train": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']), 
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.),
    
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)
}

In [None]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p) 
        self.eps = eps  

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

In [None]:
# Arcface
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0, 
                 m=0.50, easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features 
        self.out_features = out_features 
        self.s = s 
        self.m = m 
        self.ls_eps = ls_eps  
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin 
        self.cos_m = math.cos(m) # cos margin
        self.sin_m = math.sin(m) # sin margin
        self.threshold = math.cos(math.pi - m) # cos(pi - m) = -cos(m)
        self.mm = math.sin(math.pi - m) * m # sin(pi - m)*m = sin(m)*m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight)) 
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) 
        phi = cosine * self.cos_m - sine * self.sin_m # cosθ*cosm – sinθ*sinm = cos(θ + m)
        phi = phi.float() # phi to float
        cosine = cosine.float() # cosine to float
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            # if cos(θ) > cos(pi - m) means θ + m < math.pi, so phi = cos(θ + m);
            # else means θ + m >= math.pi, we use Talyer extension to approximate the cos(θ + m).
            # if fact, cos(θ + m) = cos(θ) - m * sin(θ) >= cos(θ) - m * sin(math.pi - m)
            phi = torch.where(cosine > self.threshold, phi, cosine - self.mm) 
        # https://github.com/ronghuaiyang/arcface-pytorch/issues/48
        # --------------------------- convert label to one-hot ---------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device=CONFIG['device'])
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # label smoothing
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine) 
        output *= self.s

        return output

In [None]:
class HappyWhaleModel(nn.Module):
    def __init__(self, model_name, embedding_size, pretrained=True):
        super(HappyWhaleModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        if 'efficientnet' in model_name:
            in_features = self.model.classifier.in_features
            self.model.classifier = nn.Identity()
            self.model.global_pool = nn.Identity()
        elif 'nfnet' in model_name:
            in_features = self.model.head.fc.in_features
            self.model.head.fc = nn.Identity()
            self.model.head.global_pool = nn.Identity()

        self.pooling = GeM() 
        self.embedding = nn.Sequential(
                            nn.BatchNorm1d(in_features),
                            nn.Linear(in_features, embedding_size)
                            )
        # arcface
        self.fc = ArcMarginProduct(embedding_size, 
                                   CONFIG["num_classes"], 
                                   s=CONFIG["s"],
                                   m=CONFIG["m"], 
                                   easy_margin=CONFIG["easy_margin"], 
                                   ls_eps=CONFIG["ls_eps"]) 

    def forward(self, images, labels):
        features = self.model(images) 
        pooled_features = self.pooling(features).flatten(1) 
        embedding = self.embedding(pooled_features) # embedding
        output = self.fc(embedding, labels) # arcface
        return output
    
    def extract(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1) # gem pooling
        embedding = self.embedding(pooled_features) # embedding
        return embedding

In [None]:
@torch.inference_mode()
def get_embeddings(model, dataloader, device):
    model.to(CONFIG['device'])
    model.eval()
    
    LABELS = []
    EMBEDS = []
    IDS = []
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader)) 
    for step, data in bar:        
        images = data['image'].to(device, dtype=torch.float) 
        labels = data['label'].to(device, dtype=torch.long) 
        ids = data['id'] 

        outputs = model.extract(images)
        
        LABELS.append(labels.cpu().numpy()) 
        EMBEDS.append(outputs.cpu().numpy()) 
        IDS.append(ids) 
    
    EMBEDS = np.vstack(EMBEDS) 
    LABELS = np.concatenate(LABELS) 
    IDS = np.concatenate(IDS) 
    
    return EMBEDS, LABELS, IDS

In [None]:
def prepare_loaders(df, fold):
    df_train = df[df.kfold != fold].reset_index(drop=True) 
    df_valid = df[df.kfold == fold].reset_index(drop=True) 
    
    train_dataset = HappyWhaleDataset(df_train, transforms=data_transforms["train"])
    valid_dataset = HappyWhaleDataset(df_valid, transforms=data_transforms["valid"]) 

    train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], num_workers=CONFIG["num_workers"], shuffle=False, pin_memory=False) # Train DataLoader
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], num_workers=CONFIG["num_workers"], shuffle=False, pin_memory=False) # Valid DataLoader
    
    return train_loader, valid_loader

In [None]:
test_df = pd.DataFrame()
test_df["image"] = os.listdir(f"{ROOT_DIR}/test_images")
test_df["file_path"] = test_df["image"].apply(lambda x: f"{TEST_DIR}/{x}")
test_df["individual_id"] = -1  

In [None]:
train_loader, valid_loader = prepare_loaders(df, fold=0) 
test_dataset = HappyWhaleDataset(test_df, transforms=data_transforms["valid"]) 
test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], num_workers=CONFIG["num_workers"], shuffle=False, pin_memory=False) # test dataloader

In [None]:
model_weight_dict = {
    "weights_dir":[f"{BASE_DIR}/tf_efficientnet_b6_ns_fold1.pth",
                   f"{BASE_DIR}/tf_efficientnet_b6_ns_fold3.pth",
                   f"{BASE_DIR}/tf_efficientnetv2_l_in21k_fold0.pth",
                   f"{BASE_DIR}/tf_efficientnetv2_l_in21k_fold1.pth",
                   f"{BASE_DIR}/tf_efficientnetv2_l_in21k_fold2.pth",
                   f"{BASE_DIR}/tf_efficientnetv2_l_in21k_fold4.pth",
                   f"{BASE_DIR}/eca_nfnet_l2_fold0.pth",
                   f"{BASE_DIR}/eca_nfnet_l2_fold1.pth",
                   f"{BASE_DIR}/eca_nfnet_l2_fold4.pth",
                   ],
    "model_name": ["tf_efficientnet_b6_ns",
                   "tf_efficientnet_b6_ns",
                   "tf_efficientnetv2_l_in21k",
                   "tf_efficientnetv2_l_in21k",
                   "tf_efficientnetv2_l_in21k",
                   "tf_efficientnetv2_l_in21k",
                   "eca_nfnet_l2",
                   "eca_nfnet_l2",
                   "eca_nfnet_l2",
                   ],
    "embedding_size":[512, 512, 512, 512, 512, 512, 512, 512, 512],
}

train_embeds_list = []
valid_embeds_list = []
test_embeds_list = []
train_labels_list = []
valid_labels_list = []
train_ids_list = []
valid_ids_list = []
test_ids_list = []

for idx in range(len(model_weight_dict["weights_dir"])):
    weights_dir = model_weight_dict["weights_dir"][idx]
    model_name = model_weight_dict["model_name"][idx]
    embedding_size = model_weight_dict["embedding_size"][idx] 

    model = HappyWhaleModel(model_name, embedding_size) 
    state = torch.load(weights_dir) 

    if CONFIG['gpu_parallel']:
        new_state_dict = OrderedDict()
        for k, v in state.items():
            k=k[7:]
            new_state_dict[k]=v
        model.load_state_dict(new_state_dict)
    else:
        model.load_state_dict(state)
    model.to(CONFIG['device']) 

    train_embeds, train_labels, train_ids = get_embeddings(model, train_loader, CONFIG['device']) 
    valid_embeds, valid_labels, valid_ids = get_embeddings(model, valid_loader, CONFIG['device']) 
    test_embeds, _, test_ids = get_embeddings(model, test_loader, CONFIG['device']) 

    train_embeds_list.append(train_embeds)
    valid_embeds_list.append(valid_embeds)
    test_embeds_list.append(test_embeds)
    
    torch.cuda.empty_cache()
    _ = gc.collect()

train_embeds = np.concatenate(train_embeds_list,axis=1) 
valid_embeds = np.concatenate(valid_embeds_list,axis=1)
test_embeds = np.concatenate(test_embeds_list,axis=1) 


In [None]:
from sklearn.neighbors import NearestNeighbors 
neigh = NearestNeighbors(n_neighbors=CONFIG["KNN"],metric='cosine')
neigh.fit(train_embeds) 
valid_distances, valid_idxs = neigh.kneighbors(valid_embeds, CONFIG["KNN"], return_distance=True)

In [None]:
train_allowed_labels = encoder.inverse_transform(train_labels) 
valid_allowed_labels = encoder.inverse_transform(valid_labels)

train_allowed_labels_set = set(train_allowed_labels)

In [None]:
val_targets_df = pd.DataFrame(np.stack([valid_ids, valid_allowed_labels], axis=1),columns=['image','target'])
val_targets_df.loc[~val_targets_df.target.isin(train_allowed_labels_set),'target'] = 'new_individual'

In [None]:
valid_df = []
for i in tqdm(range(len(valid_ids))):
    id_ = valid_ids[i]
    targets = train_labels[valid_idxs[i]] 
    distances = valid_distances[i] 
    subset_preds = pd.DataFrame(np.stack([targets,distances],axis=1),columns=['target','distances'])
    subset_preds['image'] = id_
    valid_df.append(subset_preds)
valid_df = pd.concat(valid_df).reset_index(drop=True) 
valid_df['confidence'] = 1-valid_df['distances']
valid_df = valid_df.groupby(['image','target']).confidence.max().reset_index()
valid_df = valid_df.sort_values('confidence',ascending=False).reset_index(drop=True) 
valid_df['target'] = encoder.inverse_transform(valid_df['target'].astype("int").to_list()) 
valid_df.to_csv('val_neighbors.csv')
valid_df.image.value_counts().value_counts()

In [None]:
sample_list = ['938b7e931166', '5bf17305f073', '7593d2aee842', '7362d7a01d00','956562ff2888'] 

def get_predictions(test_df, threshold=0.2):
    predictions = {} 
    for i, row in tqdm(test_df.iterrows()):
        if row.image in predictions: 
            if len(predictions[row.image]) == 5: 
                continue
            predictions[row.image].append(row.target) 
        elif row.confidence > threshold:
            predictions[row.image] = [row.target, 'new_individual'] 
        else:
            predictions[row.image] = ['new_individual', row.target] 

    for x in tqdm(predictions):
        if len(predictions[x]) < 5:
            remaining = [y for y in sample_list if y not in predictions] 
            predictions[x] = predictions[x] + remaining 
            predictions[x] = predictions[x][:5] 
        
    return predictions

In [None]:
def map_per_image(label, predictions):
    """Computes the precision score of one image.

    Parameters
    ----------
    label : string
            The true label of the image
    predictions : list
            A list of predicted elements (order does matter, 5 predictions allowed per image)

    Returns
    -------
    score : double
    """    
    try:
        return 1 / (predictions[:5].index(label) + 1)
    except ValueError:
        return 0.0

In [None]:
best_th = 0  
best_cv = 0
for th in [0.1*x for x in range(11)]: 
    print("threshold:", th)
    all_preds = get_predictions(valid_df,threshold=th) 
    cv = 0
    for i,row in val_targets_df.iterrows(): 
        target = row.target  
        preds = all_preds[row.image]  
        val_targets_df.loc[i,th] = map_per_image(target,preds) 
    cv = val_targets_df[th].mean() 
    print(f"CV at threshold {th}: {cv}")
    if cv>best_cv:
        best_th = th
        best_cv = cv

In [None]:
print("Best threshold",best_th)
print("Best cv",best_cv) 
val_targets_df.describe() 

In [None]:
val_targets_df.head()

In [None]:
val_targets_df['is_new_individual'] = val_targets_df.target=='new_individual' 
print(val_targets_df.is_new_individual.value_counts().to_dict())  
val_scores = val_targets_df.groupby('is_new_individual').mean().T 
val_scores['adjusted_cv'] = val_scores[True]*0.15+val_scores[False]*0.85 
best_threshold_adjusted = val_scores['adjusted_cv'].idxmax() 
print("best_threshold",best_threshold_adjusted)
val_scores

# Inference

In [None]:
all_embeds = np.concatenate([train_embeds, valid_embeds]) 
all_labels = np.concatenate([train_labels, valid_labels]) 
print(all_embeds.shape, all_labels.shape)

from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=CONFIG["KNN"],metric='cosine') 
neigh.fit(all_embeds) 
test_distances, test_idxs = neigh.kneighbors(test_embeds, CONFIG["KNN"], return_distance=True) 

In [None]:
sample_submission = pd.read_csv(f'{ROOT_DIR}/sample_submission.csv', index_col='image') 
print("test_ids len:",len(test_ids), "sample_submission len:",len(sample_submission))
test_df = []
for i in tqdm(range(len(test_ids))):  
    id_ = test_ids[i]  
    targets = all_labels[test_idxs[i]] 
    distances = test_distances[i] 
    subset_preds = pd.DataFrame(np.stack([targets,distances],axis=1),columns=['target','distances'])
    subset_preds['image'] = id_
    test_df.append(subset_preds)
test_df = pd.concat(test_df).reset_index(drop=True)
test_df['confidence'] = 1-test_df['distances']
test_df = test_df.groupby(['image','target']).confidence.max().reset_index()
test_df = test_df.sort_values('confidence',ascending=False).reset_index(drop=True) 
test_df['target'] = encoder.inverse_transform(test_df['target'].astype("int").to_list()) 
test_df.to_csv('test_neighbors.csv')
test_df.image.value_counts().value_counts()

In [None]:
predictions = {} 
for i,row in tqdm(test_df.iterrows()):
    if row.image in predictions: 
        if len(predictions[row.image])==5: 
            continue
        predictions[row.image].append(row.target)
    elif row.confidence>best_threshold_adjusted:
        predictions[row.image] = [row.target,'new_individual'] 
    else:
        predictions[row.image] = ['new_individual',row.target] 
        
for x in tqdm(predictions):
    if len(predictions[x])<5:
        remaining = [y for y in sample_list if y not in predictions] 
        predictions[x] = predictions[x]+remaining 
        predictions[x] = predictions[x][:5] 
    predictions[x] = ' '.join(predictions[x])
    
predictions = pd.Series(predictions).reset_index()
predictions.columns = ['image','predictions']
predictions.to_csv('submission.csv',index=False)
predictions.head()