# 导入依赖

In [2]:
import math
from typing import Callable
from typing import Dict
from typing import Optional
from typing import Tuple
from pathlib import Path

import faiss
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from timm.data.transforms_factory import create_transform
from timm.optim import create_optimizer_v2
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from tqdm.notebook import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import normalize
from sklearn.preprocessing import LabelEncoder

In [3]:
# From https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/blob/master/src/modeling/metric_learning.py
# Added type annotations, device, and 16bit support
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        s: norm of input feature
        m: margin
        cos(theta + m)
    """

    def __init__(
        self,
        in_features: int,
        out_features: int,
        s: float,
        m: float,
        easy_margin: bool,
        ls_eps: float,
    ):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input: torch.Tensor, label: torch.Tensor, device: str = "cuda") -> torch.Tensor:
        # --------------------------- cos(theta) & phi(theta) ---------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        # Enable 16 bit precision
        cosine = cosine.to(torch.float32)

        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device=device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s

        return output

In [4]:
class LitModule(pl.LightningModule):
    def __init__(
        self,
        model_name: str,
        pretrained: bool,
        drop_rate: float,
        embedding_size: int,
        num_classes: int,
        arc_s: float,
        arc_m: float,
        arc_easy_margin: bool,
        arc_ls_eps: float,
        optimizer: str,
        learning_rate: float,
        weight_decay: float,
        len_train_dl: int,
        epochs:int
    ):
        super().__init__()

        self.save_hyperparameters()

        self.model = timm.create_model(model_name, pretrained=pretrained, drop_rate=drop_rate)
        self.embedding = nn.Linear(self.model.get_classifier().in_features, embedding_size)
        self.model.reset_classifier(num_classes=0, global_pool="avg")

        self.arc = ArcMarginProduct(
            in_features=embedding_size,
            out_features=num_classes,
            s=arc_s,
            m=arc_m,
            easy_margin=arc_easy_margin,
            ls_eps=arc_ls_eps,
        )

        self.loss_fn = F.cross_entropy

    def forward(self, images: torch.Tensor) -> torch.Tensor:
        features = self.model(images)
        embeddings = self.embedding(features)

        return embeddings

    def configure_optimizers(self):
        optimizer = create_optimizer_v2(
            self.parameters(),
            opt=self.hparams.optimizer,
            lr=self.hparams.learning_rate,
            weight_decay=self.hparams.weight_decay,
        )
        
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            self.hparams.learning_rate,
            steps_per_epoch=self.hparams.len_train_dl,
            epochs=self.hparams.epochs,
        )
        scheduler = {"scheduler": scheduler, "interval": "step"}

        return [optimizer], [scheduler]

    def training_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> torch.Tensor:
        return self._step(batch, "train")

    def validation_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> torch.Tensor:
        return self._step(batch, "val")

    def _step(self, batch: Dict[str, torch.Tensor], step: str) -> torch.Tensor:
        images, targets = batch["image"], batch["target"]

        embeddings = self(images)
        outputs = self.arc(embeddings, targets, self.device)

        loss = self.loss_fn(outputs, targets)
        
        self.log(f"{step}_loss", loss)

        return loss

In [5]:
def load_eval_module(checkpoint_path: str, device: torch.device) -> LitModule:
    module = LitModule.load_from_checkpoint(checkpoint_path)
    module.to(device)
    module.eval()

    return module

def load_dataloaders(
    train_csv_encoded_folded: str,
    test_csv: str,
    val_fold: float,
    image_size: int,
    batch_size: int,
    num_workers: int,
) -> Tuple[DataLoader, DataLoader, DataLoader]:

    datamodule = LitDataModule(
        train_csv_encoded_folded=train_csv_encoded_folded,
        test_csv=test_csv,
        val_fold=val_fold,
        image_size=image_size,
        batch_size=batch_size,
        num_workers=num_workers,
    )

    datamodule.setup()

    train_dl = datamodule.train_dataloader()
    val_dl = datamodule.val_dataloader()
    test_dl = datamodule.test_dataloader()

    return train_dl, val_dl, test_dl


def load_encoder() -> LabelEncoder:
    encoder = LabelEncoder()
    encoder.classes_ = np.load(ENCODER_CLASSES_PATH, allow_pickle=True)

    return encoder


@torch.no_grad()
def get_embeddings(
    module: pl.LightningModule, dataloader: DataLoader, encoder: LabelEncoder, stage: str
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:

    all_image_names = []
    all_embeddings = []
    all_targets = []

    for batch in tqdm(dataloader, desc=f"Creating {stage} embeddings"):
        image_names = batch["image_name"]
        images = batch["image"].to(module.device)
        targets = batch["target"].to(module.device)

        embeddings = module(images)

        all_image_names.append(image_names)
        all_embeddings.append(embeddings.cpu().numpy())
        all_targets.append(targets.cpu().numpy())
        
        # if DEBUG:
        #     break

    all_image_names = np.concatenate(all_image_names)
    all_embeddings = np.vstack(all_embeddings)
    all_targets = np.concatenate(all_targets)

    all_embeddings = normalize(all_embeddings, axis=1, norm="l2")
    all_targets = encoder.inverse_transform(all_targets)

    return all_image_names, all_embeddings, all_targets


def create_and_search_index(embedding_size: int, train_embeddings: np.ndarray, val_embeddings: np.ndarray, k: int):
    index = faiss.IndexFlatIP(embedding_size)
    index.add(train_embeddings)
    D, I = index.search(val_embeddings, k=k)  # noqa: E741

    return D, I


def create_val_targets_df(
    train_targets: np.ndarray, val_image_names: np.ndarray, val_targets: np.ndarray
) -> pd.DataFrame:

    allowed_targets = np.unique(train_targets)
    val_targets_df = pd.DataFrame(np.stack([val_image_names, val_targets], axis=1), columns=["image", "target"])
    val_targets_df.loc[~val_targets_df.target.isin(allowed_targets), "target"] = "new_individual"

    return val_targets_df


def create_distances_df(
    image_names: np.ndarray, targets: np.ndarray, D: np.ndarray, I: np.ndarray, stage: str  # noqa: E741
) -> pd.DataFrame:

    distances_df = []
    for i, image_name in tqdm(enumerate(image_names), desc=f"Creating {stage}_df"):
        target = targets[I[i]]
        distances = D[i]
        subset_preds = pd.DataFrame(np.stack([target, distances], axis=1), columns=["target", "distances"])
        subset_preds["image"] = image_name
        distances_df.append(subset_preds)

    distances_df = pd.concat(distances_df).reset_index(drop=True)
    distances_df = distances_df.groupby(["image", "target"]).distances.max().reset_index()
    distances_df = distances_df.sort_values("distances", ascending=False).reset_index(drop=True)

    return distances_df


def get_cv(val_targets_df: pd.DataFrame, all_preds) -> Tuple[float, float]:
    # all_preds = get_predictions(valid_df, threshold=0.5)
    cv = 0
    for i, row in val_targets_df.iterrows():
        target = row.target
        preds = all_preds[row.image]
        val_targets_df.loc[i, 0.5] = map_per_image(target, preds)

    cv = val_targets_df[0.5].mean()

    # Adjustment: Since Public lb has nearly 10% 'new_individual' (Be Careful for private LB)
    val_targets_df["is_new_individual"] = val_targets_df.target == "new_individual"
    val_scores = val_targets_df.groupby("is_new_individual").mean().T
    val_scores["adjusted_cv"] = val_scores[True] * 0.1 + val_scores[False] * 0.9
    best_th = val_scores["adjusted_cv"].idxmax()

    return cv



def get_best_threshold(val_targets_df: pd.DataFrame, valid_df: pd.DataFrame) -> Tuple[float, float]:
    best_th = 0
    best_cv = 0
    for th in [0.01 * x for x in range(40, 60, 2)]:
        all_preds = get_predictions(valid_df, threshold=th)

        cv = 0
        for i, row in val_targets_df.iterrows():
            target = row.target
            preds = all_preds[row.image]
            val_targets_df.loc[i, th] = map_per_image(target, preds)

        cv = val_targets_df[th].mean()

        print(f"th={th} cv={cv}")

        if cv > best_cv:
            best_th = th
            best_cv = cv

    print(f"best_th={best_th}")
    print(f"best_cv={best_cv}")

    # Adjustment: Since Public lb has nearly 10% 'new_individual' (Be Careful for private LB)
    val_targets_df["is_new_individual"] = val_targets_df.target == "new_individual"
    val_scores = val_targets_df.groupby("is_new_individual").mean().T
    val_scores["adjusted_cv"] = val_scores[True] * 0.1 + val_scores[False] * 0.9
    best_th = val_scores["adjusted_cv"].idxmax()
    print(f"best_th_adjusted={best_th}")

    return best_th, best_cv


def get_predictions(df: pd.DataFrame, threshold: float = 0.2):
    sample_list = ["938b7e931166", "5bf17305f073", "7593d2aee842", "7362d7a01d00", "956562ff2888"]

    predictions = {}
    for i, row in tqdm(df.iterrows(), total=len(df), desc=f"Creating predictions for threshold={threshold}"):
        if row.image in predictions:
            if len(predictions[row.image]) == 5:
                continue
            predictions[row.image].append(row.target)
        elif row.distances > threshold:
            predictions[row.image] = [row.target, "new_individual"]
        else:
            predictions[row.image] = ["new_individual", row.target]

    for x in tqdm(predictions):
        if len(predictions[x]) < 5:
            remaining = [y for y in sample_list if y not in predictions]
            predictions[x] = predictions[x] + remaining
            predictions[x] = predictions[x][:5]

    return predictions


# TODO: add types
def map_per_image(label, predictions):
    """Computes the precision score of one image.

    Parameters
    ----------
    label : string
            The true label of the image
    predictions : list
            A list of predicted elements (order does matter, 5 predictions allowed per image)

    Returns
    -------
    score : double
    """
    try:
        return 1 / (predictions[:5].index(label) + 1)
    except ValueError:
        return 0.0


def create_predictions_df(test_df: pd.DataFrame, best_th: float) -> pd.DataFrame:
    predictions = get_predictions(test_df, best_th)

    predictions = pd.Series(predictions).reset_index()
    predictions.columns = ["image", "predictions"]
    predictions["predictions"] = predictions["predictions"].apply(lambda x: " ".join(x))

    return predictions

# 找到合适的加权

In [40]:
import torch

convnext_data = torch.load("./cache/convnext/convnext_large_384_in22ft1k_384_4.pth")
print("convnext: ", convnext_data['train']['train_image_names'])
print("convnext: ", convnext_data['train']['train_targets'])
print("convnext: ", convnext_data)

convnext_base_data = torch.load("./cache/convnext/convnext_base_384_in22ft1k_512_4.pth")
print("convnext: ", convnext_base_data['train']['train_image_names'])
print("convnext: ", convnext_base_data['train']['train_targets'])


efficientnet_data = torch.load("./cache/efficientnet/tf_efficientnet_b7_ns_512_4.pth")
print("efficientnet: ", efficientnet_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_data['train']['train_targets'])

efficientnet_b5_data = torch.load("./cache/efficientnet/tf_efficientnet_b5_ns_512_4.pth")
print("efficientnet: ", efficientnet_b5_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_b5_data['train']['train_targets'])

efficientnet_b6_data = torch.load("./cache/efficientnet/tf_efficientnet_b6_ns_512_4.pth")
print("efficientnet: ", efficientnet_b6_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_b6_data['train']['train_targets'])

convnext:  ['00021adfb725ed.jpg' '000562241d384d.jpg' '0007c33415ce37.jpg' ...
 'ffae18d2939ffc.jpg' 'ffaf6c062ccdef.jpg' 'ffb1ddd9a59530.jpg']
convnext:  ['cadddb1636b9' '1a71fbb72250' '60008f293a2b' ... '1a71fbb72250'
 '938167f9ea20' 'f92fcbf8f42d']
convnext:  {'train': {'train_image_names': array(['00021adfb725ed.jpg', '000562241d384d.jpg', '0007c33415ce37.jpg',
       ..., 'ffae18d2939ffc.jpg', 'ffaf6c062ccdef.jpg',
       'ffb1ddd9a59530.jpg'], dtype='<U18'), 'train_embeddings': array([[ 0.03500184,  0.02693112, -0.09545888, ...,  0.0084056 ,
         0.04649101,  0.02836194],
       [ 0.07109264,  0.02076571,  0.04391202, ...,  0.0821337 ,
        -0.03508933, -0.00155115],
       [ 0.01291985, -0.01428259, -0.04355193, ...,  0.07608046,
         0.02878092, -0.0575893 ],
       ...,
       [ 0.08326291, -0.02447291,  0.03061166, ...,  0.0741178 ,
        -0.06301463,  0.02398695],
       [ 0.0423771 ,  0.02825621, -0.06514937, ..., -0.06134047,
         0.05141734,  0.09020478],

In [20]:
train_image_names = convnext_data['train']['train_image_names']
val_image_names = convnext_data['val']['val_image_names']


train_embeddings = convnext_data['train']['train_embeddings']
val_embeddings = convnext_data['val']['val_embeddings']

train_targets = convnext_data['train']['train_targets']
val_targets = convnext_data['val']['val_targets']

D, I = create_and_search_index(512, train_embeddings, val_embeddings, 50)  # noqa: E741
print("Created index with train_embeddings")

val_targets_df = create_val_targets_df(train_targets, val_image_names, val_targets)
val_df = create_distances_df(val_image_names, train_targets, D, I, "val")
all_pred =  get_predictions(val_df, threshold=0.5)

cv = get_cv(val_targets_df, all_pred)
print(cv)


predictions = create_predictions_df(val_df, 0.5)
print(f"predictions.head()={predictions.head()}")
predictions = predictions.drop_duplicates()
predictions.to_csv(f"./cache/convnext/convnext_large_384_in22ft1k_384_0_{cv:.4f}_submission.csv", index=False)

Created index with train_embeddings


Creating val_df: 0it [00:00, ?it/s]

Creating predictions for threshold=0.5:   0%|          | 0/170315 [00:00<?, ?it/s]

  0%|          | 0/8315 [00:00<?, ?it/s]

0.8216235718580877


Creating predictions for threshold=0.5:   0%|          | 0/170315 [00:00<?, ?it/s]

  0%|          | 0/8315 [00:00<?, ?it/s]

predictions.head()=                image                                        predictions
0  27eed76d08871e.jpg  62ec6fea7ad5 new_individual 9ba53b18ffea 76f62...
1  10b11b3fe7cdef.jpg  938b7e931166 new_individual 938b7e931166 5bf17...
2  05b9a41635a275.jpg  ca69a5d7c122 new_individual 9bb90a97f325 0d73b...
3  28c85429214706.jpg  938b7e931166 new_individual 938b7e931166 5bf17...
4  2d6c2725c91aed.jpg  783f357cf2f9 new_individual 2280b5fcc6c2 444dd...


In [41]:
train_image_names = convnext_base_data['train']['train_image_names']
val_image_names = convnext_base_data['val']['val_image_names']


train_embeddings = convnext_base_data['train']['train_embeddings']
val_embeddings = convnext_base_data['val']['val_embeddings']

train_targets = convnext_base_data['train']['train_targets']
val_targets = convnext_base_data['val']['val_targets']

D, I = create_and_search_index(512, train_embeddings, val_embeddings, 50)  # noqa: E741
print("Created index with train_embeddings")

val_targets_df = create_val_targets_df(train_targets, val_image_names, val_targets)
val_df = create_distances_df(val_image_names, train_targets, D, I, "val")
all_pred =  get_predictions(val_df, threshold=0.5)

cv = get_cv(val_targets_df, all_pred)
print(cv)


predictions = create_predictions_df(val_df, 0.5)
print(f"predictions.head()={predictions.head()}")
predictions = predictions.drop_duplicates()
predictions.to_csv(f"./cache/convnext/convnext_base_384_in22ft1k_512_4_{cv:.4f}_submission.csv", index=False)

Created index with train_embeddings


Creating val_df: 0it [00:00, ?it/s]

Creating predictions for threshold=0.5:   0%|          | 0/157547 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

0.8203812845802261


Creating predictions for threshold=0.5:   0%|          | 0/157547 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

predictions.head()=                image                                        predictions
0  fbb4bd3b4fa175.jpg  938b7e931166 new_individual 938b7e931166 5bf17...
1  eba89d4f35d15c.jpg  19fbb960f07d new_individual 938b7e931166 5bf17...
2  eae5feb67dfe9f.jpg  37c7aba965a5 new_individual 938b7e931166 5bf17...
3  fc49ab1c7ce8f9.jpg  938b7e931166 new_individual 938b7e931166 5bf17...
4  e2284759a75a9e.jpg  37c7aba965a5 new_individual 938b7e931166 5bf17...


In [None]:
train_image_names = efficientnet_data['train']['train_image_names']
val_image_names = efficientnet_data['val']['val_image_names']


train_embeddings = efficientnet_data['train']['train_embeddings']
val_embeddings = efficientnet_data['val']['val_embeddings']

train_targets = efficientnet_data['train']['train_targets']
val_targets = efficientnet_data['val']['val_targets']

D, I = create_and_search_index(512, train_embeddings, val_embeddings, 50)  # noqa: E741
print("Created index with train_embeddings")

val_targets_df = create_val_targets_df(train_targets, val_image_names, val_targets)
val_df = create_distances_df(val_image_names, train_targets, D, I, "val")
all_pred =  get_predictions(val_df, threshold=0.5)

cv = get_cv(val_targets_df, all_pred)
print(cv)

predictions = create_predictions_df(val_df, 0.5)
print(f"predictions.head()={predictions.head()}")
predictions = predictions.drop_duplicates()
predictions.to_csv(f"./cache/efficientnet/tf_efficientnet_b7_ns_512_0_{cv:.4f}_submission.csv", index=False)

In [74]:
train_image_names = efficientnet_b5_data['train']['train_image_names']
val_image_names = efficientnet_b5_data['val']['val_image_names']


train_embeddings = efficientnet_b5_data['train']['train_embeddings']
val_embeddings = efficientnet_b5_data['val']['val_embeddings']

train_targets = efficientnet_b5_data['train']['train_targets']
val_targets = efficientnet_b5_data['val']['val_targets']

D, I = create_and_search_index(512, train_embeddings, val_embeddings, 50)  # noqa: E741
print("Created index with train_embeddings")

val_targets_df = create_val_targets_df(train_targets, val_image_names, val_targets)
val_df = create_distances_df(val_image_names, train_targets, D, I, "val")
all_pred =  get_predictions(val_df, threshold=0.5)

cv = get_cv(val_targets_df, all_pred)
print(cv)

predictions = create_predictions_df(val_df, 0.5)
print(f"predictions.head()={predictions.head()}")
predictions = predictions.drop_duplicates()
predictions.to_csv(f"./cache/efficientnet/tf_efficientnet_b5_ns_512_0_{cv:.4f}_submission.csv", index=False)

Created index with train_embeddings


Creating val_df: 0it [00:00, ?it/s]

Creating predictions for threshold=0.5:   0%|          | 0/165764 [00:00<?, ?it/s]

  0%|          | 0/8315 [00:00<?, ?it/s]

0.8371236720785727


Creating predictions for threshold=0.5:   0%|          | 0/165764 [00:00<?, ?it/s]

  0%|          | 0/8315 [00:00<?, ?it/s]

predictions.head()=                image                                        predictions
0  27eed76d08871e.jpg  62ec6fea7ad5 new_individual 0dbe0c61bb2f bf2e0...
1  05b9a41635a275.jpg  ca69a5d7c122 new_individual 435526bea29f 73549...
2  18b27f84a0ab8f.jpg  bbeac4b2964e new_individual 197df8b4dfd4 938b7...
3  0b84e4c05bb67a.jpg  ce6e37904aa4 new_individual 938b7e931166 5bf17...
4  1b3495ba16b398.jpg  bbeac4b2964e new_individual 197df8b4dfd4 b61c1...


In [31]:
train_image_names = efficientnet_b6_data['train']['train_image_names']
val_image_names = efficientnet_b6_data['val']['val_image_names']


train_embeddings = efficientnet_b6_data['train']['train_embeddings']
val_embeddings = efficientnet_b6_data['val']['val_embeddings']

train_targets = efficientnet_b6_data['train']['train_targets']
val_targets = efficientnet_b6_data['val']['val_targets']

D, I = create_and_search_index(512, train_embeddings, val_embeddings, 50)  # noqa: E741
print("Created index with train_embeddings")

val_targets_df = create_val_targets_df(train_targets, val_image_names, val_targets)
val_df = create_distances_df(val_image_names, train_targets, D, I, "val")
all_pred =  get_predictions(val_df, threshold=0.5)

cv = get_cv(val_targets_df, all_pred)
print(cv)

predictions = create_predictions_df(val_df, 0.5)
print(f"predictions.head()={predictions.head()}")
predictions = predictions.drop_duplicates()
predictions.to_csv(f"./cache/efficientnet/tf_efficientnet_b6_ns_512_4_{cv:.4f}_submission.csv", index=False)

Created index with train_embeddings


Creating val_df: 0it [00:00, ?it/s]

Creating predictions for threshold=0.5:   0%|          | 0/158604 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

0.841650629460348


Creating predictions for threshold=0.5:   0%|          | 0/158604 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

predictions.head()=                image                                        predictions
0  d2ae3b7448d1d4.jpg  1a20c92ffe68 new_individual 8ee65de044e7 078dd...
1  e9d46ff35fe37f.jpg  1a20c92ffe68 new_individual 8ee65de044e7 078dd...
2  b43b6b20162cc7.jpg  bbeac4b2964e new_individual 83afdba5e8e5 1d4dc...
3  c64ffcd14442e7.jpg  1a20c92ffe68 new_individual 8ee65de044e7 078dd...
4  dd4a36261a3c97.jpg  dad9b2cc8452 new_individual a0eabaf2a660 64e52...


In [24]:
import csv
import pandas as pd 

# sub_files = [
#                  './pl_convnext_large_384_in22ft1k_384_0_submission.csv',
#                  './pl_convnext_large_384_in22ft1k_384_1_submission.csv',
#                  './pl_convnext_large_384_in22ft1k_384_2_submission.csv',
#                  './pl_convnext_large_384_in22ft1k_384_3_submission.csv',
#                  './pl_convnext_large_384_in22ft1k_384_4_submission.csv'
# ]

# sub_files = [
#                  './pl_tf_efficientnet_b7_ns_512_0_submission.csv',
#                  './pl_tf_efficientnet_b7_ns_512_1_submission.csv',
#                  './pl_tf_efficientnet_b7_ns_512_2_submission.csv',
#                  './pl_tf_efficientnet_b7_ns_512_3_submission.csv',
#                  './pl_tf_efficientnet_b7_ns_512_4_submission.csv'
# ]

# sub_files = [
#                  './pl_tf_efficientnet_b7_ns_512_ensemble_submission.csv',
#                  './pl_convnext_large_384_in22ft1k_384_ensemble_submission.csv',
#                  './swin_tf_764.csv',
# ]

sub_files = [
                 './cache/convnext/convnext_large_384_in22ft1k_384_0_0.8216_submission.csv',
                 './cache/efficientnet/tf_efficientnet_b5_ns_512_0_0.8371_submission.csv',
                 './cache/efficientnet/tf_efficientnet_b6_ns_512_0_0.8428_submission.csv',
                 './cache/efficientnet/tf_efficientnet_b7_ns_512_0_0.8414_submission.csv',
]


# Weights of the individual subs
# sub_weight = [
#                 0.748**2,
#                 0.742**2,
#                 0.764**2,
#             ]

In [None]:
efficientnet_b6_data = torch.load("./cache/efficientnet/tf_efficientnet_b6_ns_512_0.pth")
print("efficientnet: ", efficientnet_b6_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_b6_data['train']['train_targets'])


train_image_names = efficientnet_b6_data['train']['train_image_names']
val_image_names = efficientnet_b6_data['val']['val_image_names']


train_embeddings = efficientnet_b6_data['train']['train_embeddings']
val_embeddings = efficientnet_b6_data['val']['val_embeddings']

train_targets = efficientnet_b6_data['train']['train_targets']
val_targets = efficientnet_b6_data['val']['val_targets']

D, I = create_and_search_index(512, train_embeddings, val_embeddings, 50)  # noqa: E741
print("Created index with train_embeddings")

val_targets_df = create_val_targets_df(train_targets, val_image_names, val_targets)
val_df = create_distances_df(val_image_names, train_targets, D, I, "val")
all_pred =  get_predictions(val_df, threshold=0.5)

best_cv = 0
for w1 in range(0, 110, 10):
    for w2 in range(0, 110-w1, 10):
        for w3 in range(0, 110-w1-w2, 10):
            w4 = 110-w1-w2-w3 
            sub_weight = [w1/100, w2/100, w3/100, w4/100]
    # w2 = 105-w1
    # sub_weight = [w1/100, w2/100]
            Hlabel = 'image' 
            Htarget = 'predictions'
            npt = 6
            place_weights = {}
            for i in range(npt):
                place_weights[i] = (1 / (i + 1))

            print(place_weights)

            lg = len(sub_files)
            sub = [None]*lg
            for i, file in enumerate( sub_files ):   
                # print("Reading {}: w={} - {}". format(i, sub_weight[i], file))
                reader = csv.DictReader(open(file,"r"))
                sub[i] = sorted(reader, key=lambda d: str(d[Hlabel]))

            # out = open("pl_convnext_large_384_in22ft1k_384_ensemble_submission.csv", "w", newline='')
            # out = open("pl_ensemble_v20220409_submission.csv", "w", newline='')
            # writer = csv.writer(out)
            # writer.writerow([Hlabel,Htarget])

            all_pred = {}

            for p, row in enumerate(sub[0]):
                target_weight = {}
                for s in range(lg):
                    row1 = sub[s][p]
                    for ind, trgt in enumerate(row1[Htarget].split(' ')):
                        target_weight[trgt] = target_weight.get(trgt,0) + (place_weights[ind]*sub_weight[s])
                tops_trgt = sorted(target_weight, key=target_weight.get, reverse=True)[:npt]
                all_pred[row1[Hlabel]] = tops_trgt
                # writer.writerow([row1[Hlabel], " ".join(tops_trgt)])
            # out.close()
            cv = get_cv(val_targets_df, all_pred)
            print(f"w1: {w1}, w2: {w2}, w3: {w3}, w4: {w4}, cv: {cv}")
            if cv > best_cv:
                best_cv = cv
        
print(f"best_cv: {best_cv}")

# Embedding Ensemble 

In [42]:
import torch

convnext_data = torch.load("./cache/convnext/convnext_large_384_in22ft1k_384_4.pth")
print("convnext: ", convnext_data['train']['train_image_names'])
print("convnext: ", convnext_data['train']['train_targets'])
print(convnext_data['train']['train_image_names'].shape)

convnext_base_data = torch.load("./cache/convnext/convnext_base_384_in22ft1k_512_4.pth")
print("convnext: ", convnext_base_data['train']['train_image_names'])
print("convnext: ", convnext_base_data['train']['train_targets'])


efficientnet_data = torch.load("./cache/efficientnet/tf_efficientnet_b7_ns_512_4.pth")
print("efficientnet: ", efficientnet_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_data['train']['train_targets'])
print(efficientnet_data['train']['train_image_names'].shape)
print(efficientnet_data['val']['val_image_names'].shape)


efficientnet_b5_data = torch.load("./cache/efficientnet/tf_efficientnet_b5_ns_512_4.pth")
print("efficientnet: ", efficientnet_b5_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_b5_data['train']['train_targets'])
print(efficientnet_b5_data['train']['train_image_names'].shape)
print(efficientnet_b5_data['val']['val_image_names'].shape)

efficientnet_b6_data = torch.load("./cache/efficientnet/tf_efficientnet_b6_ns_512_4.pth")
print("efficientnet: ", efficientnet_b6_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_b6_data['train']['train_targets'])
print(efficientnet_b6_data['train']['train_image_names'].shape)
print(efficientnet_b6_data['val']['val_image_names'].shape)


convnext:  ['00021adfb725ed.jpg' '000562241d384d.jpg' '0007c33415ce37.jpg' ...
 'ffae18d2939ffc.jpg' 'ffaf6c062ccdef.jpg' 'ffb1ddd9a59530.jpg']
convnext:  ['cadddb1636b9' '1a71fbb72250' '60008f293a2b' ... '1a71fbb72250'
 '938167f9ea20' 'f92fcbf8f42d']
(33252,)
convnext:  ['00021adfb725ed.jpg' '000562241d384d.jpg' '0007c33415ce37.jpg' ...
 'ffae18d2939ffc.jpg' 'ffaf6c062ccdef.jpg' 'ffb1ddd9a59530.jpg']
convnext:  ['cadddb1636b9' '1a71fbb72250' '60008f293a2b' ... '1a71fbb72250'
 '938167f9ea20' 'f92fcbf8f42d']
efficientnet:  ['00021adfb725ed.jpg' '000562241d384d.jpg' '0007c33415ce37.jpg' ...
 'ffae18d2939ffc.jpg' 'ffaf6c062ccdef.jpg' 'ffb1ddd9a59530.jpg']
efficientnet:  ['cadddb1636b9' '1a71fbb72250' '60008f293a2b' ... '1a71fbb72250'
 '938167f9ea20' 'f92fcbf8f42d']
(33252,)
(8314,)
efficientnet:  ['00021adfb725ed.jpg' '000562241d384d.jpg' '0007c33415ce37.jpg' ...
 'ffae18d2939ffc.jpg' 'ffaf6c062ccdef.jpg' 'ffb1ddd9a59530.jpg']
efficientnet:  ['cadddb1636b9' '1a71fbb72250' '60008f293a2b' .

In [43]:
NORM = True

train_image_names = convnext_data['train']['train_image_names']
val_image_names = convnext_data['val']['val_image_names']


train_embeddings_conv = convnext_data['train']['train_embeddings']
val_embeddings_conv = convnext_data['val']['val_embeddings']

train_embeddings_conv_base = convnext_base_data['train']['train_embeddings']
val_embeddings_conv_base = convnext_base_data['val']['val_embeddings']

train_embeddings_eff = efficientnet_data['train']['train_embeddings']
val_embeddings_eff = efficientnet_data['val']['val_embeddings']

train_embeddings_eff_b5 = efficientnet_b5_data['train']['train_embeddings']
val_embeddings_eff_b5 = efficientnet_b5_data['val']['val_embeddings']

train_embeddings_eff_b6 = efficientnet_b6_data['train']['train_embeddings']
val_embeddings_eff_b6 = efficientnet_b6_data['val']['val_embeddings']

if NORM:
    train_embeddings_conv = train_embeddings_conv / np.linalg.norm(train_embeddings_conv, axis=1)[:, None]
    train_embeddings_conv_base = train_embeddings_conv_base / np.linalg.norm(train_embeddings_conv_base, axis=1)[:, None]
    train_embeddings_eff = train_embeddings_eff / np.linalg.norm(train_embeddings_eff, axis=1)[:, None]
    train_embeddings_eff_b5 = train_embeddings_eff_b5 / np.linalg.norm(train_embeddings_eff_b5, axis=1)[:, None]
    train_embeddings_eff_b6 = train_embeddings_eff_b6 / np.linalg.norm(train_embeddings_eff_b6, axis=1)[:, None]
    val_embeddings_conv = val_embeddings_conv / np.linalg.norm(val_embeddings_conv, axis=1)[:, None]
    val_embeddings_conv_base = val_embeddings_conv_base / np.linalg.norm(val_embeddings_conv_base, axis=1)[:, None]
    val_embeddings_eff = val_embeddings_eff / np.linalg.norm(val_embeddings_eff, axis=1)[:, None]
    val_embeddings_eff_b5 = val_embeddings_eff_b5 / np.linalg.norm(val_embeddings_eff_b5, axis=1)[:, None]
    val_embeddings_eff_b6 = val_embeddings_eff_b6 / np.linalg.norm(val_embeddings_eff_b6, axis=1)[:, None]

train_concat_list = [train_embeddings_conv, train_embeddings_conv_base, train_embeddings_eff, train_embeddings_eff_b5, train_embeddings_eff_b6]
val_concat_list = [val_embeddings_conv, val_embeddings_conv_base, val_embeddings_eff, val_embeddings_eff_b5, val_embeddings_eff_b6]
train_embeddings = np.concatenate(train_concat_list, axis=1)
val_embeddings = np.concatenate(val_concat_list, axis=1)

if NORM:
    train_embeddings = train_embeddings / np.linalg.norm(train_embeddings, axis=1)[:, None]
    val_embeddings = val_embeddings / np.linalg.norm(val_embeddings, axis=1)[:, None]

train_targets = efficientnet_data['train']['train_targets']
val_targets = efficientnet_data['val']['val_targets']

D, I = create_and_search_index(512*len(train_concat_list), train_embeddings, val_embeddings, 50)  # noqa: E741
print("Created index with train_embeddings")

val_targets_df = create_val_targets_df(train_targets, val_image_names, val_targets)
val_df = create_distances_df(val_image_names, train_targets, D, I, "val")
# all_pred =  get_predictions(val_df, threshold=0.5)

# cv = get_cv(val_targets_df, all_pred)
# print(cv)
best_th, best_cv = get_best_threshold(val_targets_df, val_df)

# predictions = create_predictions_df(val_df, 0.5)
# print(f"predictions.head()={predictions.head()}")
# predictions = predictions.drop_duplicates()


Created index with train_embeddings


Creating val_df: 0it [00:00, ?it/s]

Creating predictions for threshold=0.4:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.4 cv=0.8626172720712052


Creating predictions for threshold=0.42:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.42 cv=0.8671878758720231


Creating predictions for threshold=0.44:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.44 cv=0.8675487130141929


Creating predictions for threshold=0.46:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.46 cv=0.865323550637479


Creating predictions for threshold=0.48:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.48 cv=0.8618955977868655


Creating predictions for threshold=0.5:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.5 cv=0.8580466682703873


Creating predictions for threshold=0.52:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.52 cv=0.85227327399567


Creating predictions for threshold=0.54:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.54 cv=0.8473418330526823


Creating predictions for threshold=0.56:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.56 cv=0.8410271830647101


Creating predictions for threshold=0.58:   0%|          | 0/159187 [00:00<?, ?it/s]

  0%|          | 0/8314 [00:00<?, ?it/s]

th=0.58 cv=0.8359153235506375
best_th=0.44
best_cv=0.8675487130141929
best_th_adjusted=0.4


# Embedding Ensemble Inference

In [49]:
import torch

convnext_data = torch.load("./cache/convnext/convnext_large_384_in22ft1k_384_0.pth")
print("convnext: ", convnext_data['train']['train_image_names'])
print("convnext: ", convnext_data['train']['train_targets'])
print(convnext_data['train']['train_image_names'].shape)

convnext_base_data = torch.load("./cache/convnext/convnext_base_384_in22ft1k_512_0.pth")
print("convnext: ", convnext_base_data['train']['train_image_names'])
print("convnext: ", convnext_base_data['train']['train_targets'])


efficientnet_data = torch.load("./cache/efficientnet/tf_efficientnet_b7_ns_512_0.pth")
print("efficientnet: ", efficientnet_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_data['train']['train_targets'])
print(efficientnet_data['train']['train_image_names'].shape)
print(efficientnet_data['val']['val_image_names'].shape)


efficientnet_b5_data = torch.load("./cache/efficientnet/tf_efficientnet_b5_ns_512_0.pth")
print("efficientnet: ", efficientnet_b5_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_b5_data['train']['train_targets'])
print(efficientnet_b5_data['train']['train_image_names'].shape)
print(efficientnet_b5_data['val']['val_image_names'].shape)

efficientnet_b6_data = torch.load("./cache/efficientnet/tf_efficientnet_b6_ns_512_0.pth")
print("efficientnet: ", efficientnet_b6_data['train']['train_image_names'])
print("efficientnet: ", efficientnet_b6_data['train']['train_targets'])
print(efficientnet_b6_data['train']['train_image_names'].shape)
print(efficientnet_b6_data['val']['val_image_names'].shape)


convnext:  ['000562241d384d.jpg' '001b0900f56e89.jpg' '0029d877c29ab4.jpg' ...
 'ffecd4a6afc94a.jpg' 'ffeed309a3081e.jpg' 'fff219829b3c68.jpg']
convnext:  ['1a71fbb72250' 'bc14b5054353' 'c27f0a7f4e5a' ... '424d49bef76e'
 'afe8b7c1b1c6' 'ada9e2afe2bd']
(33252,)
convnext:  ['000562241d384d.jpg' '001b0900f56e89.jpg' '0029d877c29ab4.jpg' ...
 'ffecd4a6afc94a.jpg' 'ffeed309a3081e.jpg' 'fff219829b3c68.jpg']
convnext:  ['1a71fbb72250' 'bc14b5054353' 'c27f0a7f4e5a' ... '424d49bef76e'
 'afe8b7c1b1c6' 'ada9e2afe2bd']
efficientnet:  ['000562241d384d.jpg' '001b0900f56e89.jpg' '0029d877c29ab4.jpg' ...
 'ffecd4a6afc94a.jpg' 'ffeed309a3081e.jpg' 'fff219829b3c68.jpg']
efficientnet:  ['1a71fbb72250' 'bc14b5054353' 'c27f0a7f4e5a' ... '424d49bef76e'
 'afe8b7c1b1c6' 'ada9e2afe2bd']
(33252,)
(8315,)
efficientnet:  ['000562241d384d.jpg' '001b0900f56e89.jpg' '0029d877c29ab4.jpg' ...
 'ffecd4a6afc94a.jpg' 'ffeed309a3081e.jpg' 'fff219829b3c68.jpg']
efficientnet:  ['1a71fbb72250' 'bc14b5054353' 'c27f0a7f4e5a' .

In [50]:
from pathlib import Path
import numpy as np
import pandas as pd

INPUT_DIR = Path('../datasets/kaggle/happy-whale-and-dolphin/')
OUTPUT_DIR = Path("./")

DATA_ROOT_DIR = INPUT_DIR / "happy-whale-and-dolphin-backfin"
TRAIN_DIR = DATA_ROOT_DIR / "train_images"
TEST_DIR = DATA_ROOT_DIR / "test_images"

PUBLIC_SUBMISSION_CSV_PATH = INPUT_DIR / "720" / "submission.csv"
IDS_WITHOUT_BACKFIN_PATH = INPUT_DIR / "backfin" / "ids_without_backfin.npy"

NORM = True

train_image_names = convnext_data['train']['train_image_names']
val_image_names = convnext_data['val']['val_image_names']
test_image_names = convnext_data['test']['test_image_names']


train_embeddings_conv = convnext_data['train']['train_embeddings']
val_embeddings_conv = convnext_data['val']['val_embeddings']
test_embeddings_conv = convnext_data['test']['test_embeddings']

train_embeddings_conv_base = convnext_base_data['train']['train_embeddings']
val_embeddings_conv_base = convnext_base_data['val']['val_embeddings']
test_embeddings_conv_base = convnext_base_data['test']['test_embeddings']

train_embeddings_eff = efficientnet_data['train']['train_embeddings']
val_embeddings_eff = efficientnet_data['val']['val_embeddings']
test_embeddings_eff = efficientnet_data['test']['test_embeddings']

train_embeddings_eff_b5 = efficientnet_b5_data['train']['train_embeddings']
val_embeddings_eff_b5 = efficientnet_b5_data['val']['val_embeddings']
test_embeddings_eff_b5 = efficientnet_b5_data['test']['test_embeddings']

train_embeddings_eff_b6 = efficientnet_b6_data['train']['train_embeddings']
val_embeddings_eff_b6 = efficientnet_b6_data['val']['val_embeddings']
test_embeddings_eff_b6 = efficientnet_b6_data['test']['test_embeddings']


if NORM:
    train_embeddings_conv = train_embeddings_conv / np.linalg.norm(train_embeddings_conv, axis=1)[:, None]
    train_embeddings_conv_base = train_embeddings_conv_base / np.linalg.norm(train_embeddings_conv_base, axis=1)[:, None]
    train_embeddings_eff = train_embeddings_eff / np.linalg.norm(train_embeddings_eff, axis=1)[:, None]
    train_embeddings_eff_b5 = train_embeddings_eff_b5 / np.linalg.norm(train_embeddings_eff_b5, axis=1)[:, None]
    train_embeddings_eff_b6 = train_embeddings_eff_b6 / np.linalg.norm(train_embeddings_eff_b6, axis=1)[:, None]
    val_embeddings_conv = val_embeddings_conv / np.linalg.norm(val_embeddings_conv, axis=1)[:, None]
    val_embeddings_conv_base = val_embeddings_conv_base / np.linalg.norm(val_embeddings_conv_base, axis=1)[:, None]
    val_embeddings_eff = val_embeddings_eff / np.linalg.norm(val_embeddings_eff, axis=1)[:, None]
    val_embeddings_eff_b5 = val_embeddings_eff_b5 / np.linalg.norm(val_embeddings_eff_b5, axis=1)[:, None]
    val_embeddings_eff_b6 = val_embeddings_eff_b6 / np.linalg.norm(val_embeddings_eff_b6, axis=1)[:, None]
    test_embeddings_conv = test_embeddings_conv / np.linalg.norm(test_embeddings_conv, axis=1)[:, None]
    test_embeddings_conv_base = test_embeddings_conv_base / np.linalg.norm(test_embeddings_conv_base, axis=1)[:, None]
    test_embeddings_eff = test_embeddings_eff / np.linalg.norm(test_embeddings_eff, axis=1)[:, None]
    test_embeddings_eff_b5 = test_embeddings_eff_b5 / np.linalg.norm(test_embeddings_eff_b5, axis=1)[:, None]
    test_embeddings_eff_b6 = test_embeddings_eff_b6 / np.linalg.norm(test_embeddings_eff_b6, axis=1)[:, None]

    
train_concat_list = [train_embeddings_conv, train_embeddings_conv_base, train_embeddings_eff, train_embeddings_eff_b5, train_embeddings_eff_b6]
val_concat_list = [val_embeddings_conv, val_embeddings_conv_base, val_embeddings_eff, val_embeddings_eff_b5, val_embeddings_eff_b6]
test_concat_list = [test_embeddings_conv, test_embeddings_conv_base, test_embeddings_eff, test_embeddings_eff_b5, test_embeddings_eff_b6]

train_embeddings = np.concatenate(train_concat_list, axis=1)
val_embeddings = np.concatenate(val_concat_list, axis=1)
test_embeddings = np.concatenate(test_concat_list, axis=1)

if NORM:
    train_embeddings = train_embeddings / np.linalg.norm(train_embeddings, axis=1)[:, None]
    val_embeddings = val_embeddings / np.linalg.norm(val_embeddings, axis=1)[:, None]
    test_embeddings = test_embeddings / np.linalg.norm(test_embeddings, axis=1)[:, None]

train_targets = convnext_data['train']['train_targets']
val_targets = convnext_data['val']['val_targets']
    
train_embeddings = np.concatenate([train_embeddings, val_embeddings])
train_targets = np.concatenate([train_targets, val_targets])
print("Updated train_embeddings and train_targets with val data")

D, I = create_and_search_index(512 * len(train_concat_list), train_embeddings, test_embeddings, 50)  # noqa: E741
print("Created index with train_embeddings")

test_df = create_distances_df(test_image_names, train_targets, D, I, "test")
print(f"test_df=\n{test_df.head()}")

predictions = create_predictions_df(test_df, 0.4)
print(f"predictions.head()={predictions.head()}")

# Fix missing predictions
# From https://www.kaggle.com/code/jpbremer/backfins-arcface-tpu-effnet/notebook
public_predictions = pd.read_csv(PUBLIC_SUBMISSION_CSV_PATH)
ids_without_backfin = np.load(IDS_WITHOUT_BACKFIN_PATH, allow_pickle=True)

ids2 = public_predictions["image"][~public_predictions["image"].isin(predictions["image"])]

predictions = pd.concat(
[
    predictions[~(predictions["image"].isin(ids_without_backfin))],
    public_predictions[public_predictions["image"].isin(ids_without_backfin)],
    public_predictions[public_predictions["image"].isin(ids2)],
]
)
predictions = predictions.drop_duplicates()

predictions.to_csv("embd_convnext_large_384_in22ft1k_384_0+convnext_base_384_in22ft1k_512_0+tf_efficientnet_b7_ns_512_0+tf_efficientnet_b5_ns_512_0+tf_efficientnet_b6_ns_512_0_norm_submission.csv", index=False)
    

Updated train_embeddings and train_targets with val data
Created index with train_embeddings


Creating test_df: 0it [00:00, ?it/s]

test_df=
                image        target  distances
0  a3a9c424ef9f06.jpg  0ed88187dcb5   0.993922
1  e9abb76a5bed89.jpg  35f898e6595e   0.993084
2  5fb61ff7e07076.jpg  2e0b381d3467   0.992452
3  7a785b700b0339.jpg  c93996835aa8   0.991386
4  0d61c514065cef.jpg  2dd8974deb39   0.991109


Creating predictions for threshold=0.4:   0%|          | 0/644752 [00:00<?, ?it/s]

  0%|          | 0/27942 [00:00<?, ?it/s]

predictions.head()=                image                                        predictions
0  a3a9c424ef9f06.jpg  0ed88187dcb5 new_individual 3608e0cc1c01 b9a19...
1  e9abb76a5bed89.jpg  35f898e6595e new_individual 938b7e931166 5bf17...
2  5fb61ff7e07076.jpg  2e0b381d3467 new_individual 938b7e931166 5bf17...
3  7a785b700b0339.jpg  c93996835aa8 new_individual 958e201f3f4d 862ad...
4  0d61c514065cef.jpg  2dd8974deb39 new_individual c77668767d5c 7aa63...


# 加权测试集

In [None]:
import csv
import pandas as pd 

sub_files = [
                 './pl_convnext_large_384_in22ft1k_384_0_submission.csv',
                 './pl_tf_efficientnet_b7_ns_512_0_submission.csv',
]

# Weights of the individual subs
sub_weight = [
                0.4,
                0.6
             ]

In [None]:
Hlabel = 'image' 
Htarget = 'predictions'
npt = 6
place_weights = {}
for i in range(npt):
    place_weights[i] = ( 1 / (i + 1) )
    
print(place_weights)

lg = len(sub_files)
sub = [None]*lg
for i, file in enumerate( sub_files ):
   
    print("Reading {}: w={} - {}". format(i, sub_weight[i], file))
    reader = csv.DictReader(open(file,"r"))
    sub[i] = sorted(reader, key=lambda d: str(d[Hlabel]))

out = open("convnext_large_384_in22ft1k_384_1+tf_efficientnet_b7_ns_512_1_submission.csv", "w", newline='')
writer = csv.writer(out)
writer.writerow([Hlabel,Htarget])

for p, row in enumerate(sub[0]):
    target_weight = {}
    for s in range(lg):
        row1 = sub[s][p]
        for ind, trgt in enumerate(row1[Htarget].split(' ')):
            target_weight[trgt] = target_weight.get(trgt,0) + (place_weights[ind]*sub_weight[s])
    tops_trgt = sorted(target_weight, key=target_weight.get, reverse=True)[:npt]
    writer.writerow([row1[Hlabel], " ".join(tops_trgt)])
out.close()

In [None]:
INPUT_DIR = Path('../datasets/kaggle/happy-whale-and-dolphin/')
OUTPUT_DIR = Path("./")

DATA_ROOT_DIR = INPUT_DIR / "happy-whale-and-dolphin-backfin"
TRAIN_DIR = DATA_ROOT_DIR / "train_images"
TEST_DIR = DATA_ROOT_DIR / "test_images"

PUBLIC_SUBMISSION_CSV_PATH = INPUT_DIR / "720" / "submission.csv"
IDS_WITHOUT_BACKFIN_PATH = INPUT_DIR / "backfin" / "ids_without_backfin.npy"

train_image_names = convnext_data['train']['train_image_names']
val_image_names = convnext_data['val']['val_image_names']
test_image_names = convnext_data['test']['test_image_names']

train_embeddings = convnext_data['train']['train_embeddings']
val_embeddings = convnext_data['val']['val_embeddings']
test_embeddings = convnext_data['test']['test_embeddings']

train_targets = convnext_data['train']['train_targets']
val_targets = convnext_data['val']['val_targets']
test_targets = convnext_data['test']['test_targets']


train_embeddings = np.concatenate([train_embeddings, val_embeddings])
train_targets = np.concatenate([train_targets, val_targets])
print("Updated train_embeddings and train_targets with val data")

D, I = create_and_search_index(512, train_embeddings, test_embeddings, 50)  # noqa: E741
print("Created index with train_embeddings")

test_df = create_distances_df(test_image_names, train_targets, D, I, "test")
print(f"test_df=\n{test_df.head()}")

predictions = create_predictions_df(test_df, 0.5)
print(f"predictions.head()={predictions.head()}")

# Fix missing predictions
# From https://www.kaggle.com/code/jpbremer/backfins-arcface-tpu-effnet/notebook
public_predictions = pd.read_csv(PUBLIC_SUBMISSION_CSV_PATH)
# public_predictions = pd.read_csv("./swin_tf_764.csv")
ids_without_backfin = np.load(IDS_WITHOUT_BACKFIN_PATH, allow_pickle=True)

ids2 = public_predictions["image"][~public_predictions["image"].isin(predictions["image"])]
print(ids2)
print(public_predictions[public_predictions["image"].isin(ids_without_backfin)])

predictions = pd.concat(
    [
        predictions[~(predictions["image"].isin(ids_without_backfin))],
        public_predictions[public_predictions["image"].isin(ids_without_backfin)],
        public_predictions[public_predictions["image"].isin(ids2)],
    ]
)
predictions = predictions.drop_duplicates()

# predictions.to_csv(SUBMISSION_CSV_PATH, index=False)
    

# 二次 Ensemble

In [51]:
import csv
import pandas as pd 

sub_files = [
                 './embd_convnext_large_384_in22ft1k_384_0+convnext_base_384_in22ft1k_512_0+tf_efficientnet_b7_ns_512_0+tf_efficientnet_b5_ns_512_0+tf_efficientnet_b6_ns_512_0_norm_submission.csv',
                 './embd_convnext_large_384_in22ft1k_384_1+convnext_base_384_in22ft1k_512_1+tf_efficientnet_b7_ns_512_1+tf_efficientnet_b5_ns_512_1+tf_efficientnet_b6_ns_512_1_norm_submission.csv',
                 './embd_convnext_large_384_in22ft1k_384_2+convnext_base_384_in22ft1k_512_2+tf_efficientnet_b7_ns_512_2+tf_efficientnet_b5_ns_512_2+tf_efficientnet_b6_ns_512_2_norm_submission.csv',
                 './embd_convnext_large_384_in22ft1k_384_3+convnext_base_384_in22ft1k_512_3+tf_efficientnet_b7_ns_512_3+tf_efficientnet_b5_ns_512_3+tf_efficientnet_b6_ns_512_3_norm_submission.csv',
                 './embd_convnext_large_384_in22ft1k_384_4+convnext_base_384_in22ft1k_512_4+tf_efficientnet_b7_ns_512_4+tf_efficientnet_b5_ns_512_4+tf_efficientnet_b6_ns_512_4_norm_submission.csv'
            ]

# sub_files = [    './embd_convnext_large_384_in22ft1k_384+tf_efficientnet_b7_ns_512+tf_efficientnet_b5_ns_512_norm_submission.csv',
#                  './tf_swin_effb7_effb5_777.csv',
#                  './tf_effv2_762.csv'

#             ]

# Weights of the individual subs
sub_weight = [
                 0.2,
                 0.2,
                 0.2,
                 0.2,
                 0.2,
             ]

# Weights of the individual subs
# N_wight=0.5
# Base_wight=1.5

# l1 = [0.774, 0.777, 0.762]
# l2 = [Base_wight+N_wight*i for i in range(len(l1))]
# sub_weight = list(map(lambda x,y: x**y ,l1,l2))

In [52]:
Hlabel = 'image' 
Htarget = 'predictions'
npt = 6
place_weights = {}
for i in range(npt):
    place_weights[i] = ( 1 / (i + 1) )
    
print(place_weights)

lg = len(sub_files)
sub = [None]*lg
for i, file in enumerate( sub_files ):
   
    print("Reading {}: w={} - {}". format(i, sub_weight[i], file))
    reader = csv.DictReader(open(file,"r"))
    sub[i] = sorted(reader, key=lambda d: str(d[Hlabel]))

out = open("embd_convnext_large_384_in22ft1k_384+convnext_base_384_in22ft1k_512+tf_efficientnet_b7_ns_512+tf_efficientnet_b5_ns_512+tf_efficientnet_b6_ns_512_norm_submission.csv", "w", newline='')
writer = csv.writer(out)
writer.writerow([Hlabel,Htarget])

for p, row in enumerate(sub[0]):
    target_weight = {}
    for s in range(lg):
        row1 = sub[s][p]
        for ind, trgt in enumerate(row1[Htarget].split(' ')):
            target_weight[trgt] = target_weight.get(trgt,0) + (place_weights[ind]*sub_weight[s])
    tops_trgt = sorted(target_weight, key=target_weight.get, reverse=True)[:npt]
    writer.writerow([row1[Hlabel], " ".join(tops_trgt)])
out.close()

{0: 1.0, 1: 0.5, 2: 0.3333333333333333, 3: 0.25, 4: 0.2, 5: 0.16666666666666666}
Reading 0: w=0.2 - ./embd_convnext_large_384_in22ft1k_384_0+convnext_base_384_in22ft1k_512_0+tf_efficientnet_b7_ns_512_0+tf_efficientnet_b5_ns_512_0+tf_efficientnet_b6_ns_512_0_norm_submission.csv
Reading 1: w=0.2 - ./embd_convnext_large_384_in22ft1k_384_1+convnext_base_384_in22ft1k_512_1+tf_efficientnet_b7_ns_512_1+tf_efficientnet_b5_ns_512_1+tf_efficientnet_b6_ns_512_1_norm_submission.csv
Reading 2: w=0.2 - ./embd_convnext_large_384_in22ft1k_384_2+convnext_base_384_in22ft1k_512_2+tf_efficientnet_b7_ns_512_2+tf_efficientnet_b5_ns_512_2+tf_efficientnet_b6_ns_512_2_norm_submission.csv
Reading 3: w=0.2 - ./embd_convnext_large_384_in22ft1k_384_3+convnext_base_384_in22ft1k_512_3+tf_efficientnet_b7_ns_512_3+tf_efficientnet_b5_ns_512_3+tf_efficientnet_b6_ns_512_3_norm_submission.csv
Reading 4: w=0.2 - ./embd_convnext_large_384_in22ft1k_384_4+convnext_base_384_in22ft1k_512_4+tf_efficientnet_b7_ns_512_4+tf_efficie

# Results 

## CSV ensemble

fold4: convnext_large_384_in22ft1k_384_4_0.8233 * 0.35 + tf_efficientnet_b7_ns_512_4_0.8387 * 0.65 = 0.8436

fold3: convnext_large_384_in22ft1k_384_3_0.8200 * 0.4 + tf_efficientnet_b7_ns_512_3_0.8397 * 0.6 = 0.8443

fold2: convnext_large_384_in22ft1k_384_2_0.8182 * 0.4 + tf_efficientnet_b7_ns_512_2_0.8390 * 0.6 = 0.8445

fold1: convnext_large_384_in22ft1k_384_1_0.8169 * 0.4 + tf_efficientnet_b7_ns_512_1_0.8311 * 0.6 = 0.8373

fold0: 
convnext_large_384_in22ft1k_384_0_0.8216 * 0.4 +
tf_efficientnet_b7_ns_512_0_0.8414 * 0.6 = 0.8463

## Emebdding ensemble

fold4: 

convnext_large_384_in22ft1k_384_4_0.8233 + tf_efficientnet_b7_ns_512_4_0.8387

best_th=0.9
best_cv=0.8559798733060701
best_th_adjusted=0.8

convnext_large_384_in22ft1k_384_4_0.8233 + 
tf_efficientnet_b7_ns_512_4_0.8387 + 
tf_efficientnet_b5_ns_512_4_0.8400 + NORM

best_th=0.44
best_cv=0.8645016438136476
best_th_adjusted=0.4


convnext_large_384_in22ft1k_384_4_0.8233 + 
tf_efficientnet_b7_ns_512_4_0.8387 + 
tf_efficientnet_b6_ns_512_4_0.8417 +
tf_efficientnet_b5_ns_512_4_0.8400 + NORM

best_th=0.44
best_cv=0.8657306073361394
best_th_adjusted=0.4

tf_efficientnet_b7_ns_512_4_0.8387 + 
tf_efficientnet_b5_ns_512_4_0.8400

best_th=0.9
best_cv=0.8587302541897202
best_th_adjusted=0.9

tf_efficientnet_b7_ns_512_4_0.8387 + 
tf_efficientnet_b5_ns_512_4_0.8400 + NORM

best_th=0.5
best_cv=0.8539792318178173
best_th_adjusted=0.4


convnext_large_384_in22ft1k_384_4_0.8233 + 
convnext_base_384_in22ft1k_512_4_0.8204 +
tf_efficientnet_b7_ns_512_4_0.8387 + 
tf_efficientnet_b6_ns_512_4_0.8417 +
tf_efficientnet_b5_ns_512_4_0.8400 + NORM

best_th=0.44
best_cv=0.8675487130141929
best_th_adjusted=0.4

fold3:

convnext_large_384_in22ft1k_384_3_0.8200 + tf_efficientnet_b7_ns_512_3_0.8397

best_th=0.9
best_cv=0.8532491481258769
best_th_adjusted=0.8

convnext_large_384_in22ft1k_384_3_0.8200 + 
tf_efficientnet_b7_ns_512_3_0.8397 + NORM

best_th=0.5
best_cv=0.8474764481860092
best_th_adjusted=0.4

tf_efficientnet_b5_ns_512_3_0.8384 + 
tf_efficientnet_b7_ns_512_3_0.8391

best_th=0.9
best_cv=0.8577330126277812
best_th_adjusted=0.9

convnext_large_384_in22ft1k_384_3_0.8200 + 
tf_efficientnet_b5_ns_512_3_0.8384 + 
tf_efficientnet_b7_ns_512_3_0.8391

best_th=1.0
best_cv=0.8112146722790137
best_th_adjusted=1.

convnext_large_384_in22ft1k_384_3_0.8200 + 
tf_efficientnet_b5_ns_512_3_0.8384 + 
tf_efficientnet_b7_ns_512_3_0.8391 + Norm

best_th=0.44
best_cv=0.8615453998797354
best_th_adjusted=0.42

convnext_large_384_in22ft1k_384_3_0.8200 + 
tf_efficientnet_b5_ns_512_3_0.8384 + 
tf_efficientnet_b6_ns_512_3_0.8418 +
tf_efficientnet_b7_ns_512_3_0.8391 + Norm


best_th=0.44
best_cv=0.8657306073361394
best_th_adjusted=0.4

tf_efficientnet_b5_ns_512_3_0.8384 + 
tf_efficientnet_b7_ns_512_3_0.8391 + Norm

best_th=0.5
best_cv=0.8524413710162358
best_th_adjusted=0.4

convnext_large_384_in22ft1k_384_3_0.8200 + 
convnext_base_384_in22ft1k_512_3_0.8266 +
tf_efficientnet_b5_ns_512_3_0.8384 + 
tf_efficientnet_b6_ns_512_3_0.8418 +
tf_efficientnet_b7_ns_512_3_0.8391 + Norm

best_th=0.44
best_cv=0.8663359390659452
best_th_adjusted=0.4

fold2:

tf_efficientnet_b7_ns_512_2_0.8390 +
tf_efficientnet_b5_ns_512_2_0.8403

best_th=0.9
best_cv=0.8569112046502304
best_th_adjusted=0.9


convnext_large_384_in22ft1k_384_2_0.8182 +
tf_efficientnet_b7_ns_512_2_0.8390 +
tf_efficientnet_b5_ns_512_2_0.8403 + NORM

best_th=0.44
best_cv=0.8607095610342753
best_th_adjusted=0.42

convnext_large_384_in22ft1k_384_2_0.8182 +
tf_efficientnet_b7_ns_512_2_0.8390 +
tf_efficientnet_b6_ns_512_2_0.8439 +
tf_efficientnet_b5_ns_512_2_0.8403 + NORM

best_th=0.44
best_cv=0.866738825415915
best_th_adjusted=0.4


convnext_large_384_in22ft1k_384_2_0.8182 +
convnext_base_384_in22ft1k_512_2_0.8163 +
tf_efficientnet_b7_ns_512_2_0.8390 +
tf_efficientnet_b6_ns_512_2_0.8439 +
tf_efficientnet_b5_ns_512_2_0.8403 + NORM

best_th=0.44
best_cv=0.8642513529765485
best_th_adjusted=0.4

fold1:

convnext_large_384_in22ft1k_384_1_0.8169 +
tf_efficientnet_b5_ns_512_1_0.8351 +
tf_efficientnet_b7_ns_512_1_0.8311 + NORM

best_th=0.42
best_cv=0.857147724994989
best_th_adjusted=0.4

convnext_large_384_in22ft1k_384_1_0.8169 +
tf_efficientnet_b5_ns_512_1_0.8351 +
tf_efficientnet_b6_ns_512_1_0.8385 +
tf_efficientnet_b7_ns_512_1_0.8311 + NORM

best_th=0.44
best_cv=0.861824012828222
best_th_adjusted=0.4


tf_efficientnet_b5_ns_512_1_0.8351 +
tf_efficientnet_b7_ns_512_1_0.8311

best_th=0.9
best_cv=0.8526618560833834
best_th_adjusted=0.8


convnext_large_384_in22ft1k_384_1_0.8169 +
convnext_base_384_in22ft1k_512_1_0.8059 +
tf_efficientnet_b5_ns_512_1_0.8351 +
tf_efficientnet_b6_ns_512_1_0.8385 +
tf_efficientnet_b7_ns_512_1_0.8311 + NORM

best_th=0.44
best_cv=0.8610964121066347
best_th_adjusted=0.4


fold0:

convnext_large_384_in22ft1k_384_0_0.8216  +
tf_efficientnet_b7_ns_512_0_0.8414 + NORM

best_th=0.5
best_cv=0.8509440769693325
best_th_adjusted=0.4

convnext_large_384_in22ft1k_384_0_0.8216  +
tf_efficientnet_b7_ns_512_0_0.8414 

best_th=0.9
best_cv=0.8561755862898376
best_th_adjusted=0.8

tf_efficientnet_b5_ns_512_0_0.8371 +
tf_efficientnet_b7_ns_512_0_0.8414

best_th=0.9
best_cv=0.8584606133493686
best_th_adjusted=0.8

convnext_large_384_in22ft1k_384_0_0.8216  +
tf_efficientnet_b5_ns_512_0_0.8371 +
tf_efficientnet_b7_ns_512_0_0.8414 + NORM

best_th=0.44
best_cv=0.8632812186810984
best_th_adjusted=0.42

convnext_large_384_in22ft1k_384_0_0.8216  +
tf_efficientnet_b5_ns_512_0_0.8371 +
tf_efficientnet_b6_ns_512_0_0.8428 +
tf_efficientnet_b7_ns_512_0_0.8414 + NORM

best_th=0.44
best_cv=0.867971537382241
best_th_adjusted=0.42

convnext_large_384_in22ft1k_384_0_0.8216  +
convnext_base_384_in22ft1k_512_0_0.8149 + 
tf_efficientnet_b5_ns_512_0_0.8371 +
tf_efficientnet_b6_ns_512_0_0.8428 +
tf_efficientnet_b7_ns_512_0_0.8414 + NORM

best_th=0.44
best_cv=0.8668610944076969
best_th_adjusted=0.4
