In [None]:
from src.train import SphereClassifier, WhaleDataModule
from src.dataset import load_df
import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np
from config.config import Config, load_config

cuda = torch.device("cuda:0")

## Load model

In [None]:
model = SphereClassifier.load_from_checkpoint(
    checkpoint_path="/app/sandbox/happy_whale/kaggle-happywhale-1st-place/result/b6_new/1/last.ckpt"
)
model.to(cuda)
model.eval()


# image = torch.rand(1, 3, 528, 528).to(cuda)
# logits_ids, logits_species = model(image)

In [5]:
from torchviz import make_dot

image = torch.rand(1, 3, 528, 528).to(cuda)
yhat = model(image)
make_dot(yhat, params=dict(list(model.named_parameters()))).render(
    "b6_torchviz", "b6.png"
)

'b6.png/rnn_torchviz.pdf'

In [7]:
model

SphereClassifier(
  (backbone): EfficientNetFeatures(
    (conv_stem): Conv2dSame(3, 56, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNormAct2d(
      56, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(56, 56, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=56, bias=False)
          (bn1): BatchNormAct2d(
            56, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(56, 14, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(14, 56, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Conv2d(56, 32, kernel_siz

In [9]:
backbone_out = model.backbone(image)

In [11]:
backbone_out[0].shape, backbone_out[1].shape

(torch.Size([1, 200, 33, 33]), torch.Size([1, 576, 17, 17]))

In [6]:
from torchsummary import summary

summary(model, input_size=(3, 528, 528))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
        Conv2dSame-1         [-1, 56, 264, 264]           1,512
          Identity-2         [-1, 56, 264, 264]               0
              SiLU-3         [-1, 56, 264, 264]               0
    BatchNormAct2d-4         [-1, 56, 264, 264]             112
            Conv2d-5         [-1, 56, 264, 264]             504
          Identity-6         [-1, 56, 264, 264]               0
              SiLU-7         [-1, 56, 264, 264]               0
    BatchNormAct2d-8         [-1, 56, 264, 264]             112
            Conv2d-9             [-1, 14, 1, 1]             798
             SiLU-10             [-1, 14, 1, 1]               0
           Conv2d-11             [-1, 56, 1, 1]             840
          Sigmoid-12             [-1, 56, 1, 1]               0
    SqueezeExcite-13         [-1, 56, 264, 264]               0
           Conv2d-14         [-1, 32, 2

## Compute embs on train ds

In [None]:
cfg = load_config("config/efficientnet_b6_new.yaml", "config/default.yaml")

In [None]:
df = load_df("input", cfg, "train.csv", True)
data_module = WhaleDataModule(
    df,
    cfg,
    f"input/train_images",
    cfg.val_bbox,
    -1,
)

In [None]:
train_dataset = data_module.get_dataset(df, False)
train_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=32,
    pin_memory=True,
    drop_last=False,
)

In [None]:
# predictions = []
# model.eval()

# for batch in tqdm(train_loader):
#     images = batch['image'].to(cuda)
#     feats = F.normalize(model.get_feat(images), p=2.0, dim=1)
#     predictions.append(feats.detach().cpu())
# embs = torch.cat(predictions, axis=0).numpy()
# #np.savez(f"whale_train_emb.npz", embs=embs)

## Create Whale Train OSFR protocol

In [None]:
train_dataset.ids  # defines unique id of image. id \in [0, 15587)
a = np.load("whale_train_emb.npz")

In [None]:
train_dataset.x_paths

In [None]:
a["embs"].shape

In [None]:
unique_ids, count_ids = np.unique(train_dataset.ids, return_counts=True)

out_of_gallery_ids = unique_ids[count_ids == 1]  # single image ids
in_gallery_ids = unique_ids[count_ids > 1]
assert len(out_of_gallery_ids) + len(in_gallery_ids) == 15587

In [None]:
np.unique(train_dataset.x_paths).shape

In [None]:
from pathlib import Path

# construct gallery and probe temlates
image_path_to_template_id = {}
image_path_to_subject_id = {}

gallery_templates = []
known_probe_templates = []
subject_id = 0
gallery_template_id = 0
probe_template_id = 10000
for subject in in_gallery_ids:
    subject_images_paths = train_dataset.x_paths[train_dataset.ids == subject]
    image_count = len(subject_images_paths)
    for i, image_path in enumerate(subject_images_paths):
        image_path_to_subject_id[image_path] = subject_id
        if i < image_count // 2:
            image_path_to_template_id[image_path] = gallery_template_id
        if i >= image_count // 2:
            image_path_to_template_id[image_path] = probe_template_id

    gallery_templates.append(
        (subject_images_paths[: image_count // 2], gallery_template_id, subject_id)
    )
    known_probe_templates.append(
        (subject_images_paths[image_count // 2 :], probe_template_id, subject_id)
    )
    gallery_template_id += 1
    probe_template_id += 1
    subject_id += 1

assert gallery_template_id < 10000
unknown_probe_templates = []

for probe_subject in out_of_gallery_ids:
    probe_images_paths = train_dataset.x_paths[train_dataset.ids == probe_subject]
    for image_path in probe_images_paths:
        image_path = str(image_path)
        image_path_to_subject_id[image_path] = subject_id
        image_path_to_template_id[image_path] = probe_template_id
    unknown_probe_templates.append((probe_images_paths, probe_template_id, subject_id))
    probe_template_id += 1
    subject_id += 1

In [None]:
assert len(image_path_to_template_id) == len(train_dataset.x_paths)
assert len(image_path_to_subject_id) == len(train_dataset.x_paths)
assert len(set(image_path_to_subject_id.values())) == len(unique_ids)
assert len(set(image_path_to_template_id.values())) == len(unique_ids) + len(
    in_gallery_ids
)

In [None]:
len(gallery_templates), len(known_probe_templates), len(unknown_probe_templates)

In [None]:
len(unknown_probe_templates) / (
    len(known_probe_templates) + len(unknown_probe_templates)
)

In [None]:
len(known_probe_templates) + len(unknown_probe_templates)

In [None]:
import pandas as pd

ds_name = "whale"
# create meta files
# tid mid
identification_ds_path = Path("/app/datasets/whale_train")
identification_ds_path.mkdir(exist_ok=True)
meta_path = identification_ds_path / "meta"
meta_path.mkdir(exist_ok=True)
img_names = train_dataset.x_paths
# names = [x.split("/")[-1] for x in img_names]
names = img_names
mids = np.arange(len(img_names))
tids = []
sids = []

for image_path in img_names:
    tids.append(image_path_to_template_id[image_path])
    sids.append(image_path_to_subject_id[image_path])

out_file_tid_mid = meta_path / Path(f"{ds_name}_face_tid_mid.txt")
with open(out_file_tid_mid, "w") as fd:
    for name, tid, sid, mid in zip(names, tids, sids, mids):
        fd.write(f"{name} {tid} {mid} {sid}\n")

out_file_probe = meta_path / Path(f"{ds_name}_1N_probe_mixed.csv")
out_file_gallery = meta_path / Path(f"{ds_name}_1N_gallery_G1.csv")

tids_probe = []
sids_probe = []
names_probe = []
for probe_meta in known_probe_templates + unknown_probe_templates:
    tids_probe.extend([probe_meta[1]] * len(probe_meta[0]))
    sids_probe.extend([probe_meta[2]] * len(probe_meta[0]))
    names_probe.extend([x.split("/")[-1] for x in probe_meta[0]])

tids_gallery = []
sids_gallery = []
names_gallery = []

for gallery_meta in gallery_templates:
    tids_gallery.extend([gallery_meta[1]] * len(gallery_meta[0]))
    sids_gallery.extend([gallery_meta[2]] * len(gallery_meta[0]))
    names_gallery.extend([x.split("/")[-1] for x in gallery_meta[0]])

assert len(tids_gallery) + len(tids_probe) == len(img_names)
probe = pd.DataFrame(
    {
        "TEMPLATE_ID": tids_probe,
        "SUBJECT_ID": sids_probe,
        "FILENAME": names_probe,
    }
)
gallery = pd.DataFrame(
    {
        "TEMPLATE_ID": tids_gallery,
        "SUBJECT_ID": sids_gallery,
        "FILENAME": names_gallery,
    }
)

probe.to_csv(out_file_probe, sep=",", index=False)
gallery.to_csv(out_file_gallery, sep=",", index=False)

In [None]:
emb_dir = identification_ds_path / "embeddings"
emb_dir.mkdir(exist_ok=True)
np.savez(emb_dir / "b6_embs_whale.npz", **a, unc=np.ones((a["embs"].shape[0], 1)) * 30)

In [None]:
np.ones((a["embs"].shape[0], 1)).shape