In [1]:
import os
import zarr
import timm
import random
import json
import warnings
import numpy as np
import pandas as pd
import torch.nn as nn
from pathlib import Path
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from collections import defaultdict
import sys
import torch

# import torchvision.transforms.functional as F
import random

warnings.filterwarnings("ignore")
sys.path.append("./src/")

from src.config import CFG
from src.dataloader import (
    read_zarr,
    read_info_json,
    scale_coordinates,
    create_dataset,
    create_segmentation_map,
    EziiDataset,
    drop_padding,
)
from src.network import Unet3D
from src.utils import save_images, PadToSize
from src.metric import (
    score,
    create_cls_pos,
    create_cls_pos_sikii,
    create_df,
    SegmentationLoss,
    DiceLoss,
)
from metric import visualize_epoch_results
from src.utils import save_images
from src.metric import score, create_cls_pos, create_cls_pos_sikii, create_df
from src.inference import inference, inference2pos
from src.kaggle_notebook_metric import compute_lb

sample_submission = pd.read_csv("../../inputs/sample_submission.csv")

In [2]:
def create_gt_df(base_dir, exp_names):
    result_df = None
    particle_names = CFG.particles_name

    for exp_name in exp_names:
        for particle in particle_names:
            np_corrds = read_info_json(
                base_dir=base_dir, exp_name=exp_name, particle_name=particle
            )  # (n, 3)
            # 各行にexp_nameとparticle_name追加
            particle_df = pd.DataFrame(np_corrds, columns=["z", "y", "x"])
            particle_df["experiment"] = exp_name
            particle_df["particle_type"] = particle

            if result_df is None:
                result_df = particle_df
            else:
                result_df = pd.concat([result_df, particle_df], axis=0).reset_index(
                    drop=True
                )

    result_df = result_df.reset_index()
    result_df = result_df[["index", "experiment", "particle_type", "x", "y", "z"]]

    return result_df


gt_df = create_gt_df("../../inputs/train/overlay/ExperimentRuns/", CFG.train_exp_names)
gt_df

Unnamed: 0,index,experiment,particle_type,x,y,z
0,0,TS_4,apo-ferritin,3045.036742,919.139280,421.270403
1,1,TS_4,apo-ferritin,2969.078552,1027.114255,440.085721
2,2,TS_4,apo-ferritin,2839.792769,1069.080767,425.839468
3,3,TS_4,apo-ferritin,2875.180486,1077.907940,298.254286
4,4,TS_4,apo-ferritin,2765.950544,1019.336833,322.072039
...,...,...,...,...,...,...
14400,14400,TS_6_6,virus-like-particle,2609.876000,4569.876000,1169.759000
14401,14401,TS_6_6,virus-like-particle,2213.287000,4135.017000,1286.851000
14402,14402,TS_6_6,virus-like-particle,3303.905000,5697.825000,789.744000
14403,14403,TS_6_6,virus-like-particle,1008.748000,5949.213000,1077.303000


In [3]:
import timm

encoder = timm.create_model(
    model_name=CFG.model_name,
    pretrained=True,
    in_chans=3,
    num_classes=0,
    global_pool="",
    features_only=True,
)
model = Unet3D(encoder=encoder, num_domains=5).to("cuda")
model.load_state_dict(torch.load("./best_model.pth"))

# inferenced_array = inference(model, exp_name, train=False)
# 0.7303962244998289

<All keys matched successfully>

In [4]:
exp_names = CFG.valid_exp_names  # ["TS_6_4", "TS_5_4", "TS_69_2"]

import gc
from tqdm import tqdm


constant = 0.25
sikii = {
    "apo-ferritin": constant,
    "beta-amylase": constant,
    "beta-galactosidase": constant,
    "ribosome": constant,
    "thyroglobulin": constant,
    "virus-like-particle": constant,
}

pred_dict = {}

# for exp_name in tqdm(CFG.train_exp_names):
for exp_name in tqdm(exp_names):  # 5つのデータで試す
    # inferenced_array = inference(model, exp_name, train=False)
    inferenced_array, n_tomogram, segmentation_map = inference(
        model,
        exp_name,
        train=False,
        base_dir="../../inputs/train/",
    )
    pred_dict[exp_name] = inferenced_array
    # pred_df = inference2pos(
    #     pred_segmask=inferenced_array, exp_name=exp_name, sikii_dict=sikii
    # )

    # all_pred.append(pred_df)

    gc.collect()

100%|██████████| 2/2 [00:31<00:00, 15.93s/it]


In [5]:
# pred_df = pd.concat(all_pred, axis=0).reset_index(drop=True)
# pred_df = pred_df[pred_df["particle_type"] != "beta-amylase"]
# pred_df = pred_df.drop_duplicates(
#     subset=["experiment", "x", "y", "z"], keep="first"
# ).reset_index(drop=True)
# pred_df = pred_df.reset_index().rename(columns={"index": "id"})
# pred_df

In [6]:
gt_df = create_gt_df("../../inputs/train/overlay/ExperimentRuns/", exp_names)
gt_df

Unnamed: 0,index,experiment,particle_type,x,y,z
0,0,TS_69_2,apo-ferritin,770.625,1111.161,1088.795
1,1,TS_69_2,apo-ferritin,828.291,1201.673,1153.745
2,2,TS_69_2,apo-ferritin,668.986,1041.449,1102.246
3,3,TS_69_2,apo-ferritin,834.049,592.958,698.099
4,4,TS_69_2,apo-ferritin,81.893,2152.929,543.179
...,...,...,...,...,...,...
363,363,TS_86_3,virus-like-particle,4683.103,1546.998,899.626
364,364,TS_86_3,virus-like-particle,3563.170,2650.076,660.386
365,365,TS_86_3,virus-like-particle,3994.606,2797.533,839.291
366,366,TS_86_3,virus-like-particle,3829.926,2129.623,859.549


In [7]:
first_df = None
last_df = None

for i, constant in enumerate(np.linspace(0.02, 0.9, 20)):
    initial_sikii = {
        "apo-ferritin": constant,
        "beta-amylase": constant,
        "beta-galactosidase": constant,
        "ribosome": constant,
        "thyroglobulin": constant,
        "virus-like-particle": constant,
    }

    all_pred = []

    for exp_name in exp_names:
        pred_original_df = inference2pos(
            pred_segmask=pred_dict[exp_name],
            exp_name=exp_name,
            sikii_dict=initial_sikii,
        )
        all_pred.append(pred_original_df)

    pred_df = pd.concat(all_pred, axis=0).reset_index(drop=True)

    gt_df = create_gt_df(
        base_dir="../../inputs/train/overlay/ExperimentRuns/", exp_names=exp_names
    )

    # s = score(
    #     pred_df,
    #     gt_df,
    #     row_id_column_name="index",
    #     distance_multiplier=0.5,
    #     beta=4,
    # )
    # print(constant, s)

    result_df, lb_score = compute_lb(
        pred_df, "../../inputs/train/overlay/ExperimentRuns/", CFG.valid_exp_names
    )
    print(constant, lb_score)

    if i == 0:
        first_df = pred_df

    last_df = pred_df

 TS_86_3 virus-like-particle
0.02 0.5664270441475798
 TS_86_3 virus-like-particle
0.06631578947368422 0.6445891164907284
 TS_86_3 virus-like-particle
0.11263157894736843 0.6800997598444924
 TS_86_3 virus-like-particle
0.15894736842105264 0.7057515813429501
 TS_86_3 virus-like-particle
0.20526315789473684 0.7204784749646016
 TS_86_3 virus-like-particle
0.25157894736842107 0.719137291229705
 TS_86_3 virus-like-particle
0.2978947368421053 0.7075448653211996
 TS_86_3 virus-like-particle
0.3442105263157895 0.7115590318080408
 TS_86_3 virus-like-particle
0.3905263157894737 0.7081116073689306
 TS_86_3 virus-like-particle
0.4368421052631579 0.6937799960532344
 TS_86_3 virus-like-particle
0.4831578947368421 0.6837696882299481
 TS_86_3 virus-like-particle
0.5294736842105263 0.6659781719577131
 TS_86_3 virus-like-particle
0.5757894736842106 0.6686465403737107
 TS_86_3 virus-like-particle
0.6221052631578948 0.6590559191225472
 TS_86_3 virus-like-particle
0.668421052631579 0.6311363033346684
 TS_86

KeyboardInterrupt: 

In [None]:
first_df

In [None]:
"../../../../../../../../mnt/d/kaggle-tmp-models/czii2024/"

In [None]:
last_df

In [None]:
pred_df

In [None]:
gt_df

In [None]:
"""
0.2 0.42886866017326397
0.2368421052631579 0.44763846995979895
0.2736842105263158 0.4804059446122441
0.31052631578947365 0.4843158713629149
0.34736842105263155 0.5199510011648549
0.38421052631578945 0.5525385551828039
0.42105263157894735 0.5790230089865157
0.45789473684210524 0.6270203806695409
0.49473684210526314 0.6630851348008636
0.531578947368421 0.6898782365235326
0.5684210526315789 0.7254659045615721
0.6052631578947368 0.7185365400137919
0.6421052631578947 0.7466747413107093
0.6789473684210525 0.7474254728226583
0.7157894736842105 0.7565872909933272
0.7526315789473683 0.7802748413991157
0.7894736842105263 0.782614114024948
0.8263157894736841 0.7965023875014008
0.8631578947368421 0.7649020542074689
0.9 0.8140205630560696
"""

In [None]:
pred_df["particle_type"].value_counts()

In [None]:
gt_df["particle_type"].value_counts()

In [53]:
import pandas as pd
import numpy as np
import json
import zarr
from scipy.optimize import linear_sum_assignment

from timeit import default_timer as timer


def time_to_str(t, mode="min"):
    if mode == "min":
        t = int(t) / 60
        hr = t // 60
        min = t % 60
        return "%2d hr %02d min" % (hr, min)

    elif mode == "sec":
        t = int(t)
        min = t // 60
        sec = t % 60
        return "%2d min %02d sec" % (min, sec)

    else:
        raise NotImplementedError


class dotdict(dict):
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

    def __getattr__(self, name):
        try:
            return self[name]
        except KeyError:
            raise AttributeError(name)


PARTICLE = [
    {
        "name": "apo-ferritin",
        "difficulty": "easy",
        "pdb_id": "4V1W",
        "label": 1,
        "color": [0, 255, 0, 0],
        "radius": 60,
        "map_threshold": 0.0418,
    },
    {
        "name": "beta-amylase",
        "difficulty": "ignore",
        "pdb_id": "1FA2",
        "label": 2,
        "color": [0, 0, 255, 255],
        "radius": 65,
        "map_threshold": 0.035,
    },
    {
        "name": "beta-galactosidase",
        "difficulty": "hard",
        "pdb_id": "6X1Q",
        "label": 3,
        "color": [0, 255, 0, 255],
        "radius": 90,
        "map_threshold": 0.0578,
    },
    {
        "name": "ribosome",
        "difficulty": "easy",
        "pdb_id": "6EK0",
        "label": 4,
        "color": [0, 0, 255, 0],
        "radius": 150,
        "map_threshold": 0.0374,
    },
    {
        "name": "thyroglobulin",
        "difficulty": "hard",
        "pdb_id": "6SCJ",
        "label": 5,
        "color": [0, 255, 255, 0],
        "radius": 130,
        "map_threshold": 0.0278,
    },
    {
        "name": "virus-like-particle",
        "difficulty": "easy",
        "pdb_id": "6N4V",
        "label": 6,
        "color": [0, 0, 0, 255],
        "radius": 135,
        "map_threshold": 0.201,
    },
]

PARTICLE_COLOR = [[0, 0, 0]] + [PARTICLE[i]["color"][1:] for i in range(6)]
PARTICLE_NAME = ["none"] + [PARTICLE[i]["name"] for i in range(6)]

"""
(184, 630, 630)  
(92, 315, 315)  
(46, 158, 158)  
"""


def read_one_data(id, static_dir):
    zarr_dir = f"{static_dir}/{id}/VoxelSpacing10.000"
    zarr_file = f"{zarr_dir}/denoised.zarr"
    zarr_data = zarr.open(zarr_file, mode="r")
    volume = zarr_data[0][:]
    max = volume.max()
    min = volume.min()
    volume = (volume - min) / (max - min)
    volume = volume.astype(np.float16)
    return volume


def read_one_truth(id, overlay_dir):
    location = {}

    json_dir = f"{overlay_dir}/{id}/Picks"
    for p in PARTICLE_NAME[1:]:
        json_file = f"{json_dir}/{p}.json"

        with open(json_file, "r") as f:
            json_data = json.load(f)

        num_point = len(json_data["points"])
        loc = np.array(
            [
                list(json_data["points"][i]["location"].values())
                for i in range(num_point)
            ]
        )
        location[p] = loc

    return location


def do_one_eval(truth, predict, threshold):
    P = len(predict)
    T = len(truth)

    if P == 0:
        hit = [[], []]
        miss = np.arange(T).tolist()
        fp = []
        metric = [P, T, len(hit[0]), len(miss), len(fp)]
        return hit, fp, miss, metric

    if T == 0:
        hit = [[], []]
        fp = np.arange(P).tolist()
        miss = []
        metric = [P, T, len(hit[0]), len(miss), len(fp)]
        return hit, fp, miss, metric

    # ---
    distance = predict.reshape(P, 1, 3) - truth.reshape(1, T, 3)
    distance = distance**2
    distance = distance.sum(axis=2)
    distance = np.sqrt(distance)
    p_index, t_index = linear_sum_assignment(distance)

    valid = distance[p_index, t_index] <= threshold
    p_index = p_index[valid]
    t_index = t_index[valid]
    hit = [p_index.tolist(), t_index.tolist()]
    miss = np.arange(T)
    miss = miss[~np.isin(miss, t_index)].tolist()
    fp = np.arange(P)
    fp = fp[~np.isin(fp, p_index)].tolist()

    metric = [P, T, len(hit[0]), len(miss), len(fp)]  # for lb metric F-beta copmutation
    return hit, fp, miss, metric


def compute_lb(submit_df, overlay_dir, valid_id):
    print(valid_id)

    eval_df = []
    for id in valid_id:
        truth = read_one_truth(
            id, overlay_dir
        )  # =f'{valid_dir}/overlay/ExperimentRuns')
        id_df = submit_df[submit_df["experiment"] == id]
        for p in PARTICLE:
            p = dotdict(p)
            print("\r", id, p.name, end="", flush=True)
            xyz_truth = truth[p.name]
            xyz_predict = id_df[id_df["particle_type"] == p.name][
                ["x", "y", "z"]
            ].values
            hit, fp, miss, metric = do_one_eval(xyz_truth, xyz_predict, p.radius * 0.5)
            eval_df.append(
                dotdict(
                    id=id,
                    particle_type=p.name,
                    P=metric[0],
                    T=metric[1],
                    hit=metric[2],
                    miss=metric[3],
                    fp=metric[4],
                )
            )
    print("")
    eval_df = pd.DataFrame(eval_df)
    gb = eval_df.groupby("particle_type").agg("sum").drop(columns=["id"])
    gb.loc[:, "precision"] = gb["hit"] / gb["P"]
    gb.loc[:, "precision"] = gb["precision"].fillna(0)
    gb.loc[:, "recall"] = gb["hit"] / gb["T"]
    gb.loc[:, "recall"] = gb["recall"].fillna(0)
    gb.loc[:, "f-beta4"] = (
        17 * gb["precision"] * gb["recall"] / (16 * gb["precision"] + gb["recall"])
    )
    gb.loc[:, "f-beta4"] = gb["f-beta4"].fillna(0)

    gb = gb.sort_values("particle_type").reset_index(drop=False)
    # https://www.kaggle.com/competitions/czii-cryo-et-object-identification/discussion/544895
    gb.loc[:, "weight"] = [1, 0, 2, 1, 2, 1]
    lb_score = (gb["f-beta4"] * gb["weight"]).sum() / gb["weight"].sum()
    return gb, lb_score

In [None]:
result_df, lb_score = compute_lb(
    pred_df, "../../inputs/train/overlay/ExperimentRuns/", CFG.valid_exp_names
)


result_df

In [8]:
result_df

Unnamed: 0,particle_type,P,T,hit,miss,fp,precision,recall,f-beta4,weight
0,apo-ferritin,117,99,87,12,30,0.74359,0.878788,0.869489,1
1,beta-amylase,1,21,0,21,1,0.0,0.0,0.0,0
2,beta-galactosidase,12,39,6,33,6,0.5,0.153846,0.160377,2
3,ribosome,107,92,78,14,29,0.728972,0.847826,0.839772,1
4,thyroglobulin,74,79,29,50,45,0.391892,0.367089,0.36846,2
5,virus-like-particle,37,38,37,1,0,1.0,0.973684,0.975194,1


In [11]:
apo_ferritin = result_df[result_df["particle_type"] == "apo-ferritin"]
beta_amylase = result_df[result_df["particle_type"] == "beta-amylase"]
beta_galactosidase = result_df[result_df["particle_type"] == "beta-galactosidase"]
ribosome = result_df[result_df["particle_type"] == "ribosome"]
thyroglobulin = result_df[result_df["particle_type"] == "thyroglobulin"]
virus_like_particle = result_df[result_df["particle_type"] == "virus-like-particle"]

In [15]:
apo_ferritin_r = apo_ferritin["recall"].values[0]
apo_ferritin_p = apo_ferritin["precision"].values[0]
apo_ferritin_f4 = apo_ferritin["f-beta4"].values[0]

beta_amylase_r = beta_amylase["recall"].values[0]
beta_amylase_p = beta_amylase["precision"].values[0]
beta_amylase_f4 = beta_amylase["f-beta4"].values[0]

beta_galactosidase_r = beta_galactosidase["recall"].values[0]
beta_galactosidase_p = beta_galactosidase["precision"].values[0]
beta_galactosidase_f4 = beta_galactosidase["f-beta4"].values[0]

ribosome_r = ribosome["recall"].values[0]
ribosome_p = ribosome["precision"].values[0]
ribosome_f4 = ribosome["f-beta4"].values[0]

thyroglobulin_r = thyroglobulin["recall"].values[0]
thyroglobulin_p = thyroglobulin["precision"].values[0]
thyroglobulin_f4 = thyroglobulin["f-beta4"].values[0]

virus_like_particle_r = virus_like_particle["recall"].values[0]
virus_like_particle_p = virus_like_particle["precision"].values[0]
virus_like_particle_f4 = virus_like_particle["f-beta4"].values[0]