In [1]:
import os
import zarr
import random
import json
import warnings
import numpy as np
import pandas as pd
import torch.nn as nn
from pathlib import Path
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
import torch
import torchvision.transforms.functional as F
import random
import sys
from collections import defaultdict

warnings.filterwarnings("ignore")
sys.path.append("./src/")

from src.config import CFG
from src.dataloader import (
    read_zarr,
    read_info_json,
    scale_coordinates,
    create_dataset,
    create_segmentation_map,
    EziiDataset,
    drop_padding,
)
from src.network import UNet_2D, aug
from src.utils import save_images
from src.metric import score, create_cls_pos, create_cls_pos_sikii, create_df

sample_submission = pd.read_csv("../../inputs/sample_submission.csv")

In [2]:
valid_dataset = EziiDataset(
    exp_names=CFG.valid_exp_names,
    # exp_names=CFG.train_exp_names,
    base_dir="../../inputs/train",
    particles_name=CFG.particles_name,
    resolution=CFG.resolution,
    zarr_type=CFG.valid_zarr_types,
    train=True,
)

from tqdm import tqdm

# train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False)

for row in tqdm(valid_loader):
    normalized_tomogram = row["normalized_tomogram"]
    break

[('TS_86_3', 'denoised'), ('TS_6_6', 'denoised')]


  0%|          | 0/2 [00:00<?, ?it/s]


In [3]:
model = UNet_2D().to("cuda")
model.eval()
model.load_state_dict(torch.load("./best_model.pth"))


optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss(
    weight=torch.tensor([0.5, 32, 32, 32, 32, 32, 32]).to("cuda")
)
# criterion = DiceLoss()

best_model = None
best_loss = np.inf
batch_size = 4

valid_loss = []
valid_pred_tomogram = defaultdict(list)
valid_gt_tomogram = defaultdict(list)
model.eval()
tq = tqdm(range(len(valid_loader) * normalized_tomogram.shape[0]))
for data in valid_loader:
    exp_name = data["exp_name"][0]
    tomogram = data["normalized_tomogram"].to("cuda")
    segmentation_map = data["segmentation_map"].to("cuda").long()

    for i in range(tomogram.shape[1]):
        input_ = tomogram[:, i].unsqueeze(0)
        gt = segmentation_map[:, i]

        output = model(input_)
        loss = criterion(output, gt)

        valid_loss.append(loss.item())
        tq.set_description(f"Loss: {np.mean(valid_loss)}")
        tq.update(1)

        valid_pred_tomogram[exp_name].append(output.cpu().detach().numpy())
        valid_gt_tomogram[exp_name].append(gt.cpu().detach().numpy())
tq.close()

Loss: 0.6418150775134563: : 92it [00:04, 21.02it/s]                     


In [4]:
valid_pred_tomogram.keys()

dict_keys(['TS_86_3', 'TS_6_6'])

In [5]:
def create_gt_df(base_dir, exp_names):
    result_df = None
    particle_names = CFG.particles_name

    for exp_name in exp_names:
        for particle in particle_names:
            np_corrds = read_info_json(
                base_dir=base_dir, exp_name=exp_name, particle_name=particle
            )  # (n, 3)
            # 各行にexp_nameとparticle_name追加
            particle_df = pd.DataFrame(np_corrds, columns=["z", "y", "x"])
            particle_df["experiment"] = exp_name
            particle_df["particle_type"] = particle

            if result_df is None:
                result_df = particle_df
            else:
                result_df = pd.concat([result_df, particle_df], axis=0).reset_index(
                    drop=True
                )

    result_df = result_df.reset_index()  # index	experiment	particle_type	x	y	z
    result_df = result_df[["index", "experiment", "particle_type", "x", "y", "z"]]

    return result_df


create_gt_df("../../inputs/train/overlay/ExperimentRuns/", CFG.valid_exp_names)

Unnamed: 0,index,experiment,particle_type,x,y,z
0,0,TS_86_3,apo-ferritin,3870.343,4952.714,1261.600
1,1,TS_86_3,apo-ferritin,4130.897,5422.292,501.860
2,2,TS_86_3,apo-ferritin,2735.000,4668.447,520.291
3,3,TS_86_3,apo-ferritin,2649.615,4690.615,600.923
4,4,TS_86_3,apo-ferritin,2665.353,4810.641,612.019
...,...,...,...,...,...,...
363,363,TS_6_6,virus-like-particle,2609.876,4569.876,1169.759
364,364,TS_6_6,virus-like-particle,2213.287,4135.017,1286.851
365,365,TS_6_6,virus-like-particle,3303.905,5697.825,789.744
366,366,TS_6_6,virus-like-particle,1008.748,5949.213,1077.303


In [6]:
# 各実験のスコアを計算
cv_scores = []

experiments = list(valid_pred_tomogram.keys())
print(f"experiments: {experiments}")

for exp_name in experiments:
    print(
        f"####################### valid-experiments: {exp_name} #######################"
    )
    # pred
    # pred_tomogram = np.array(valid_pred_tomogram[exp_name]).argmax(2).squeeze(1)
    # pred_tomogram = drop_padding(pred_tomogram, CFG.resolution)
    # pred_cls_pos, pred_Ascale_pos = create_cls_pos(pred_tomogram)
    # pred_df = create_df(pred_Ascale_pos, exp_name)
    # pred_df = pred_df.reset_index()

    sikii_dict = {
        "apo-ferritin": 0.38,
        "beta-amylase": 0.38,
        "beta-galactosidase": 0.38,
        "ribosome": 0.38,
        "thyroglobulin": 0.38,
        "virus-like-particle": 0.38,
    }

    # multi-cls-pred
    pred_tomogram = np.array(valid_pred_tomogram[exp_name]).squeeze(1)
    pred_tomogram = drop_padding(pred_tomogram, CFG.resolution)
    pred_tomogram = np.exp(pred_tomogram) / np.exp(pred_tomogram).sum(1)[:, None]
    pred_cls_pos, pred_Ascale_pos = create_cls_pos_sikii(
        pred_tomogram, sikii_dict=sikii_dict
    )
    pred_df = create_df(pred_Ascale_pos, exp_name)
    pred_df = pred_df.drop_duplicates(subset=["x", "y", "z"], keep="first")
    pred_df = pred_df.reset_index()

    # gt
    # gt_tomogram = np.array(valid_gt_tomogram[exp_name]).squeeze(1)
    # gt_tomogram = drop_padding(gt_tomogram, CFG.resolution)
    # gt_cls_pos, gt_Ascale_pos = create_cls_pos(gt_tomogram)
    # gt_df = create_df(gt_Ascale_pos, exp_name)

    # gt_df = gt_df.reset_index()
    gt_df = create_gt_df("../../inputs/train/overlay/ExperimentRuns/", [exp_name])

    score_ = score(
        solution=pred_df,
        submission=gt_df,
        row_id_column_name="index",
        distance_multiplier=1,
        beta=4,
    )
    print(f"experiments: {exp_name}, score: {score_}")
    cv_scores.append(score_)

print(f"CV: {np.mean(cv_scores)}")

experiments: ['TS_86_3', 'TS_6_6']
####################### valid-experiments: TS_86_3 #######################
experiments: TS_86_3, score: 0.005421305679549
####################### valid-experiments: TS_6_6 #######################
experiments: TS_6_6, score: 0.000821805232159978
CV: 0.003121555455854489


In [7]:
np.array(pred_cls_pos)[:, 1].min()

0.0

In [8]:
pred_tomogram = np.array(valid_pred_tomogram[exp_name]).squeeze(1)
pred_tomogram = drop_padding(pred_tomogram, CFG.resolution)
pred_tomogram.shape

(46, 7, 158, 158)

In [10]:
gt_df

Unnamed: 0,index,experiment,particle_type,x,y,z
0,0,TS_6_6,apo-ferritin,1916.830,3311.797,754.673
1,1,TS_6_6,apo-ferritin,1996.861,3231.277,803.577
2,2,TS_6_6,apo-ferritin,2206.512,2975.302,1179.674
3,3,TS_6_6,apo-ferritin,285.292,1379.331,417.577
4,4,TS_6_6,apo-ferritin,753.781,2633.219,973.094
...,...,...,...,...,...,...
138,138,TS_6_6,virus-like-particle,2609.876,4569.876,1169.759
139,139,TS_6_6,virus-like-particle,2213.287,4135.017,1286.851
140,140,TS_6_6,virus-like-particle,3303.905,5697.825,789.744
141,141,TS_6_6,virus-like-particle,1008.748,5949.213,1077.303
