In [1]:
import os
import zarr
import random
import json
import warnings
import numpy as np
import pandas as pd
import torch.nn as nn
from pathlib import Path
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
import torch
import torchvision.transforms.functional as F
import random
import sys
from collections import defaultdict

warnings.filterwarnings("ignore")
sys.path.append("./src/")

from src.config import CFG
from src.dataloader import (
    read_zarr,
    read_info_json,
    scale_coordinates,
    create_dataset,
    create_segmentation_map,
    EziiDataset,
    drop_padding,
)
from src.network import UNet_2D, aug
from src.utils import save_images
from src.metric import score, create_cls_pos, create_cls_pos_sikii, create_df

sample_submission = pd.read_csv("../../inputs/sample_submission.csv")

In [2]:
valid_dataset = EziiDataset(
    exp_names=CFG.valid_exp_names,
    # exp_names=CFG.train_exp_names,
    base_dir="../../inputs/train/static",
    particles_name=CFG.particles_name,
    resolution=CFG.resolution,
    zarr_type=CFG.valid_zarr_types,
)

from tqdm import tqdm

# train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False)

for row in tqdm(valid_loader):
    normalized_tomogram = row["normalized_tomogram"]
    break

[('TS_86_3', 'denoised'), ('TS_6_6', 'denoised')]


  0%|          | 0/2 [00:00<?, ?it/s]


In [3]:
model = UNet_2D().to("cuda")
model.eval()
model.load_state_dict(torch.load("./best_model.pth"))


optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss(
    weight=torch.tensor([0.5, 32, 32, 32, 32, 32, 32]).to("cuda")
)
# criterion = DiceLoss()

best_model = None
best_loss = np.inf
batch_size = 4

valid_loss = []
valid_pred_tomogram = defaultdict(list)
valid_gt_tomogram = defaultdict(list)
model.eval()
tq = tqdm(range(len(valid_loader) * normalized_tomogram.shape[0]))
for data in valid_loader:
    exp_name = data["exp_name"][0]
    tomogram = data["normalized_tomogram"].to("cuda")
    segmentation_map = data["segmentation_map"].to("cuda").long()

    for i in range(tomogram.shape[1]):
        input_ = tomogram[:, i].unsqueeze(0)
        gt = segmentation_map[:, i]

        output = model(input_)
        loss = criterion(output, gt)

        valid_loss.append(loss.item())
        tq.set_description(f"Loss: {np.mean(valid_loss)}")
        tq.update(1)

        valid_pred_tomogram[exp_name].append(output.cpu().detach().numpy())
        valid_gt_tomogram[exp_name].append(gt.cpu().detach().numpy())
tq.close()

Loss: 0.36468520154670364: : 184it [00:09, 20.13it/s]                    


In [4]:
valid_pred_tomogram.keys()

dict_keys(['TS_86_3', 'TS_6_6'])

In [6]:
# 各実験のスコアを計算
cv_scores = []

experiments = list(valid_pred_tomogram.keys())
print(f"experiments: {experiments}")

for exp_name in experiments:
    print(
        f"####################### valid-experiments: {exp_name} #######################"
    )
    # pred
    # pred_tomogram = np.array(valid_pred_tomogram[exp_name]).argmax(2).squeeze(1)
    # pred_tomogram = drop_padding(pred_tomogram, CFG.resolution)
    # pred_cls_pos, pred_Ascale_pos = create_cls_pos(pred_tomogram)
    # pred_df = create_df(pred_Ascale_pos, exp_name)
    # pred_df = pred_df.reset_index()

    constant = 0.5
    sikii_dict = {
        "apo-ferritin": constant,
        "beta-amylase": constant,
        "beta-galactosidase": constant,
        "ribosome": constant,
        "thyroglobulin": constant,
        "virus-like-particle": constant,
    }

    # multi-cls-pred
    pred_tomogram = np.array(valid_pred_tomogram[exp_name]).squeeze(1)
    pred_tomogram = drop_padding(pred_tomogram, CFG.resolution)
    pred_tomogram = np.exp(pred_tomogram) / np.exp(pred_tomogram).sum(1)[:, None]
    pred_cls_pos, pred_Ascale_pos = create_cls_pos_sikii(
        pred_tomogram, sikii_dict=sikii_dict
    )
    pred_df = create_df(pred_Ascale_pos, exp_name)
    pred_df = pred_df.drop_duplicates(subset=["x", "y", "z"], keep="first")
    pred_df = pred_df.reset_index()

    # gt
    gt_tomogram = np.array(valid_gt_tomogram[exp_name]).squeeze(1)
    gt_tomogram = drop_padding(gt_tomogram, CFG.resolution)
    gt_cls_pos, gt_Ascale_pos = create_cls_pos(gt_tomogram)
    gt_df = create_df(gt_Ascale_pos, exp_name)

    gt_df = gt_df.reset_index()

    score_ = score(
        solution=pred_df,
        submission=gt_df,
        row_id_column_name="index",
        distance_multiplier=0.5,
        beta=4,
    )
    print(f"experiments: {exp_name}, score: {score_}")
    cv_scores.append(score_)

print(f"CV: {np.mean(cv_scores)}")

experiments: ['TS_86_3', 'TS_6_6']
####################### valid-experiments: TS_86_3 #######################
experiments: TS_86_3, score: 0.14467114162413539
####################### valid-experiments: TS_6_6 #######################
experiments: TS_6_6, score: 0.21106372794782077
CV: 0.17786743478597808
