In [None]:
from tqdm import tqdm
import importlib
from pathlib import Path
import pandas as pd

import numpy as np
import functools
from collections import defaultdict

import cv2
import os

from torch.utils.data import DataLoader
import albumentations as albu
import torch

from google.colab import drive
drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/Courses/comp540/project')

from dataset import PneumoDataset
from helpers import load_yaml
%load_ext autoreload
%autoreload 2

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def inference_model(model, loader, device):
    model.eval()
    with torch.no_grad():
        mask_dict = {}
        for image_ids, images in tqdm(loader):
            images = images.to(device)
            predicted = model(images)
            masks = torch.sigmoid(predicted)
            masks = masks.squeeze(1).cpu().detach().numpy()
            for name, mask in zip(image_ids, masks):
                mask_dict[name] = mask.astype(np.float32)
    return mask_dict

def eval_binarizer_triplet(mask_dict, label_path, binarizer_fn, eval_fn, device):
    used_thresholds = binarizer_fn.thresholds
    metrics = defaultdict(float)
    for idx, (name, mask) in enumerate(tqdm(mask_dict.items())):
        mask = torch.tensor(mask).to(torch.float32)
        mask = mask.to(device)
        label = cv2.imread(os.path.join(label_path, f"{name}.png"), 0) / 255.
        label = torch.tensor(label).unsqueeze(0).to(torch.float32)
        label = label.to(device)

        mask_generator = binarizer_fn.transform(mask)
        for current_thr, current_mask in zip(used_thresholds, mask_generator):
            current_metric = eval_fn(current_mask, label).item()
            current_thr = tuple(current_thr)
            metrics[current_thr] = (metrics[current_thr] * idx + current_metric) / (idx + 1)

        best_threshold = max(metrics, key=metrics.get)
        best_metric = metrics[best_threshold]
    return metrics, best_metric

def process_summary(result_path, fold_size, best_metric, metrics):
    fold_summary = pd.DataFrame.from_dict([metrics])
    fold_summary['fold_size'] = fold_size
    fold_summary['best_metric'] = best_metric
    fold_summary = fold_summary[['fold_size', 'best_metric'] + list(metrics.keys())]
    fold_summary.columns = [str(col) for col in fold_summary.columns]


    if not result_path.is_file():
        fold_summary.to_csv(result_path, index=False)
    else:
        summary = pd.read_csv(result_path)
        summary = pd.concat([summary, fold_summary], ignore_index=True)
        summary.to_csv(result_path, index=False)

In [None]:
experiment_folder = Path("experiments")
config_folder = experiment_folder / "configs" / "Inference_train.yaml"
inference_config = load_yaml(config_folder)
print(inference_config)

{'SEED': 42, 'NUM_WORKERS': 4, 'DEVICE': 'cuda', 'BATCH_SIZE': 2, 'MODEL': {'PY': 'model', 'CLASS': 'ResUNet', 'ARGS': {'pretrained': False}}, 'CHECKPOINTS': {'FULL_FOLDER': 'resunet_1024_3', 'PIPELINE_PATH': 'experiments/resunet', 'PIPELINE_NAME': 'resunet_1024'}, 'USEFOLDS': [0, 1, 2, 3, 4], 'SELECTED_CHECKPOINTS': {'fold0': [0, 1, 3], 'fold1': [3, 7, 9], 'fold2': [6, 7, 11], 'fold3': [7, 11, 17], 'fold4': [8, 9, 10]}, 'MASK_BINARIZER': {'PY': 'binarizer', 'CLASS': 'TripletMaskBinarization', 'ARGS': {'triplets': [[0.75, 2000, 0.3], [0.7, 2000, 0.3], [0.7, 2500, 0.3], [0.7, 3000, 0.3], [0.65, 2000, 0.3], [0.65, 2500, 0.3], [0.65, 3000, 0.3], [0.6, 2000, 0.3], [0.6, 2500, 0.3], [0.6, 3000, 0.3], [0.6, 2000, 0.35], [0.6, 2500, 0.35], [0.6, 3000, 0.35], [0.6, 2000, 0.4], [0.6, 2500, 0.4], [0.6, 3000, 0.4]]}}, 'EVALUATION_METRIC': {'PY': 'losses', 'CLASS': 'dice_metric', 'ARGS': {'per_image': True}}, 'RESULT': 'result_top3.csv'}


In [None]:
batch_size = inference_config['BATCH_SIZE']
device = inference_config['DEVICE']

module = importlib.import_module(inference_config['MODEL']['PY'])
model_class = getattr(module, inference_config['MODEL']['CLASS'])
model = model_class(**inference_config['MODEL'].get('ARGS', None)).to(device)
model.eval()

pipeline_path = Path(inference_config['CHECKPOINTS']['PIPELINE_PATH'])
pipeline_name = inference_config['CHECKPOINTS']['PIPELINE_NAME']
checkpoints_list = []
folds_dict = inference_config['SELECTED_CHECKPOINTS']
for folder_name, epoch_list in folds_dict.items():
    checkpoint_folder = Path(
        pipeline_path,
        inference_config['CHECKPOINTS']['FULL_FOLDER'],
        folder_name)
    for epoch in epoch_list:
        checkpoint_path = Path(
            checkpoint_folder,
            '{}_{}_epoch{}.pth'.format(pipeline_name, folder_name, epoch)
        )
        checkpoints_list.append(checkpoint_path)

binarizer_module = importlib.import_module(inference_config['MASK_BINARIZER']['PY'])
binarizer_class = getattr(binarizer_module, inference_config['MASK_BINARIZER']['CLASS'])
binarizer_fn = binarizer_class(**inference_config['MASK_BINARIZER']['ARGS'])

eval_module = importlib.import_module(inference_config['EVALUATION_METRIC']['PY'])
eval_fn = getattr(eval_module, inference_config['EVALUATION_METRIC']['CLASS'])
eval_fn = functools.partial(eval_fn, **inference_config['EVALUATION_METRIC']['ARGS'])

result_path = Path(experiment_folder, inference_config['RESULT'])

test_transform = albu.Compose([
    albu.Resize(1024, 1024, always_apply=True),
    albu.Normalize()
])

num_workers = inference_config['NUM_WORKERS']
positive_names = np.load("data/2img_mask_npy/positive_imgs_names.npy")
negative_names = np.load("data/2img_mask_npy/negative_imgs_names.npy")
train_names = np.concatenate((positive_names, negative_names))
fold_labels = np.load("./data/2img_mask_npy/fold_labels.npy")
label_path = 'data/1img_mask/mask/'

In [None]:
for fold_id in range(5):

    print(f"Fold {fold_id}")

    dataset = PneumoDataset(
        mode='valtest',
        fold_index=fold_id,
        train_names=train_names,
        fold_labels=fold_labels,
        transform=test_transform,
    )
    dataloader = DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=False
    )

    fold_size = len(dataset)
    print(f"Data amount: {fold_size}")

    mask_dict = defaultdict(int)
    for pred_idx, checkpoint_path in enumerate(checkpoints_list):
        print(checkpoint_path)
        model.load_state_dict(torch.load(checkpoint_path))
        current_mask_dict = inference_model(model, dataloader, device)
        for name, mask in tqdm(current_mask_dict.items()):
            mask_dict[name] = (mask_dict[name] * pred_idx + mask) / (pred_idx + 1)

    metrics, best_metric = eval_binarizer_triplet(mask_dict, label_path, binarizer_fn, eval_fn, device)

    process_summary(result_path, fold_size, best_metric, metrics)


Fold 0
Data amount: 2410
experiments/resunet/resunet_1024_3/fold0/resunet_1024_fold0_epoch0.pth


  model.load_state_dict(torch.load(checkpoint_path))
  8%|▊         | 94/1205 [01:33<06:26,  2.87it/s]