In [1]:
import os, json, io, torch
import numpy as np
import pandas as pd
# from models.load import TrainedModels

from utils.engine import get_iou_types, evaluate
from models.load import get_trained_model
from utils.print import print_title
from utils.init import reproducibility, clean_memory_get_device
from data.load import get_datasets, get_dataloaders
from data.paths import MIMIC_EYE_PATH
from tqdm import tqdm
from utils.train import  get_coco_eval_params
from utils.coco_eval import get_eval_params_dict
from data.strs import SourceStrs, TaskStrs
from IPython.display import clear_output
from coco_froc_analysis.froc.froc_curve import get_froc_curve, get_interpolate_froc
from utils import detect_utils

## Suppress the assignement warning from pandas.
pd.options.mode.chained_assignment = None  # default='warn

## Supress user warning
import warnings
warnings.filterwarnings("ignore")

from models.components.task_performers import MultiBinaryClassificationPerformer, ObjectDetectionPerformer, RegressionPerformer
from utils.engine import ClassificationEvaluator, RegressionEvaluator
from data.helpers import map_every_thing_to_device
import time, torch
from enum import Enum


%matplotlib inline

In [2]:
class TrainedModel(Enum):
    clinical_model = "val_gender-classification_accuracy_0_6963_test_gender-classification_accuracy_0_5809_epoch31_04-11-2023 01-53-53_clinical_predictions"
    gender_prediction = "val_gender-classification_accuracy_0_9556_test_gender-classification_accuracy_0_9338_epoch16_04-11-2023 13-54-11_clinical_predictions"
    age_prediction = "val_age-regression_r2_0_0652_test_age-regression_r2_0_0416_epoch40_04-11-2023 20-34-04_age_predictions"

In [3]:
select_model = TrainedModel.age_prediction

In [4]:
device = clean_memory_get_device()
reproducibility()

model, train_info, _, _ = get_trained_model(
            select_model,
            device,
        )
model = model.to(device)
model.eval()


setup = train_info.model_setup
iou_types = get_iou_types(model, setup)

dataset_params_dict = {
    "MIMIC_EYE_PATH": MIMIC_EYE_PATH,
    "labels_cols": setup.lesion_label_cols,
    "with_xrays_input": SourceStrs.XRAYS in setup.sources,
    "with_clincal_input": SourceStrs.CLINICAL in setup.sources,
    "with_fixations_input": SourceStrs.FIXATIONS in setup.sources,
    "fixations_mode_input": setup.fiaxtions_mode_input,
    "with_bboxes_label": TaskStrs.LESION_DETECTION in setup.tasks,
    "with_fixations_label": TaskStrs.FIXATION_GENERATION in setup.tasks,
    "fixations_mode_label": setup.fiaxtions_mode_label,
    "with_chexpert_label": TaskStrs.CHEXPERT_CLASSIFICATION in setup.tasks,
    "with_negbio_label": TaskStrs.NEGBIO_CLASSIFICATION in setup.tasks,
    "clinical_numerical_cols": setup.clinical_num,
    "clinical_categorical_cols": setup.clinical_cat,
    "image_size": setup.image_size,
    "image_mean": setup.image_mean,
    "image_std": setup.image_std,
    "with_clinical_label": setup.with_clinical_label,
}

detect_eval_dataset, train_dataset, val_dataset, test_dataset = get_datasets(
    dataset_params_dict=dataset_params_dict,
)

train_dataloader, val_dataloader, test_dataloader = get_dataloaders(
    train_dataset,
    val_dataset,
    test_dataset,
    batch_size=setup.batch_size,
)

train_coco = None
val_coco = None
test_coco = None
eval_params_dict = None

This notebook will running on device: [CUDA]
Using pretrained backbone. resnet50
Using SGD as optimizer with lr=0.001


In [5]:
data_loader = test_dataloader

In [6]:
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)

model.eval()
metric_logger = detect_utils.MetricLogger(delimiter="  ")
header = "Evaluation:"
# coco_evaluator = CocoEvaluator(coco, iou_types, params_dict)

evaluators = {}

all_prs = []
all_gts = []

for k, v in model.task_performers.items():
    if isinstance(v, MultiBinaryClassificationPerformer):
        evaluators[k] = ClassificationEvaluator()
    elif isinstance(v, RegressionPerformer):
            evaluators[k] = RegressionEvaluator()
    else:
        raise ValueError(f"Task-{k} doesn't have an evaluator.")

for data in metric_logger.log_every(data_loader, 100, header):
    inputs, targets = data_loader.dataset.prepare_input_from_data(data)

    inputs = map_every_thing_to_device(inputs, device)
    targets = map_every_thing_to_device(targets, device)

    if torch.cuda.is_available():
        torch.cuda.synchronize()

    model_time = time.time()
    outputs = model(inputs, targets=targets)

    all_losses = {}
    for task in outputs.keys():
        all_losses.update(
            {
                f"{task}_{model.task_performers[task].name}_{k}": v
                for k, v in outputs[task]["losses"].items()
            }
        )

    loss_dict_reduced = detect_utils.reduce_dict(all_losses)
    losses_reduced = sum(loss for loss in loss_dict_reduced.values())

    for k in model.task_performers.keys():            
        evaluators[k].update(outputs[k]["outputs"], [t[k] for t in targets])

    gts = map_every_thing_to_device(targets, "cpu")
    prs = map_every_thing_to_device(outputs, "cpu") # test if this operation is possible.
    all_gts.append(gts)
    all_prs.append(prs)


# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)

torch.set_num_threads(n_threads)

Evaluation:  [ 0/34]  eta: 0:00:58    time: 1.7209  data: 0.5283  max mem: 1925
Evaluation:  [33/34]  eta: 0:00:00    time: 0.5918  data: 0.5298  max mem: 2197
Evaluation: Total time: 0:00:20 (0.6074 s / it)
Averaged stats: 


In [7]:
processed_gts = []

for gts in all_gts:
    for gt in gts:
        dict_to_append = {}
        for k in gt.keys():
            clinical_attribute, task_name  = k.split("-")
            dict_to_append.update({clinical_attribute: gt[k][f"{task_name}s"].item()})
        processed_gts.append(dict_to_append)

        # processed_gts.append({
        #     "age": gt['age-regression']['regressions'].item(),
        #     "temperature": gt['temperature-regression']['regressions'].item(),
        #     "heartrate": gt['heartrate-regression']['regressions'].item(),
        #     "resprate": gt['resprate-regression']['regressions'].item(),
        #     "o2sat": gt['o2sat-regression']['regressions'].item(),
        #     "sbp": gt['sbp-regression']['regressions'].item(),
        #     "dpb": gt['dpb-regression']['regressions'].item(),
        #     "gender": gt['gender-classification']['classifications'].item()
        # })

In [8]:
processed_prs = []
for prs in all_prs:
    zip_list = [prs[k]["outputs"] for k in prs.keys()]
    for attrs in zip(*zip_list):
        dict_to_append = {}
        for k, attr in zip(list(prs.keys()), attrs):
            clinical_attribute, task_name  = k.split("-")
            dict_to_append.update({clinical_attribute : attr.item()})
        processed_prs.append(dict_to_append)

In [9]:
processed_gts[0], processed_prs[0]

({'age': 66,
  'temperature': 98.1,
  'heartrate': 85.6,
  'resprate': 18.2,
  'o2sat': 97.8,
  'sbp': 132.9,
  'dpb': 73.5,
  'acuity': 1.0,
  'gender': True},
 {'age': 63.2711067199707})

In [10]:
from sklearn.metrics import accuracy_score, r2_score
import pandas as pd

In [11]:
r2_score([ gt['age'] for gt in processed_gts], [ pr['age'] for pr in processed_prs])
# accuracy_score([ gt['gender'] for gt in processed_gts], [ pr['gender'] >= 0.5 for pr in processed_prs],)

0.04156145875590622

In [12]:
gt_df = pd.DataFrame(processed_gts)
gt_df.to_csv("age_gt.csv")
pr_df = pd.DataFrame(processed_prs)
pr_df.to_csv("age_pr.csv")

In [13]:
# gender, acc = 0.9338235294117647
# age, r2 =