In [1]:
# load the model first, run the evaluator on the model.

import pandas as pd
import numpy as np
import torch
import utils.print as print_f
import os

from utils.engine import evaluate, get_iou_types
from utils.plot import plot_losses, plot_performance

from models.utils import get_model_size_in_MB
from models.build import create_model_from_setup
from models.setup import ModelSetup
from models.train import TrainingInfo
from utils.save import check_best, end_train, get_data_from_metric_logger
from data.load import get_datasets, get_dataloaders
from IPython.display import clear_output
from utils.eval import get_ap_ar, get_ap_ar_for_train_val
from utils.train import get_optimiser, get_lr_scheduler, print_params_setup, get_coco_eval_params, get_dynamic_loss
from utils.init import reproducibility, clean_memory_get_device
from data.paths import MIMIC_EYE_PATH
# from datetime import datetime
# from models.dynamic_loss import DynamicWeightedLoss
from utils.engine import train_one_epoch, evaluate
from torch import optim
from data.paths import MIMIC_EYE_PATH
from models.load import get_trained_model
# from data.load import seed_worker, get_dataloader_g


## Suppress the assignement warning from pandas.r
pd.options.mode.chained_assignment = None  # default='warn'

## Supress user warning
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
from enum import Enum

class TrainedModels(Enum):
    mobilenet_baseline = "val_lesion-detection_ap_0_1655_test_lesion-detection_ap_0_1648_epoch50_03-15-2023 16-43-54_lesion_dsetection_baseline_mobilenet"  # mobilenet baseline
    mobilenet_with_fix = "val_lesion-detection_ap_0_1918_test_lesion-detection_ap_0_1903_epoch16_03-16-2023 11-34-10_lesion_dsetection_with_fixation_mobilenet"
    resnet18_baseline = "val_lesion-detection_ap_0_1973_test_lesion-detection_ap_0_2010_epoch22_03-16-2023 19-44-55_lesion_dsetection_baseline_resnet"
    resnet18_with_fix = "val_lesion-detection_ap_0_1951_test_lesion-detection_ap_0_2195_epoch12_03-17-2023 00-31-54_lesion_dsetection_with_fixation_resnet"
    densenet161_baseline = "val_lesion-detection_ap_0_1990_test_lesion-detection_ap_0_2085_epoch5_03-17-2023 08-53-33_lesion_dsetection_baseline_densenet161"
    densenet161_with_fix = "val_lesion-detection_ap_0_2120_test_lesion-detection_ap_0_2104_epoch12_03-17-2023 18-36-01_lesion_dsetection_with_fixation_densenet161"
    efficientnet_b5_baseline = "val_lesion-detection_ap_0_1898_test_lesion-detection_ap_0_2055_epoch5_03-17-2023 23-30-57_lesion_dsetection_baseline_efficientnet_b5"
    efficientnet_b5_with_fix = "val_lesion-detection_ap_0_2117_test_lesion-detection_ap_0_2190_epoch8_03-18-2023 12-29-20_lesion_dsetection_with_fixation_efficientnet_b5"
    efficientnet_b0_baseline = "val_lesion-detection_ap_0_1934_test_lesion-detection_ap_0_1858_epoch10_03-18-2023 23-50-47_lesion_dsetection_baseline_efficientnet_b0"
    efficientnet_b0_with_fix = "val_lesion-detection_ap_0_2191_test_lesion-detection_ap_0_2162_epoch10_03-18-2023 19-38-11_lesion_dsetection_with_fixation_efficientnet_b0"
    convnext_base_with_fix = "val_lesion-detection_ap_0_2610_test_lesion-detection_ap_0_2548_epoch22_03-22-2023 02-55-37_lesion_dsetection_with_fixation_convnext_base"
    convnext_base_baseline = "val_lesion-detection_ap_0_2426_test_lesion-detection_ap_0_2325_epoch20_03-22-2023 11-53-53_lesion_dsetection_baseline_convnext_base"
    vgg16_with_fix = "val_lesion-detection_ap_0_2301_test_lesion-detection_ap_0_2186_epoch22_03-20-2023 19-26-02_lesion_dsetection_with_fixation_vgg16"
    vgg16_baseline = "val_lesion-detection_ap_0_2113_test_lesion-detection_ap_0_2068_epoch12_03-21-2023 00-45-24_lesion_dsetection_baseline_vgg16"
    regnet_y_8gf_with_fix = "val_lesion-detection_ap_0_2267_test_lesion-detection_ap_0_2029_epoch12_03-21-2023 11-28-48_lesion_dsetection_with_fixation_regnet_y_8gf"
    regnet_y_8gf_baseline = "val_lesion-detection_ap_0_1883_test_lesion-detection_ap_0_1658_epoch13_03-21-2023 15-22-32_lesion_dsetection_baseline_regnet_y_8gf"

In [3]:
device = clean_memory_get_device()
reproducibility()

This notebook will running on device: [CUDA]


In [4]:
get_epoch_from_model = lambda select_model: int(([substr for substr in select_model.value.split("_") if "epoch" in substr][0]).replace("epoch", ""))
get_model_name = lambda select_model: str(select_model).split(".")[-1]

In [5]:
select_model = TrainedModels.densenet161_with_fix

In [6]:
model, train_info, optimizer, dynamic_loss_weight = get_trained_model(
        select_model,
        device,
    )

model.to(device)
model.eval()

# cover the long model.
print()

Using pretrained backbone. densenet161
Using pretrained backbone. densenet161
Using SGD as optimizer with lr=0.01



In [7]:
setup = train_info.model_setup
iou_types = get_iou_types(model, setup)

In [8]:
from data.strs import SourceStrs, TaskStrs

dataset_params_dict = {
        "MIMIC_EYE_PATH": MIMIC_EYE_PATH,
        "labels_cols": setup.lesion_label_cols,
        "with_xrays_input": SourceStrs.XRAYS in setup.sources,
        "with_clincal_input": SourceStrs.CLINICAL in setup.sources,
        "with_fixations_input": SourceStrs.FIXATIONS in setup.sources,
        "fixations_mode_input": setup.fiaxtions_mode_input,
        "with_bboxes_label": TaskStrs.LESION_DETECTION in setup.tasks,
        "with_fixations_label": TaskStrs.FIXATION_GENERATION in setup.tasks,
        "fixations_mode_label": setup.fiaxtions_mode_label,
        "with_chexpert_label": TaskStrs.CHEXPERT_CLASSIFICATION in setup.tasks,
        "with_negbio_label": TaskStrs.NEGBIO_CLASSIFICATION in setup.tasks,
        "clinical_numerical_cols": setup.clinical_num,
        "clinical_categorical_cols": setup.clinical_cat,
        "image_size": setup.image_size,
        "image_mean": setup.image_mean,
        "image_std": setup.image_std,
    }

In [9]:
detect_eval_dataset, train_dataset, val_dataset, test_dataset = get_datasets(
    dataset_params_dict=dataset_params_dict,
)

train_dataloader, val_dataloader, test_dataloader = get_dataloaders(
    train_dataset, val_dataset, test_dataset, batch_size=setup.batch_size,
)

train_coco = None
train_coco, val_coco, test_coco, eval_dict = get_coco_eval_params(
    source_name=SourceStrs.XRAYS,
    task_name=TaskStrs.LESION_DETECTION,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    test_dataloader=test_dataloader,
    detect_eval_dataset=detect_eval_dataset,
    iou_thrs=np.array([0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]),
    use_iobb=setup.use_iobb,
    maxDets=[1, 5, 10, 30, 100],
)

In [10]:
test_evaluator, _ = evaluate(
    setup=setup,
    model=model,
    data_loader=test_dataloader,
    device=device,
    params_dict=eval_dict,
    coco=test_coco,
    iou_types=iou_types,
    # score_thres=score_thres,
)

Evaluation:  [ 0/57]  eta: 0:03:15  loss: 0.3649 (0.3649)  lesion-detection_performer-object_detection_loss_classifier: 0.1999 (0.1999)  lesion-detection_performer-object_detection_loss_box_reg: 0.1117 (0.1117)  lesion-detection_performer-object_detection_loss_objectness: 0.0490 (0.0490)  lesion-detection_performer-object_detection_loss_rpn_box_reg: 0.0043 (0.0043)  model_time: 1679757696.0000 (1679757653.3429)  evaluator_time: 0.0000 (0.0000)  time: 3.4286  data: 1.9727  max mem: 1137
Evaluation:  [56/57]  eta: 0:00:02  loss: 0.2141 (0.2027)  lesion-detection_performer-object_detection_loss_classifier: 0.1060 (0.1041)  lesion-detection_performer-object_detection_loss_box_reg: 0.0781 (0.0697)  lesion-detection_performer-object_detection_loss_objectness: 0.0244 (0.0253)  lesion-detection_performer-object_detection_loss_rpn_box_reg: 0.0038 (0.0036)  model_time: 1679757696.0000 (1679757715.3251)  evaluator_time: 0.0000 (0.0000)  time: 2.1903  data: 1.9306  max mem: 1139
Evaluation: Total 

In [11]:
def get_ap_ar_str(eval_dict, p):
    ap = eval_dict['ap']
    iouThr = eval_dict['iouThr']
    areaRng = eval_dict['areaRng']
    maxDets = eval_dict['maxDets']
    mean_s = eval_dict['mean_s']

    iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
    titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
    typeStr = '(AP)' if ap==1 else '(AR)'
    iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
        if iouThr is None else '{:0.2f}'.format(iouThr)
    return iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)


In [12]:
printing_str = ""
for state in test_evaluator['lesion-detection'].coco_eval['bbox'].stats:
    printing_str += f"{get_ap_ar_str(state,test_evaluator['lesion-detection'].coco_eval['bbox'].params)}\n"

In [13]:
print(printing_str)
# we should also record this printing str in the evaluator.

 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.123
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.079
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  5 ] = 0.117
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.122
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 30 ] = 0.123
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.123
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 10 ] = 0.210
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 10 ] = 0.110
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets= 10 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 10 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 10 ] = 0.122
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.173
 Average Recall     (AR) @[ IoU=0.50:0

In [14]:
# evalImgs= test_evaluator['lesion-detection'].coco_eval['bbox'].evalImgs
# p = test_evaluator['lesion-detection'].coco_eval['bbox'].params
# self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
#             for catId in catIds
#             for areaRng in p.areaRng
#             for imgId in p.imgIds
#         ]


In [15]:
def get_mean(s):
    if len(s[s>-1])==0:
        mean_s = -1
    else:
        mean_s = np.mean(s[s>-1])
    return mean_s

In [16]:
lesion_detection_evaluator = test_evaluator["lesion-detection"]
evalImgs = lesion_detection_evaluator.coco_eval["bbox"].evalImgs
p = lesion_detection_evaluator.coco_eval["bbox"].params
maxDet = 100
image_id_precission_recall_dict = {}

for image_id in p.imgIds:
    R = len(p.recThrs)
    T = len(p.iouThrs)
    K = len(p.catIds) if p.useCats else 1

    precision = -np.ones((T, R, K))
    recall = -np.ones((T, K))
    scores = -np.ones((T, R, K))

    all_E = [
        i
        for i in evalImgs
        if (not i is None)
        and i["image_id"] == image_id
        and i["aRng"] == [0, 10000000000.0]
        and i["maxDet"] == 100
    ]
    for k, k0 in enumerate(p.catIds):
        E = [e for e in all_E if e["category_id"] == k0]
        if len(E) == 0:
            continue

        dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])

        # different sorting method generates slightly different results.
        # mergesort is used to be consistent as Matlab implementation.
        inds = np.argsort(-dtScores, kind="mergesort")
        dtScoresSorted = dtScores[inds]

        dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds]
        dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds]
        gtIg = np.concatenate([e["gtIgnore"] for e in E])
        npig = np.count_nonzero(gtIg == 0)

        if npig == 0:
            # imgid_performance_dict.update({image_id: -1})
            continue

        tps = np.logical_and(dtm, np.logical_not(dtIg))
        fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))

        tp_sum = np.cumsum(tps, axis=1).astype(dtype=float)
        fp_sum = np.cumsum(fps, axis=1).astype(dtype=float)

        for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
            tp = np.array(tp)
            fp = np.array(fp)
            nd = len(tp)
            rc = tp / npig
            pr = tp / (fp + tp + np.spacing(1))
            q = np.zeros((R,))
            ss = np.zeros((R,))

            if nd:
                # print(f"recall is {rc[-1]}")
                recall[t, k] = rc[-1]
                # recall[t,k,a,m] = rc[-1]
            else:
                recall[t, k] = 0
                # print(f"recall is 0")

            # numpy is slow without cython optimization for accessing elements
            # use python array gets significant speed improvement
            pr = pr.tolist()
            q = q.tolist()

            for i in range(nd - 1, 0, -1):
                if pr[i] > pr[i - 1]:
                    pr[i - 1] = pr[i]

            inds = np.searchsorted(rc, p.recThrs, side="left")
            try:
                for ri, pi in enumerate(inds):
                    q[ri] = pr[pi]
                    ss[ri] = dtScoresSorted[pi]
            except:
                pass

            precision[t, :, k] = np.array(q)
            scores[t, :, k] = np.array(ss)
            # precision[t,:,k,a,m] = np.array(q)
            # scores[t,:,k,a,m] = np.array(ss)
        
    image_id_precission_recall_dict.update(
        {image_id: {"ap": get_mean(precision), "ar": get_mean(recall)}}
    )
    # eval = {
    #     'params': p,
    #     'counts': [T, R, K, A, M],
    #     'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    #     'precision': precision,
    #     'recall':   recall,
    #     'scores': scores,
    # }
    # toc = time.time()
    # print('DONE (t={:0.2f}s).'.format( toc-tic))


In [17]:
from collections import OrderedDict

image_id_precission_dict = { k: v['ap'] for k, v in image_id_precission_recall_dict.items()}
image_id_precission_dict = { k: v  for k, v in image_id_precission_dict.items() if v > 0.0}
image_id_precission_dict = OrderedDict(sorted(image_id_precission_dict.items(), key=lambda item: item[1], reverse=True))

In [18]:
ap_rank_img_id = list(image_id_precission_dict.keys())

In [19]:
best_5_idxs = ap_rank_img_id[:5]

In [20]:
worst_5_idxs = ap_rank_img_id[-5:]

In [21]:
image_id_precission_dict

OrderedDict([(100, 1.0),
             (182, 1.0),
             (122, 0.9999999999999998),
             (134, 0.9999999999999998),
             (153, 0.9999999999999998),
             (176, 0.9999999999999998),
             (232, 0.9999999999999998),
             (265, 0.9999999999999998),
             (315, 0.9999999999999998),
             (390, 0.9999999999999998),
             (405, 0.9999999999999998),
             (416, 0.9999999999999998),
             (441, 0.9999999999999998),
             (135, 0.95),
             (123, 0.9333333333333332),
             (348, 0.9199999999999998),
             (422, 0.900990099009901),
             (0, 0.8999999999999999),
             (12, 0.8999999999999999),
             (26, 0.8999999999999999),
             (36, 0.8999999999999999),
             (192, 0.8999999999999999),
             (215, 0.8999999999999999),
             (328, 0.8834983498349834),
             (98, 0.8511551155115512),
             (4, 0.85),
             (137, 0.85),
 

In [25]:
# print(test_evaluator['lesion-detection'])

In [26]:
# all_len = [len(train_dataset[i][1]['lesion-detection']['boxes']) for i in  range(len(train_dataset))]

# max(all_len)
# np.mean(all_len)
# all_len.sort()
# all_len[::-1]