

<img src="https://dl.fbaipublicfiles.com/detectron2/Detectron2-Logo-Horz.png" width="500">


# Metrics extraction and model selecction
Import detectron2 and other dependencies

In [None]:
import torch, torchvision
print(torch.__version__)

print(torch.cuda.is_available())

In [None]:
# You may need to restart your runtime prior to this, to let your installation take effect
# Some basic setup
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import json
import yaml
from pathlib import Path
import pandas as pd
import random

# import some common detectron2 utilities
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.engine import DefaultTrainer
from detectron2 import model_zoo
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.evaluation import PascalVOCDetectionEvaluator
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.visualizer import ColorMode



# Paths and checks for the data


In [None]:
%env RGB=False
%env first_iter=False

In [None]:
%env DIR=/eos/jeodpp/data/projects/REFOCUS/data/swalim_v2


Set all the paths to run the evaluation stage and load the test or validation 

In [None]:
#Add the path to check
   
if  os.getenv('first_iter') == 'True':
    if os.getenv('RGB') == 'False':
        path_ann = '{}/inputs/cvat/pancro_first_iter/'.format(os.getenv('DIR'))
        path_imgs = '{}/inputs/pancro_first'.format(os.getenv('DIR'))
        results_path = '{}/outputs/first_iter/pancro/'.format(os.getenv('DIR'))
    else:
        path_ann = '{}/inputs/cvat/rgb_first_iter/'.format(os.getenv('DIR'))
        path_imgs = '{}/inputs/rgb_first'.format(os.getenv('DIR'))
        results_path = '{}/outputs/first_iter/rgb/'.format(os.getenv('DIR'))

    #For the model selection we use the validation set, for final metrics test
    test = path_ann+'val.json'
    with open(path_ann+'val.json') as json_file:
        data = json.load(json_file)
    aux = data['images']
    aux2 = data['annotations']
    print("Number of images for validation and model selection: {}, number of annotations:{}".format(len(aux), len(aux2)))

else:
    if os.getenv('RGB') == 'False':
        path_ann = '{}/inputs/cvat/pancro_astrid_300/'.format(os.getenv('DIR'))
        path_imgs = '{}/pancro_300'.format(os.getenv('DIR'))
        results_path = '{}/outputs/second_iter/pancro_300/'.format(os.getenv('DIR'))
    else:
        path_ann = '{}/inputs/cvat/rgb_astrid_300/'.format(os.getenv('DIR'))
        path_imgs = '{}/RGB_300'.format(os.getenv('DIR'))
        results_path = '{}/outputs/second_iter/rgb/'.format(os.getenv('DIR'))

    #For the model selection we use the validation set, for final metrics test
    test = path_ann+'val.json'
    with open(path_ann+'val.json') as json_file:
        data = json.load(json_file)
    aux = data['images']
    aux2 = data['annotations']
    print("Number of images for test and final metrics: {}, number of annotations:{}".format(len(aux), len(aux2)))



train = path_ann+'train.json'
with open(path_ann+'train.json') as json_file:
    data = json.load(json_file)
aux = data['images']
aux2 = data['annotations']
print("Number of images for train: {}, number of annotations:{}".format(len(aux), len(aux2)))

# Evaluation
Register the dataset to detectron2, following the [detectron2 custom dataset tutorial](https://detectron2.readthedocs.io/tutorials/datasets.html).


In [None]:
from detectron2.data.datasets import register_coco_instances
try:
    DatasetCatalog.remove("swalim_train")
    DatasetCatalog.remove("swalim_val")
except KeyError:
    pass

In [None]:
register_coco_instances("swalim_train", {}, train, path_imgs)
register_coco_instances("swalim_val", {}, test, path_imgs)

MetadataCatalog.get("swalim_val")
DatasetCatalog.get("swalim_val")
swalim_metadata = MetadataCatalog.get("swalim_train")
dataset_dicts = DatasetCatalog.get("swalim_train")

Load the fuctions needed for the inference






In [None]:
def load_conf_file(path):
    config_file_path = path
    weights_path = "{}model_final.pth".format(str(path).split('config.yaml')[0])
    print(weights_path)
    
    cfg = get_cfg()
    cfg.merge_from_file(config_file_path)

    cfg.DATASETS.TRAIN = ("swalimRGB_train", )
    cfg.DATASETS.TEST = ("swalim_val", )
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.WEIGHTS = weights_path
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

    return cfg    

In [None]:
def Evaluator(cfg, predictor, dataset_dicts, IoUThresh, dataset):
    evaluator = COCOEvaluator(dataset)
    val_loader = build_detection_test_loader(cfg, dataset)
    AP_res = inference_on_dataset(predictor.model, val_loader, evaluator)

    #Calc of precision, recall , f1score at an IoU of IoUThresh
    count = 0
    img_res = {}
    for d in dataset_dicts:
        count += 1
        ann = d["annotations"]
        inst = detectron2.data.detection_utils.annotations_to_instances(ann, (d['width'], d['height']),
                                                                      mask_format='polygon')
        bboxes_gt = inst.gt_boxes.tensor.cpu().numpy()
        im = cv2.imread(d['file_name'])
        outputs = predictor(im)
        bboxes_dt = outputs["instances"].pred_boxes.tensor.cpu().numpy()
        img_res[d["image_id"]] = get_single_image_results(bboxes_gt, bboxes_dt, IoUThresh)
    pr_rc = calc_precision_recall(img_res)
    return [list(AP_res.items())[0][1],pr_rc['true_positive'], pr_rc['false_positive'], pr_rc['false_negative'], pr_rc['recall'], pr_rc['precision'], pr_rc['f1']]


def calc_iou(gt_bbox, pred_bbox):
    '''
    This function takes the predicted bounding box and ground truth bounding box and
    return the IoU ratio
    '''
    x_topleft_gt, y_topleft_gt, x_bottomright_gt, y_bottomright_gt = gt_bbox
    x_topleft_p, y_topleft_p, x_bottomright_p, y_bottomright_p = pred_bbox

    if (x_topleft_gt > x_bottomright_gt) or (y_topleft_gt > y_bottomright_gt):
        raise AssertionError("Ground Truth Bounding Box is not correct")
    if (x_topleft_p > x_bottomright_p) or (y_topleft_p > y_bottomright_p):
        raise AssertionError("Predicted Bounding Box is not correct", x_topleft_p, x_bottomright_p, y_topleft_p,
                             y_bottomright_gt)

    # if the GT bbox and predcited BBox do not overlap then iou=0
    if (x_bottomright_gt < x_topleft_p):
        # If bottom right of x-coordinate  GT  bbox is less than or above the top left of x coordinate of  the predicted BBox
        return 0.0
    if (y_bottomright_gt < y_topleft_p):  # If bottom right of y-coordinate  GT  bbox is less than or above the top left of y coordinate of  the predicted BBox
        return 0.0
    if (x_topleft_gt > x_bottomright_p):  # If bottom right of x-coordinate  GT  bbox is greater than or below the bottom right  of x coordinate of  the predcited BBox
        return 0.0
    if (y_topleft_gt > y_bottomright_p):  # If bottom right of y-coordinate  GT  bbox is greater than or below the bottom right  of y coordinate of  the predcited BBox
        return 0.0

    GT_bbox_area = (x_bottomright_gt - x_topleft_gt + 1) * (y_bottomright_gt - y_topleft_gt + 1)
    Pred_bbox_area = (x_bottomright_p - x_topleft_p + 1) * (y_bottomright_p - y_topleft_p + 1)

    x_top_left = np.max([x_topleft_gt, x_topleft_p])
    y_top_left = np.max([y_topleft_gt, y_topleft_p])
    x_bottom_right = np.min([x_bottomright_gt, x_bottomright_p])
    y_bottom_right = np.min([y_bottomright_gt, y_bottomright_p])

    intersection_area = (x_bottom_right - x_top_left + 1) * (y_bottom_right - y_top_left + 1)

    union_area = (GT_bbox_area + Pred_bbox_area - intersection_area)

    return intersection_area / union_area

def get_single_image_results(gt_boxes, pred_boxes, iou_thr):
    """Calculates number of true_pos, false_pos, false_neg from single batch of boxes.
    Args:
        gt_boxes (list of list of floats): list of locations of ground truth
            objects as [xmin, ymin, xmax, ymax]
        pred_boxes (dict): dict of dicts of 'boxes' (formatted like `gt_boxes`)
            and 'scores'
        iou_thr (float): value of IoU to consider as threshold for a
            true prediction.
    Returns:
        dict: true positives (int), false positives (int), false negatives (int)
    """
    all_pred_indices = range(len(pred_boxes))
    all_gt_indices = range(len(gt_boxes))
    if len(all_pred_indices) == 0:
        tp = 0
        fp = 0
        fn = 0
        return {'true_positive': tp, 'false_positive': fp, 'false_negative': fn}
    if len(all_gt_indices) == 0:
        tp = 0
        fp = 0
        fn = 0
        return {'true_positive': tp, 'false_positive': fp, 'false_negative': fn}

    gt_idx_thr = []
    pred_idx_thr = []
    ious = []
    for ipb, pred_box in enumerate(pred_boxes):
        for igb, gt_box in enumerate(gt_boxes):
            iou = calc_iou(gt_box, pred_box)

            if iou > iou_thr:
                gt_idx_thr.append(igb)
                pred_idx_thr.append(ipb)
                ious.append(iou)
    iou_sort = np.argsort(ious)[::1]
    if len(iou_sort) == 0:
        tp = 0
        fp = 0
        fn = 0
        return {'true_positive': tp, 'false_positive': fp, 'false_negative': fn}
    else:
        gt_match_idx = []
        pred_match_idx = []
        for idx in iou_sort:
            gt_idx = gt_idx_thr[idx]
            pr_idx = pred_idx_thr[idx]
            # If the boxes are unmatched, add them to matches
            if (gt_idx not in gt_match_idx) and (pr_idx not in pred_match_idx):
                gt_match_idx.append(gt_idx)
                pred_match_idx.append(pr_idx)
        tp = len(gt_match_idx)
        fp = len(pred_boxes) - len(pred_match_idx)
        fn = len(gt_boxes) - len(gt_match_idx)
    return {'true_positive': tp, 'false_positive': fp, 'false_negative': fn}


def calc_precision_recall(image_results):
    """Calculates precision and recall from the set of images
    Args:
        img_results (dict): dictionary formatted like:
            {
                'img_id1': {'true_pos': int, 'false_pos': int, 'false_neg': int},
                'img_id2': ...
                ...
            }
    Returns:
        dictionary:
        {'true_positive':true_positive, 'false_positive':false_positive, 'false_negative':false_negative, 'precision':precision, 'recall':recall,  'f1':f1_score}
    """
    true_positive = 0
    false_positive = 0
    false_negative = 0
    for img_id, res in image_results.items():
        true_positive += res['true_positive']
        false_positive += res['false_positive']
        false_negative += res['false_negative']
        try:
            precision = true_positive / (true_positive + false_positive)
        except ZeroDivisionError:
            precision = 0.0
        try:
            recall = true_positive / (true_positive + false_negative)
        except ZeroDivisionError:
            recall = 0.0
        try:
            f1_score = 2 * ((precision * recall) / (precision + recall))
        except ZeroDivisionError:
            f1_score = 0.0

    res = {'true_positive': true_positive, 'false_positive': false_positive, 'false_negative': false_negative,
           'precision': precision, 'recall': recall, 'f1': f1_score}

    return res

Set up the predictor to work on the test set.

Loop though the output to get the list of models done




In [None]:
#We set the IoU at 0.5
IoUThresh = 0.5


models = []
dataset_dicts = DatasetCatalog.get("swalim_val")
full_results = pd.DataFrame(columns=['model', 'LR', 'momentum', \
                                     'RBF', 'AP', 'AP50', 'AP75', \
                                     'TP', 'FP', 'FN', 'Precision', \
                                     'Recall', 'F_score', 'Path'])

for path in Path(results_path).rglob('*.yaml'):
    models.append(path)
    aux = "{}model_final.pth".format(str(path).split('config.yaml')[0])
    cfg = load_conf_file(str(path))

    if os.path.exists(aux):
        predictor = DefaultPredictor(cfg)
        res = Evaluator(cfg, predictor, dataset_dicts, IoUThresh, "swalim_val")
        mod = 'Faster_rcnn101X'
        full_results = full_results.append({'model': mod, 'LR': cfg.SOLVER.BASE_LR, 'momentum': cfg.SOLVER.MOMENTUM,'RBF': cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE\
                                          , 'AP': res[0]['AP'], 'AP50': res[0]['AP50'], 'AP75': res[0]['AP75'], 'TP': res[1] \
                                          , 'FP': res[2], 'FN': res[3], 'Precision': res[5]\
                                          , 'Recall': res[4], 'F_score': res[6], 'Path': path}, ignore_index=True)
    
    else:
        print('Path to the model: {} not found'.format(aux))


Save the results of the evaluation metrics

In [None]:
print(full_results.dropna().sort_values(by=['F_score'], ascending=False))

full_results = full_results.loc[:, ~full_results.columns.str.contains('^Unnamed')]

full_results.to_csv('{}final_results.csv'.format(results_path))


Plot the accuracies on the hypertune space


In [None]:
import plotly.express as px
import plotly

df = full_results.dropna()
fig = px.parallel_coordinates(df, color="F_score", labels={"F_score": "F1-Score",
                "model": "Model", "LR": "Learning rate",
                "momentum": "Momentum", "RBF": "Batch Size", "AP50": "Average precision at .5","AP75": "Average precision at .75",})
fig.show()
fig.write_html("{}/hypertune-plot.html".format(results_path))

Then, we randomly select several samples to visualize the prediction results.

TODO anadir plots de https://towardsdatascience.com/evaluating-performance-of-an-object-detection-model-137a349c517b

In [None]:
config = best_model['Path']
plt.rcParams["figure.figsize"] = (30,30)

In [None]:
cfg = load_conf_file(config)
predictor = DefaultPredictor(cfg)

for d in random.sample(dataset_dicts, 5):    
    print(d["file_name"])
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    visualizer = Visualizer(im[:, :, ::-1], metadata=swalim_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    out = visualizer.draw_instance_predictions(outputs["instances"].to("cpu"))
    #TODO show RGB
    print(out.get_image()[:, :, ::-1].shape)
    plt.imshow(out.get_image()[:, :, ::-1])
    plt.show()
