In [1]:
import argparse
from pathlib import Path

import numpy as np

from super_gradients.training import models
from super_gradients.training.transforms.utils import _rescale_and_pad_to_size, _rescale_xyxy_bboxes

from super_gradients.training.datasets.detection_datasets.coco_format_detection import COCOFormatDetectionDataset
from super_gradients.training.datasets.detection_datasets.doclaynet_detection import DocLayNetDetectionDataset

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

[2024-02-08 11:24:34] INFO - crash_tips_setup.py - Crash tips is enabled. You can set your environment variable to CRASH_HANDLER=FALSE to disable it


The console stream is logged into /home/marianna.parzych/sg_logs/console.log




In [2]:
def box_iou_calc(boxes1, boxes2):
    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
    """
    Return intersection-over-union (Jaccard index) of boxes.
    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Arguments:
        boxes1 (Array[N, 4])
        boxes2 (Array[M, 4])
    Returns:
        iou (Array[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2

    This implementation is taken from the above link and changed so that it only uses numpy..
    """

    def box_area(box):
        # box = 4xn
        return (box[2] - box[0]) * (box[3] - box[1])

    area1 = box_area(boxes1.T)
    area2 = box_area(boxes2.T)

    lt = np.maximum(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
    rb = np.minimum(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]

    inter = np.prod(np.clip(rb - lt, a_min=0, a_max=None), 2)
    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)

In [3]:
class Metrices:
    def __init__(self, classes_list: int, CONF_THRESHOLD=0.3, IOU_THRESHOLD=0.5):
        self.classes_list = classes_list
        self.num_classes = len(classes_list)
        self.CONF_THRESHOLD = CONF_THRESHOLD
        self.IOU_THRESHOLD = IOU_THRESHOLD
        self.metrices = {}
        self.metrices_per_image = {}

    @staticmethod
    def update_metrices(metrices: dict):
        try:    
            metrices["precision"] = metrices["TP"] / (metrices["TP"]+metrices["FP"])
        except ZeroDivisionError:
            metrices["precision"] = 0
        try:
            metrices["recall"] = metrices["TP"] / (metrices["TP"]+metrices["FN"])
        except ZeroDivisionError:
            metrices["recall"] = 0
        try:
            metrices["f1"] = 2 * (metrices["precision"]*metrices["recall"]) / (metrices["precision"]+metrices["recall"])
        except ZeroDivisionError:
            metrices["f1"] = 0
        return metrices

    def process_images(self, list_of_detections, list_of_labels):
        self.metrices = {
            "TP": 0,
            "FP": 0,
            "FN": 0,
            "precision": 0,
            "recall": 0,
            "f1": 0,
        }

        for detections, labels in zip(list_of_detections, list_of_labels):
            self.process_image('dummy_name', detections, labels)
            self.metrices["TP"] += self.metrices_per_image['dummy_name']["TP"]
            self.metrices["FP"] += self.metrices_per_image['dummy_name']["FP"]
            self.metrices["FN"] += self.metrices_per_image['dummy_name']["FN"]
        self.metrices = self.update_metrices(self.metrices)

    def process_image(self, image_name, detections, labels: np.ndarray):
        """
        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
        Arguments:
            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
            labels (Array[M, 5]), class, x1, y1, x2, y2
        Returns:
            None, updates metrices accordingly
        """
        
        key_image_name = str(Path(image_name).name)
        self.metrices_per_image[key_image_name] = {
            "TP": 0,
            "FP": 0,
            "FN": 0,
            "precision": 0,
            "recall": 0,
            "f1": 0,
        }
        
        gt_classes = labels[:, 0].astype(np.int16)

        try:
            detections = detections[detections[:, 4] > self.CONF_THRESHOLD]
        except IndexError or TypeError:
            # detections are empty, end of process
            self.metrices_per_image[key_image_name]["FN"] += len(class_labels)
            return

        for class_id in range(self.num_classes):
            
            class_detections = detections[detections[:, -1] == class_id]
            class_labels = labels[labels[:, 0] == class_id]
            
            detection_classes = class_detections[:, 5].astype(np.int16)
    
            all_ious = box_iou_calc(class_labels[:, 1:], class_detections[:, :4])
            want_idx = np.where(all_ious > self.IOU_THRESHOLD)
    
            all_matches = [[want_idx[0][i], want_idx[1][i], all_ious[want_idx[0][i], want_idx[1][i]]]
                           for i in range(want_idx[0].shape[0])]
    
            all_matches = np.array(all_matches)
            if all_matches.shape[0] > 0:  # if there is match
                all_matches = all_matches[all_matches[:, 2].argsort()[::-1]]
                true_matches = all_matches[np.unique(all_matches[:, 1], return_index=True)[1]]
                true_matches = all_matches[all_matches[:, 2].argsort()[::-1]]
                true_matches = all_matches[np.unique(all_matches[:, 0], return_index=True)[1]]

                # Convert arrays to tuples to use setdiff1d
                all_matches = [tuple(row) for row in all_matches]
                true_matches = [tuple(row) for row in true_matches]
                false_matches = np.setdiff1d(all_matches, true_matches, assume_unique=True)
                all_matches = np.array(all_matches)
                true_matches = np.array(true_matches)
                false_matches = np.array(false_matches)
            else:
                self.metrices_per_image[key_image_name]["FN"] += len(class_labels)
                self.metrices_per_image[key_image_name]["FP"] += len(class_detections)
                continue

            for i, label in enumerate(class_labels):
                if true_matches[true_matches[:, 0] == i].shape[0] == 1:  #todo this should be 0 or 1?
                    self.metrices_per_image[key_image_name]["TP"] += 1
                else:
                    self.metrices_per_image[key_image_name]["FN"] += 1
    
            self.metrices_per_image[key_image_name]["FP"] += len(false_matches)

        self.metrices_per_image[key_image_name] = self.update_metrices(self.metrices_per_image[key_image_name])

In [6]:
model_type = "yolox_l"
num_classes = 17
checkpoint_path = "/mnt/ml-team/unstructured/Marianna/Checkpoints/Model_Jan2024_1/ckpt_best.pth"
# /mnt/ml-team/unstructured/Marianna/Checkpoints/Model_Jan2024_2/
data_dir = "/mnt/ml-team/homes/marianna.parzych/Unstructured/MiniHolistic"
images_dir = "PNG"
split_info_pth = "COCO/test_unstructured_classes.json"
# split_info_pth = "COCO/val.json"
res = 512

In [7]:
model = models.get(model_type, num_classes=num_classes, checkpoint_path=checkpoint_path)

Please update your code to use the `arch_params.in_channels` instead of `arch_params.channels_in`.
[2024-02-08 11:30:43] INFO - checkpoint_utils.py - Successfully loaded model weights from /mnt/ml-team/unstructured/Marianna/Checkpoints/Model_Jan2024_1/ckpt_best.pth EMA checkpoint.


In [8]:
dataset = COCOFormatDetectionDataset(
    data_dir=data_dir,
    json_annotation_file=split_info_pth,
    images_dir=images_dir,
)

[2024-02-08 11:30:49] INFO - detection_dataset.py - Dataset Initialization in progress. `cache_annotations=True` causes the process to take longer due to full dataset indexing.


loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


Indexing dataset annotations: 100%|██████████| 252/252 [00:00<00:00, 3394.04it/s]


In [9]:
class_names = ["image", "page_number", "paraprgaphs_in_image", "paraprgaph", "subheading", "page_header", "formulas", "other", "table", "page_footer", "title", "form", "paraprgaphs_in_form", "checkbox_checked", "checkbox", "radio_button", "radio_button_checked"]

In [10]:
all_predictions = []
all_labels = []
all_names = []

In [11]:
#for image, target, _ in dataset:
for indx in range(len(dataset)):

    image, target, name = dataset.get_named_item(indx)
    all_names.append(name)

    gt_bboxes = target[:,:4]
    gt_labels = target[:,-1]
        
    if res:
        image_input, r = _rescale_and_pad_to_size(image, (res, res))
    else:
        image_input = image
        r = None

    output = model.predict(image_input, iou=0.65, conf=0.01)
    
    labels = output.prediction.labels
    scores = output.prediction.confidence
    if r:
        bboxes = _rescale_xyxy_bboxes(output.prediction.bboxes_xyxy, 1 / r) 
    else:
        bboxes = output.prediction.bboxes_xyxy
    
    prediction = np.c_[bboxes, scores, labels]
    all_predictions.append(prediction)
    
    annos = np.c_[gt_labels, gt_bboxes]
    all_labels.append(annos)

[2024-02-08 11:30:59] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-02-08 11:31:00] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-02-08 11:31:00] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-02-08 11:31:01] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-02-08 11:31:01] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-02-08 11:31:02] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-02-08 11:31:02] INFO -

In [12]:
# np.save("miniholistic_predictions.npy", all_predictions, allow_pickle=True, fix_imports=True)
# np.save("miniholistic_annos.npy", all_labels, allow_pickle=True, fix_imports=True)
# np.save("miniholistic_names.npy", all_names, allow_pickle=True, fix_imports=True)

  arr = np.asanyarray(arr)


In [12]:
# all_predictions = np.load("predictions.npy", allow_pickle=True, fix_imports=True)
# all_labels = np.load("annos.npy", allow_pickle=True, fix_imports=True)
# all_names = np.load("names.npy", allow_pickle=True, fix_imports=True)

In [13]:
# Classes merging
classes_after_merging = ["image", "paraprgaph", "formulas", "other", "table", "page_footer", "title", "form", "checkbox_checked", "checkbox", "radio_button", "radio_button_checked"]

In [38]:
iou_thresh = 0.3
conf_thresh = 0.25
metrices = Metrices(classes_list = class_names, CONF_THRESHOLD = conf_thresh, IOU_THRESHOLD = iou_thresh)
metrices.process_images(all_predictions, all_labels)

In [39]:
all_metrices = metrices.metrices

In [40]:
mean_precision = all_metrices['precision']
mean_recall = all_metrices['recall']
mean_f1 = all_metrices['f1']

In [41]:
print(f"Metrics for split: {split_info_pth},\nfor {len(classes_after_merging)} classes: {classes_after_merging}")
print(f"Precision@iou{iou_thresh}&conf{conf_thresh}: {mean_precision}")
print(f"Recall@iou{iou_thresh}&conf{conf_thresh}: {mean_recall}")
print(f"F1@iou{iou_thresh}&conf{conf_thresh}: {mean_f1}")

Metrics for split: COCO/test_unstructured_classes.json,
for 12 classes: ['image', 'paraprgaph', 'formulas', 'other', 'table', 'page_footer', 'title', 'form', 'checkbox_checked', 'checkbox', 'radio_button', 'radio_button_checked']
Precision@iou0.3: 0.07061660351360662
Recall@iou0.3: 0.3934740882917466
F1@iou0.3: 0.11974299065420561


In [42]:
IOUS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
CONFS = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

In [47]:
F1 = {}
for iou_thresh in IOUS:
    for conf_thresh in CONFS:
        metrices = Metrices(classes_list = class_names, CONF_THRESHOLD = conf_thresh, IOU_THRESHOLD = iou_thresh)
        metrices.process_images(all_predictions, all_labels)
        mean_precision = metrices.metrices['precision']
        mean_recall = metrices.metrices['recall']
        mean_f1 = metrices.metrices['f1']
        print("------------------------------")
        print("------------------------------")
        print(f"Metrics for split: {split_info_pth},\nfor {len(classes_after_merging)} classes: {classes_after_merging}")
        print(f"Precision@iou{iou_thresh}&conf{conf_thresh}: {mean_precision}")
        print(f"Recall@iou{iou_thresh}&conf{conf_thresh}: {mean_recall}")
        print(f"F1@iou{iou_thresh}&conf{conf_thresh}: {mean_f1}")
        F1[f"iou:{iou_thresh},conf:{conf_thresh}"] = mean_f1

------------------------------
------------------------------
Metrics for split: COCO/test_unstructured_classes.json,
for 12 classes: ['image', 'paraprgaph', 'formulas', 'other', 'table', 'page_footer', 'title', 'form', 'checkbox_checked', 'checkbox', 'radio_button', 'radio_button_checked']
Precision@iou0.1&conf0.01: 0.034138309549945116
Recall@iou0.1&conf0.01: 0.5969289827255279
F1@iou0.1&conf0.01: 0.06458311701796283
------------------------------
------------------------------
Metrics for split: COCO/test_unstructured_classes.json,
for 12 classes: ['image', 'paraprgaph', 'formulas', 'other', 'table', 'page_footer', 'title', 'form', 'checkbox_checked', 'checkbox', 'radio_button', 'radio_button_checked']
Precision@iou0.1&conf0.1: 0.06290516206482594
Recall@iou0.1&conf0.1: 0.5028790786948176
F1@iou0.1&conf0.1: 0.11182244985061887
------------------------------
------------------------------
Metrics for split: COCO/test_unstructured_classes.json,
for 12 classes: ['image', 'paraprgaph', 

In [48]:
sorted_F1 = dict(sorted(F1.items(), key=lambda item: item[1]))

In [49]:
sorted_F1

{'iou:0.9,conf:0.01': 0.01452215696743193,
 'iou:0.9,conf:0.1': 0.02948943661971831,
 'iou:0.8,conf:0.01': 0.03011785246617198,
 'iou:0.9,conf:0.2': 0.03621399176954732,
 'iou:0.7,conf:0.01': 0.03806190528272996,
 'iou:0.9,conf:0.3': 0.04144427001569859,
 'iou:0.6,conf:0.01': 0.04238584225475202,
 'iou:0.5,conf:0.01': 0.044811833804655206,
 'iou:0.9,conf:0.4': 0.046393620877129386,
 'iou:0.4,conf:0.01': 0.04919624554968174,
 'iou:0.9,conf:0.5': 0.052347320315745745,
 'iou:0.3,conf:0.01': 0.05422715627668659,
 'iou:0.8,conf:0.1': 0.05846909172059807,
 'iou:0.2,conf:0.01': 0.05930146670887411,
 'iou:0.9,conf:0.6': 0.05954123962908736,
 'iou:0.1,conf:0.01': 0.06458311701796283,
 'iou:0.9,conf:0.7': 0.06850961538461539,
 'iou:0.8,conf:0.2': 0.06990291262135923,
 'iou:0.7,conf:0.1': 0.07230528899799152,
 'iou:0.8,conf:0.3': 0.07782347358430877,
 'iou:0.6,conf:0.1': 0.07796836711962575,
 'iou:0.5,conf:0.1': 0.08266666666666668,
 'iou:0.8,conf:0.4': 0.08315098468271334,
 'iou:0.9,conf:0.8': 0

In [105]:
classes_to_use = ["form"] # "table" "form"
classes_to_use_ind = [class_names.index(name) for name in classes_to_use]

In [106]:
classes_to_use_ind

[11]

In [107]:
# Get only predictions and labels with classes_to_use_ind
new_predictions = [array[np.isin(array[:, -1], classes_to_use_ind)] for array in all_predictions]
new_labels = [array[np.isin(array[:, 0], classes_to_use_ind)] for array in all_labels]

In [108]:
IOUS = [0.1]
CONFS = [0.01]
F1 = {}
for iou_thresh in IOUS:
    for conf_thresh in CONFS:
        metrices = Metrices(classes_list = class_names, CONF_THRESHOLD = conf_thresh, IOU_THRESHOLD = iou_thresh)
        metrices.process_images(new_predictions, new_labels)
        mean_precision = metrices.metrices['precision']
        mean_recall = metrices.metrices['recall']
        mean_f1 = metrices.metrices['f1']
        print("------------------------------")
        print("------------------------------")
        print(f"Metrics for split: {split_info_pth},\nfor {len(classes_after_merging)} classes: {classes_after_merging}")
        print(f"Precision@iou{iou_thresh}&conf{conf_thresh}: {mean_precision}")
        print(f"Recall@iou{iou_thresh}&conf{conf_thresh}: {mean_recall}")
        print(f"F1@iou{iou_thresh}&conf{conf_thresh}: {mean_f1}")
        F1[f"iou:{iou_thresh},conf:{conf_thresh}"] = mean_f1

------------------------------
------------------------------
Metrics for split: COCO/test_unstructured_classes.json,
for 12 classes: ['image', 'paraprgaph', 'formulas', 'other', 'table', 'page_footer', 'title', 'form', 'checkbox_checked', 'checkbox', 'radio_button', 'radio_button_checked']
Precision@iou0.1&conf0.01: 0.31521739130434784
Recall@iou0.1&conf0.01: 0.14285714285714285
F1@iou0.1&conf0.01: 0.19661016949152543


In [91]:
sorted_F1 = dict(sorted(F1.items(), key=lambda item: item[1]))

In [72]:
# F1_for_image_table_form_formula = sorted_F1

In [79]:
# F1_for_table_form = sorted_F1

In [86]:
# F1_for_form = sorted_F1

In [93]:
# F1_for_table = sorted_F1

In [100]:
F1_for_form

{'iou:0.1,conf:0.4': 0,
 'iou:0.1,conf:0.5': 0,
 'iou:0.1,conf:0.6': 0,
 'iou:0.1,conf:0.7': 0,
 'iou:0.1,conf:0.8': 0,
 'iou:0.2,conf:0.2': 0,
 'iou:0.2,conf:0.3': 0,
 'iou:0.2,conf:0.4': 0,
 'iou:0.2,conf:0.5': 0,
 'iou:0.2,conf:0.6': 0,
 'iou:0.2,conf:0.7': 0,
 'iou:0.2,conf:0.8': 0,
 'iou:0.3,conf:0.1': 0,
 'iou:0.3,conf:0.2': 0,
 'iou:0.3,conf:0.3': 0,
 'iou:0.3,conf:0.4': 0,
 'iou:0.3,conf:0.5': 0,
 'iou:0.3,conf:0.6': 0,
 'iou:0.3,conf:0.7': 0,
 'iou:0.3,conf:0.8': 0,
 'iou:0.4,conf:0.01': 0,
 'iou:0.4,conf:0.1': 0,
 'iou:0.4,conf:0.2': 0,
 'iou:0.4,conf:0.3': 0,
 'iou:0.4,conf:0.4': 0,
 'iou:0.4,conf:0.5': 0,
 'iou:0.4,conf:0.6': 0,
 'iou:0.4,conf:0.7': 0,
 'iou:0.4,conf:0.8': 0,
 'iou:0.5,conf:0.01': 0,
 'iou:0.5,conf:0.1': 0,
 'iou:0.5,conf:0.2': 0,
 'iou:0.5,conf:0.3': 0,
 'iou:0.5,conf:0.4': 0,
 'iou:0.5,conf:0.5': 0,
 'iou:0.5,conf:0.6': 0,
 'iou:0.5,conf:0.7': 0,
 'iou:0.5,conf:0.8': 0,
 'iou:0.6,conf:0.01': 0,
 'iou:0.6,conf:0.1': 0,
 'iou:0.6,conf:0.2': 0,
 'iou:0.6,con

In [98]:
F1_for_table['iou:0.2,conf:0.01']

0.47654784240150094

In [None]:
0.4316309719934102