In [None]:
import os
import io
import cv2
import sys
import math
import json
import torch
import pickle
import warnings
import PIL.Image
import xlsxwriter
import numpy as np
import torchvision
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from copy import deepcopy
from ultralytics import YOLO
import matplotlib.pyplot as plt
from laptq_pyutils.ops import box__miniou
from laptq_pyutils.objects import ListAligner
from scipy.optimize import linear_sum_assignment
from laptq_pyutils.draw import cv2_putText, cv2_rectangle
from IPython.display import clear_output, Image, display


HERE = Path('/mnt/hdd10tb/Users/laptq/laptq-prj-46/notebooks')
ROOT_DIR = HERE.parent
sys.path.append(str(ROOT_DIR))


def imshow(a, fmt='jpeg'):
    a = a[:,:,::-1]
    a = np.uint8(np.clip(a, 0, 255))
    f = io.BytesIO()
    PIL.Image.fromarray(a).save(f, fmt)
    display(Image(data=f.getvalue()))


warnings.filterwarnings("ignore")
pd.set_option('max_colwidth', 1000)
pd.set_option('display.max_rows', 500)

In [None]:
PATH__DIR__DATASETS__SOURCE__IMG="/mnt/hdd10tb/Users/laptq/laptq-prj-46/data/road-issues-detection"
POSTFIX__DIR__IMG="--20241128--phase-2--annotated-ver2--pot-man-drain--checked--crop-top50-side20-botom0"

PATH__DIR__DATASETS__SOURCE__LBL_GT="/mnt/hdd10tb/Users/laptq/laptq-prj-46/outputs/20241208--true-labels--json"
POSTFIX__DIR__LBL_GT="--20241128--phase-2--annotated-ver2--pot-man-drain--checked--crop-top50-side20-botom0"

NAME__MODEL = 'yolo11m--960--crop-20'
VER__TRAIN = 'train3'

PATH__DIR__DATASETS__SOURCE__LBL_PRED="/mnt/hdd10tb/Users/laptq/laptq-prj-46/outputs/20241208--model-prediction--json--filterby-conf-miniou/{}/{}".format(NAME__MODEL, VER__TRAIN)
# PATH__DIR__DATASETS__SOURCE__LBL_PRED="/mnt/hdd10tb/Users/laptq/laptq-prj-46/outputs/20241208--model-prediction--json--filterby-conf/{}/{}".format(NAME__MODEL, VER__TRAIN)
POSTFIX__DIR__LBL_PRED="--20241128--phase-2--annotated-ver2--pot-man-drain--checked--crop-top50-side20-botom0"

MAP__SUBPATH_DIR__TO__={
    "Pothole_235/train": "",
    "Pothole_Maeda/first_shot_eval": "",
}
PATH__DIR__OUTPUT="/mnt/hdd10tb/Users/laptq/laptq-prj-46/outputs/20241208--error--analysis/{}/{}".format(NAME__MODEL, VER__TRAIN)

In [None]:
PATH__DIR__OUTPUT__CROPS__GT = os.path.join(PATH__DIR__OUTPUT, 'crops--gt')
PATH__DIR__OUTPUT__CROPS__PRED = os.path.join(PATH__DIR__OUTPUT, 'crops--pred')
PATH__FILE__INFO_CROPS__PRED = os.path.join(PATH__DIR__OUTPUT, 'info_crops__pred.pkl')
PATH__DIR__OUTPUT__SRC_IMG__VIS = os.path.join(PATH__DIR__OUTPUT, 'src-img--visualize')
PATH__FILE__DF_PREPROCESSED = os.path.join(PATH__DIR__OUTPUT, 'df_preprocessed.pkl')
# DIR_OUTPUT_BINS = '/mnt/hdd10tb/Users/laptq/laptq-prj-46/outputs/20241031-cropped-object-{}/bins'.format(DATA_SUBSET)
PATH__DIR__WORKSHEET = os.path.join(PATH__DIR__OUTPUT, 'worksheets')
# DIR_FP_FN_ROOT_CAUSE_WORKSHEETS = '/mnt/hdd10tb/Users/laptq/laptq-prj-46/outputs/20241105--FP-FN-worksheets/{}'.format(
#     {
#         'train': 'train',
#         'Pothole_235': 'validation',
#         'Pothole_Maeda_firstshot_eval': 'test',
#     }[DATA_SUBSET]
# )
# DIR_CROP_BY_ROOT_CAUSE = '/mnt/hdd10tb/Users/laptq/laptq-prj-46/outputs/20241105--FP-FN-by-root-cause/{}'.format({
#         'train': 'train',
#         'Pothole_235': 'validation',
#         'Pothole_Maeda_firstshot_eval': 'test',
#     }[DATA_SUBSET]
# )
IMGSZ = 960
MODE__IOU_EVAL = 'MINIOU' # 'NORMAL' 'MINIOU'
THRESH__IOU = 0.5
THRESH__MINIOU = 0.5
MODE__EVAL__MATCHING = 'many-many' # 'one-one' 'many-many'
BINS = [0, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
BINS_LABEL = ['({}, {}]'.format(BINS[_], BINS[_ + 1]) for _ in range(len(BINS) - 1)]
PAD__CROP = 80
FONTSCALE = 0.7
THICKNESS = 1

MAP__ID_CLASS__TO__NAME = {
    0: 'pot',
    1: 'man',
    2: 'drain'
}

NUM__CROP__VERIFY = 500

# get image paths and label paths
list__path__file__img = []
list__name__file__img = []
list__subpath__dir = []
list__name__file__lbl = []
list__path__file__lbl_gt = []
list__path__file__lbl_pred = []
for subpath__dir in sorted(list(MAP__SUBPATH_DIR__TO__.keys())):
    path__dir__img = os.path.join(PATH__DIR__DATASETS__SOURCE__IMG, subpath__dir, "images{}".format(POSTFIX__DIR__IMG))
    path__dir__lbl_gt = os.path.join(PATH__DIR__DATASETS__SOURCE__LBL_GT, subpath__dir, "labels{}".format(POSTFIX__DIR__LBL_GT))
    path__dir__lbl_pred = os.path.join(PATH__DIR__DATASETS__SOURCE__LBL_PRED, subpath__dir, "labels{}".format(POSTFIX__DIR__LBL_PRED))

    for name__file__img in sorted(os.listdir(path__dir__img)):
        path__file__img = os.path.join(path__dir__img, name__file__img)
        name__file__lbl = os.path.splitext(name__file__img)[0] + '.json'
        path__file__lbl_gt = os.path.join(path__dir__lbl_gt, name__file__lbl)
        path__file__lbl_pred = os.path.join(path__dir__lbl_pred, name__file__lbl)


        if not os.path.isfile(path__file__lbl_gt):
            raise Exception("File not found: {}".format(path__file__lbl_gt))
        if not os.path.isfile(path__file__lbl_pred):
            raise Exception("File not found: {}".format(path__file__lbl_pred))
        
        list__path__file__img.append(path__file__img)
        list__name__file__img.append(name__file__img)
        list__subpath__dir.append(subpath__dir)
        list__name__file__lbl.append(name__file__lbl)
        list__path__file__lbl_gt.append(path__file__lbl_gt)
        list__path__file__lbl_pred.append(path__file__lbl_pred)

print(len(list__path__file__img), len(list__path__file__lbl_gt), len(list__path__file__lbl_pred))

# = Extract GT crops

In [None]:
# DANGER ZONE!!!
os.system('rm -r {}'.format(PATH__DIR__OUTPUT__CROPS__GT))

In [None]:
# load labels
list__label_gt = []
for path__lbl_gt in tqdm(list__path__file__lbl_gt):
    with open(path__lbl_gt, 'r') as f:
        dict_result__gt = json.load(f)
    list__label_gt.append(dict_result__gt)

list_aliger__info__gt = ListAligner(
    list__key=[
        'list__box__cls__gt',
        'list__box__xcycwh__gt',
        'list__box__path_src_img__gt',  # store source image
        'list__box__index_lbl__gt',     # store index in the label file
        'list__box__path_crop__gt',     # store path of the crop image
        'list__box__subpath_dir__gt',   # store subpath of the crop image
        'list__box__src_img_W__gt',
        'list__box__src_img_H__gt',
    ]
)

# load boxes
for (
    name__file__img,
    path__file__img,
    subpath__dir,
    lbl_gt,
) in tqdm(zip(
    list__name__file__img,
    list__path__file__img,
    list__subpath__dir,
    list__label_gt,
)):
    img__org = cv2.imread(path__file__img)
    H, W = img__org.shape[:2]

    for i_b, (id_class, box) in enumerate(
        zip(lbl_gt['list__obj__id_class'], lbl_gt['list__obj__box_xcycwhn'])
    ):
        xcn, ycn, wn, hn = box
        xc = int(xcn * W)
        yc = int(ycn * H)
        w = int(wn * W)
        h = int(hn * H)
        x1 = int(xc - w / 2)
        y1 = int(yc - h / 2)
        x2 = x1 + w
        y2 = y1 + h
        img = img__org.copy()
        cv2_rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=THICKNESS)
        cv2_putText(img, MAP__ID_CLASS__TO__NAME[id_class], (x1, y1 - THICKNESS - 5), color=(0, 255, 0), fontScale=FONTSCALE, thickness=THICKNESS)
        img__crop = img[y1:y2, x1:x2]
        img__crop__padded = img[max(0, y1 - PAD__CROP):y2 + PAD__CROP, max(0, x1 - PAD__CROP):x2 + PAD__CROP]
        name__file__crop = "{}--crop-{}.jpg".format(os.path.splitext(name__file__img)[0], i_b)
        path__dir__parent = os.path.join(PATH__DIR__OUTPUT__CROPS__GT, subpath__dir)
        os.makedirs(path__dir__parent, exist_ok=True)
        path_crop = os.path.join(path__dir__parent, name__file__crop)
        try:
            cv2.imwrite(path_crop, img__crop__padded)
        except:
            print(img__crop__padded)
            continue

        list_aliger__info__gt.append(
            {
                'list__box__cls__gt': id_class,
                'list__box__xcycwh__gt': [xc, yc, w, h],
                'list__box__path_src_img__gt': path__file__img,
                'list__box__index_lbl__gt': i_b,
                'list__box__path_crop__gt': path_crop,
                'list__box__subpath_dir__gt': subpath__dir,
                'list__box__src_img_W__gt': W,
                'list__box__src_img_H__gt': H,
            }
        )


In [None]:
# view some crops
list_indexes_sampled = np.random.choice(range(len(list_aliger__info__gt.get__key('list__box__path_crop__gt'))), size=3, replace=False)
for i_sampled in list_indexes_sampled:
    xcycwh = list_aliger__info__gt.get__key('list__box__xcycwh__gt')[i_sampled]
    path_crop = list_aliger__info__gt.get__key('list__box__path_crop__gt')[i_sampled]
    crop_img = cv2.imread(path_crop)
    print('\n=== path_crop = {} ==='.format(path_crop))
    imshow(crop_img)
    print('xcycwh = ', xcycwh)
    print('crop_img.shape = ', crop_img.shape)

# = Extract Pred crops

In [None]:
# DANGER ZONE!!!
os.system('rm -r {}'.format(PATH__DIR__OUTPUT__CROPS__PRED))

In [None]:
# load labels
list__label_pred = []
for path__lbl_pred in tqdm(list__path__file__lbl_pred):
    with open(path__lbl_pred, 'r') as f:
        dict_result__pred = json.load(f)
    list__label_pred.append(dict_result__pred)

list_aliger__info__pred = ListAligner(
    list__key=[
        'list__box__cls__pred',
        'list__box__xcycwh__pred',
        'list__box__path_src_img__pred',
        'list__box__index_lbl__pred',
        'list__box__path_crop__pred',
        'list__box__conf__pred',
        'list__box__subpath_dir__pred',
        'list__box__src_img_W__pred',
        'list__box__src_img_H__pred',
    ]
)

# load boxes
for (
    name__file__img,
    path__file__img,
    subpath__dir,
    lbl_pred,
) in tqdm(zip(
    list__name__file__img,
    list__path__file__img,
    list__subpath__dir,
    list__label_pred,
)):
    img__org = cv2.imread(path__file__img)
    H, W = img__org.shape[:2]

    for i_b, (id_class, box, conf) in enumerate(
        zip(lbl_pred['list__obj__id_class'], lbl_pred['list__obj__box_xcycwhn'], lbl_pred['list__obj__box_conf'])
    ):
        xcn, ycn, wn, hn = box
        xc = int(xcn * W)
        yc = int(ycn * H)
        w = int(wn * W)
        h = int(hn * H)
        x1 = int(xc - w / 2)
        y1 = int(yc - h / 2)
        x2 = x1 + w
        y2 = y1 + h
        img = img__org.copy()
        cv2_rectangle(img, (x1, y1), (x2, y2), color=(255, 50, 255), thickness=THICKNESS)
        cv2_putText(img, "{} {:.2f}".format(MAP__ID_CLASS__TO__NAME[id_class], conf) , (x1, y1 - THICKNESS - 5), color=(255, 50, 255), fontScale=FONTSCALE, thickness=THICKNESS)
        img__crop = img[y1:y2, x1:x2]
        img__crop__padded = img[max(0, y1 - PAD__CROP):y2 + PAD__CROP, max(0, x1 - PAD__CROP):x2 + PAD__CROP]
        name__file__crop = "{}--crop-{}.jpg".format(os.path.splitext(name__file__img)[0], i_b)
        path__dir__parent = os.path.join(PATH__DIR__OUTPUT__CROPS__PRED, subpath__dir)
        os.makedirs(path__dir__parent, exist_ok=True)
        path_crop = os.path.join(path__dir__parent, name__file__crop)
        try:
            cv2.imwrite(path_crop, img__crop__padded)
        except:
            print(img__crop__padded)
            continue

        list_aliger__info__pred.append(
            {
                'list__box__cls__pred': id_class,
                'list__box__xcycwh__pred': [xc, yc, w, h],
                'list__box__path_src_img__pred': path__file__img,
                'list__box__index_lbl__pred': i_b,
                'list__box__path_crop__pred': path_crop,
                'list__box__conf__pred': conf,
                'list__box__subpath_dir__pred': subpath__dir,
                'list__box__src_img_W__pred': W,
                'list__box__src_img_H__pred': H,
            }
        )


In [None]:
list_indexes_sampled = np.random.choice(range(len(list_aliger__info__pred.get__key('list__box__path_crop__pred'))), size=3, replace=False)
for i_sampled in list_indexes_sampled:
    xcycwh = list_aliger__info__pred.get__key('list__box__xcycwh__pred')[i_sampled]
    path_crop = list_aliger__info__pred.get__key('list__box__path_crop__pred')[i_sampled]
    crop_img = cv2.imread(path_crop)
    print('\n=== path_crop = {} ==='.format(path_crop))
    imshow(crop_img)
    print('xcycwh = ', xcycwh)
    print('crop_img.shape = ', crop_img.shape)

# = Preprocess and evaluate

In [None]:
# combine GT and Pred into a table
info_crops_all = {
    'list__box__cls': list_aliger__info__gt.get__key('list__box__cls__gt') + list_aliger__info__pred.get__key('list__box__cls__pred'),
    'list__box__xcycwh': list_aliger__info__gt.get__key('list__box__xcycwh__gt') + list_aliger__info__pred.get__key('list__box__xcycwh__pred'),
    'list__box__path_src_img': list_aliger__info__gt.get__key('list__box__path_src_img__gt') + list_aliger__info__pred.get__key('list__box__path_src_img__pred'),
    'list__box__index_lbl': list_aliger__info__gt.get__key('list__box__index_lbl__gt') + list_aliger__info__pred.get__key('list__box__index_lbl__pred'),
    'list__box__path_crop': list_aliger__info__gt.get__key('list__box__path_crop__gt') + list_aliger__info__pred.get__key('list__box__path_crop__pred'),
    'list__box__conf': [1] * len(list_aliger__info__gt) + list_aliger__info__pred.get__key('list__box__conf__pred'),
    'list__box__type': ['gt'] * len(list_aliger__info__gt) + ['pred'] * len(list_aliger__info__pred),
    'list__box__subpath_dir': list_aliger__info__gt.get__key('list__box__subpath_dir__gt') + list_aliger__info__pred.get__key('list__box__subpath_dir__pred'),
    'list__box__src_img_W': list_aliger__info__gt.get__key('list__box__src_img_W__gt') + list_aliger__info__pred.get__key('list__box__src_img_W__pred'),
    'list__box__src_img_H': list_aliger__info__gt.get__key('list__box__src_img_H__gt') + list_aliger__info__pred.get__key('list__box__src_img_H__pred')
}

df = pd.DataFrame(info_crops_all)
df['w'] = df['list__box__xcycwh'].apply(lambda x: x[2])
df['h'] = df['list__box__xcycwh'].apply(lambda x: x[3])
df['list__box__x1y1x2y2'] = df['list__box__xcycwh'].apply(lambda x: [x[0] - x[2] // 2, x[1] - x[3] // 2, x[0] + x[2] // 2, x[1] + x[3] // 2])
df['area'] = df['w'] * df['h']
df['area_log'] = np.log(df['area'])
df['area_sqrt'] = np.sqrt(df['area'])

# calculate boxes size given IMGSZ
df['w_IMGSZ'] = df.apply(lambda x: int(x['w'] * IMGSZ / max(x['list__box__src_img_W'], x['list__box__src_img_H'])), axis=1)
df['h_IMGSZ'] = df.apply(lambda x: int(x['h'] * IMGSZ / max(x['list__box__src_img_W'], x['list__box__src_img_H'])), axis=1)

df['area_IMGSZ'] = df['w_IMGSZ'] * df['h_IMGSZ']
df['area_IMGSZ_log'] = np.log(df['area_IMGSZ'])
df['area_IMGSZ_sqrt'] = np.sqrt(df['area_IMGSZ'])

len(df)

In [None]:
# evaluate boxes
df['list__box__eval'] = None
df['list__box__matched_to_pred__dfloc'] = None
df['list__box__iou_to_matched_pred'] = None
unique_paths_src_img = df['list__box__path_src_img'].unique()
for path_src_img in tqdm(unique_paths_src_img):
    df__of_img = df[df['list__box__path_src_img'] == path_src_img]
    df__of_img__gt = df__of_img[df__of_img['list__box__type'] == 'gt']
    df__of_img__pred = df__of_img[df__of_img['list__box__type'] == 'pred']
    indices__of_img__gt = df__of_img__gt.index.to_numpy()
    indices__of_img__pred = df__of_img__pred.index.to_numpy()

    # cost by iou
    if MODE__IOU_EVAL == 'NORMAL':
        cost_iou = (1 - torchvision.ops.box_iou(
            torch.tensor(df__of_img__gt['list__box__x1y1x2y2'].tolist()).reshape(-1, 4), 
            torch.tensor(df__of_img__pred['list__box__x1y1x2y2'].tolist()).reshape(-1, 4)
        )).numpy()
        cost_iou = np.where(cost_iou < 1 - THRESH__IOU, cost_iou, 1)
    elif MODE__IOU_EVAL == 'MINIOU':
        cost_iou = 1 - box__miniou(
            np.array(df__of_img__gt['list__box__x1y1x2y2'].tolist()).reshape(-1, 4), 
            np.array(df__of_img__pred['list__box__x1y1x2y2'].tolist()).reshape(-1, 4)
        )
        cost_iou = np.where(cost_iou < 1 - THRESH__MINIOU, cost_iou, 1)
    else:
        raise ValueError('Invalid MODE__IOU_EVAL: {}'.format(MODE__IOU_EVAL))
    # mask by class id
    mask_cls = np.equal(
        np.expand_dims(df__of_img__gt['list__box__cls'].tolist(), axis=1),
        np.expand_dims(df__of_img__pred['list__box__cls'].tolist(), axis=0)
    )
    cost_iou = np.where(mask_cls, cost_iou, 1)

    # matching
    if MODE__EVAL__MATCHING == 'one-one':
        matched_gt, matched_pred = linear_sum_assignment(cost_iou)
    elif MODE__EVAL__MATCHING == 'many-many':
        matched_gt, matched_pred = np.where(cost_iou < 1 - (THRESH__IOU if MODE__IOU_EVAL == 'NORMAL' else THRESH__MINIOU))
    else:
        raise ValueError('Invalid MODE__EVAL__MATCHING: {}'.format(MODE__EVAL__MATCHING))
    unmatched_gt = [_ for _ in range(len(indices__of_img__gt)) if _ not in matched_gt]
    unmatched_pred = [_ for _ in range(len(indices__of_img__pred)) if _ not in matched_pred]
    matched_pred_loc_set = {}
    matched_iou_set = {}
    for i_gt, i_pred in zip(matched_gt, matched_pred):
        loc_gt = indices__of_img__gt[i_gt]
        if loc_gt not in matched_pred_loc_set:
            matched_pred_loc_set[loc_gt] = []
            matched_iou_set[loc_gt] = []
        log_pred = indices__of_img__pred[i_pred]
        matched_pred_loc_set[loc_gt].append(log_pred)
        matched_iou_set[loc_gt].append(1 - cost_iou[i_gt, i_pred])
    matched_gt_loc_set, matched_pred_loc_set, matched_iou_set = list(matched_pred_loc_set.keys()), list(matched_pred_loc_set.values()), list(matched_iou_set.values())

    df['list__box__eval'].loc[indices__of_img__gt[matched_gt]] = 'TP'
    df['list__box__matched_to_pred__dfloc'].loc[matched_gt_loc_set] = matched_pred_loc_set
    df['list__box__iou_to_matched_pred'].loc[matched_gt_loc_set] = matched_iou_set
    df['list__box__eval'].loc[indices__of_img__pred[matched_pred]] = 'TP'
    df['list__box__eval'].loc[indices__of_img__gt[unmatched_gt]] = 'FN'
    df['list__box__eval'].loc[indices__of_img__pred[unmatched_pred]] = 'FP'

In [None]:
pvtab_eval = df[
    (
        (df['list__box__eval'] == 'TP') & (df['list__box__type'] == 'gt')
    ) \
        | (df['list__box__eval'] != 'TP')
] \
    .groupby(['list__box__cls', 'list__box__eval']).size().reset_index(name='count') \
        .pivot_table(index='list__box__cls', columns='list__box__eval', values='count', fill_value=0)

pvtab_eval['precision'] = pvtab_eval['TP'] / (pvtab_eval['TP'] + pvtab_eval['FP'])
pvtab_eval['recall'] = pvtab_eval['TP'] / (pvtab_eval['TP'] + pvtab_eval['FN'])
pvtab_eval['f1'] = 2 * pvtab_eval['precision'] * pvtab_eval['recall'] / (pvtab_eval['precision'] + pvtab_eval['recall'])
pvtab_eval['Number-of-GT-boxes'] = pvtab_eval['TP'] + pvtab_eval['FN']
pvtab_eval['Percentage-of-GT-boxes'] = pvtab_eval['Number-of-GT-boxes'] / pvtab_eval['Number-of-GT-boxes'].sum()

pvtab_eval[
    [
        'precision', 
        'recall', 
        'f1', 
        'Percentage-of-GT-boxes'
    ]
].plot.bar()
plt.grid(axis='y')
plt.legend(loc='lower right', bbox_to_anchor=(1.5, 0))
plt.show()

pvtab_eval


# = Visualize

In [None]:
# visualize GT and Pred boxes on the src_img
os.system('rm -rf {}'.format(PATH__DIR__OUTPUT__SRC_IMG__VIS))
os.makedirs(PATH__DIR__OUTPUT__SRC_IMG__VIS, exist_ok=True)
locs = []
paths_vis = []
for path_src_img, group in tqdm(df.groupby('list__box__path_src_img')):
    img = cv2.imread(path_src_img)
    img_vis = img.copy()
    for loc, row in group.iterrows():
        box_cls = row['list__box__cls']
        subpath__dir = row['list__box__subpath_dir']
        path__dir__parent = os.path.join(PATH__DIR__OUTPUT__SRC_IMG__VIS, subpath__dir)
        os.makedirs(path__dir__parent, exist_ok=True)
        path_vis = os.path.join(path__dir__parent, os.path.basename(path_src_img))
        x1, y1, x2, y2 = row['list__box__x1y1x2y2']
        type_ = row['list__box__type']
        conf = row['list__box__conf']
        
        if type_ == 'gt':
            color = (50, 255, 0)
        else:
            color = (255, 50, 255)
        
        cv2_rectangle(
            img_vis, (x1, y1), (x2, y2), 
            color=color, 
            thickness=THICKNESS,
        )
        cv2_putText(
            img_vis, 
            '{} {}'.format(MAP__ID_CLASS__TO__NAME[box_cls], '{:.2f}'.format(conf) if type_ == 'pred' else ''), 
            (x1, y1 - THICKNESS - 5) if type_ == 'gt' else (x1 + 4, y2 - THICKNESS - 5), 
            fontScale=FONTSCALE, color=color, thickness=THICKNESS
        )
        locs.append(loc)
        paths_vis.append(path_vis)
    cv2.imwrite(path_vis, np.concatenate([img, img_vis], axis=0))

df['list__boxes__path_src_img__vis'] = None
df['list__boxes__path_src_img__vis'].loc[locs] = paths_vis

# = Create bins and pivot table

In [None]:
# split into bins
df['list_boxes_bin__area_IMGSZ_sqrt'] = pd.cut(
    df['area_IMGSZ_sqrt'], 
    bins=BINS,
    labels=BINS_LABEL,
    right=False
)

In [None]:
# count TP, FP, FN
def get_pvtab_eval(box_cls):
    pvtab_eval = df[
        (df['list__box__cls'] == box_cls) \
            & (
                (
                    (df['list__box__eval'] == 'TP') & (df['list__box__type'] == 'gt')
                ) \
                    | (df['list__box__eval'] != 'TP')
            )
    ] \
        .groupby(['list_boxes_bin__area_IMGSZ_sqrt', 'list__box__eval']).size().reset_index(name='count') \
        .pivot_table(index='list_boxes_bin__area_IMGSZ_sqrt', columns='list__box__eval', values='count', fill_value=0)

    pvtab_eval['precision'] = pvtab_eval['TP'] / (pvtab_eval['TP'] + pvtab_eval['FP'])
    pvtab_eval['recall'] = pvtab_eval['TP'] / (pvtab_eval['TP'] + pvtab_eval['FN'])
    pvtab_eval['f1'] = 2 * pvtab_eval['precision'] * pvtab_eval['recall'] / (pvtab_eval['precision'] + pvtab_eval['recall'])
    pvtab_eval['Number-of-GT-boxes'] = pvtab_eval['TP'] + pvtab_eval['FN']
    pvtab_eval['Percentage-of-GT-boxes'] = pvtab_eval['Number-of-GT-boxes'] / pvtab_eval['Number-of-GT-boxes'].sum()

    return pvtab_eval

dict__pvtab__eval = {id_class: get_pvtab_eval(box_cls=id_class) for id_class in MAP__ID_CLASS__TO__NAME.keys()}


In [None]:
# Pothole
id_class = 0

dict__pvtab__eval[id_class][
    [
        'precision', 
        'recall', 
        'f1', 
        'Percentage-of-GT-boxes'
    ]
].plot.bar()
plt.grid(axis='y')
plt.legend(loc='lower right', bbox_to_anchor=(1.5, 0))
plt.title(MAP__ID_CLASS__TO__NAME[id_class])
plt.show()

dict__pvtab__eval[id_class]

In [None]:
# Manhole
id_class = 1

dict__pvtab__eval[id_class][
    [
        'precision', 
        'recall', 
        'f1', 
        'Percentage-of-GT-boxes'
    ]
].plot.bar()
plt.grid(axis='y')
plt.legend(loc='lower right', bbox_to_anchor=(1.5, 0))
plt.title(MAP__ID_CLASS__TO__NAME[id_class])
plt.show()

dict__pvtab__eval[id_class]

In [None]:
# Drainage
id_class = 2

dict__pvtab__eval[id_class][
    [
        'precision', 
        'recall', 
        'f1', 
        'Percentage-of-GT-boxes'
    ]
].plot.bar()
plt.grid(axis='y')
plt.legend(loc='lower right', bbox_to_anchor=(1.5, 0))
plt.title(MAP__ID_CLASS__TO__NAME[id_class])
plt.show()

dict__pvtab__eval[id_class]

In [None]:
# mean conf of TP, FP, FN
def get_pvtab_conf(box_cls):
    pvtab_conf = df[(df['list__box__cls'] == box_cls) & (((df['list__box__eval'] == 'TP') & (df['list__box__type'] == 'pred')) | (df['list__box__eval'] != 'FN'))] \
        .groupby(['list_boxes_bin__area_IMGSZ_sqrt', 'list__box__eval'])['list__box__conf'].mean().reset_index(name='conf') \
            .pivot_table(index='list_boxes_bin__area_IMGSZ_sqrt', columns='list__box__eval', values='conf', fill_value=0)

    return pvtab_conf

dict__pvtab__conf = {id_class: get_pvtab_conf(box_cls=id_class) for id_class in MAP__ID_CLASS__TO__NAME.keys()}

In [None]:
# Pothole
id_class = 0

dict__pvtab__conf[id_class].plot.bar()
plt.grid(axis='y')
plt.legend(loc='lower right', bbox_to_anchor=(1.2, 0))
plt.ylabel('Mean confidence score')
plt.title(MAP__ID_CLASS__TO__NAME[id_class])
plt.show()

dict__pvtab__conf[id_class]

In [None]:
# Manhole
id_class = 1

dict__pvtab__conf[id_class].plot.bar()
plt.grid(axis='y')
plt.legend(loc='lower right', bbox_to_anchor=(1.2, 0))
plt.ylabel('Mean confidence score')
plt.title(MAP__ID_CLASS__TO__NAME[id_class])
plt.show()

dict__pvtab__conf[id_class]

In [None]:
# Drainage
id_class = 2

dict__pvtab__conf[id_class].plot.bar()
plt.grid(axis='y')
plt.legend(loc='lower right', bbox_to_anchor=(1.2, 0))
plt.ylabel('Mean confidence score')
plt.title(MAP__ID_CLASS__TO__NAME[id_class])
plt.show()

dict__pvtab__conf[id_class]

## == Determine root causes

### === Select NUM__CROP__VERIFY failed cases

In [None]:
os.makedirs(PATH__DIR__WORKSHEET, exist_ok=True)

for box_cls in MAP__ID_CLASS__TO__NAME:
    workbook = xlsxwriter.Workbook(os.path.join(PATH__DIR__WORKSHEET, "{}.xlsx".format(MAP__ID_CLASS__TO__NAME[box_cls])))
    cols_select = ['list__box__conf', 'list__box__type', 'w', 'h', 'w_IMGSZ', 'h_IMGSZ', 'list__box__eval']
    for bin_, group in df[(df['list__box__eval'] != 'TP') & (df['list__box__cls'] == box_cls)].groupby('list_boxes_bin__area_IMGSZ_sqrt'):
        worksheet = workbook.add_worksheet(bin_.strip('(').strip(']').replace(', ', '-'))
        size = min(len(group), NUM__CROP__VERIFY)
        worksheet.write_row(0, 0, ['loc'] + cols_select + ['crop_img', 'root-cause', 'source-image'])
        subset = group.sample(size, replace=False, random_state=42)
        row_count = 1
        max_crop_width = 80
        offset = 5
        for loc, row in subset.iterrows():
            worksheet.write_row(row_count, 0, [loc] + row[cols_select].tolist())
            path_crop = row['list__box__path_crop']
            crop_W, crop_H = PIL.Image.open(path_crop).size
            max_crop_H = 180
            crop_scale = min(1, max_crop_H / crop_H)
            max_crop_width = max(max_crop_width, int(crop_W * crop_scale))
            worksheet.insert_image(row_count, len(cols_select) + 1, path_crop, {'x_scale': crop_scale, 'y_scale': crop_scale})
            worksheet.set_row_pixels(row_count, max(25, min(crop_H, max_crop_H)) + offset)
            worksheet.write(row_count, len(cols_select) + 3, row['list__boxes__path_src_img__vis'])
            row_count += 1
        worksheet.autofit()
        worksheet.set_column_pixels(len(cols_select) + 1, len(cols_select) + 1, max_crop_width + offset)
        worksheet.set_column_pixels(len(cols_select) + 2, len(cols_select) + 2, 400)
    workbook.close()
    print("Saved: {}".format(workbook.filename))

### === Root causes statistics

In [None]:
def get_pvtab_root_cause(box_cls):
    path_excel = os.path.join(DIR_FP_FN_ROOT_CAUSE_WORKSHEETS, '{}.xlsx'.format(MAP__ID_CLASS__TO__NAME[box_cls]))
    df_root_cause = []
    for bin_ in df['list_boxes_bin__area_IMGSZ_sqrt'].unique().sort_values():
        sheet = pd.read_excel(path_excel, sheet_name=bin_.strip('(').strip(']').replace(', ', '-'), index_col=0)
        sheet['root-cause'] = sheet['root-cause'].apply(lambda x: x.strip().strip('.').strip().encode('ascii', 'ignore').decode('ascii').lower())
        sheet_count = sheet.groupby(['root-cause', 'list__box__eval']).size().reset_index(name='count').sort_values(by='count', ascending=False)
        sheet_count['percentage'] = sheet_count['count'] / sheet_count['count'].sum() * 100
        sheet_count['list_boxes_bin__area_IMGSZ_sqrt'] = bin_
        df_root_cause.append(sheet_count)

    df_root_cause = pd.concat(df_root_cause, axis=0)
    df_root_cause = df_root_cause.reset_index(drop=True)
    pvtab_root_cause = df_root_cause.groupby(['root-cause', 'list_boxes_bin__area_IMGSZ_sqrt']).sum().pivot_table(index='list_boxes_bin__area_IMGSZ_sqrt', columns='root-cause', values='percentage')
    pvtab_root_cause = pvtab_root_cause.loc[df_root_cause['list_boxes_bin__area_IMGSZ_sqrt'].unique()]

    return df_root_cause, pvtab_root_cause

def helper_aggregate_root_cause(df_root_cause, eval_):
    print('FP + FN:' if eval_ is None else eval_)
    df_root_cause = df_root_cause[df_root_cause['list__box__eval'] == eval_] if eval_ is not None else df_root_cause
    df_root_cause = df_root_cause[['root-cause', 'count']].groupby('root-cause').sum()
    df_root_cause['percentage'] = df_root_cause['count'] / df_root_cause['count'].sum() * 100
    df_root_cause = df_root_cause.reset_index().sort_values(by='percentage', ascending=False)
    return df_root_cause

df_root_cause_pothole, pvtab_root_cause_pothole = get_pvtab_root_cause(0)
df_root_cause_manhole, pvtab_root_cause_manhole = get_pvtab_root_cause(1)

In [None]:
pvtab_root_cause_pothole.plot.bar(figsize=(15, 9))
plt.title(MAP__ID_CLASS__TO__NAME[0])
plt.show()

df_root_cause_pothole

In [None]:
helper_aggregate_root_cause(df_root_cause_pothole, eval_=None)

In [None]:
helper_aggregate_root_cause(df_root_cause_pothole, eval_='FP')

In [None]:
helper_aggregate_root_cause(df_root_cause_pothole, eval_='FN')

In [None]:
pvtab_root_cause_manhole.plot.bar(figsize=(15, 9))
plt.title(MAP__ID_CLASS__TO__NAME[1])
plt.show()

df_root_cause_manhole

In [None]:
helper_aggregate_root_cause(df_root_cause_manhole, eval_=None)

In [None]:
helper_aggregate_root_cause(df_root_cause_manhole, eval_='FP')

In [None]:
helper_aggregate_root_cause(df_root_cause_manhole, eval_='FN')

### === Arrange failed crops into folders by root causes

In [None]:
for box_cls in MAP__ID_CLASS__TO__NAME:
    path_excel = os.path.join(DIR_FP_FN_ROOT_CAUSE_WORKSHEETS, '{}.xlsx'.format(MAP__ID_CLASS__TO__NAME[box_cls]))
    df_root_cause = []
    for bin_ in df['list_boxes_bin__area_IMGSZ_sqrt'].unique().sort_values():
        sheet = pd.read_excel(path_excel, sheet_name=bin_.strip('(').strip(']').replace(', ', '-'), index_col=0)
        sheet['root-cause'] = sheet['root-cause'].apply(lambda x: x.strip().strip('.').strip().encode('ascii', 'ignore').decode('ascii').lower())
        df_root_cause.append(sheet)

    df_root_cause = pd.concat(df_root_cause, axis=0)
    for loc, row in tqdm(df_root_cause.iterrows()):
        path_crop__padded = df.loc[loc]['list__box__path_crop']
        path_src_img = df.loc[loc]['list__box__path_src_img']
        path_src_img_vis = df.loc[loc]['list__boxes__path_src_img__vis']
        dir_output = os.path.join(DIR_CROP_BY_ROOT_CAUSE, MAP__ID_CLASS__TO__NAME[box_cls], row['root-cause'].replace(' ', '-').replace('(', '').replace(')', '').replace('/', '-'), row['list__box__eval'])
        dir_output_crop = os.path.join(dir_output, 'crop')
        dir_output_src_img = os.path.join(dir_output, 'source-image')
        dir_output_src_img_vis = os.path.join(dir_output, 'source-image-visualized')
        os.makedirs(dir_output_crop, exist_ok=True)
        os.makedirs(dir_output_src_img, exist_ok=True)
        os.makedirs(dir_output_src_img_vis, exist_ok=True)
        os.system('cp "{}" "{}"'.format(path_crop__padded, dir_output_crop))
        os.system('cp "{}" "{}"'.format(path_src_img, dir_output_src_img))
        os.system('cp "{}" "{}"'.format(path_src_img_vis, dir_output_src_img_vis))

## == Remove objects that are too small

In [None]:
df['list_boxes_bin__area_IMGSZ_sqrt'].unique()

In [None]:
dir_out = '/mnt/hdd10tb/Users/laptq/laptq-prj-46/outputs/20241115--small'
os.system('rm -rf {}'.format(dir_out))
os.makedirs(dir_out, exist_ok=True)

for loc, row in tqdm(df[df['list_boxes_bin__area_IMGSZ_sqrt'] == '(16, 32]'].iterrows()):
    path_crop__padded = row['list__box__path_crop']
    os.system('cp "{}" "{}"'.format(path_crop__padded, dir_out))

## == Drawing stuffs

In [None]:
# show cases
bin_ = '(64, 128]'
box_cls = 0
eval_ = 'FN'

for i, (idx, row) in enumerate(
    df[
        (df['list_boxes_bin__area_IMGSZ_sqrt'] == bin_) \
            & (df['list__box__eval'] == eval_) \
                & (df['list__box__cls'] == box_cls)
    ].iterrows()
):
    path_img = row['list__box__path_crop']
    path_src_img = row['list__box__path_src_img']
    path_src_img = os.path.join(PATH__DIR__OUTPUT__SRC_IMG__VIS, os.path.split(path_src_img)[1])
    
    if i > 10:
        break
    print(row[['list__box__cls', 'list__box__conf', 'list__box__eval', 'w', 'h', 'w_IMGSZ', 'h_IMGSZ', 'area_IMGSZ_sqrt']])
    imshow(cv2.imread(path_img))
    imshow(cv2.imread(path_src_img))

In [None]:
# arrange crops into subfolders
os.system("rm -rf {}".format(DIR_OUTPUT_BINS))
os.makedirs(DIR_OUTPUT_BINS, exist_ok=True)

for i_loc, row in tqdm(df.iterrows()):
    box_cls = row['list__box__cls']
    path_crop = row['list__box__path_crop']
    conf = row['list__box__conf']
    type_ = row['list__box__type']
    eval_ = row['list__box__eval']
    bin_ = row['list_boxes_bin__area_IMGSZ_sqrt']
    if type_ == 'pred' and eval_ == 'TP':
        continue
    _dir_output = os.path.join(DIR_OUTPUT_BINS, '{}'.format('pothole' if box_cls == 0 else 'manhole'), bin_.strip('(').strip(']').replace(', ','-'), eval_)
    _path_output = os.path.join(_dir_output, '{}-{}.jpg'.format(os.path.splitext(os.path.split(path_crop)[1])[0], conf))
    os.makedirs(_dir_output, exist_ok=True)
    os.system('cp "{}" "{}"'.format(path_crop, _path_output))