In [1]:
import os
import cv2
import numpy as np
from ultralytics import YOLO

IMAGE_FOLDER = r"C:\Users\jotir\Downloads\train-2\images"
LABEL_FOLDER = r"C:\Users\jotir\Downloads\train-2\labels"
MODEL_PATH = "runs/detect/small_real/weights/last.pt"
IOU_THRESHOLD = 0.3
MERGE_IOU_THRESHOLD = 0.0

CLASS_NAMES = {0: "image", 1: "text"}

# ==== IOU + Merging ====
def box_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    inter = max(0, xB - xA) * max(0, yB - yA)
    if inter == 0:
        return 0.0
    areaA = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    areaB = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    return inter / (areaA + areaB - inter)

def merge_boxes_with_classes(boxes, classes, iou_thresh):
    merged = []
    used = [False] * len(boxes)
    for i, (boxA, clsA) in enumerate(zip(boxes, classes)):
        if used[i]:
            continue
        group = [boxA]
        group_cls = clsA
        used[i] = True
        for j, (boxB, clsB) in enumerate(zip(boxes, classes)):
            if not used[j] and clsA == clsB and box_iou(boxA, boxB) > iou_thresh:
                group.append(boxB)
                used[j] = True
        group = np.array(group)
        x1, y1 = np.min(group[:, 0:2], axis=0)
        x2, y2 = np.max(group[:, 2:4], axis=0)
        merged.append((group_cls, [x1, y1, x2, y2]))
    return merged

def load_gt_boxes(label_path, img_w, img_h):
    boxes = []
    classes = []
    if not os.path.exists(label_path):
        return boxes, classes
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue
            cls, xc, yc, w, h = map(float, parts)
            x1 = (xc - w / 2) * img_w
            y1 = (yc - h / 2) * img_h
            x2 = (xc + w / 2) * img_w
            y2 = (yc + h / 2) * img_h
            boxes.append([x1, y1, x2, y2])
            classes.append(int(cls))
    return boxes, classes


metrics = {
    0: {'TP': 0, 'FP': 0, 'FN': 0, 'BG': 0},  # image
    1: {'TP': 0, 'FP': 0, 'FN': 0, 'BG': 0},  # text
}

model = YOLO(MODEL_PATH)

for fname in os.listdir(IMAGE_FOLDER):
    if not fname.lower().endswith(('.jpg', '.png', '.jpeg')):
        continue

    image_path = os.path.join(IMAGE_FOLDER, fname)
    label_path = os.path.join(LABEL_FOLDER, os.path.splitext(fname)[0] + ".txt")
    img = cv2.imread(image_path)
    h, w = img.shape[:2]

    # Predictions
    results = model(image_path, conf=0.8, show=False, verbose=False)[0]
    if results.boxes is not None:
        preds_raw = results.boxes.xyxy.cpu().numpy()
        pred_classes = results.boxes.cls.cpu().numpy().astype(int)
    else:
        preds_raw = []
        pred_classes = []

    merged_preds = merge_boxes_with_classes(preds_raw, pred_classes, MERGE_IOU_THRESHOLD)

    # Ground truth
    gts, gt_classes = load_gt_boxes(label_path, w, h)
    gt_used = [False] * len(gts)
    pred_used = [False] * len(merged_preds)

    for i, (pred_cls, pred_box) in enumerate(merged_preds):
        for j, (gt_box, gt_cls) in enumerate(zip(gts, gt_classes)):
            if pred_cls == gt_cls and not gt_used[j] and box_iou(pred_box, gt_box) >= IOU_THRESHOLD:
                metrics[pred_cls]['TP'] += 1
                gt_used[j] = True
                pred_used[i] = True
                break

    for i, (pred_cls, _) in enumerate(merged_preds):
        if not pred_used[i]:
            metrics[pred_cls]['FP'] += 1
            metrics[pred_cls]['BG'] += 1

    for j, gt_cls in enumerate(gt_classes):
        if not gt_used[j]:
            metrics[gt_cls]['FN'] += 1

# ==== OUTPUT ====
print("\n📊 Class-wise Evaluation:")
for cls_id, name in CLASS_NAMES.items():
    TP = metrics[cls_id]['TP']
    FP = metrics[cls_id]['FP']
    FN = metrics[cls_id]['FN']
    BG = metrics[cls_id]['BG']
    precision = TP / (TP + FP + 1e-6)
    recall = TP / (TP + FN + 1e-6)

    print(f"\nClass: {name.upper()}")
    print(f"True Positives (TP): {TP}")
    print(f"False Positives (FP): {FP}")
    print(f"False Negatives (FN): {FN}")
    print(f"Background Predictions: {BG}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")


  from .autonotebook import tqdm as notebook_tqdm



📊 Class-wise Evaluation:

Class: IMAGE
True Positives (TP): 191
False Positives (FP): 45
False Negatives (FN): 116
Background Predictions: 45
Precision: 0.8093
Recall: 0.6221

Class: TEXT
True Positives (TP): 2455
False Positives (FP): 416
False Negatives (FN): 2390
Background Predictions: 416
Precision: 0.8551
Recall: 0.5067


In [None]:
from ultralytics import YOLO
import os
MODEL_PATH = "runs/detect/train12/weights/last.pt"  
DATA_YAML = "data_test.yaml"     
CONF_THRESHOLD = 0.9      
def evaluate_model(model_path, data_yaml):
    print(f"Evaluating model: {model_path}")
    model = YOLO(model_path)

    metrics = model.val(
        data=data_yaml,
        split='test',
        conf=CONF_THRESHOLD,
        iou = 1,
        save_json=True,
        save_hybrid=True
    )

    print("\n---- RESULTS ----")
    print(f"mAP50: {metrics.box.map50:.4f}")
    print(f"mAP50-95: {metrics.box.map:.4f}")
    print(f"Precision: {metrics.box.mp:.4f}")
    print(f"Recall: {metrics.box.mr:.4f}")

if __name__ == "__main__":
    evaluate_model(MODEL_PATH, DATA_YAML)