In [33]:
import sys
sys.path.append("../..")

from pathlib import Path

import cv2
import numpy as np
import matplotlib.pyplot as plt
import mediapipe as mp
from tqdm import tqdm
from sklearn.utils import resample
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

from src.config.path import SEGMENTATION_MP_PATH

In [34]:
def load_yolo_annotations(label_path, image_shape):

    h, w = image_shape[:2]
    polygons = []

    label_path = Path(label_path)
    if not label_path.exists():
        return []

    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) > 5:
                coords = np.array(list(map(float, parts[1:])), dtype=np.float32)
                coords = coords.reshape(-1, 2)

                coords[:, 0] *= w
                coords[:, 1] *= h

                polygons.append(coords)
                
    return polygons

In [35]:
def polygons_to_mask(polygons, shape):
    mask = np.zeros(shape[:2], dtype=np.uint8)
    
    for polygon in polygons:
        cv2.fillPoly(mask, [np.int32(polygon)], 1)
    
    return mask

In [36]:
def compute_iou(mask_pred, mask_gt):
    intersection = np.logical_and(mask_pred, mask_gt)
    union = np.logical_or(mask_pred, mask_gt)
    
    return intersection.sum() / (union.sum() + 1e-6)

In [37]:
def bootstrap_ci(iou_values, n_bootstrap=1000, ci=0.95):
    bootstrapped_means = []
    
    for _ in range(n_bootstrap):
        sample = resample(iou_values)
        bootstrapped_means.append(np.mean(sample))
    
    lower = np.percentile(bootstrapped_means, (1 - ci) / 2 * 100)
    upper = np.percentile(bootstrapped_means, (1 + ci) / 2 * 100)
    
    return np.mean(iou_values), (lower, upper)

In [38]:
base_options = python.BaseOptions(model_asset_path=SEGMENTATION_MP_PATH)
options = vision.ImageSegmenterOptions(base_options=base_options, output_category_mask=True)
model = vision.ImageSegmenter.create_from_options(options)

In [39]:
dataset_path = "../../datasets/merged/test"

images_dir = Path(dataset_path) / "images"
labels_dir = Path(dataset_path) / "labels"

images = list(images_dir.glob("*.jpg")) + list(images_dir.glob("*.png"))
labels = list(labels_dir.glob("*.txt"))

In [40]:
iou_values = []

for image_path, label_path in tqdm(zip(images, labels), total=len(images)):

    image_path = str(image_path)
    label_path = str(label_path)

    image = mp.Image.create_from_file(image_path)

    segmentation_result = model.segment(image)
    category_mask = segmentation_result.category_mask

    image_data = image.numpy_view()
    shape = image_data.shape
    
    gt_polygons = load_yolo_annotations(label_path, shape)

    mask_gt = polygons_to_mask(gt_polygons, shape)
    mask_pred = segmentation_result.category_mask.numpy_view()
    mask_pred = (mask_pred == 0).astype(np.uint8)
    
    iou = compute_iou(mask_pred, mask_gt)
    iou_values.append(iou)

mean_iou, (low, high) = bootstrap_ci(iou_values)

print(f"IoU [CI95]: {mean_iou:.4f} [{low:.4f}, {high:.4f}]")

100%|██████████| 4000/4000 [04:28<00:00, 14.89it/s]


IoU [CI95]: 0.9685 [0.9673, 0.9697]
