In [None]:
import cv2
from ultralytics import YOLO
import numpy as np


In [None]:
# Paths to stereo images, the output folder for the detection files

image_2_path = r""
image_3_path = r""
labels_output_path = r""

In [None]:
model  = YOLO("yolov8n.pt")

In [None]:
# Run detection on images

results_2 = model.predict(
    source=image_2_path,
    save_txt=True,
    save_conf=True,
    project=labels_output_path,
    name = "label_2",
    exist_ok=False,
    
)

results_2 = model.predict(
    source=image_3_path,
    save_txt=True,
    save_conf=True,
    project=labels_output_path,
    name = "label_3",
    exist_ok=False,
    
)


In [None]:
# Filter out detections smaller than min_w x min_h

def size_filter(image_path, label_path, min_w, min_h):
    img = cv2.imread(image_path)
    H, W = img.shape[:2]

    kept_lines = []
    with open(label_path, "r") as f:
        for line in f:
            s = line.strip()
            if not s:
                continue
            parts = s.split()
            if len(parts) < 5:
                continue  
            w_px = float(parts[3]) * W  
            h_px = float(parts[4]) * H  
            if w_px >= min_w and h_px >= min_h:
                kept_lines.append(s)

    for idx in range(len(kept_lines)):
        kept_lines[idx] = kept_lines[idx] + f" {idx}"

    with open(label_path, "w") as f:
        if kept_lines:
            f.write("\n".join(kept_lines) + "\n")

In [None]:
# Path to the YOLO generated detection files

labels_2_txt_path = r""
labels_3_txt_path = r""

size_filter(image_2_path, labels_2_txt_path, 100, 100)
size_filter(image_3_path, labels_3_txt_path, 100, 100)

In [None]:
# Draw bounding boxes on images

""" def draw_yolo_boxes(image_path, label_path, output_path=None):
    img = cv2.imread(image_path)
    H, W = img.shape[:2]

    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue
            id = int(float(parts[6]))
            xc, yc, w, h = map(float, parts[1:5])
            x1 = int((xc - w/2) * W)
            y1 = int((yc - h/2) * H)
            x2 = int((xc + w/2) * W)
            y2 = int((yc + h/2) * H)

            cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
            label = str(id)
            
            (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
            y_text = max(0, y1 - 6)
            cv2.rectangle(img, (x1, y_text - th - 4), (x1 + tw + 4, y_text + 2), (0,255,0), -1)
            cv2.putText(img, label, (x1 + 2, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 2, cv2.LINE_AA)

    
    if output_path is None:
        base, ext = os.path.splitext(image_path)
        output_path = base + "_boxed" + ext
    cv2.imwrite(output_path, img) 
    """

In [None]:
# Path for output images

""" output_image2_path = r""
output_image3_path = r""

draw_yolo_boxes(image_2_path, labels_2_txt, output_image2_path)
draw_yolo_boxes(image_3_path, labels_3_txt, output_image3_path) 
"""

In [None]:
# Match detections between stereo images

def simple_stereo_match(left_img_path, left_label_path, right_label_path,
                        y_tolerance_pct, size_tolerance_pct):
  
    img_height, img_width = cv2.imread(left_img_path).shape[:2]

    def load_yolo_labels(txt_path):
        boxes = []
        try:
            with open(txt_path, "r") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) < 5:
                        continue
                    xc, yc, w, h = map(float, parts[1:5])
                    boxes.append([
                        xc * img_width,
                        yc * img_height,
                        w * img_width,
                        h * img_height
                    ])
        except FileNotFoundError:
            pass
        return boxes

    left_boxes  = load_yolo_labels(left_label_path)
    right_boxes = load_yolo_labels(right_label_path)

    used_right = [False] * len(right_boxes)
    matches = []

    max_vertical_diff = (y_tolerance_pct / 100.0) * img_height
    size_tolerance = size_tolerance_pct / 100.0

    for left_idx, (cxL, cyL, wL, hL) in enumerate(left_boxes):
        for right_idx, (cxR, cyR, wR, hR) in enumerate(right_boxes):
            if used_right[right_idx]:
                continue
            if abs(cyL - cyR) > max_vertical_diff:
                continue
            disparity_px = cxL - cxR
            if disparity_px <= 0:
                continue
            if abs(wL - wR) / max(wL, wR, 1e-6) > size_tolerance:
                continue
            if abs(hL - hR) / max(hL, hR, 1e-6) > size_tolerance:
                continue

            used_right[right_idx] = True
            matches.append((left_idx, right_idx, disparity_px))
            break

    return matches


In [None]:
pairs = simple_stereo_match(image_2_path, labels_2_txt_path, labels_3_txt_path,  y_tolerance_pct=5, size_tolerance_pct=20)
for left_idx, right_idx, disp in pairs:
    print(f"L#{left_idx} match R#{right_idx}  | d = {disp:.2f}px")

In [None]:
# Reading calibration file

def read_kitti_fx_baseline(calib_path):
    P2 = P3 = None
    with open(calib_path, "r") as f:
        for line in f:
            if line.startswith("P2:"):
                vals = list(map(float, line.strip().split()[1:]))
                P2 = np.array(vals, dtype=float).reshape(3, 4)
            elif line.startswith("P3:"):
                vals = list(map(float, line.strip().split()[1:]))
                P3 = np.array(vals, dtype=float).reshape(3, 4)
    

    fx = P2[0, 0]
    baseline = (P2[0, 3] - P3[0, 3]) / fx  # meters
    return fx, baseline

In [None]:
# Calculating distance from disparity

def disparities_to_distances(pairs, fx, baseline, min_disp=0.1):
    out = []
    for iL, iR, disp in pairs:
        if disp <= min_disp:
            Z = float("inf")
        else:
            Z = (fx * baseline) / disp
        out.append({
            "left_idx": iL,
            "right_idx": iR,
            "disparity_px": float(disp),
            "distance_m": float(Z)
        })
    return out

In [None]:
# Path to calibration file
calibration_file_path = r""

fx, b = read_kitti_fx_baseline(calibration_file_path)
print(f"fx={fx:.3f} px, baseline={b:.3f} m")

results = disparities_to_distances(pairs, fx, b)

for r in results:
    if np.isfinite(r["distance_m"]):
        print(f"L#{r['left_idx']}  R#{r['right_idx']}:  disparity={r['disparity_px']:.2f}px    Z={r['distance_m']:.2f} m")
    else:
        print(f"L#{r['left_idx']}  R#{r['right_idx']}:  disparity={r['disparity_px']:.2f}px    Z=inf")

In [None]:
# Loading YOLO bounding boxes for IOU calculation

def load_yolo_boxes_xyxy(label_path, W, H):
    boxes = []
    with open(label_path, "r") as f:
        for line in f:
            p = line.strip().split()
            if len(p) < 5: 
                continue
            xc, yc, w, h = map(float, p[1:5])
            cx, cy, ww, hh = xc*W, yc*H, w*W, h*H
            x1, y1, x2, y2 = cx-ww/2, cy-hh/2, cx+ww/2, cy+hh/2
            boxes.append((x1, y1, x2, y2))
    return boxes


In [None]:
# Loading KITTI boxes for IOU calculation

def load_kitti_gt_xyxy_z(gt_path):
    gt = []
    with open(gt_path, "r") as f:
        for line in f:
            p = line.strip().split()
            if len(p) < 15: 
                continue
            if p[0] in ("Car","Pedestrian","Cyclist"):
                x1,y1,x2,y2 = map(float, p[4:8])
                z = float(p[13])
                gt.append(((x1,y1,x2,y2), z))
    return gt


In [None]:
# IOU calculation

def _iou(a, b):
    ax1, ay1, ax2, ay2 = a; bx1, by1, bx2, by2 = b
    xx1, yy1 = max(ax1, bx1), max(ay1, by1)
    xx2, yy2 = min(ax2, bx2), min(ay2, by2)
    iw, ih = max(0.0, xx2-xx1), max(0.0, yy2-yy1)
    inter = iw*ih
    area_a = max(0.0, (ax2-ax1)*(ay2-ay1))
    area_b = max(0.0, (bx2-bx1)*(by2-by1))
    union = max(area_a + area_b - inter, 1e-6)
    return inter/union



In [None]:
# Calculating errors of estimation compared to groundtruth

def distance_errors_vs_kitti(left_image_path, left_yolo_txt, kitti_gt_txt, estimates, iou_thresh=0.5):
    H, W = cv2.imread(left_image_path).shape[:2]
    det_boxes = load_yolo_boxes_xyxy(left_yolo_txt, W, H)
    gt_boxes  = load_kitti_gt_xyxy_z(kitti_gt_txt)

    out = []
    for e in estimates:
        if isinstance(e, dict):
            iL, Z_est = e["left_idx"], float(e["distance_m"])
        else:
            iL, Z_est = int(e[0]), float(e[1])

        if iL >= len(det_boxes):
            out.append({"left_idx": iL, "Z_est": Z_est, "Z_gt": None, "IoU": 0.0, "error_m": None})
            continue

        det_box = det_boxes[iL]
        best_iou, best_z = 0.0, None
        for (gbox, z) in gt_boxes:
            i = _iou(det_box, gbox)
            if i > best_iou:
                best_iou, best_z = i, z

        if best_z is not None and best_iou >= iou_thresh:
            err = Z_est - best_z
            out.append({"left_idx": iL, "Z_est": Z_est, "Z_gt": best_z, "IoU": best_iou, "error_m": err})
        else:
            out.append({"left_idx": iL, "Z_est": Z_est, "Z_gt": None, "IoU": best_iou, "error_m": None})

    return out

In [None]:
# Printing errors

# Path to KITTI label
kitti_gt_path = r""

estimates = [(r["left_idx"], r["distance_m"]) for r in results]

errors = distance_errors_vs_kitti(
    left_image_path=image_2_path,
    left_yolo_txt=labels_2_txt_path,
    kitti_gt_txt= kitti_gt_path,
    estimates=estimates,
    iou_thresh=0.7
)

# Print
for e in errors:
    if e["Z_gt"] is not None:
        print(f"L#{e['left_idx']} | IoU={e['IoU']:.2f} | Distance estimate={e['Z_est']:.2f} m | "
              f"Distance groundtruth={e['Z_gt']:.2f} m | error={e['error_m']:+.2f} m")
    else:
        print(f"L#{e['left_idx']} | no GT match (best IoU={e['IoU']:.2f}) | Z_est={e['Z_est']:.2f} m")
