In [214]:
import os
from ultralytics import YOLO

In [215]:
# CONFIG

# Test set to predict
DATASET_DIR = "../../datasets"
DATASET_NAME = "roboflow_augmented"
#DATASET_NAME = "roboflow1"
DATASET_SET_FOLDER = os.path.join(DATASET_DIR, DATASET_NAME, "test")

# Model weights to use
MODEL_WEIGHTS = "/home/andrea/work/AI-waste-detection/src/training/runs/cross_validation_balanced/yolo11s_fold_1/weights/best.pt"

# Output directory for prediction
PREDICT_DIR = os.path.join(DATASET_DIR, DATASET_NAME, "predict")

In [216]:
#remove previous predictions
if os.path.exists(PREDICT_DIR):
    for dir in os.listdir(PREDICT_DIR):
        dir_path = os.path.join(PREDICT_DIR, dir)
        if os.path.isdir(dir_path):
            # force dir removal
            os.system(f"rm -rf {dir_path}")


In [217]:
# Predict 
model = YOLO(MODEL_WEIGHTS)

model.predict(
    source=os.path.join(DATASET_SET_FOLDER, "images"),
    save=True,
    save_txt=True,
    save_conf=True,
    project=PREDICT_DIR,
    name="prediction",
    conf=0.25,  # Confidence threshold
    iou=0.45,   # IoU threshold
    device="cuda" if os.path.exists("/dev/nvidia0") else "cpu",
)


inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

image 1/1099 /home/andrea/work/AI-waste-detection/src/inference/../../datasets/roboflow_augmented/test/images/005f0645-R_3968_jpg.rf.86b63d8b1aa85d796dbc5f3980491ea1.jpg: 256x320 1 glass, 8.3ms
image 2/1099 /home/andrea/work/AI-waste-detection/src/inference/../../datasets/roboflow_augmented/test/images/008ff725-R_7177_jpg.rf.ff2adb47789566ced47f89fa6dfb0476.jpg: 192x320 1 paper, 15.1ms
image 3/1099 /home/andrea/work/AI-waste-detection/src/inference/../../datasets/

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'glass', 1: 'metal', 2: 'organic', 3: 'paper', 4: 'plastic'}
 obb: None
 orig_img: array([[[254, 254, 254],
         [254, 254, 254],
         [254, 254, 254],
         ...,
         [254, 254, 254],
         [254, 254, 254],
         [254, 254, 254]],
 
        [[254, 254, 254],
         [254, 254, 254],
         [254, 254, 254],
         ...,
         [254, 254, 254],
         [254, 254, 254],
         [254, 254, 254]],
 
        [[254, 254, 254],
         [254, 254, 254],
         [254, 254, 254],
         ...,
         [254, 254, 254],
         [254, 254, 254],
         [254, 254, 254]],
 
        ...,
 
        [[218, 218, 218],
         [216, 216, 216],
         [213, 213, 213],
         ...,
         [254, 254, 254],
         [254, 254, 254],
         [254, 254, 254]],
 
        [[216, 216, 216],
         [215, 215, 215],
       

In [218]:
# Evaluate model on the test set

results = model.val(
    data=os.path.join(DATASET_DIR, DATASET_NAME, "data.yaml"),
    split="test",
    project=PREDICT_DIR,
    name="eval",
    save=True,
)       

Ultralytics 8.3.167 🚀 Python-3.11.0rc1 torch-2.7.1+cu126 CUDA:0 (Tesla T4, 14914MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 847.0±621.5 MB/s, size: 43.1 KB)


[34m[1mval: [0mScanning /home/andrea/work/AI-waste-detection/datasets/roboflow_augmented/test/labels.cache... 1099 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1099/1099 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 69/69 [00:11<00:00,  5.78it/s]


                   all       1099       1622      0.884      0.833      0.873      0.675
                 glass         84        109      0.918      0.908      0.926      0.727
                 metal        377        489      0.928      0.896      0.924       0.67
               organic        128        163      0.847      0.748      0.821      0.645
                 paper        220        241      0.936      0.967      0.966      0.825
               plastic        296        620      0.792      0.645      0.729       0.51
Speed: 0.2ms preprocess, 3.1ms inference, 0.0ms loss, 3.1ms postprocess per image
Results saved to [1m../../datasets/roboflow_augmented/predict/eval[0m


In [219]:
# sorted list of dataset test labels files
data_labels_dir = os.path.join(DATASET_DIR, DATASET_NAME, "test", "labels")
# txt files in listdir
data_labels_files = sorted([f for f in os.listdir(data_labels_dir) if f.endswith('.txt')])

# sorted list of results labels files
results_labels_dir = os.path.join(DATASET_DIR, DATASET_NAME, "predict", "prediction", "labels")
# txt files in listdir
results_labels_files = sorted([f for f in os.listdir(results_labels_dir) if f.endswith('.txt')])

In [220]:
import os
import numpy as np
from sklearn.cluster import DBSCAN
from scipy.spatial.distance import squareform # Not directly used in DBSCAN with precomputed, but good to keep if needed elsewhere
from PIL import Image # Used for IMG_WIDTH/HEIGHT context, though not directly in the clustering logic

# Image size (needed to denormalize/normalize YOLO boxes)
IMG_WIDTH = 320
IMG_HEIGHT = 320

# --- Original function (for ground truth labels, no confidence) ---
def read_yolo_labels(file_path):
    """
    Reads a YOLO label file (ground truth) and returns list of (class_id, [x1, y1, x2, y2]) in absolute pixels.
    Format: class_id x_center y_center width height
    """
    boxes = []
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center, y_center, w, h = map(float, parts[1:5])
            # Convert from YOLO normalized format to [x1, y1, x2, y2] absolute pixels
            x1 = (x_center - w / 2) * IMG_WIDTH
            y1 = (y_center - h / 2) * IMG_HEIGHT
            x2 = (x_center + w / 2) * IMG_WIDTH
            y2 = (y_center + h / 2) * IMG_HEIGHT
            boxes.append((class_id, [x1, y1, x2, y2]))
    return boxes

# --- NEW function to read prediction files (includes confidence) ---
def read_yolo_predictions(file_path):
    """
    Reads a YOLO prediction file and returns list of (class_id, confidence, [x1, y1, x2, y2]) in absolute pixels.
    Format: class_id confidence x_center y_center width height
    """
    predictions = []
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 6: # Ensure enough parts for class, conf, and bbox
                print(f"Warning: Skipping malformed line in {file_path}: {line.strip()}")
                continue
            class_id = int(parts[0])
            confidence = float(parts[5])
            x_center, y_center, w, h = map(float, parts[1:5])
            # Convert from YOLO normalized format to [x1, y1, x2, y2] absolute pixels
            x1 = (x_center - w / 2) * IMG_WIDTH
            y1 = (y_center - h / 2) * IMG_HEIGHT
            x2 = (x_center + w / 2) * IMG_WIDTH
            y2 = (y_center + h / 2) * IMG_HEIGHT
            predictions.append((class_id, [x1, y1, x2, y2], confidence))
    return predictions

def get_centroid(box_coords):
    """Calculates the centroid of a bounding box [x1, y1, x2, y2]."""
    print(box_coords)
    x1, y1, x2, y2 = box_coords
    return [(x1 + x2) / 2, (y1 + y2) / 2]

# --- Modified function to get enclosing box AND mean confidence ---
def get_enclosing_box_and_conf(cluster_entries):
    """
    Calculates the enclosing bounding box and mean confidence for a cluster.
    cluster_entries: list of (box_coords, confidence) tuples, where box_coords is [x1, y1, x2, y2].
    Returns: [min_x1, min_y1, max_x2, max_y2, mean_conf]
    """
    if not cluster_entries:
        return None

    x1s, y1s, x2s, y2s, confs = [], [], [], [], []
    for box_coords, conf in cluster_entries:
        x1, y1, x2, y2 = box_coords
        x1s.append(x1)
        y1s.append(y1)
        x2s.append(x2)
        y2s.append(y2)
        if conf is not None: # Only append if confidence exists
            confs.append(conf)

    min_x1 = min(x1s)
    min_y1 = min(y1s)
    max_x2 = max(x2s)
    max_y2 = max(y2s)

    # Calculate mean confidence, default to 1.0 if no confs or empty
    mean_conf = np.mean(confs) if confs else 1.0

    return [min_x1, min_y1, max_x2, max_y2, mean_conf]

def compute_custom_dist_matrix(boxes_coords_only):
    """
    Computes a custom distance matrix between bounding boxes based on overlap.
    boxes_coords_only: list of [x1, y1, x2, y2]
    """
    n = len(boxes_coords_only)
    dist_matrix = np.zeros((n, n))

    for i in range(n):
        x1_c, y1_c = get_centroid(boxes_coords_only[i])
        w1 = boxes_coords_only[i][2] - boxes_coords_only[i][0]
        h1 = boxes_coords_only[i][3] - boxes_coords_only[i][1]
        for j in range(i + 1, n):
            x2_c, y2_c = get_centroid(boxes_coords_only[j])
            w2 = boxes_coords_only[j][2] - boxes_coords_only[j][0]
            h2 = boxes_coords_only[j][3] - boxes_coords_only[j][1]

            # This distance metric seems to be based on non-overlapping distance
            # If dx or dy are negative, it means there's overlap in that dimension.
            # max(0, dx, dy) ensures distance is 0 if boxes overlap or touch.
            dx = abs(x1_c - x2_c) - (w1 + w2) / 2
            dy = abs(y1_c - y2_c) - (h1 + h2) / 2
            distance_x = max(0, dx)
            distance_y = max(0, dy) # This is a custom distance, not IoU based
            distance = (distance_x * distance_x + distance_y * distance_y) ** 0.5  # Euclidean distance

            dist_matrix[i, j] = dist_matrix[j, i] = distance
    return dist_matrix

# --- Modified clustering function to handle confidence and be flexible for GT/Predictions ---
def cluster_boxes_per_image(file_path, eps=1, min_samples=1, is_prediction=False):
    """
    Clusters bounding boxes within a single image file based on a custom distance metric using DBSCAN.
    Can process either ground truth labels (no confidence) or predictions (with confidence).

    Args:
        file_path (str): Path to the YOLO label/prediction file.
        eps (float): The maximum distance between two samples for one to be considered as in the neighborhood of the other.
        min_samples (int): The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
        is_prediction (bool): If True, reads file as predictions (with confidence). Otherwise, as ground truth.

    Returns:
        dict: A dictionary where keys are class_ids and values are lists of
              [x1, y1, x2, y2, mean_confidence] for each cluster.
    """
    class_to_entries = {} # Stores (class_id, (box_coords, confidence))

    if is_prediction:
        entries = read_yolo_predictions(file_path)
        for class_id, box_coords, conf in entries:
            class_to_entries.setdefault(class_id, []).append((box_coords, conf))
    else:
        entries = read_yolo_labels(file_path)
        for class_id, box_coords in entries:
            class_to_entries.setdefault(class_id, []).append((box_coords, 1.0)) # Default confidence for GT

    clusters_per_class = {}

    for class_id, entries_for_class in class_to_entries.items():
        if not entries_for_class: # Skip if no boxes for this class
            continue
        # Separate box coordinates for distance calculation
        boxes_coords_only = [entry[0] for entry in entries_for_class]
        
        # Compute distance matrix only on coordinates
        dist_matrix = compute_custom_dist_matrix(boxes_coords_only)
        print("Dist matrix:", dist_matrix)
        
        # DBSCAN expects a 2D array, even for a single sample (though min_samples=1 handles this)
        if len(boxes_coords_only) == 1:
            # DBSCAN with precomputed metric needs a 1x1 matrix for a single point
            # If min_samples is 1, a single point is a cluster.
            labels = np.array([0]) # Assign to cluster 0
        else:
            db = DBSCAN(eps=eps, min_samples=min_samples, metric='precomputed')
            labels = db.fit_predict(dist_matrix)

        clusters_raw = {} # Stores raw entries (box_coords, conf) for each cluster label
        for lbl, entry in zip(labels, entries_for_class):
            if lbl == -1: # Noise points are ignored as per original code
                continue
            clusters_raw.setdefault(lbl, []).append(entry)

        # Process raw clusters to get enclosing box and mean confidence
        clusters_per_class[class_id] = [get_enclosing_box_and_conf(c) for c in clusters_raw.values()]

    return clusters_per_class

# --- Modified save_clusters function ---
def save_clusters(src_dir, save_cluster_labels_dir, is_prediction_data=False):
    """
    Processes files in src_dir, clusters bounding boxes, and saves the results.
    For prediction data, it includes the mean confidence of the cluster.

    Args:
        src_dir (str): Directory containing the source YOLO label/prediction files.
        save_cluster_labels_dir (str): Directory where clustered label files will be saved.
        is_prediction_data (bool): Set to True if src_dir contains prediction files
                                   (which have confidence scores).
    """
    set_clusters = {}
    print(f"Processing files in {src_dir}...")
    for fname in sorted(os.listdir(src_dir)):
        if not fname.endswith('.txt'):
            continue
        path = os.path.join(src_dir, fname)
        # Pass the is_prediction flag to the clustering function
        clusters = cluster_boxes_per_image(path, eps=50, min_samples=1, is_prediction=is_prediction_data)
        set_clusters[fname] = clusters

    # Print results (optional, for debugging/overview)
    print("\n--- Clustering Results Summary ---")
    for fname, clusters in set_clusters.items():
        print(f"File: {fname}")
        if not clusters:
            print("  No clusters found.")
            continue
        for class_id, boxes_with_conf in clusters.items():
            print(f"  Class ID: {class_id}")
            for box in boxes_with_conf:
                # box is [x1, y1, x2, y2, mean_conf]
                print(f"    Enclosing Box: [{box[0]:.2f}, {box[1]:.2f}, {box[2]:.2f}, {box[3]:.2f}], Mean Conf: {box[4]:.4f}")
        print()

    save_dir = save_cluster_labels_dir
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    print(f"Saving clustered results to {save_dir}...")

    # Save results with format of YOLO label files (class_id x_center y_center width height [confidence])
    for fname, clusters in set_clusters.items():
        output_path = os.path.join(save_dir, fname)
        with open(output_path, 'w') as f:
            # Optional: print if multiple classes for a file (as in your original code)
            # This logic is a bit odd as it prints to console, not file, and only if >1 class
            # if len(clusters) > 1:
            #     print(f"Multiple classes found in {fname}, writing to file.")
            #     for class_id, boxes in clusters.items():
            #         print(f"  Class ID: {class_id}, Enclosing Boxes: {len(boxes)}")
                        
            for class_id, boxes_with_conf in clusters.items():
                for box_data in boxes_with_conf:
                    # box_data is [x1, y1, x2, y2, mean_conf]
                    x1, y1, x2, y2, mean_conf = box_data
                    
                    x_center = (x1 + x2) / 2 / IMG_WIDTH
                    y_center = (y1 + y2) / 2 / IMG_HEIGHT
                    width = (x2 - x1) / IMG_WIDTH
                    height = (y2 - y1) / IMG_HEIGHT
                    
                    # Write class_id x_center y_center width height confidence
                    f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f} {mean_conf:.6f}\n")

# --- Example Usage (assuming DATASET_DIR and DATASET_NAME are defined) ---
# Example placeholder for DATASET_DIR and DATASET_NAME
# DATASET_DIR = "/path/to/your/dataset"
# DATASET_NAME = "my_yolo_dataset"

# # Example directories for demonstration
# data_labels_dir = os.path.join(DATASET_DIR, DATASET_NAME, "test", "labels")
# results_labels_dir = os.path.join(DATASET_DIR, DATASET_NAME, "predict", "prediction", "labels")

# Example calls:
# For ground truth labels (no confidence in input, default 1.0 for cluster conf)
save_clusters(data_labels_dir, os.path.join(DATASET_DIR, DATASET_NAME, "test", "labels_clusters"), is_prediction_data=False)

# For prediction results (with confidence in input, mean confidence for cluster conf)
save_clusters(results_labels_dir, os.path.join(DATASET_DIR, DATASET_NAME, "predict", "prediction", "labels_clusters"), is_prediction_data=True)

Processing files in ../../datasets/roboflow_augmented/test/labels...
[28.976377952755907, 8.080808080808097, 294.8031496062992, 320.0]
Dist matrix: [[          0]]
[5.517241379310338, 9.195402298850563, 320.0, 305.28735632183907]
Dist matrix: [[          0]]
[5.688888888888872, 41.24444444444446, 320.0, 292.97777777777776]
Dist matrix: [[          0]]
[7.111111111111104, 7.111111111111104, 320.0, 320.0]
Dist matrix: [[          0]]
[81.875, 5.8333333333333215, 233.125, 272.5]
Dist matrix: [[          0]]
[47.407407407407405, 1.0289389067524013, 268.641975308642, 320.0]
Dist matrix: [[          0]]
[0.0, 8.743169398907114, 299.05454545454546, 320.0]
Dist matrix: [[          0]]
[44.088888888888874, 24.177777777777774, 281.59999999999997, 297.2444444444444]
Dist matrix: [[          0]]
[29.652509652509657, 54.43298969072167, 163.08880308880308, 298.55670103092785]
Dist matrix: [[          0]]
[164.32432432432432, 117.11340206185568, 300.2316602316602, 293.60824742268045]
Dist matrix: [[ 

[196.25, 126.03221083455341, 248.75, 194.4363103953148]
Dist matrix: [[          0]]
[2.644628099173545, 7.692307692307683, 320.0, 320.0]
Dist matrix: [[          0]]
[13.486590038314166, 16.580310880829003, 320.0, 320.0]
Dist matrix: [[          0]]
[13.689839572192515, 40.296296296296305, 307.16577540106954, 310.5185185185185]
Dist matrix: [[          0]]
[6.597938144329891, 1.235521235521233, 320.0, 320.0]
Dist matrix: [[          0]]
[11.689497716894977, 6.95652173913043, 309.77168949771686, 315.82608695652175]
Dist matrix: [[          0]]
[5.245901639344268, 2.3272727272727423, 320.0, 320.0]
Dist matrix: [[          0]]
[0.0, 16.271186440677976, 320.0, 320.0]
Dist matrix: [[          0]]
[6.421404682274243, 0.0, 320.0, 320.0]
Dist matrix: [[          0]]
[3.2989690721649367, 2.471042471042466, 320.0, 320.0]
Dist matrix: [[          0]]
[4.266666666666659, 2.844444444444445, 304.35555555555555, 320.0]
Dist matrix: [[          0]]
[0.0, 0.0, 320.0, 320.0]
Dist matrix: [[          0]

In [221]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

DATASET_SET_FOLDER = os.path.join(DATASET_DIR, DATASET_NAME, "test")

# === CONFIG ===
IMAGE_DIR = os.path.join(DATASET_DIR, DATASET_NAME, "test", "images")

LABEL_DIR = os.path.join(DATASET_DIR, DATASET_NAME, "test", "labels_clusters")
PRED_DIR = os.path.join(DATASET_DIR, DATASET_NAME, "predict", "prediction", "labels_clusters")

def yolo_to_xyxy(box, img_w, img_h):
    cls, x_c, y_c, w, h = box[:5]
    x1 = (x_c - w / 2) * img_w
    y1 = (y_c - h / 2) * img_h
    x2 = (x_c + w / 2) * img_w
    y2 = (y_c + h / 2) * img_h
    return [int(cls), x1, y1, x2, y2]

def compute_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    inter = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2]-box1[0]) * (box1[3]-box1[1])
    area2 = (box2[2]-box2[0]) * (box2[3]-box2[1])
    union = area1 + area2 - inter
    return inter / union if union > 0 else 0

def ap_per_class(tp, conf, pred_cls, target_cls):
    indices = np.argsort(-conf)
    tp, pred_cls = tp[indices], pred_cls[indices]
    unique_classes = np.unique(np.concatenate((pred_cls, target_cls)))
    ap = []

    for c in unique_classes:
        idx = pred_cls == c
        n_gt = (target_cls == c).sum()
        n_pred = idx.sum()

        if n_gt == 0 or n_pred == 0:
            ap.append(0)
            continue

        fpc = (1 - tp[idx]).cumsum()
        tpc = tp[idx].cumsum()
        recall = tpc / (n_gt + 1e-16)
        precision = tpc / (tpc + fpc + 1e-16)

        ap_c = np.trapezoid(precision, recall)
        ap.append(ap_c)

    return np.mean(ap)

def load_labels(file_path, img_w, img_h):
    boxes = []
    with open(file_path, 'r') as f:
        for line in f:
            vals = list(map(float, line.strip().split()))
            boxes.append(yolo_to_xyxy(vals, img_w, img_h))
    return boxes

def load_predictions(file_path, img_w, img_h):
    boxes = []
    with open(file_path, 'r') as f:
        for line in f:
            vals = list(map(float, line.strip().split()))
            cls, x1, y1, x2, y2 = yolo_to_xyxy(vals[:5], img_w, img_h)
            conf = vals[5]
            boxes.append([cls, x1, y1, x2, y2, conf])
    return boxes

def compute_map_from_folders(img_dir, label_dir, pred_dir, iou_thr=0.5):
    tp, confs, pred_cls, target_cls = [], [], [], []

    for img_file in tqdm(os.listdir(img_dir)):
        if not img_file.endswith('.jpg'):
            continue

        base = os.path.splitext(img_file)[0]
        img_path = os.path.join(img_dir, img_file)
        label_path = os.path.join(label_dir, base + '.txt')
        pred_path = os.path.join(pred_dir, base + '.txt')

        img = Image.open(img_path)
        w, h = img.size

        if not os.path.exists(label_path):
            continue
        gt = load_labels(label_path, w, h)
        preds = load_predictions(pred_path, w, h) if os.path.exists(pred_path) else []

        matched = []
        for pred in preds:
            cls_p, x1p, y1p, x2p, y2p, conf = pred
            best_iou = 0
            best_idx = -1
            for i, gt_box in enumerate(gt):
                cls_g, x1g, y1g, x2g, y2g = gt_box
                if cls_p != cls_g or i in matched:
                    continue
                iou = compute_iou([x1p, y1p, x2p, y2p], [x1g, y1g, x2g, y2g])
                if iou > best_iou:
                    best_iou = iou
                    best_idx = i

            if best_iou >= iou_thr:
                tp.append(1)
                matched.append(best_idx)
            else:
                tp.append(0)
            confs.append(conf)
            pred_cls.append(cls_p)

        for gt_box in gt:
            target_cls.append(gt_box[0])

    map50 = ap_per_class(np.array(tp), np.array(confs), np.array(pred_cls), np.array(target_cls))
    return map50

# === ESECUZIONE ===
map50 = compute_map_from_folders(IMAGE_DIR, LABEL_DIR, PRED_DIR, iou_thr=0.5)
print(f"\nmAP@0.5: {map50:.4f}")


100%|██████████| 1099/1099 [00:00<00:00, 8246.34it/s]


mAP@0.5: 0.8972



