In [None]:
!pip install ultralytics ensemble-boxes

Collecting ultralytics
  Using cached ultralytics-8.3.121-py3-none-any.whl.metadata (37 kB)
Collecting ensemble-boxes
  Using cached ensemble_boxes-1.0.9-py3-none-any.whl.metadata (728 bytes)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Using cached ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.

In [None]:
import torch
from ultralytics import YOLO
from ensemble_boxes import weighted_boxes_fusion
import numpy as np
import cv2

# Function to normalize bounding boxes to [0, 1] range
def normalize_boxes(boxes, img_width, img_height):
    normalized = boxes.copy()
    normalized[:, 0] /= img_width  # x_min
    normalized[:, 1] /= img_height # y_min
    normalized[:, 2] /= img_width  # x_max
    normalized[:, 3] /= img_height # y_max
    return normalized

# Function to denormalize bounding boxes back to pixel values
def denormalize_boxes(boxes, img_width, img_height):
    denormalized = boxes.copy()
    denormalized[:, 0] *= img_width  # x_min
    denormalized[:, 1] *= img_height # y_min
    denormalized[:, 2] *= img_width  # x_max
    denormalized[:, 3] *= img_height # y_max
    return denormalized

# Main ensemble function
def ensemble_yolo_predictions(image_path, model_paths, conf_thres=0.25, iou_thres=0.45):
    # Load image
    img = cv2.imread(image_path)
    img_height, img_width = img.shape[:2]

    # Initialize lists to store predictions from all models
    all_boxes = []
    all_scores = []
    all_labels = []

    # Load and run inference for each model
    for model_path in model_paths:
        # Load YOLOv11 model
        model = YOLO(model_path)

        # Perform inference
        results = model.predict(image_path, conf=conf_thres, iou=iou_thres, verbose=False)

        # Extract predictions
        boxes = []
        scores = []
        labels = []

        for result in results:
            # Get boxes in xyxy format, scores, and class labels
            pred_boxes = result.boxes.xyxy.cpu().numpy()  # [x_min, y_min, x_max, y_max]
            pred_scores = result.boxes.conf.cpu().numpy() # Confidence scores
            pred_labels = result.boxes.cls.cpu().numpy()  # Class indices

            # Normalize boxes to [0, 1] for WBF
            normalized_boxes = normalize_boxes(pred_boxes, img_width, img_height)

            boxes.append(normalized_boxes)
            scores.append(pred_scores)
            labels.append(pred_labels)

        # Append predictions from this model
        all_boxes.append(boxes[0] if boxes else np.array([]))
        all_scores.append(scores[0] if scores else np.array([]))
        all_labels.append(labels[0] if labels else np.array([]))

    # Apply Weighted Box Fusion (WBF)
    if all_boxes and all_scores and all_labels:
        boxes, scores, labels = weighted_boxes_fusion(
            all_boxes,
            all_scores,
            all_labels,
            weights=None,  # Equal weights for all models; adjust if needed
            iou_thr=iou_thres,
            skip_box_thr=0.0  # Keep all boxes above model's conf_thres
        )

        # Denormalize boxes back to pixel values
        boxes = denormalize_boxes(boxes, img_width, img_height)
    else:
        boxes, scores, labels = np.array([]), np.array([]), np.array([])

    # Visualize or return results
    return boxes, scores, labels, img

# Example usage
def visualize_results(boxes, scores, labels, img, class_names):
    for i, box in enumerate(boxes):
        x_min, y_min, x_max, y_max = map(int, box)
        label = int(labels[i])
        score = scores[i]

        # Draw bounding box
        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        # Add label and score
        text = f"{class_names[label]}: {score:.2f}"
        cv2.putText(img, text, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Save or display the image
    cv2.imwrite("ensemble_output.jpg", img)
    print("Output saved as ensemble_output.jpg")

if __name__ == "__main__":
    # Paths to trained model weights
    model_paths = [
        "/content/best_heridal.pt",  # Replace with your model weights
        "/content/best_heridal_human.pt",
        "/content/best_human_dataset.pt"
    ]

    # Path to test image
    image_path = "/content/photo_1.jpg"  # Replace with your test image

    # Class names (replace with your dataset's classes)
    class_names = ["human"]  # Example: ["person", "car", "dog"]

    # Ensemble parameters
    conf_thres = 0.25  # Confidence threshold
    iou_thres = 0.45   # IoU threshold for WBF

    # Run ensemble
    boxes, scores, labels, img = ensemble_yolo_predictions(
        image_path, model_paths, conf_thres, iou_thres
    )

    # Visualize results
    if boxes.size > 0:
        visualize_results(boxes, scores, labels, img, class_names)
    else:
        print("No detections found.")

Output saved as ensemble_output.jpg


In [None]:
# model_paths = ["model1.pt", "model2.pt", "model3.pt"]

In [None]:
# class_names = ["person", "car", "dog", ...]