In [42]:
# from detectron2_wrapper import run_detectron
from detr_wrapper import run_detr
from yolo_wrapper import run_yolo

# Run all the models on the same image
test_image = '../data/Images/image5.jpg'

yolo_output = run_yolo(test_image)
detr_boxes, detr_scores, detr_labels = run_detr(test_image)
# detectron_output = run_detectron(test_image)


0: 448x640 1 person, 1 boat, 89.1ms
Speed: 3.5ms preprocess, 89.1ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)


Using cache found in C:\Users\chowd/.cache\torch\hub\facebookresearch_detr_main


### Transform outputs

In [43]:
import numpy as np
import torch

# Function for YOLO output
def transform_yolo_output(yolo_output):
    """
    Transforms YOLO output into standardized format.
    """
    boxes = yolo_output.boxes.xyxy.cpu().numpy()  # Bounding boxes
    scores = yolo_output.boxes.conf.cpu().numpy()  # Confidence scores
    classes = yolo_output.boxes.cls.cpu().numpy()  # Class predictions

    boxes = np.array(boxes)  # No conversion needed
    scores = np.array(scores)
    classes = np.array(classes, dtype=int)
    return boxes, scores, classes

# Function for DETR output
def transform_detr_output(detr_boxes, detr_scores, detr_classes):
    """
    Transforms DETR output into standardized format.
    """
    boxes = detr_boxes.cpu().numpy() if isinstance(detr_boxes, torch.Tensor) else np.array(detr_boxes)
    scores = detr_scores.cpu().numpy() if isinstance(detr_scores, torch.Tensor) else np.array(detr_scores)
    classes = detr_classes.cpu().numpy().astype(int) if isinstance(detr_classes, torch.Tensor) else np.array(detr_classes, dtype=int)
    return boxes, scores, classes

# Function for Detectron2 output
def transform_detectron_output(detectron_output):
    """
    Transforms Detectron2 output Instances object into standardized format.
    """
    boxes = detectron_output.pred_boxes.tensor.cpu().numpy()
    scores = detectron_output.scores.cpu().numpy()
    classes = detectron_output.pred_classes.cpu().numpy().astype(int)
    return boxes, scores, classes


boxes_yolo, scores_yolo, classes_yolo = transform_yolo_output(yolo_output)
boxes_detr, scores_detr, classes_detr = transform_detr_output(detr_boxes, detr_scores, detr_labels)
print(boxes_yolo, scores_yolo, classes_yolo)
print(boxes_detr, scores_detr, classes_detr)

# boxes_detectron, scores_detectron, classes_detectron = transform_detectron_output(detectron_output)

### Ensemble models

In [44]:
import numpy as np
import torch
from torchvision.ops import box_iou

def aggregate_boxes(model_outputs, iou_threshold=0.5):
    """
    Aggregates bounding boxes, scores, and classes from multiple models based on overlap using Weighted Box Fusion.
    
    Arguments:
        model_outputs (list): List of tuples, where each tuple contains (boxes, scores, classes) arrays.
        iou_threshold (float): IoU threshold to consider boxes as overlapping.
    
    Returns:
        Aggregated boxes, scores, and classes as numpy arrays.
    """
    # Unpack the model outputs into lists of boxes, scores, and classes
    boxes_list, scores_list, classes_list = zip(*model_outputs)

    # Stack all boxes, concatenate all scores and classes
    all_boxes = np.vstack(boxes_list)
    all_scores = np.concatenate(scores_list)
    all_classes = np.concatenate(classes_list)

    # Convert all boxes to a torch tensor for box_iou compatibility
    all_boxes_tensor = torch.tensor(all_boxes, dtype=torch.float32)

    # Calculate IoU matrix for all pairs of boxes
    iou_matrix = box_iou(all_boxes_tensor, all_boxes_tensor)

    # Print the IoU matrix for debugging
    print("IoU Matrix:")
    for i in range(iou_matrix.shape[0]):
        for j in range(iou_matrix.shape[1]):
            print(f"IoU[{i}][{j}] = {iou_matrix[i, j]:.4f}", end="\t")
        print()  # Newline for readability

    # Lists to store final aggregated results
    final_boxes, final_scores, final_classes = [], [], []
    used_indices = set()

    # Loop through each box and aggregate based on IoU only
    for i, (box, score, cls) in enumerate(zip(all_boxes, all_scores, all_classes)):
        if i in used_indices:
            continue

        # Find all boxes that overlap with the current box, ignoring class
        overlaps = (iou_matrix[i] > iou_threshold).nonzero(as_tuple=False).flatten()
        group_indices = [j.item() for j in overlaps if j.item() not in used_indices]

        # Debug: Print group information for analysis
        print(f"\nBox {i}: Initial box {box}, Score: {score}, Class: {cls}")
        print(f"Group indices (overlapping boxes with IoU > {iou_threshold}): {group_indices}")
        print(f"IoUs with group indices: {[iou_matrix[i][j].item() for j in group_indices]}")

        if not group_indices:
            # No overlapping boxes; add the box as is
            final_boxes.append(box)
            final_scores.append(score)
            final_classes.append(cls)
            used_indices.add(i)
        else:
            # Aggregate overlapping boxes
            overlapping_boxes = [all_boxes[j] * all_scores[j] for j in group_indices]
            overlapping_scores = [all_scores[j] for j in group_indices]

            # Weighted average for the final box
            avg_box = np.sum(overlapping_boxes, axis=0) / np.sum(overlapping_scores)

            # Average score, optionally scaled by model agreement
            avg_score = np.mean(overlapping_scores) * (len(group_indices) / len(model_outputs))

            # Append aggregated results
            final_boxes.append(avg_box)
            final_scores.append(avg_score)
            # Select the class of the first box in the group for simplicity
            final_classes.append(cls)
            used_indices.update(group_indices)  # Mark all grouped indices as used

    return np.array(final_boxes), np.array(final_scores), np.array(final_classes)

# Example model outputs for testing
model_outputs = [
    (boxes_yolo, scores_yolo, classes_yolo),
    (boxes_detr, scores_detr, classes_detr),
    # (boxes_detectron, scores_detectron, classes_detectron)
]

aggregated_boxes, aggregated_scores, aggregated_classes = aggregate_boxes(model_outputs)

print("Aggregated boxes:", aggregated_boxes)
print("Aggregated scores:", aggregated_scores)
print("Aggregated classes:", aggregated_classes)

IoU Matrix:
IoU[0][0] = 1.0000	IoU[0][1] = 0.2038	IoU[0][2] = 0.2647	IoU[0][3] = 0.9157	
IoU[1][0] = 0.2038	IoU[1][1] = 1.0000	IoU[1][2] = 0.6579	IoU[1][3] = 0.1920	
IoU[2][0] = 0.2647	IoU[2][1] = 0.6579	IoU[2][2] = 1.0000	IoU[2][3] = 0.2500	
IoU[3][0] = 0.9157	IoU[3][1] = 0.1920	IoU[3][2] = 0.2500	IoU[3][3] = 1.0000	

Box 0: Initial box [     166.19      130.83      621.33      343.04], Score: 0.8051950335502625, Class: 8
Group indices (overlapping boxes with IoU > 0.5): [0, 3]
IoUs with group indices: [1.0, 0.9156527519226074]

Box 1: Initial box [     250.78      67.302       413.7       264.6], Score: 0.7584846615791321, Class: 0
Group indices (overlapping boxes with IoU > 0.5): [1, 2]
IoUs with group indices: [1.0, 0.6579228043556213]
Aggregated boxes: [[     146.84      129.75      621.31      342.92]
 [     216.32      66.116      407.34      266.68]]
Aggregated scores: [    0.85298     0.86742]
Aggregated classes: [8 0]


In [45]:
import cv2
from tqdm import tqdm

def draw_aggregated_boxes(image_path, boxes, scores, classes, class_names=None, confidence_threshold=0.5):
    """
    Draws aggregated bounding boxes, scores, and class labels on the image.

    Arguments:
        image_path (str): Path to the input image.
        boxes (ndarray): Array of shape (num_boxes, 4) with bounding box coordinates [x_min, y_min, x_max, y_max].
        scores (ndarray): Array of shape (num_boxes,) with confidence scores.
        classes (ndarray): Array of shape (num_boxes,) with class labels.
        class_names (dict, optional): Dictionary mapping class indices to class names. Defaults to None.
        confidence_threshold (float): Minimum confidence score to display a box.
    """
    # Load the image
    print("Loading image...")
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image not found at {image_path}")
    print("Image loaded successfully.")

    # Process each box and draw if above confidence threshold
    for i, box in tqdm(enumerate(boxes), total=len(boxes), desc="Processing bounding boxes"):
        score = scores[i]
        
        # Skip boxes below the confidence threshold
        if score < confidence_threshold:
            continue

        print(f"Drawing box {i + 1} / {len(boxes)} with score: {score}")
        x_min, y_min, x_max, y_max = map(int, box)
        cls = classes[i]
        
        # Set box color based on class
        color = (0, 255, 0)  # Green for bounding box
        
        # Draw bounding box
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2)

        # Display label with class and confidence
        label = f"{class_names.get(cls, cls)}: {score:.2f}"
        label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        label_y = max(y_min, label_size[1] + 10)
        
        print(f"Adding label: {label} at position ({x_min}, {label_y - 7})")
        
        # Draw the label background and text
        cv2.rectangle(image, (x_min, y_min - label_size[1] - 10), (x_min + label_size[0], y_min), color, -1)
        cv2.putText(image, label, (x_min, label_y - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    # Replace the display part with saving
    output_path = "aggregated_output.jpg"
    cv2.imwrite(output_path, image)
    print(f"Image saved to {output_path}")


# Example usage with class names and confidence threshold
class_names = {0: "Class0", 55: "Class55", 61: "Class61"}  # Example class names
draw_aggregated_boxes(test_image, aggregated_boxes, aggregated_scores, aggregated_classes, class_names, confidence_threshold=0.0)


Loading image...
Image loaded successfully.


Processing bounding boxes: 100%|██████████| 2/2 [00:00<00:00, 1988.29it/s]

Drawing box 1 / 2 with score: 0.8529750108718872
Adding label: 8: 0.85 at position (146, 122)
Drawing box 2 / 2 with score: 0.8674219846725464
Adding label: Class0: 0.87 at position (216, 59)
Image saved to aggregated_output.jpg



