In [1]:
# Import required libraries
import torch
import torchvision
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import os
import json
from pathlib import Path

# Import DetectionMetrics components
from detectionmetrics.datasets.coco import CocoDataset
from detectionmetrics.models.torch_detection import TorchImageDetectionModel
from detectionmetrics.utils import conversion as uc

# Set up matplotlib for better visualization
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

In [2]:
# Create directories for data
!mkdir -p local/data/models
!mkdir -p local/outputs

In [3]:
# Initialize COCO dataset
# Using existing COCO data paths
img_dir = "/Users/sakprave/Downloads/Coco/images/train2017"
ann_file = "/Users/sakprave/Downloads/Coco/annotations/instances_train2017.json"

# Check if files exist
if not os.path.exists(img_dir) or not os.path.exists(ann_file):
    print("COCO data not found. Please check the paths above.")
else:
    # Load dataset
    dataset = CocoDataset(annotation_file=ann_file, image_dir=img_dir)
    print(f"Dataset loaded with {len(dataset.dataset)} samples")
    print(f"Number of classes: {len(dataset.ontology)}")

loading annotations into memory...


Done (t=9.02s)
creating index...
index created!
Dataset loaded with 118287 samples
Number of classes: 80


In [4]:
# Create a pre-trained detection model
model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")
model.eval()

# Save the model
model_path = "local/data/models/maskrcnn_model.pt"
os.makedirs(os.path.dirname(model_path), exist_ok=True)
torch.save(model, model_path)
model_cfg = {
    "resize": {"height": 480, "width": 640},
    "normalization": {
        "mean": [0.485, 0.456, 0.406],
        "std": [0.229, 0.224, 0.225]
    },
    "batch_size": 1,
    "num_workers": 0,
    "confidence_threshold": 0.5,
    "nms_threshold": 0.3
}
config_path = "local/data/models/maskrcnn_config.json"
with open(config_path, "w") as f:
    json.dump(model_cfg, f, indent=2)

# Save model ontology
class_names = [
    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
    "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
    "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
    "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed",
    "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
    "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
]
model_ontology = {}
for i, name in enumerate(class_names):
    model_ontology[str(i + 1)] = {
        "idx": i + 1,
        "name": name,
        "rgb": [0, 0, 0]
    }
ontology_path = "local/data/models/coco_model_ontology.json"
with open(ontology_path, "w") as f:
    json.dump(model_ontology, f, indent=2)

# dataset.ontology uses class names as keys
ontology_translation = {}
for name, data in dataset.ontology.items():
    for idx, model_data in model_ontology.items():
        if model_data["name"] == name:
            ontology_translation[name] = idx
            break

# Save ontology translation
translation_path = "local/data/models/ontology_translation.json"
with open(translation_path, "w") as f:
    json.dump(ontology_translation, f, indent=2)

print("Model and configuration saved!")

Model and configuration saved!


In [5]:
detection_model = TorchImageDetectionModel(
    model=model_path,
    model_cfg=config_path,
    ontology_fname=ontology_path  # This is the model ontology (indices as keys)
)

# Set ontology and number of classes
detection_model.ontology = model_ontology 
detection_model.n_classes = len(model_ontology)

print("Detection model initialized!")

Model is not a TorchScript model. Loading as native PyTorch model.
Detection model initialized!


: 

In [None]:
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import matplotlib.patches as patches

# Load a sample image from your dataset
dataset.make_fname_global()
image_fname = dataset.dataset["image"].iloc[0]
image = Image.open(image_fname)

detection_model.device = torch.device("cpu")
detection_model.model = detection_model.model.to(detection_model.device)
# Run inference
predictions = detection_model.inference(image)

# Get ground truth for comparison
annotation_fname = dataset.dataset["annotation"].iloc[0]
ground_truth = dataset.read_annotation(annotation_fname)

# Simple visualization
fig, axes = plt.subplots(1, 2, figsize=(15, 7))

# Original image with predictions
axes[0].imshow(image)
axes[0].set_title(f"Predictions ({len(predictions['boxes'])} detections)")

# Draw prediction boxes
if len(predictions['boxes']) > 0:
    boxes = predictions['boxes'].cpu().numpy()
    scores = predictions['scores'].cpu().numpy()
    labels = predictions['labels'].cpu().numpy()
    
    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box
        width = x2 - x1
        height = y2 - y1
        
        # Robust class name lookup
        label_str = str(label.item()) if hasattr(label, 'item') else str(label)
        label_int = int(label.item()) if hasattr(label, 'item') else int(label)
        if label_str in detection_model.ontology:
            class_name = detection_model.ontology[label_str]["name"]
        elif label_int in detection_model.ontology:
            class_name = detection_model.ontology[label_int]["name"]
        else:
            class_name = f"Class {label}"  # fallback
        
        rect = patches.Rectangle(
            (x1, y1), width, height,
            linewidth=2, edgecolor='red', facecolor='none', alpha=0.7
        )
        axes[0].add_patch(rect)
        axes[0].text(x1, y1-5, f'{class_name}: {score:.2f}', 
                    color='red', fontsize=8, weight='bold')

# Draw ground truth boxes
if len(ground_truth[0]) > 0:
    gt_boxes, gt_labels = ground_truth
    for box, label in zip(gt_boxes, gt_labels):
        x1, y1, x2, y2 = box
        width = x2 - x1
        height = y2 - y1
        
        # Robust class name lookup for GT
        label_str = str(label)
        label_int = int(label)
        if label_str in dataset.ontology:
            class_name = dataset.ontology[label_str]["name"]
        elif label_int in dataset.ontology:
            class_name = dataset.ontology[label_int]["name"]
        else:
            class_name = f"Class {label}"
        
        rect = patches.Rectangle(
            (x1, y1), width, height,
            linewidth=2, edgecolor='green', facecolor='none', alpha=0.7
        )
        axes[1].add_patch(rect)
        axes[1].text(x1, y1+height+5, f'GT: {class_name}', 
                    color='green', fontsize=8, weight='bold')

for ax in axes:
    ax.axis('off')

plt.tight_layout()
plt.show()

# Print summary
print(f" Detection Summary:")
print(f"   Predictions: {len(predictions['boxes'])} objects")
print(f"   Ground Truth: {len(ground_truth[0])} objects")
print(f"   Prediction scores: {predictions['scores'].cpu().numpy()}")

In [None]:
# Run evaluation on a subset of the dataset
# For demonstration, we'll use a small subset
if 'dataset' in locals():
    
    # Create a small subset for faster evaluation
    small_dataset = dataset.dataset.head(5)  # Use first 5 images for demo
    
    # Temporarily replace dataset with subset
    original_dataset = dataset.dataset
    dataset.dataset = small_dataset
    
    # Reset dataset_dir to the original path since make_fname_global() was called earlier
    dataset.dataset_dir = "/Users/sakprave/Downloads/Coco"
    
    # Make sure the output directory exists
    predictions_outdir = "local/outputs/detection_preds"
    os.makedirs(predictions_outdir, exist_ok=True)
    
    # Path to ontology translation file
    ontology_translation_path = "local/data/models/ontology_translation.json"
    
    try:
        # Run evaluation with ontology translation
        results = detection_model.eval(
            dataset=dataset,
            split="train",  # Use train split for demo
            ontology_translation=ontology_translation_path,  # Add ontology translation
            predictions_outdir=predictions_outdir,
            results_per_sample=True
        )
        
        print(" Evaluation completed!")
        
    except Exception as e:
        print(f" Evaluation failed: {e}")
        import traceback
        traceback.print_exc()
    
    finally:
        # Restore original dataset
        dataset.dataset = original_dataset
        # Reset dataset_dir back to None (as it was after make_fname_global)
        dataset.dataset_dir = None
else:
    print("  Dataset not loaded. Please check the data paths above.")


In [None]:
display(results)