In [None]:
!pip install -q transformers torch torchvision pillow timm

In [None]:
import torch
from transformers import DetrImageProcessor, DetrForObjectDetection
from PIL import Image
import matplotlib.pyplot as plt
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
model.to(device)
model.eval()

In [None]:
image_path = "/content/Screenshot 2025-12-13 at 12.52.59â€¯PM.png"  # upload your image to Colab and update this path
image = Image.open(image_path).convert("RGB")

In [None]:
inputs = processor(images=image, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}

In [None]:
with torch.no_grad():
    outputs = model(**inputs)

# Post-processing
target_sizes = torch.tensor([image.size[::-1]]).to(device)
results = processor.post_process_object_detection(
    outputs,
    target_sizes=target_sizes,
    threshold=0.7
)[0]

# Visualize results
plt.figure(figsize=(10, 10))
plt.imshow(image)
ax = plt.gca()

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    x_min, y_min, x_max, y_max = box.tolist()
    ax.add_patch(
        plt.Rectangle(
            (x_min, y_min),
            x_max - x_min,
            y_max - y_min,
            fill=False,
            color="red",
            linewidth=2
        )
    )
    class_name = model.config.id2label[label.item()]
    ax.text(
        x_min,
        y_min,
        f"{class_name}: {score:.2f}",
        color="white",
        fontsize=12,
        bbox=dict(facecolor="red", alpha=0.7)
    )

plt.axis("off")
plt.show()
