In [None]:

# Install GroundingDINO
!git clone https://github.com/IDEA-Research/GroundingDINO.git /content/GroundingDINO
%cd /content/GroundingDINO
!pip install -e .
%cd /content

# Install Segment Anything
!git clone https://github.com/facebookresearch/segment-anything.git /content/segment-anything
%cd /content/segment-anything
!pip install -e .
%cd /content


In [None]:

import torch, cv2, numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from groundingdino.util.inference import load_model, predict
from groundingdino.util import box_ops
from segment_anything import sam_model_registry, SamPredictor


In [None]:

# Load Grounding DINO
gd_config = "/content/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
gd_ckpt = "/content/weights/groundingdino_swint_ogc.pth"
gd_model = load_model(gd_config, gd_ckpt).eval()

# Load SAM
sam = sam_model_registry["vit_h"](checkpoint="/content/weights/sam_vit_h.pth")
device = "cuda" if torch.cuda.is_available() else "cpu"
sam.to(device)
sam_pred = SamPredictor(sam)


In [None]:

# Read image
image_path = "/content/your_image.jpg"  # Change this to your image path
img = np.array(Image.open(image_path).convert("RGB"))
H, W, _ = img.shape

# Detect with prompt
PROMPT = "helmet on person"
boxes, logits, phrases = predict(gd_model, img, caption=PROMPT, box_threshold=0.35, text_threshold=0.25)
xyxy = box_ops.xywh_to_xyxy(boxes) * torch.tensor([W, H, W, H])
xyxy = xyxy.cpu().numpy().astype(int)

# Separate person and helmet boxes
person_boxes = [b for b, p in zip(xyxy, phrases) if "person" in p]
helmet_boxes = [b for b, p in zip(xyxy, phrases) if "helmet" in p]


In [None]:

# Visualization
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(img)

helmeted = 0
nohelmet = 0

for pbox in person_boxes:
    x1, y1, x2, y2 = pbox
    sam_pred.set_image(img)
    masks, scores, _ = sam_pred.predict(box=np.array([pbox]), multimask_output=False)
    mask = masks[0]

    # Check overlap with any helmet box
    overlaps = 0
    for hbox in helmet_boxes:
        hx1, hy1, hx2, hy2 = hbox
        helmet_mask = np.zeros_like(mask, dtype=np.uint8)
        helmet_mask[hy1:hy2, hx1:hx2] = 1
        if np.logical_and(mask, helmet_mask).sum() > 0:
            overlaps += 1

    if overlaps:
        helmeted += 1
        color = "green"
    else:
        nohelmet += 1
        color = "red"

    ax.add_patch(patches.Rectangle((x1, y1), x2 - x1, y2 - y1, edgecolor=color, linewidth=3, fill=False))

ax.axis("off")
plt.show()

# Print summary
print(f"👷 Total people: {len(person_boxes)}")
print(f"🪖 Wearing helmets: {helmeted}")
print(f"🙍 Without helmets: {nohelmet}")
print(f"🪖 Helmet present? {'Yes' if helmeted>0 else 'No'}")
