In [None]:
#Using YOLO world for Open Vocabulary Detection
from ultralytics import YOLOWorld

# Initialize a YOLO-World model
model = YOLOWorld("yolov8s-world.pt")  # or select yolov8m/l-world.pt for different sizes

# Define desired objects/classes
desired_object= "red candle"
model.set_classes([desired_object])

# Define img path and predict/show result
img_path="Test_images/table.JPG"
results = model.predict(img_path)
results[0].show()

In [None]:
#Object Tracking YOLO World
from ultralytics import YOLOWorld

model.set_classes(["wine bottle"])

results = model.track(source="Test_images/video_wine_bottle.mp4", show=True, tracker="bytetrack.yaml",save=True)

for result in results:
    if result.boxes.id is not None:
        ids = result.boxes.id.int().cpu().tolist()
        print(f"Tracking IDs in this frame: {ids}")

In [None]:
#Using Grounding Dino
import requests
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
from accelerate import Accelerator
model_id = "IDEA-Research/grounding-dino-tiny"
device = Accelerator().device

processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)

image_path = "Test_images/table.JPG"
image = Image.open(image_path)
# Check for cats and remote controls
text_labels = [["a remote control", "red candle", "wine bottle","a pair of wool socks", "notebook"]]

inputs = processor(images=image, text=text_labels, return_tensors="pt").to(model.device)
with torch.no_grad():
    outputs = model(**inputs)

results = processor.post_process_grounded_object_detection(
    outputs,
    inputs.input_ids,
    threshold=0.3,
    text_threshold=0.3,
    target_sizes=[image.size[::-1]]
)

result = results[0]
for box, score, labels in zip(result["boxes"], result["scores"], result["labels"]):
    box = [round(x, 2) for x in box.tolist()]
    print(f"Detected {labels} with confidence {round(score.item(), 3)} at location {box}")

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

fig, ax = plt.subplots(1, figsize=(12, 9))
ax.imshow(image)
for box, score, label in zip(result["boxes"], result["scores"], result["labels"]):
    xmin, ymin, xmax, ymax = box.tolist()

    rect = patches.Rectangle(
        (xmin, ymin), xmax - xmin, ymax - ymin, 
        linewidth=2, edgecolor='r', facecolor='none'
    )

    ax.add_patch(rect)

    plt.text(xmin, ymin - 5, f"{label}: {score:.2f}", 
             color='white', fontsize=12, fontweight='bold',
             bbox=dict(facecolor='red', alpha=0.5))

plt.axis('off')
plt.show()