In [16]:
import requests

import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
import os
from pathlib import Path

model_id = "IDEA-Research/grounding-dino-tiny"
device = "cuda"

processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)



In [20]:

image_folder = Path("C:/Data/3lc-images")

image_files = list(image_folder.glob("*.jpg")) + list(image_folder.glob("*.webp"))
image_files = [str(file) for file in image_files]
images = [Image.open(file) for file in image_files]


In [25]:
# Run inference

label_names = ["a person", "the boss", "some text"]

text_labels = [label_names] * len(image_files)

inputs = processor(images=images, text=text_labels, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model(**inputs)

results = processor.post_process_grounded_object_detection(
    outputs,
    inputs.input_ids,
    box_threshold=0.4,
    text_threshold=0.3,
    target_sizes=[image.size[::-1] for image in images]
)

In [None]:

import tlc

table_writer = tlc.TableWriter(
    "grounded-dino-predictions",
    "GROUNDED_DINO_DEMO",
    "GROUNDED_DINO_DEMO",
    column_schemas={
        "image": tlc.ImagePath("image"),
        "predicted_boxes": tlc.BoundingBoxListSchema(
            {i: tlc.MapElement(label_names[i]) for i in range(len(label_names))},
            is_prediction=True,
            include_segmentation=False,

        )
    },
    if_exists="rename"
)

with table_writer:
    
    for i, result in enumerate(results):

        bb_list = []
        for box, score, labels in zip(result["boxes"], result["scores"], result["labels"]):
            box = [round(x, 2) for x in box.tolist()]
            print(f"Detected {labels} with confidence {round(score.item(), 3)} at location {box}")

            bb_list.append(
                {
                    "x0": box[0],
                    "y0": box[1],
                    "x1": box[2], "y1": box[3],
                    "label": label_names.index(labels),
                    "confidence": score.item(),
                    "iou": 0.0,
                }
            )

        table_writer.add_row(
            {
                "image": image_files[i],
                "predicted_boxes": {
                    "image_height": images[i].height,
                    "image_width": images[i].width,
                    "bb_list": bb_list,
                }
            }
        )

