## Before you start

Let's make sure that we have access to GPU. We can use `nvidia-smi` command to do that. In case of any problems navigate to `Edit` -> `Notebook settings` -> `Hardware accelerator`, set it to `GPU`, and then click `Save`.

In [1]:
!nvidia-smi

Tue Feb 20 23:28:23 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.23                 Driver Version: 551.23         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1660 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   63C    P8             14W /   60W |       0MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import os
HOME = os.getcwd()
print(HOME)

d:\Research\VisualGeneration\Scripts


## Install Grounding DINO 🦕

In [3]:
# %cd {HOME}
# !git clone https://github.com/IDEA-Research/GroundingDINO.git
%cd {HOME}/GroundingDINO
# %pip install -e .

d:\Research\VisualGeneration\Scripts\GroundingDINO


In [4]:
# CONFIG_PATH = os.path.join(HOME, "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py")
CONFIG_PATH = os.path.join(HOME, "GroundingDINO/groundingdino/config/GroundingDINO_SwinB_cfg.py")
print(CONFIG_PATH, "; exist:", os.path.isfile(CONFIG_PATH))

d:\Research\VisualGeneration\Scripts\GroundingDINO/groundingdino/config/GroundingDINO_SwinB_cfg.py ; exist: True


## Download Grounding DINO Weights 🏋️

In [5]:
%cd {HOME}
!mkdir {HOME}\weights
%cd {HOME}/weights
WEIGHTS_FILE = os.path.join(HOME, "weights", 'groundingdino_swinb_cogcoor.pth')

import urllib.request

# !wget https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth -o groundingdino_swint_ogc.pth
# !wget  https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha2/groundingdino_swinb_cogcoor.pth -o groundingdino_swinb_cogcoor.pth

# urllib.request.urlretrieve("https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha2/groundingdino_swinb_cogcoor.pth", WEIGHTS_FILE)

d:\Research\VisualGeneration\Scripts
d:\Research\VisualGeneration\Scripts\weights


A subdirectory or file d:\Research\VisualGeneration\Scripts\weights already exists.


In [6]:
WEIGHTS_NAME = "groundingdino_swinb_cogcoor.pth"
WEIGHTS_PATH = os.path.join(HOME, "weights", WEIGHTS_NAME)
print(WEIGHTS_PATH, "; exist:", os.path.isfile(WEIGHTS_PATH))

d:\Research\VisualGeneration\Scripts\weights\groundingdino_swinb_cogcoor.pth ; exist: True


## Load Grounding DINO Model

In [7]:
%cd {HOME}/GroundingDINO

from groundingdino.util.inference import load_model, load_image, predict, annotate
model = load_model(CONFIG_PATH, WEIGHTS_PATH)

d:\Research\VisualGeneration\Scripts\GroundingDINO


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


final text_encoder_type: bert-base-uncased


## Object Detection with Grounding DINO

In [8]:
import matplotlib.pyplot as plt
import torch
import cv2
import time

In [9]:
# IMAGE_NAME = "HL_microwave_close.jpg"
IMAGE_NAME = "HL_temperature.jpg"
IMAGE_PATH = os.path.join(HOME, "data", IMAGE_NAME)

image_source, image = load_image(IMAGE_PATH)
img_h = image_source.shape[0]
img_w = image_source.shape[1]

In [10]:
begin = time.time()
# TEXT_PROMPT = "button"
# TEXT_PROMPT = "right side power button"
# TEXT_PROMPT = "center light switch"
# TEXT_PROMPT = "Left knob"
TEXT_PROMPT = "light switch . coffee machine power button . "
BOX_TRESHOLD = 0.1
TEXT_TRESHOLD = 0.1

# Tensor of found boxes (with confidence above box_threshold)
# Tensor of logits for text phrases
# List[str] of phrases from prompt found corresponding to boxes (with confidence above text_threshold)
boxes, logits, phrases = predict(
    model=model,
    image=image,
    caption=TEXT_PROMPT,
    box_threshold=BOX_TRESHOLD,
    text_threshold=TEXT_TRESHOLD
)

annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)
# print(phrases)

# Get box coordinates
scale_fct = torch.Tensor([img_w, img_h, img_w, img_h])
boxes = boxes * scale_fct

if(boxes.numel() == 0):
   print('No objects detected.')
# Output details for each detection:
for result in zip(boxes, logits, phrases):
    print(f'{result[2]}: confidence {result[1]}, box {result[0].tolist()}')
# print(boxes)
# print(logits)
# print(phrases)

for box in boxes:
  # Draw blue circle as center of each box (0, 0) is top-left of image
  annotated_frame = cv2.circle(annotated_frame, (int(box[0]), int(box[1])), 10, (255, 0, 0), -1)

%matplotlib inline
# sv.plot_image(annotated_frame, (16, 16))
annotated_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(16, 16))
plt.imshow(annotated_frame)
plt.axis('off')
plt.show()
print(f'{time.time() - begin} sec')



NameError: name '_C' is not defined