In [None]:
import os
import torch
import warnings
import time
import numpy as np
import cv2
import shutil

warnings.filterwarnings("ignore")

from groundingdino.util.inference import load_model as load_grounding_model
from groundingdino.util.inference import predict, load_image

In [None]:
import logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

In [None]:
INPUT_DIR = "images"
LABDELS_DIR = "labels"
VIS_DIR = "vis"
CONFIG = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
CHECKPOINT = "weights/groundingdino_swint_ogc.pth"
BOX_THRESHOLD = 0.3
TEXT_THRESHOLD = 0.25
PROMT = "head"

In [None]:
device = "cuda"

In [None]:
os.makedirs(LABDELS_DIR, exist_ok=True)
os.makedirs(ANNOTATION_DIR, exist_ok=True)

In [None]:
image_paths = [os.path.join(INPUT_DIR, name) for name in os.listdir(INPUT_DIR) if
                   name.endswith((".jpg", ".jpeg", ".png"))]
print(f"Найдено {len(image_paths)} изображений")

In [None]:
print(f"Загружаю Grounding DINO модель...")
model = load_grounding_model(CONFIG, CHECKPOINT, device=device)
print("✓ Модель загружена")

In [None]:
for idx, image_path in enumerate(image_paths):
    stem = os.path.splitext(os.path.basename(image_path))[0]
    label_path = os.path.join(LABDELS_DIR, f"{stem}.txt")
    vis_path = os.path.join(VIS_DIR, f"{stem}.png")

    try:
        print(f"[{idx+1}/{len(image_paths)}] {stem}")
        image_rgb, image_tensor = load_image(image_path)
        boxes, logits, phrases = predict(
            model=model,
            image=image_tensor,
            caption=PROMT,
            box_threshold=BOX_THRESHOLD,
            text_threshold=TEXT_THRESHOLD,
            device=device
        )

        with open(label_path, "w", encoding="utf-8") as f:
            for box, phrase, logit in zip(boxes, phrases, logits):
                cx, cy, w, h = box.tolist()
                f.write(f"{phrase} {logit:.6f} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")

        image_vis = image_rgb.copy()
        H, W = image_vis.shape[:2]

        if len(boxes) == 0:
            print("   → Объекты не найдены")
        else:
            for box, phrase, logit in zip(boxes, phrases, logits):
                cx, cy, w, h = box.tolist()
                x1 = int((cx - w / 2) * W)
                y1 = int((cy - h / 2) * H)
                x2 = int((cx + w / 2) * W)
                y2 = int((cy + h / 2) * H)

                cv2.rectangle(image_vis, (x1, y1), (x2, y2), (0, 0, 255), 2)
                label_text = f"{phrase}: {logit:.2f}"
                cv2.putText(image_vis, label_text, (x1, max(10, y1 - 10)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 3, cv2.LINE_AA)
                cv2.putText(image_vis, label_text, (x1, max(10, y1 - 10)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA)

        image_bgr = cv2.cvtColor(image_vis, cv2.COLOR_RGB2BGR)
        cv2.imwrite(vis_path, image_bgr)

    except Exception as e:
        print(f"   ✗ Ошибка при обработке {stem}: {e}")

print("Готово!")      