# ***Etiquetado de Imágenes con Grounding DINO***



1. Verificación de GPU Disponible

In [None]:
!nvidia-smi

2. Definición de la ruta principal del proyecto

In [None]:
cd/content

In [None]:
import os
HOME = os.getcwd()

3. Clonado del repositorio GroundingDINO

In [None]:
%cd {HOME}
!git clone https://github.com/IDEA-Research/GroundingDINO.git

4. Instalación del modelo como paquete editable

In [None]:
%cd {HOME}/GroundingDINO
!pip install -q -e .
!pip install supervision

5. Descarga de pesos preentrenados

In [None]:
!mkdir {HOME}/weights
%cd {HOME}/weights
!wget -q https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth

6. Carga del modelo

In [None]:
cd /content/GroundingDINO

%cd {HOME}/GroundingDINO
from groundingdino.util.inference import load_model, load_image, predict, annotate

model = load_model(f"{HOME}/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py",
                   f"{HOME}/weights/groundingdino_swint_ogc.pth")

7. Predicción y visualización del objeto en la imagen


In [None]:
import os
import supervision as sv

IMAGE_NAME = "/content/data/images/moneda_7.jpg"
IMAGE_PATH = os.path.join(HOME, 'data', IMAGE_NAME)

TEXT_PROMPT = "coin"
BOX_THRESHOLD = 0.35
TEXT_THRESHOLD = 0.35

image_source, image = load_image(IMAGE_NAME)

boxes, logits, phrases = predict(
    model = model,
    image = image,
    caption = TEXT_PROMPT,
    box_threshold = BOX_THRESHOLD,
    text_threshold = TEXT_THRESHOLD,
    device = 'cuda'
)

annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)

%matplotlib inline
sv.plot_image(annotated_frame, (16,16))

8. Ajustes de entorno y limpieza de directorios temporales

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

!find /content/data/images -type d -name ".ipynb_checkpoints" -exec rm -r {} +

9. Importación de librerías para el procesamiento y visualización

In [None]:
import os
from time import time
import cv2
import torch
from PIL import Image
from GroundingDINO.groundingdino.util.inference import load_model, predict, annotate
import GroundingDINO.groundingdino.datasets.transforms as T

10. Sistema de detección por lotes y generación automática de anotaciones

In [None]:
# Device
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

def read_images_from_folder(folder_path):
    images = []
    clases = []
    lista = os.listdir(folder_path)

    for lis in lista:
        img_path = os.path.join(folder_path, lis)
        img = cv2.imread(img_path)
        images.append(img)
        clases.append(os.path.splitext(lis)[0])

    return images, clases

def save_results(image, boxes, class_id, out_folder):
    # Norm
    xc, yc, an, al = boxes[0][0], boxes[0][1], boxes[0][2], boxes[0][3]
    xc, yc, an, al = max(0, min(1, xc)), max(0, min(1, yc)), max(0, min(1, an)), max(0, min(1, al))
    list_info = [f"{class_id} {xc} {yc} {an} {al}"]
    time_now = str(time()).replace('.', '')
    cv2.imwrite(f"{out_folder}/{time_now}.jpg", image)

    for info in list_info:
        with open(f"{out_folder}/{time_now}.txt", 'a') as f:
            f.write(info)

def main():
    img_folder_path = '/content/data/images'
    out_folder_path = '/content/data/annotations'
    class_id = 2
    save_results_flag = True

    images, classes = read_images_from_folder(img_folder_path)
    num_images = len(images)

    print(f"Imagenes: {num_images}")
    print(f'Nombres: {classes}')

    home = os.getcwd()

    # Config Path
    config_path = os.path.join(home, "/content/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py")

    # CheckPoint Weights
    check_point_path = '/content/weights/groundingdino_swint_ogc.pth'

    # Model
    model = load_model(config_path, check_point_path)

    # Prompt
    text_prompt = 'coin'
    box_threshold = 0.35
    text_threshold = 0.35

    for con in range(num_images):
        img = images[con]
        print("------------------//--------------------")
        print(f"Image: {classes[con]}")

        img_copy = img.copy()

        transform = T.Compose([
            T.RandomResize([800], max_size=1333),
            T.ToTensor(),
            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        img_source = Image.fromarray(img).convert("RGB")
        img_transform, _ = transform(img_source, None)

        boxes, logits, phrases = predict(
            model=model,
            image=img_transform,
            caption=text_prompt,
            box_threshold=box_threshold,
            text_threshold=text_threshold,
            device=DEVICE)

        if len(boxes) != 0:
            if save_results_flag:
                save_results(img_copy, boxes, class_id, out_folder_path)

        annotated_img = annotate(image_source=img, boxes=boxes, logits=logits, phrases=phrases)
        out_frame = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)

if __name__ == "__main__":
    main()

11. Compresión de resultados para exportación

In [None]:
!zip -r "/content/data/annotations.zip" "/content/data/annotations"

12. Eliminación de directorios temporales o de entrenamiento previo

In [None]:
from shutil import rmtree
rmtree("/content/dataTrain/train/images")

13. Instalación de Ultralytics y descarga del modelo YOLOv8

In [None]:
!pip install ultralytics

In [None]:
cd /content/models

In [None]:
!wget https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n.pt

14. Entrenamiento del modelo YOLOv8 con datos personalizados


In [None]:
cd /content

In [None]:
from ultralytics import YOLO

In [None]:
model = YOLO('/content/models/yolov8n.pt')

In [None]:
model.train(data='/content/dataTrain/data.yaml', epochs = 40, batch = 64, imgsz = 640, device = 'cuda')