In [None]:
# ==============================
# GLIP-Large en COCO val2017 (vehículos) con thresholds bajos
# ==============================

import os
import torch
import json
from tqdm import tqdm
from PIL import Image, ImageDraw
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from transformers import GLIPProcessor, GLIPForObjectDetection
import matplotlib.pyplot as plt

device = "cuda" if torch.cuda.is_available() else "cpu"

# ----- 1️⃣ COCO dataset -----
dataDir = "./COCO"
dataType = "val2017"
annFile = os.path.join(dataDir, "annotations", f"instances_{dataType}.json")
coco_gt = COCO(annFile)

vehicle_classes = ['car', 'bus', 'truck', 'bicycle', 'motorcycle']
cat_ids = coco_gt.getCatIds(catNms=vehicle_classes)

# Obtener todas las imágenes que tengan al menos una categoría de vehículo
img_ids_set = set()
for cat_id in cat_ids:
    ids = coco_gt.getImgIds(catIds=[cat_id])
    img_ids_set.update(ids)

img_ids = list(img_ids_set)
images = coco_gt.loadImgs(img_ids)

print(f"{len(images)} imágenes con al menos un vehículo encontradas.")

# ----- 2️⃣ Cargar GLIP-Large -----
model_id = "Salesforce/glip-large"
processor = GLIPProcessor.from_pretrained(model_id)
model = GLIPForObjectDetection.from_pretrained(model_id).to(device)
model.eval()

# ----- 3️⃣ Inferencia y conversión a formato COCO -----
coco_results = []

for img in tqdm(images, desc="Procesando imágenes"):
    img_path = os.path.join(dataDir, "images", dataType, img['file_name'])
    image = Image.open(img_path).convert("RGB")
    width, height = image.size

    # Preparar inputs
    inputs = processor(images=image, text=vehicle_classes, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    # Postprocesar detecciones
    # GLIP devuelve bounding boxes en normalized coordinates
    target_sizes = torch.tensor([[height, width]], device=device)
    results = processor.post_process_object_detection(outputs, threshold=0.25, target_sizes=target_sizes)[0]

    # Dibujar detecciones y guardar en formato COCO
    draw = ImageDraw.Draw(image)
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        label_text = vehicle_classes[label] if label < len(vehicle_classes) else "unknown"
        x1, y1, x2, y2 = box.tolist()
        coco_results.append({
            "image_id": img['id'],
            "category_id": coco_gt.getCatIds(catNms=[label_text])[0],
            "bbox": [x1, y1, x2-x1, y2-y1],
            "score": float(score)
        })
        # Dibujar
        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
        draw.text((x1, y1-10), f"{label_text}: {score:.2f}", fill="red")

# ----- 4️⃣ Guardar resultados -----
with open("glip_large_highrec.json", "w") as f:
    json.dump(coco_results, f)

print(f"Resultados guardados: {len(coco_results)} detecciones")

# ----- 5️⃣ Evaluación COCO -----
if len(coco_results) > 0:
    coco_dt = coco_gt.loadRes("glip_large_highrec.json")
    coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
    coco_eval.params.catIds = cat_ids
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
else:
    print("No se detectaron objetos, ajusta thresholds o prompt.")

# Mostrar última imagen procesada
plt.figure(figsize=(10, 8))
plt.imshow(image)
plt.axis("off")
plt.show()
