In [None]:
# Reinstalar PyTorch con CUDA (ejecutar solo si CUDA no funciona)
# Descomentar y ejecutar:

# !pip uninstall torch torchvision -y
# !pip install torch torchvision --index-url https://download.pytorch.org/whl/cu124

# Después reiniciar el kernel (Ctrl+Shift+P -> "Restart Kernel")

In [2]:
# Verificar GPU
import torch
print(f"CUDA disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memoria: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

CUDA disponible: False


In [None]:
from ultralytics import YOLO
from pathlib import Path
import yaml

# Configuración
DATASET_PATH = Path("../dataset_keypoints")
MODEL_NAME = "yolov8n-pose"  # nano-pose, ligero para tu 1070

print(f"Dataset: {DATASET_PATH.absolute()}")
print(f"Modelo base: {MODEL_NAME}")

In [None]:
# Verificar dataset
images = list((DATASET_PATH / "images").glob("*.jpg"))
labels = list((DATASET_PATH / "labels").glob("*.txt"))

print(f"Imágenes: {len(images)}")
print(f"Labels: {len(labels)}")

# Mostrar ejemplo de label
if labels:
    with open(labels[0]) as f:
        print(f"\nEjemplo label ({labels[0].name}):")
        print(f.read())

In [None]:
# Dividir en train/val (80/20)
import shutil
import random

train_img = DATASET_PATH / "images" / "train"
val_img = DATASET_PATH / "images" / "val"
train_lbl = DATASET_PATH / "labels" / "train"
val_lbl = DATASET_PATH / "labels" / "val"

# Crear directorios
for d in [train_img, val_img, train_lbl, val_lbl]:
    d.mkdir(exist_ok=True)

# Obtener imágenes en la raíz (no en subcarpetas)
root_images = [f for f in (DATASET_PATH / "images").glob("*.jpg") if f.is_file()]

if root_images:
    random.shuffle(root_images)
    split = int(len(root_images) * 0.8)
    
    for img in root_images[:split]:
        lbl = DATASET_PATH / "labels" / f"{img.stem}.txt"
        shutil.move(str(img), train_img / img.name)
        if lbl.exists():
            shutil.move(str(lbl), train_lbl / lbl.name)
    
    for img in root_images[split:]:
        lbl = DATASET_PATH / "labels" / f"{img.stem}.txt"
        shutil.move(str(img), val_img / img.name)
        if lbl.exists():
            shutil.move(str(lbl), val_lbl / lbl.name)
    
    print(f"Train: {len(list(train_img.glob('*.jpg')))}")
    print(f"Val: {len(list(val_img.glob('*.jpg')))}")
else:
    print("Ya están divididos o no hay imágenes en la raíz")
    print(f"Train: {len(list(train_img.glob('*.jpg')))}")
    print(f"Val: {len(list(val_img.glob('*.jpg')))}")

In [None]:
# Actualizar data.yaml con rutas correctas
yaml_content = f"""# Dataset YOLO-Pose para miniaturas
path: {DATASET_PATH.absolute()}
train: images/train
val: images/val

# 1 keypoint (frente), 3 valores: x, y, visibilidad
kpt_shape: [1, 3]

# Clases
names:
  0: miniature

# No flip horizontal (afecta orientación)
flip_idx: []
"""

with open(DATASET_PATH / "data.yaml", 'w') as f:
    f.write(yaml_content)
    
print("data.yaml actualizado")
print(yaml_content)

In [None]:
# Entrenar
model = YOLO(f"{MODEL_NAME}.pt")

results = model.train(
    data=str(DATASET_PATH / "data.yaml"),
    epochs=100,
    imgsz=640,
    batch=8,          # Ajustar según tu VRAM (8GB = batch 8-16)
    device=0,         # GPU
    workers=4,
    patience=20,      # Early stopping
    save=True,
    project="runs/pose",
    name="miniatures_pose",
    exist_ok=True,
    # Augmentations (cuidado con flip)
    fliplr=0.0,       # NO flip horizontal
    flipud=0.0,       # NO flip vertical
    mosaic=0.5,
    degrees=180,      # Rotación sí
    scale=0.3,
)

In [None]:
# Ver resultados
from IPython.display import Image, display

results_dir = Path("runs/pose/miniatures_pose")

# Mostrar curvas de entrenamiento
if (results_dir / "results.png").exists():
    display(Image(filename=str(results_dir / "results.png"), width=800))

In [None]:
# Probar el modelo
best_model = YOLO(str(results_dir / "weights" / "best.pt"))

# Validar
metrics = best_model.val()
print(f"\nmAP50: {metrics.box.map50:.3f}")
print(f"mAP50-95: {metrics.box.map:.3f}")

In [None]:
# Probar en imágenes de validación
import cv2
import matplotlib.pyplot as plt

val_images = list((DATASET_PATH / "images" / "val").glob("*.jpg"))[:4]

fig, axes = plt.subplots(2, 2, figsize=(12, 12))

for ax, img_path in zip(axes.flat, val_images):
    results = best_model(str(img_path), verbose=False)
    
    # Dibujar resultado
    img = cv2.imread(str(img_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    for r in results:
        if r.keypoints is not None:
            for i, (box, kpts) in enumerate(zip(r.boxes.xyxy, r.keypoints.xy)):
                x1, y1, x2, y2 = map(int, box)
                
                # Dibujar bbox
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
                
                # Centro
                cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
                
                # Keypoint (frente)
                if len(kpts) > 0:
                    fx, fy = int(kpts[0][0]), int(kpts[0][1])
                    if fx > 0 and fy > 0:  # Válido
                        cv2.arrowedLine(img, (cx, cy), (fx, fy), (255, 255, 0), 2, tipLength=0.3)
                        cv2.circle(img, (fx, fy), 5, (255, 255, 0), -1)
    
    ax.imshow(img)
    ax.set_title(img_path.name)
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Copiar modelo final
import shutil

src = results_dir / "weights" / "best.pt"
dst = Path("..") / "miniatures_pose.pt"

shutil.copy(src, dst)
print(f"✅ Modelo copiado a: {dst.absolute()}")
print(f"   Tamaño: {dst.stat().st_size / 1e6:.1f} MB")