<a href="https://colab.research.google.com/github/Chumunaca/Chumunaca/blob/main/yolo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import os
from ast import literal_eval
from tqdm import tqdm
import warnings

warnings.filterwarnings('ignore')

# --- 1. Mover y Descomprimir Imágenes ---
print("Moviendo imágenes desde Drive a Colab...")
!cp '/content/drive/MyDrive/TESIS/imagenes/images_png.zip' /content/
print("Imágenes copiadas. Descomprimiendo...")
!unzip -q /content/images_png.zip -d /content/
!rm /content/images_png.zip
print("Imágenes descomprimidas en /content/images_png/")

# --- 2. Cargar el CSV ---
print("\n--- Iniciando Convertidor a YOLOv8 (con Agrupación de Clases) ---")
try:
    df = pd.read_csv('/content/drive/MyDrive/TESIS/finding_annotations.csv')
    print("CSV 'finding_annotations.csv' cargado.")
except Exception as e:
    print(f"ERROR: No se pudo cargar 'finding_annotations.csv'. Asegúrate de que exista. Error: {e}")
    raise

# --- 3. Preparar Directorios ---
base_yolo_dir = '/content/yolo_dataset_vindr/'
os.makedirs(os.path.join(base_yolo_dir, 'images/train'), exist_ok=True)
os.makedirs(os.path.join(base_yolo_dir, 'images/val'), exist_ok=True)
os.makedirs(os.path.join(base_yolo_dir, 'labels/train'), exist_ok=True)
os.makedirs(os.path.join(base_yolo_dir, 'labels/val'), exist_ok=True)
print(f"Directorios de YOLO creados en: {base_yolo_dir}")

# --- 4. Filtrar Filas con Anotaciones ---
df_annotations = df[df['xmin'].notnull()].copy()
print(f"Filas totales en CSV: {len(df)}")
print(f"Filas con anotaciones: {len(df_annotations)}")

df_annotations['finding_categories'] = df_annotations['finding_categories'].apply(
    lambda x: literal_eval(x) if isinstance(x, str) else []
)

# --- 5. Definir Clases Agrupadas ---
# Mapeo basado en la distribución del dataset (clases raras agrupadas para reducir desbalance)
group_mapping = {
    'Mass': 0,
    'Suspicious Calcification': 1,
    'Asymmetry': 2,
    'Focal Asymmetry': 2,
    'Global Asymmetry': 2,
    'Architectural Distortion': 2,
    'Suspicious Lymph Node': 3,
    'Skin Thickening': 3,
    'Skin Retraction': 3,
    'Nipple Retraction': 3
}

group_names = ['Masa', 'Calcificación Sospechosa', 'Anomalías Estructurales', 'Características Asociadas']

print("\n--- Mapeo de Clases Agrupadas para YOLO ---")
for group_id, name in enumerate(group_names):
    print(f"  {name}: {group_id}")
print("---------------------------------")

# --- 6. Función para Normalizar y Escribir .txt ---
def create_yolo_label_file(row, label_dir, source_image_dir):
    try:
        img_id = row['image_id']
        img_h = row['height']
        img_w = row['width']
        x1 = row['xmin']
        y1 = row['ymin']
        x2 = row['xmax']
        y2 = row['ymax']
        categories = row['finding_categories']

        box_w = x2 - x1
        box_h = y2 - y1
        x_center = x1 + (box_w / 2)
        y_center = y1 + (box_h / 2)
        x_center_norm = x_center / img_w
        y_center_norm = y_center / img_h
        width_norm = box_w / img_w
        height_norm = box_h / img_h

        label_filename = f"{img_id}.txt"
        label_path = os.path.join(label_dir, label_filename)

        yolo_lines = []
        seen_groups = set()  # Evitar duplicados si múltiples categorías mapean al mismo grupo
        for cat in categories:
            if cat in group_mapping:
                group_id = group_mapping[cat]
                if group_id not in seen_groups:
                    yolo_line = f"{group_id} {x_center_norm:.6f} {y_center_norm:.6f} {width_norm:.6f} {height_norm:.6f}"
                    yolo_lines.append(yolo_line)
                    seen_groups.add(group_id)

        if yolo_lines:
            with open(label_path, 'a') as f:
                f.write("\n".join(yolo_lines) + "\n")

        return True
    except Exception as e:
        pass
    return False

# --- 7. Generar el Dataset ---
print("\nGenerando archivos .txt de YOLO y copiando imágenes...")
processed_count = 0
images_copied = set()

for _, row in tqdm(df_annotations.iterrows(), total=len(df_annotations)):
    split = 'train' if row['split'] == 'training' else 'val'
    label_dir = os.path.join(base_yolo_dir, 'labels', split)
    image_dir = os.path.join(base_yolo_dir, 'images', split)

    img_id = row['image_id']
    image_source_path = f"/content/images_png/{row['study_id']}/{img_id}.png"
    image_dest_path = os.path.join(image_dir, f"{img_id}.png")

    if img_id not in images_copied and os.path.exists(image_source_path):
        os.system(f"cp '{image_source_path}' '{image_dest_path}'")
        images_copied.add(img_id)

    if create_yolo_label_file(row, label_dir, "/content/images_png"):
        processed_count += 1

print(f"\n--- ¡Proceso Completado! ---")
print(f"Se procesaron {processed_count} anotaciones.")
print(f"Se copiaron {len(images_copied)} imágenes únicas.")
print(f"Dataset de YOLO listo en: {base_yolo_dir}")

# --- 8. Crear el Archivo YAML ---
yaml_content = f"""
# --- Configuración del Dataset VinDR-Mammo (Clases Agrupadas) ---
path: {base_yolo_dir}
train: images/train
val: images/val

names:
"""
for i, name in enumerate(group_names):
    yaml_content += f"  {i}: {name}\n"

yaml_path = os.path.join(base_yolo_dir, 'data_vindr.yaml')
with open(yaml_path, 'w') as f:
    f.write(yaml_content)

print(f"\nArchivo de configuración YAML creado en: {yaml_path}")

# --- 9. Instalar Ultralytics y Entrenar ---
!pip install -q ultralytics

print("\n--- Entrenando YOLOv8l (Ajustado para Evitar OOM) ---")
!yolo task=detect mode=train model=yolov8l.pt data={yaml_path} epochs=300 imgsz=1280 batch=24 optimizer=auto lr0=0.001 patience=50 augment=True mosaic=0.0 rect=True

In [None]:
# --- 10. Guardar el Modelo Entrenado en Drive ---
print("\nGuardando el modelo entrenado en Google Drive...")
model_source_path = '/content/runs/detect/train2/weights/best.pt'
drive_dest_dir = '/content/drive/MyDrive/TESIS/yolov8_models/'

# Crear el directorio de destino en Drive si no existe
os.makedirs(drive_dest_dir, exist_ok=True)

# Copiar el archivo del modelo
!cp '{model_source_path}' '{drive_dest_dir}'

print(f"Modelo 'best.pt' guardado en: {drive_dest_dir}")