In [None]:
from pathlib import Path
from sklearn.model_selection import KFold
import shutil
from ultralytics import YOLO
import pandas as pd


In [1]:
import torch
torch.backends.cudnn.benchmark = True


In [None]:
from pathlib import Path

# Nombres de las clases (sin incluir background)
class_names = [
    "Vehiculos",
    "Bodegas",
    "Caminos",
    "Rios",
    "Zonas de mineria ilegal"
]

for dataset_name in ["modelo_yolov11_dataset_filtrado", "modelo_yolov11_dataset_completo"]:
    DATA_ROOT = Path(dataset_name)

    yaml_content = f"""
    path: {DATA_ROOT.resolve()}
    train: train
    val: val
    test: test
    nc: {len(class_names)}
    names: {class_names}
    """.strip()

    yaml_path = DATA_ROOT / "dataset.yaml"
    with open(yaml_path, "w", encoding="utf-8") as f:
        f.write(yaml_content)

    print(f"✅ YAML generado en: {yaml_path}")


In [None]:
from pathlib import Path
from sklearn.model_selection import KFold
import shutil
from ultralytics import YOLO
import pandas as pd

def ejecutar_kfold_yolov8(
    dataset_name: str,
    class_names: list,
    model_weights: str = "yolov8s.pt",
    num_folds: int = 5,
    epochs: int = 40,
    batch_size: int = 16,
    img_size: int = 640,
    patience: int = 5
):
    DATA_ROOT = Path(dataset_name)
    IMAGES_DIR = DATA_ROOT / "train" / "images"
    LABELS_DIR = DATA_ROOT / "train" / "labels"
    CV_ROOT = Path(f"kfold_results_{dataset_name}")
    CV_ROOT.mkdir(parents=True, exist_ok=True)

    all_images = sorted(IMAGES_DIR.glob("*.jpg"))
    all_labels = [LABELS_DIR / (img.stem + ".txt") for img in all_images]
    assert all([lbl.exists() for lbl in all_labels]), "❌ Faltan archivos .txt para algunas imágenes."

    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
    results_summary = []

    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(all_images)):
        print(f"\n🔁 Entrenando fold {fold_idx + 1}/{num_folds}...")

        fold_dir = CV_ROOT / f"fold_{fold_idx}"
        train_img_dir = fold_dir / "images" / "train"
        val_img_dir = fold_dir / "images" / "val"
        train_lbl_dir = fold_dir / "labels" / "train"
        val_lbl_dir = fold_dir / "labels" / "val"
        for d in [train_img_dir, val_img_dir, train_lbl_dir, val_lbl_dir]:
            d.mkdir(parents=True, exist_ok=True)

        for idx in train_idx:
            shutil.copy(all_images[idx], train_img_dir)
            shutil.copy(all_labels[idx], train_lbl_dir)
        for idx in val_idx:
            shutil.copy(all_images[idx], val_img_dir)
            shutil.copy(all_labels[idx], val_lbl_dir)

        yaml_content = f"""
        path: {fold_dir.resolve()}
        train: images/train
        val: images/val
        nc: {len(class_names)}
        names: {class_names}
        """.strip()

        yaml_path = fold_dir / "dataset.yaml"
        with open(yaml_path, "w", encoding="utf-8") as f:
            f.write(yaml_content)

        run_name = f"{dataset_name}_cv_fold_{fold_idx}"
        project_dir = Path("runs/detect")
        last_ckpt = project_dir / run_name / "weights" / "last.pt"

        if last_ckpt.exists():
            print(f"🔄 Reanudando fold {fold_idx} desde checkpoint: {last_ckpt}")
            model = YOLO(str(last_ckpt))
            results = model.train(
                resume=True,
                epochs=epochs,
                name=run_name
            )
        else:
            print(f"🚀 Entrenando fold {fold_idx} desde cero...")
            model = YOLO(model_weights)
            results = model.train(
                data=str(yaml_path),
                epochs=epochs,
                imgsz=img_size,
                batch=batch_size,
                name=run_name,
                patience=patience
            )

        metrics = model.val(data=str(yaml_path)).results_dict
        metrics["fold"] = fold_idx
        results_summary.append(metrics)

    df_results = pd.DataFrame(results_summary)
    csv_output = CV_ROOT / "cv_results_summary.csv"
    df_results.to_csv(csv_output, index=False)
    print(f"\n✅ Cross-validation completa. Resultados guardados en: {csv_output}")

def entrenar_modelo_final(
    dataset_name: str,
    class_names: list,
    model_weights: str = "yolov8s.pt",
    epochs: int = 80,
    batch_size: int = 16,
    img_size: int = 640,
    patience: int = 10
):
    DATA_ROOT = Path(dataset_name)
    FINAL_OUTPUT_DIR = Path(f"final_model_{dataset_name}")
    FINAL_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

    yaml_path = DATA_ROOT / "dataset.yaml"
    run_name = f"{dataset_name}_final"
    project_dir = Path("runs/detect")
    last_ckpt = project_dir / run_name / "weights" / "last.pt"

    if last_ckpt.exists():
        print(f"🔄 Reanudando modelo final desde checkpoint: {last_ckpt}")
        model = YOLO(str(last_ckpt))
        results = model.train(
            resume=True,
            epochs=epochs,
            name=run_name
        )
    else:
        print(f"🚀 Entrenando modelo final sobre todo el dataset: {dataset_name}...")
        model = YOLO(model_weights)
        results = model.train(
            data=str(yaml_path),
            epochs=epochs,
            imgsz=img_size,
            batch=batch_size,
            name=run_name,
            patience=patience
        )

    final_model_path = FINAL_OUTPUT_DIR / f"{dataset_name}_final_model.pt"
    model.save(str(final_model_path))
    print(f"\n✅ Modelo final guardado en: {final_model_path}")

# Entrenamiento del multiclass para dataset filtrado

In [None]:
ejecutar_kfold_yolov8(
    dataset_name="modelo_yolov11_dataset_filtrado",
    class_names=["Vehiculos", "Bodegas", "Caminos", "Rios", "Zonas de mineria ilegal"]
)

In [None]:
entrenar_modelo_final(
    dataset_name="modelo_yolov11_dataset_filtrado",
    class_names=["Vehiculos", "Bodegas", "Caminos", "Rios", "Zonas de mineria ilegal"]
)


# Entrenamiento del multiclass para dataset completo