# Entrenamiento_3 modelo Multiclase con ResNet18 


In [2]:

!pip -q install --upgrade pip
!pip -q install mlflow scikit-learn matplotlib pillow pandas tqdm

In [3]:
# Comprobar GPU y entorno
import torch, sys
print("Python:", sys.version)
print("PyTorch:", torch.__version__)
print("CUDA disponible:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

Python: 3.10.18 (main, Jun  5 2025, 08:13:51) [Clang 14.0.6 ]
PyTorch: 1.13.1
CUDA disponible: False


In [None]:
# Montar Google Drive
import os, pathlib
from google.colab import drive
drive.mount('/content/drive')

PROJECT_DIR = "/content/drive/MyDrive/ProyectoFinalKC"
pathlib.Path(PROJECT_DIR).mkdir(parents=True, exist_ok=True)
os.chdir(PROJECT_DIR)
print("Working dir:", os.getcwd())

## Guardar quality_df (parquet + csv) en Data/quality_df/

In [4]:
import pandas as pd
from pathlib import Path

# Verificación mínima
required_cols = ["filename", "brightness", "contrast",
                 "is_dark", "is_bright", "is_low_contrast", "is_high_contrast"]
missing = [c for c in required_cols if 'quality_df' not in globals() or c not in quality_df.columns]
if missing:
    raise ValueError(f"No está quality_df en memoria o faltan columnas: {missing}")

quality_df = quality_df.copy()
quality_df["filename"] = quality_df["filename"].astype(str)

# Guardamos lo esencial (flags + métricas básicas)
q_keep = ["filename", "brightness", "contrast", "is_dark", "is_low_contrast"]
quality_min = quality_df[q_keep].drop_duplicates(subset="filename")

out_dir = Path("Data/quality_df"); out_dir.mkdir(parents=True, exist_ok=True)
pq_path  = out_dir / "quality_df.parquet"
csv_path = out_dir / "quality_df.csv"

quality_min.to_parquet(pq_path, index=False)
quality_min.to_csv(csv_path, index=False)

print("Guardado quality_df en:")
print(" -", pq_path)
print(" -", csv_path)
print("Filas:", len(quality_min))

ValueError: No está quality_df en memoria o faltan columnas: ['filename', 'brightness', 'contrast', 'is_dark', 'is_bright', 'is_low_contrast', 'is_high_contrast']

## Unir flags de calidad al parquet base → dataset_meta_ojouni_quality.parquet

In [None]:
meta_path = Path("Data/parquet/dataset_meta_ojouni.parquet")
q_path    = Path("Data/quality_df/quality_df.parquet")

meta = pd.read_parquet(meta_path).reset_index(drop=True)
qdf  = pd.read_parquet(q_path).reset_index(drop=True)

meta["filename"] = meta["filename"].astype(str)
qdf["filename"]  = qdf["filename"].astype(str)

# Seleccionamos los flags de calidad (una fila por imagen)
q_flags = qdf[["filename", "is_dark", "is_low_contrast"]].drop_duplicates(subset="filename")

merged = meta.merge(q_flags, on="filename", how="left", validate="one_to_one")

# Relleno seguro (si alguna imagen no está en quality_df)
for c in ["is_dark", "is_low_contrast"]:
    if c not in merged.columns:
        merged[c] = False
    merged[c] = merged[c].fillna(False).astype(bool)

out_meta = Path("Data/parquet/dataset_meta_ojouni_quality.parquet")
merged.to_parquet(out_meta, index=False)

print("Parquet enriquecido guardado en:", out_meta)
print("is_dark True %:", round(merged["is_dark"].mean()*100, 2))
print("is_low_contrast True %:", round(merged["is_low_contrast"].mean()*100, 2))
print("Filas:", len(merged))

In [None]:
# Para confirmar si hy fugas de pacientes entre splits!!!!!!!!!
import pandas as pd
import numpy as np

df = pd.read_parquet("Data/parquet/dataset_meta_ojouni_quality.parquet").reset_index(drop=True)
# Extrae un ID de paciente del filename (ajusta si tu patrón difiere)
df["patient_id"] = df["filename"].str.extract(r"^(\d+)_").astype(str)

train_p = set(df.loc[tr_idx, "patient_id"])
val_p   = set(df.loc[va_idx, "patient_id"])
test_p  = set(df.loc[te_idx, "patient_id"])

print("Pacientes solapados train↔val:", len(train_p & val_p))
print("Pacientes solapados train↔test:", len(train_p & test_p))
print("Pacientes solapados val↔test:", len(val_p & test_p))

In [None]:
# Ejecutar el script que construye los .pt (subsets) en Data/dataset/
RUTA_CREATE = "General/create_split_dataset_Leti.py"  
assert os.path.exists(RUTA_CREATE), f"No se encuentra el script: {RUTA_CREATE}"
!python -u {RUTA_CREATE}

In [None]:
data_dir = os.path.join(os.getcwd(), "Data", "dataset")
os.makedirs(data_dir, exist_ok=True)
esperados = ["train_dataset.pt", "val_dataset.pt", "test_dataset.pt"]
print("Data/dataset:", os.listdir(data_dir) if os.path.isdir(data_dir) else "No existe")

faltan = [f for f in esperados if not os.path.exists(os.path.join(data_dir, f))]
assert not faltan, f"FALTAN archivos en Data/dataset: {faltan}"
print("Subsets OK:", esperados)

In [None]:
# Ejecutar script real de entrenamiento
RUTA_SCRIPT = "General/train_3_Leticia.py"  
assert os.path.exists(RUTA_SCRIPT), f"No se encuentra el script: {RUTA_SCRIPT}"

# Ejecutar el entrenamiento (tu script ya configura MLflow y guarda artefactos en mlruns)
!python -u {RUTA_SCRIPT}