# 03 - Prediccion con modelo entrenado

Este notebook carga un modelo de AutoGluon entrenado y genera predicciones para un CSV de entrada.

In [None]:
from pathlib import Path

import pandas as pd
from autogluon.tabular import TabularPredictor

In [None]:
def find_project_root(start: Path) -> Path:
    current = start.resolve()
    for candidate in [current, *current.parents]:
        if (candidate / "data").exists() and (candidate / "modeling").exists():
            return candidate
    raise FileNotFoundError("No se encontro la raiz del proyecto.")


PROJECT_ROOT = find_project_root(Path.cwd())
MODEL_DIR = PROJECT_ROOT / "modeling" / "artifacts" / "autogluon_model"
INPUT_PATH = PROJECT_ROOT / "modeling" / "data" / "processed" / "transition_dataset.csv"
OUTPUT_PATH = PROJECT_ROOT / "modeling" / "outputs" / "predictions.csv"

DROP_COLUMNS = ["gene_id", "chromosome", "global_position", "local_position"]
INCLUDE_PROBA = True

MODEL_DIR, INPUT_PATH

In [None]:
if not MODEL_DIR.exists():
    raise FileNotFoundError(f"No existe el directorio del modelo: {MODEL_DIR}")
if not INPUT_PATH.exists():
    raise FileNotFoundError(f"No existe el archivo de entrada: {INPUT_PATH}")

predictor = TabularPredictor.load(str(MODEL_DIR))
raw_df = pd.read_csv(INPUT_PATH, low_memory=False)
if raw_df.empty:
    raise ValueError(f"El archivo de entrada esta vacio: {INPUT_PATH}")

columns_to_drop = [col for col in DROP_COLUMNS if col in raw_df.columns]
feature_df = raw_df.drop(columns=columns_to_drop, errors="ignore")

if predictor.label in feature_df.columns:
    feature_df = feature_df.drop(columns=[predictor.label])

predictions = predictor.predict(feature_df)

output_df = raw_df.copy()
output_df["prediction"] = predictions.values

if INCLUDE_PROBA:
    probabilities = predictor.predict_proba(feature_df)
    for class_name in probabilities.columns:
        output_df[f"proba_{class_name}"] = probabilities[class_name].values

OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
output_df.to_csv(OUTPUT_PATH, index=False)

print(f"Predicciones guardadas en: {OUTPUT_PATH}")

In [None]:
output_df.head()