In [None]:
# ====================================
# üîπ Entrenamiento del modelo (pipeline)
# Archivo: 04_Entrenamiento.ipynb
# ====================================

# 1. Importaciones
import pandas as pd
import numpy as np
from pathlib import Path
import joblib

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor

# 2. Cargar datos
ROOT = Path.cwd()
INPUT_PATH = ROOT / "data" / "interim" / "feature_engineered_data.csv"

df = pd.read_csv(INPUT_PATH)
print(f"‚úÖ Datos cargados: {df.shape}")

# 3. Definir X, y
target_col = "Fat_Percentage"
if target_col not in df.columns:
    raise ValueError(f"‚ùå No se encontr√≥ la columna '{target_col}'.")

X = df.drop(columns=[target_col])
y = df[target_col]

# 4. Renombrar columnas para alinearse con backend
X = X.rename(columns={
    "Age": "age",
    "Weight (kg)": "weight_kg",
    "Height (m)": "height_m",
    "Max_BPM": "max_bpm",
    "Avg_BPM": "avg_bpm",
    "Resting_BPM": "resting_bpm",
    "Session_Duration (hours)": "session_duration_hours",
    "Gender": "gender",
    "Workout_Type": "workout_type"
})

# 5. Columnas num√©ricas y categ√≥ricas
num_features = X.select_dtypes(include=np.number).columns.tolist()
cat_features = X.select_dtypes(include=["object", "category"]).columns.tolist()

print("Num√©ricas:", num_features)
print("Categ√≥ricas:", cat_features)

# 6. Crear preprocesador
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_features),
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features)
])

# 7. Crear pipeline completo
pipeline = Pipeline([
    ("preprocessing", preprocessor),
    ("model", RandomForestRegressor(n_estimators=10, random_state=42))  # R√ÅPIDO
])

# 8. Split y entrenamiento
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

print("‚úÖ Entrenamiento completado")

# 9. Guardar modelo entrenado
MODELS_DIR = ROOT / "models"
MODELS_DIR.mkdir(parents=True, exist_ok=True)

# Guardar versi√≥n principal
pipeline_path = MODELS_DIR / "rf_pipeline.pkl"
joblib.dump(pipeline, pipeline_path)

# Guardar versi√≥n con fecha
from datetime import datetime
today = datetime.today().strftime("%Y-%m-%d")
versioned_path = MODELS_DIR / f"rf_pipeline_v1_{today}.pkl"
joblib.dump(pipeline, versioned_path)

print(f"‚úÖ Pipeline guardado en: {pipeline_path}")
print(f"üì¶ Versi√≥n con fecha en: {versioned_path}")
