In [4]:
# ===============================
# 🔹 03-preprocesamiento
# ===============================
import sys
from pathlib import Path

# Sube al root del proyecto y añade 'src' al PYTHONPATH
ROOT = Path.cwd().parent
sys.path.append(str(ROOT / "src"))

import pandas as pd
from features.preprocessing import create_preprocessor

# ===============================
# 🔹 2. Cargar datos intermedios
# ===============================
INPUT_PATH = ROOT / "data" / "interim" / "feature_engineered_data.csv"

if not INPUT_PATH.exists():
    raise FileNotFoundError(f"❌ No se encuentra el archivo: {INPUT_PATH}")

df = pd.read_csv(INPUT_PATH)

# ===============================
# 🔹 3. Aplicar preprocesador
# ===============================
preprocessor = create_preprocessor(df)
df_processed = preprocessor.fit_transform(df)

# Recuperar nombres de columnas (numéricas + OHE categóricas)
num_features = df.select_dtypes(include="number").columns.tolist()
cat_features = df.select_dtypes(include=["object", "category"]).columns.tolist()
ohe_columns = preprocessor.named_transformers_["cat"].get_feature_names_out(cat_features)
import numpy as np
columns = np.concatenate([num_features, ohe_columns])
df_processed = pd.DataFrame(df_processed, columns=columns)

# ===============================
# 🔹 4. Guardar resultado
# ===============================
OUTPUT_PATH = ROOT / "data" / "processed" / "preprocessed_data.csv"
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
df_processed.to_csv(OUTPUT_PATH, index=False)

print(f"✅ Datos preprocesados guardados en: {OUTPUT_PATH}")



✅ Datos preprocesados guardados en: c:\Users\alozano\OneDrive\Documentos\Workspace\Grasa_corporal\data\processed\preprocessed_data.csv
