In [5]:
# ===============================
# 🔹 1. Importaciones
# ===============================
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import StandardScaler, LabelEncoder

# ===============================
# 🔹 2. Cargar datos intermedios
# ===============================
# Detectar la raíz del proyecto desde el notebook
ROOT = Path.cwd().parent
INTERIM_PATH = ROOT / "data" / "interim" / "feature_engineered_data.csv"

if not INTERIM_PATH.exists():
    raise FileNotFoundError(f"No se encuentra el archivo: {INTERIM_PATH}")

df = pd.read_csv(INTERIM_PATH)

# ===============================
# 🔹 3. Preprocesamiento
# ===============================

# Escalar variables numéricas
numeric_columns = df.select_dtypes(include=[np.number]).columns
scaler = StandardScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Codificar variables categóricas
categorical_columns = df.select_dtypes(include=["object"]).columns
label_encoders = {}

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# ===============================
# 🔹 4. Guardar datos preprocesados
# ===============================
PROCESSED_PATH = ROOT / "data" / "processed"
PROCESSED_PATH.mkdir(parents=True, exist_ok=True)

output_file = PROCESSED_PATH / "preprocessed_data.csv"
df.to_csv(output_file, index=False)

print(f"✅ Datos preprocesados guardados en: {output_file}")



✅ Datos preprocesados guardados en: c:\Users\alozano\OneDrive\Documentos\Workspace\Grasa_corporal\data\processed\preprocessed_data.csv
