In [None]:
#Qu√© hace este notebook:
#Carga los datos crudos desde data/raw/gym_members_exercise_tracking.csv.
#Crea nuevas columnas: BMI y Log_Age.
#Guarda el dataset con features en data/interim/feature_engineered_data.csv.
#Actualiza el feature_store/user_fat_percentage/v1/features.csv.
#Guarda una versi√≥n con fecha, por si luego quieres entrenar con datos hist√≥ricos.
#Compatible con VS Code o Colab, y mantiene la estructura del proyecto.


# ===============================
# üîπ 1. Importaciones
# ===============================
import pandas as pd
import numpy as np
import os
from pathlib import Path
from datetime import datetime

# ===============================
# üîπ 2. Detectar ra√≠z del proyecto
# ===============================
ROOT_DIR = Path.cwd()
while not (ROOT_DIR / "pyproject.toml").exists() and ROOT_DIR != ROOT_DIR.parent:
    ROOT_DIR = ROOT_DIR.parent

# ===============================
# üîπ 3. Cargar datos crudos
# ===============================
RAW_PATH = ROOT_DIR / "data" / "raw" / "gym_members_exercise_tracking.csv"

if not RAW_PATH.exists():
    raise FileNotFoundError(f"‚ùå No se encontr√≥ el archivo en: {RAW_PATH}")

df = pd.read_csv(RAW_PATH)
print(f"‚úÖ Datos cargados con forma: {df.shape}")

# ===============================
# üîπ 4. Feature Engineering
# ===============================
df['BMI'] = df['Weight (kg)'] / (df['Height (m)'] ** 2)
df['Log_Age'] = np.log(df['Age'])

# ===============================
# üîπ 5. Guardar datos intermedios
# ===============================
INTERIM_PATH = ROOT_DIR / "data" / "interim"
INTERIM_PATH.mkdir(parents=True, exist_ok=True)
interim_file = INTERIM_PATH / "feature_engineered_data.csv"
df.to_csv(interim_file, index=False)
print(f"üíæ Guardado en datos intermedios: {interim_file}")

# ===============================
# üîπ 6. Guardar en Feature Store local
# ===============================
FEATURE_STORE_PATH = ROOT_DIR / "feature_store" / "user_fat_percentage" / "v1"
FEATURE_STORE_PATH.mkdir(parents=True, exist_ok=True)
fs_file = FEATURE_STORE_PATH / "features.csv"
df.to_csv(fs_file, index=False)
print(f"üì¶ Feature store local actualizado: {fs_file}")

# ===============================
# üîπ 7. Versi√≥n con fecha (opcional)
# ===============================
def save_versioned_feature_store(df: pd.DataFrame, entity: str = "user_fat_percentage"):
    today = datetime.today().strftime("%Y-%m-%d")
    version_path = ROOT_DIR / "feature_store" / entity / f"v1_{today}"
    version_path.mkdir(parents=True, exist_ok=True)
    output_path = version_path / "features.csv"
    df.to_csv(output_path, index=False)
    print(f"üóÇÔ∏è Feature store versionado en: {output_path}")

save_versioned_feature_store(df)


‚úÖ Datos intermedios guardados
üì¶ Feature Store: c:\Users\alozano\OneDrive\Documentos\Workspace\Grasa_corporal\feature_store\user_fat_percentage\v1\features.csv
‚úÖ Feature store actualizado en: c:\Users\alozano\OneDrive\Documentos\Workspace\Grasa_corporal\feature_store\user_fat_percentage\v1_2025-06-08\features.csv


In [4]:
# 02_Feature Engineering

import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import os

# ===============================
# üîπ 1. Definir rutas del proyecto
# ===============================
ROOT = Path.cwd()
while not (ROOT / "pyproject.toml").exists() and ROOT != ROOT.parent:
    ROOT = ROOT.parent

RAW_PATH = ROOT / "data" / "raw" / "gym_members_exercise_tracking.csv"
INTERIM_PATH = ROOT / "data" / "interim"
FEATURE_STORE_BASE = ROOT / "feature_store" / "user_fat_percentage"

# ===============================
# üîπ 2. Cargar datos crudos
# ===============================
if not RAW_PATH.exists():
    raise FileNotFoundError(f"‚ùå No se encuentra el archivo: {RAW_PATH}")

print("üì• Cargando datos crudos...")
df = pd.read_csv(RAW_PATH)

# ===============================
# üîπ 3. Crear nuevas features
# ===============================
df["BMI"] = df["Weight (kg)"] / (df["Height (m)"] ** 2)
df["Log_Age"] = np.log(df["Age"])

# ===============================
# üîπ 4. Guardar datos intermedios
# ===============================
INTERIM_PATH.mkdir(parents=True, exist_ok=True)
interim_file = INTERIM_PATH / "feature_engineered_data.csv"
df.to_csv(interim_file, index=False)
print(f"‚úÖ Guardado en: {interim_file}")

# ===============================
# üîπ 5. Guardar en Feature Store
# ===============================
FEATURE_STORE_V1 = FEATURE_STORE_BASE / "v1"
FEATURE_STORE_V1.mkdir(parents=True, exist_ok=True)

fs_file = FEATURE_STORE_V1 / "features.csv"
df.to_csv(fs_file, index=False)
print(f"üì¶ Feature Store base: {fs_file}")

# Versi√≥n con fecha
today = datetime.today().strftime("%Y-%m-%d")
FEATURE_STORE_VERSIONED = FEATURE_STORE_BASE / f"v1_{today}"
FEATURE_STORE_VERSIONED.mkdir(parents=True, exist_ok=True)

fs_versioned_file = FEATURE_STORE_VERSIONED / "features.csv"
df.to_csv(fs_versioned_file, index=False)
print(f"üóÇÔ∏è Feature Store versionado: {fs_versioned_file}")

# ===============================
# üîπ 6. Confirmaci√≥n final
# ===============================
print("\n‚úÖ Feature engineering completado correctamente")
print(f"üî¢ Filas: {df.shape[0]} | Columnas: {df.shape[1]}")


üì• Cargando datos crudos...
‚úÖ Guardado en: c:\TEMP\ALOZANO\GRASACORPORAL\data\interim\feature_engineered_data.csv
üì¶ Feature Store base: c:\TEMP\ALOZANO\GRASACORPORAL\feature_store\user_fat_percentage\v1\features.csv
üóÇÔ∏è Feature Store versionado: c:\TEMP\ALOZANO\GRASACORPORAL\feature_store\user_fat_percentage\v1_2025-10-26\features.csv

‚úÖ Feature engineering completado correctamente
üî¢ Filas: 973 | Columnas: 16
