In [6]:
import pandas as pd
import numpy as np
import joblib
import sys
import os

sys.path.append(os.path.abspath('..'))

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

from Utils.operators import Mapper, DateFeatureExtractor

In [7]:
df = pd.read_excel('../data/raw/data_sales_forecasting.xlsx', sheet_name='Base de Datos')

X = df.drop(['Venta_Neta_GTQ'], axis=1)
y = df['Venta_Neta_GTQ']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2025)

print(f"Tamaño de X_train: {X_train.shape}")

Tamaño de X_train: (455960, 10)


In [8]:
date_vars = ['Fecha']

cat_vars_with_na = ['Codigo_Cupon', 'Descripcion_Cupon']

num_vars = ['Cantidad_Vendida'] 


In [9]:

# 1. Pipeline para Fechas
date_pipeline = Pipeline([
    ('date_extractor', DateFeatureExtractor(variable=date_vars))
])

# 2. Pipeline para Categóricas (Imputación + Encoding)
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='Sin Cupon')),
    ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

# 3. Unificación con ColumnTransformer
preprocessor = ColumnTransformer([
    ('date_features', date_pipeline, date_vars),
    ('cat_features', cat_pipeline, cat_vars_with_na),
], remainder='passthrough') 

# --- Pipeline Final ---
feature_engineering_pipeline = Pipeline([
    ('preprocessor', preprocessor)
])

In [10]:
feature_engineering_pipeline.fit(X_train, y_train)

X_train_processed = feature_engineering_pipeline.transform(X_train)

print("Pipeline ajustado y probado exitosamente.")
print(f"Forma de los datos procesados: {X_train_processed.shape}")



Pipeline ajustado y probado exitosamente.
Forma de los datos procesados: (455960, 198)


In [11]:
output_path = '../models/feature_engineering_pipeline.pkl'

joblib.dump(feature_engineering_pipeline, output_path)

print(f"Pipeline guardado exitosamente en: {output_path}")

Pipeline guardado exitosamente en: ../models/feature_engineering_pipeline.pkl
