In [73]:
# ----------------------------------------------
# 03_Model_LTV_180.ipynb
# Entrenamiento con split temporal para LTV_180
# ----------------------------------------------

# Inicialización
import os
import sys
import pandas as pd

# Añadir src al path para importar los scripts
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))

#  Imports del script
from train import (
    build_preprocessor,
    train_models,
    train_stacking_model,
    save_models
)

#  Cargar datos y aplicar split temporal
df = pd.read_csv("../data/processed/final_dataset.csv", parse_dates=["first_session"])
df = df[df["first_session"] < "2018-07-01"]
df = df[~df["LTV_180"].isna()]

# Split temporal
train_df = df[df["first_session"] < "2018-01-01"]
test_df  = df[(df["first_session"] >= "2018-01-01") & (df["first_session"] < "2018-07-01")]

# Preparar features y target
target = "LTV_180"
drop_cols = ['uid', 'first_session', 'last_session', 'first_order', 'last_order',
             'LTV_180', 'CAC_source_30', 'ltv_cohort_avg', 'cac_cohort_avg', 'conversion_rate_cohort']

X_train = train_df.drop(columns=drop_cols, errors='ignore')
y_train = train_df[target]

X_test = test_df.drop(columns=drop_cols, errors='ignore')
y_test = test_df[target]

#  Preprocesamiento
preprocessor = build_preprocessor(X_train)



In [74]:
# Entrenar modelos base y avanzados
print("Entrenando modelos...")
modelos = train_models(X_train, y_train, preprocessor)

# Ensamblador
print("Entrenando modelo stacking...")
stacked_model = train_stacking_model(X_train, y_train, preprocessor, modelos)
modelos["stacking"] = stacked_model


Entrenando modelos...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001387 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2299
[LightGBM] [Info] Number of data points in the train set: 22294, number of used features: 26
[LightGBM] [Info] Start training from score 7.413806
Entrenando modelo stacking...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001529 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2299
[LightGBM] [Info] Number of data points in the train set: 22294, number of used features: 26
[LightGBM] [Info] Start training from score 7.413806
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002337 seconds.
You can set `force_row_wise=true

In [75]:
#  Guardar
print("Guardando modelos...")
save_models(modelos, target_name="LTV_180", save_path="../models/")

print(" Entrenamiento y test set listo para evaluación.")

Guardando modelos...
Modelos guardados exitosamente en ../models/
 Entrenamiento y test set listo para evaluación.
