In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from statsmodels.tsa.arima.model import ARIMA
import lightgbm as lgb
import xgboost as xgb
from autogluon.tabular import TabularPredictor
import warnings
import os

warnings.filterwarnings("ignore")

# 1. Cargar dataset
df = pd.read_csv("dataset_base.csv")
df['periodo'] = pd.to_datetime(df['periodo'])
df = df.groupby(['product_id', 'periodo'])['tn'].sum().reset_index()

# 2. Listado de productos
productos = df['product_id'].unique()

# 3. Salida
resultados = []
log = []

# 4. Directorio temporal para AutoGluon
os.makedirs("autogluon_temp", exist_ok=True)

for prod in tqdm(productos, desc="🔁 Procesando productos"):
    datos = df[df['product_id'] == prod].sort_values('periodo').copy()
    
    # Features
    datos['mes'] = datos['periodo'].dt.month
    datos['year'] = datos['periodo'].dt.year

    # Train: hasta nov-2019
    train = datos[datos['periodo'] < '2019-12-01'].copy()
    val = datos[datos['periodo'] == '2019-12-01'].copy()
    test_fecha = pd.to_datetime('2020-02-01')

    if len(train) < 12 or val.empty:
        continue  # muy poco historial

    X_train = train[['mes']]
    y_train = train['tn']
    X_val = val[['mes']]
    y_val = val['tn']

    maes = {}
    preds = {}

    # 1. Regresión lineal
    try:
        lr = LinearRegression()
        lr.fit(X_train, y_train)
        y_pred = lr.predict(X_val)
        maes['regresion'] = mean_absolute_error(y_val, y_pred)
        # Febrero
        preds['regresion'] = lr.predict([[2]])[0]
    except:
        maes['regresion'] = np.inf

    # 2. ARIMA
    try:
        serie = train.set_index('periodo')['tn']
        modelo_arima = ARIMA(serie, order=(1, 1, 1)).fit()
        y_pred = modelo_arima.forecast(steps=1)
        maes['arima'] = mean_absolute_error(y_val, y_pred)
        # Febrero
        feb_pred = modelo_arima.forecast(steps=3)[-1]
        preds['arima'] = feb_pred
    except:
        maes['arima'] = np.inf

    # 3. LightGBM
    try:
        lgb_model = lgb.LGBMRegressor(
    n_estimators=722,
    learning_rate=0.26830103566346203,
    max_depth=8,
    num_leaves=302,
    min_data_in_leaf=65,
    min_child_weight=0.2723737879682162,
    subsample=0.6693558818396728,
    subsample_freq=3,
    colsample_bytree=0.9982366151830648,
    colsample_bynode=0.6808843764592971,
    reg_alpha=1.5889509640833777,
    reg_lambda=3.277536337616617,
    max_bin=502,
    min_split_gain=0.10250744462326401,
    cat_smooth=49.797959349843936,
    random_state=42,
    boosting_type='dart',
    verbosity=-1,
    linear_tree=True
)

        lgb_model.fit(X_train, y_train)
        y_pred = lgb_model.predict(X_val)
        maes['lgbm'] = mean_absolute_error(y_val, y_pred)
        preds['lgbm'] = lgb_model.predict([[2]])[0]
    except:
        maes['lgbm'] = np.inf

    # 4. XGBoost
    try:
        xgb_model = xgb.XGBRegressor()
        xgb_model.fit(X_train, y_train)
        y_pred = xgb_model.predict(X_val)
        maes['xgboost'] = mean_absolute_error(y_val, y_pred)
        preds['xgboost'] = xgb_model.predict([[2]])[0]
    except:
        maes['xgboost'] = np.inf

    # 5. AutoGluon
    try:
        ag_data = train[['mes', 'tn']].copy()
        ag_data.columns = ['mes', 'label']
        predictor = TabularPredictor(label='label', path=f"autogluon_temp/{prod}", verbosity=0)
        predictor.fit(ag_data, time_limit=15, presets="best_quality", verbosity=0)
        pred_val = predictor.predict(X_val)
        maes['autogluon'] = mean_absolute_error(y_val, pred_val)
        # Febrero
        feb_pred = predictor.predict(pd.DataFrame({'mes': [2]}))
        preds['autogluon'] = feb_pred.values[0]
    except:
        maes['autogluon'] = np.inf

    # Mejor modelo
    mejor_modelo = min(maes, key=maes.get)
    pred_final = preds[mejor_modelo]
    linea_log = f"Producto {prod}: mejor modelo = {mejor_modelo}, MAE dic-2019 = {maes[mejor_modelo]:.4f}"
print(linea_log)
log.append(linea_log)

resultados.append({'product_id': prod, 'tn_predicho': pred_final})

# 5. Exportar
df_out = pd.DataFrame(resultados)
df_out.to_csv("predicciones_febrero2020_porproducto.csv", index=False)

# 6. Guardar log
with open("log_modelos.txt", "w") as f:
    for linea in log:
        f.write(linea + "\n")

print("Predicción guardada en predicciones_febrero2020_porproducto.csv")
print("Log guardado en log_modelos.txt")
