In [None]:
# 📦 1. Importar librerías
import pandas as pd
import matplotlib.pyplot as plt
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

# 📄 2. Cargar datasets
df_sellin = pd.read_csv("sell-in.txt", sep="\t")
df_productos = pd.read_csv("tb_productos.txt", sep="\t")

# 📄 Leer lista de productos a predecir
with open("product_id_apredecir201912.TXT", "r") as f:
    product_ids = [int(line.strip()) for line in f if line.strip().isdigit()]

# 🧹 3. Preprocesamiento
df_sellin['timestamp'] = pd.to_datetime(df_sellin['periodo'], format='%Y%m')

df_filtered = df_sellin[
    (df_sellin['timestamp'] <= '2019-12-01') &
    (df_sellin['product_id'].isin(product_ids))
]

df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()
df_monthly_product['item_id'] = df_monthly_product['product_id']

# ⏰ 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly_product,
    id_column='item_id',
    timestamp_column='timestamp'
)
ts_data = ts_data.fill_missing_values()

# ⚙️ 5. Entrenar predictor
predictor = TimeSeriesPredictor(
    prediction_length=2,
    target='tn',
    freq='MS',
    eval_metric='MASE',
    verbosity=2
)

predictor.fit(
    ts_data,
    num_val_windows=3,
    time_limit=60 * 60,
    enable_ensemble=False,
    hyperparameters={
        "DeepAR": {"context_length": 12},
        "ETS": {},
        "AutoARIMA": {}
    }
)

# 📋 6. Leaderboard
print("\n🏆 Leaderboard:")
print(predictor.leaderboard())

# 🔮 7. Predicciones
forecast = predictor.predict(ts_data)
forecast_mean = forecast['mean'].reset_index()

# 🕒 Ver qué fechas están disponibles
fechas = forecast_mean['timestamp'].unique()
print("\n📅 Timestamps disponibles en el forecast:", fechas)

# Filtrar febrero si existe
resultado = forecast_mean[forecast_mean['timestamp'] == '2020-02-01']
if not resultado.empty:
    resultado = resultado[['item_id', 'mean']]
    resultado.columns = ['product_id', 'tn']

    # 💾 Guardar CSV
    resultado.to_csv("predicciones_febrero2020_autogluon_mejorado.csv", index=False)
    print("✅ Archivo guardado: predicciones_febrero2020_autogluon_mejorado.csv")

    # 📊 Gráfico de ejemplo
    ejemplo = resultado['product_id'].iloc[0]
    predictor.plot(item_id=str(ejemplo), ts_dataframe=ts_data)
    plt.title(f"Predicción para producto {ejemplo}")
    plt.tight_layout()
    plt.show()
else:
    print("⚠️ No se encontraron predicciones para 2020-02-01.")


No path specified. Models will be saved in: "AutogluonModels\ag-20250703_010341"
Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to 'c:\Users\waldo\Dropbox\Maestría Ciencia de Datos\Labo 3\Proceso\AutogluonModels\ag-20250703_010341'
AutoGluon Version:  1.3.1
Python Version:     3.9.23
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
GPU Count:          0
Memory Avail:       44.32 GB / 63.68 GB (69.6%)
Disk Space Avail:   1315.37 GB / 1862.26 GB (70.6%)

Fitting with arguments:
{'enable_ensemble': False,
 'eval_metric': MASE,
 'freq': 'MS',
 'hyperparameters': {'AutoARIMA': {},
                     'DeepAR': {'context_length': 12},
                     'ETS': {}},
 'known_covariates_names': [],
 'num_val_windows': 2,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selectio


🏆 Leaderboard:
       model  score_val  pred_time_val  fit_time_marginal  fit_order
0        ETS  -1.077332       3.668570           7.731666          1
1  AutoARIMA  -1.201972      28.769119          27.080944          2


Model not specified in predict, will default to the model with the best validation score: ETS



📅 Timestamps disponibles en el forecast: <DatetimeArray>
['2020-01-01 00:00:00', '2020-02-01 00:00:00']
Length: 2, dtype: datetime64[ns]
✅ Archivo guardado: predicciones_febrero2020_autogluon_mejorado.csv


TypeError: plot() got an unexpected keyword argument 'item_id'

In [None]:
# 📦 1. Importar librerías
import pandas as pd
import matplotlib.pyplot as plt
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

# 📄 2. Cargar dataset base
df_base = pd.read_csv("datos_finales.csv")

# 📄 Leer lista de productos a predecir
with open("product_id_apredecir201912.TXT", "r") as f:
    product_ids = [int(line.strip()) for line in f if line.strip().isdigit()]

# 🧹 3. Preprocesamiento
df_base['timestamp'] = pd.to_datetime(df_base['periodo'], format='%Y-%m')

df_filtered = df_base[
    (df_base['timestamp'] <= '2019-12-01') &
    (df_base['product_id'].isin(product_ids))
]

df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()
df_monthly_product['item_id'] = df_monthly_product['product_id']

# ⏰ 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly_product,
    id_column='item_id',
    timestamp_column='timestamp'
)
ts_data = ts_data.fill_missing_values()

# ⚙️ 5. Entrenar predictor
predictor = TimeSeriesPredictor(
    prediction_length=2,
    target='tn',
    freq='MS',
    eval_metric='MASE',
    verbosity=2
)

predictor.fit(
    ts_data,
    num_val_windows=2,
    time_limit=60 * 60,
    enable_ensemble=False,
    hyperparameters={
        "DeepAR": {"context_length": 12},
        "ETS": {},
        "AutoARIMA": {}
    }
)

# 📋 6. Leaderboard
print("\n🏆 Leaderboard:")
print(predictor.leaderboard())

# 🔮 7. Predicciones
forecast = predictor.predict(ts_data)
forecast_mean = forecast['mean'].reset_index()

# 🕒 Ver qué fechas están disponibles
fechas = forecast_mean['timestamp'].unique()
print("\n📅 Timestamps disponibles en el forecast:", fechas)

# Filtrar febrero si existe
resultado = forecast_mean[forecast_mean['timestamp'] == '2020-02-01']
if not resultado.empty:
    resultado = resultado[['item_id', 'mean']]
    resultado.columns = ['product_id', 'tn']

    # 💾 Guardar CSV
    resultado.to_csv("predicciones_febrero2020_autogluon_datosfinales.csv", index=False)
    print("✅ Archivo guardado: predicciones_febrero2020_autogluon_datosfinales.csv")

    # 📊 Gráfico de ejemplo
    ejemplo = resultado['product_id'].iloc[0]
    predictor.plot(item_id=str(ejemplo), ts_dataframe=ts_data)
    plt.title(f"Predicción para producto {ejemplo}")
    plt.tight_layout()
    plt.show()
else:
    print("⚠️ No se encontraron predicciones para 2020-02-01.")


Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to 'c:\Users\waldo\Dropbox\Maestría Ciencia de Datos\Labo 3\Proceso\AutogluonModels\ag-20250714_200529'
AutoGluon Version:  1.3.1
Python Version:     3.9.23
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
GPU Count:          0
Memory Avail:       33.54 GB / 63.68 GB (52.7%)
Disk Space Avail:   1293.77 GB / 1862.26 GB (69.5%)

Fitting with arguments:
{'enable_ensemble': False,
 'eval_metric': MASE,
 'freq': 'MS',
 'hyperparameters': {'AutoARIMA': {},
                     'DeepAR': {'context_length': 12},
                     'ETS': {}},
 'known_covariates_names': [],
 'num_val_windows': 2,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'tn',
 'time_limit': 3600,
 'verbosity': 2}

Provided train


🏆 Leaderboard:
       model  score_val  pred_time_val  fit_time_marginal  fit_order
0        ETS  -1.162587       4.850515          48.722618          1
1  AutoARIMA  -1.336413      46.855072          35.624823          2

📅 Timestamps disponibles en el forecast: <DatetimeArray>
['2020-01-01 00:00:00', '2020-02-01 00:00:00']
Length: 2, dtype: datetime64[ns]
✅ Archivo guardado: predicciones_febrero2020_autogluon_mejoradodataset.csv


TypeError: plot() got an unexpected keyword argument 'item_id'

In [None]:
#mejoras de perplexity. 0.294 en el public leaderboard
import pandas as pd
import matplotlib.pyplot as plt
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
import warnings
warnings.filterwarnings("ignore")

# 1. Cargar dataset base
df_base = pd.read_csv("datos_finales.txt", sep="\t")

# 2. Leer lista de productos a predecir
with open("product_id_apredecir201912.TXT", "r") as f:
    product_ids = [int(line.strip()) for line in f if line.strip().isdigit()]

# 3. Preprocesamiento
df_base['timestamp'] = pd.to_datetime(df_base['periodo'], format='%Y-%m-%d')

df_filtered = df_base[
    (df_base['timestamp'] <= '2019-12-01') &
    (df_base['product_id'].isin(product_ids))
]

df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()
df_monthly_product['item_id'] = df_monthly_product['product_id']

# 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly_product,
    id_column='item_id',
    timestamp_column='timestamp'
)

# Mejor manejo de valores faltantes: interpolación temporal
ts_data = ts_data.interpolate(method='linear').fill_missing_values()

# 5. Entrenar predictor con preset para mejor exploración de modelos
predictor = TimeSeriesPredictor(
    prediction_length=2,
    target='tn',
    freq='MS',
    eval_metric='MASE',
    verbosity=2
)

predictor.fit(
    ts_data,
    num_val_windows=3,  # validación temporal robusta
    time_limit=60 * 60,  # 1 hora
    presets="medium_quality",  # permite explorar más modelos automáticamente
    enable_ensemble=True  # activar ensamblado para mejor precisión
)

# 6. Guardar leaderboard para análisis
leaderboard = predictor.leaderboard(silent=True)
leaderboard.to_csv("leaderboard_autogluon.csv", index=False)
print("\n🏆 Leaderboard guardado en 'leaderboard_autogluon.csv'")

# 7. Predicciones
forecast = predictor.predict(ts_data)

# Mostrar timestamps disponibles
fechas = forecast.index.get_level_values('timestamp').unique()
print("\n📅 Timestamps disponibles en el forecast:", fechas)

# Filtrar predicciones para febrero 2020
fecha_objetivo = pd.Timestamp('2020-02-01')
if fecha_objetivo in fechas:
    resultado = forecast.xs(fecha_objetivo, level='timestamp')[['mean']].reset_index()
    resultado.columns = ['product_id', 'tn']

    # Guardar predicciones
    resultado.to_csv("predicciones_febrero2020_autogluon_datosfinales.csv", index=False)
    print("✅ Archivo guardado: predicciones_febrero2020_autogluon_datosfinales.csv")

    # Graficar ejemplo para un producto
    ejemplo = resultado['product_id'].iloc[0]
    predictor.plot(item_id=str(ejemplo), ts_dataframe=ts_data)
    plt.title(f"Predicción para producto {ejemplo}")
    plt.tight_layout()
    plt.savefig(f"prediccion_producto_{ejemplo}.png")
    plt.show()
else:
    print("⚠️ No se encontraron predicciones para 2020-02-01.")


Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to 'c:\Users\waldo\Dropbox\Maestría Ciencia de Datos\Labo 3\Proceso\AutogluonModels\ag-20250715_000923'
AutoGluon Version:  1.3.1
Python Version:     3.9.23
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
GPU Count:          0
Memory Avail:       39.56 GB / 63.68 GB (62.1%)
Disk Space Avail:   1293.17 GB / 1862.26 GB (69.4%)
Setting presets to: medium_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MASE,
 'freq': 'MS',
 'hyperparameters': 'light',
 'known_covariates_names': [],
 'num_val_windows': 3,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'tn',
 'time_limit': 3600,
 'verbosity': 2}

train_data with frequency 'IRREG' has been resampled to frequency 'MS'.
Provi


🏆 Leaderboard guardado en 'leaderboard_autogluon.csv'


Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble



📅 Timestamps disponibles en el forecast: DatetimeIndex(['2020-01-01', '2020-02-01'], dtype='datetime64[ns]', name='timestamp', freq=None)
✅ Archivo guardado: predicciones_febrero2020_autogluon_datosfinales.csv


TypeError: plot() got an unexpected keyword argument 'item_id'

In [2]:
print(df_base.columns)


Index(['periodo\tcustomer_id\tproduct_id\tplan_precios_cuidados\tcust_request_qty\tcust_request_tn\ttn\tcat1\tcat2\tcat3\tbrand\tsku_size\tstock_final'], dtype='object')


In [7]:
import pandas as pd
import numpy as np
import os
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

# 1. Cargar dataset final con features y stocks
df = pd.read_csv("datos_finales.txt", sep="\t")
df['periodo'] = pd.to_datetime(df['periodo'])
df = df.groupby(['product_id', 'periodo'])['tn'].sum().reset_index()

# 2. Cargar listado fijo de productos
with open("product_id_apredecir201912.TXT", "r") as f:
    productos = [int(line.strip()) for line in f if line.strip().isdigit()]

# 3. Filtrar datos
df = df[df['product_id'].isin(productos)].copy()

# 4. Crear TimeSeriesDataFrame
df_serie = df.copy()
df_serie = df_serie.rename(columns={'periodo': 'timestamp'})
df_serie['item_id'] = df_serie['product_id'].astype(str)
df_serie = df_serie[['item_id', 'timestamp', 'tn']]

ts_data = TimeSeriesDataFrame.from_data_frame(
    df_serie, id_column='item_id', timestamp_column='timestamp'
).fill_missing_values()

# 5. Crear carpeta de salida
os.makedirs("autogluon_temp_ts", exist_ok=True)

# 6. Entrenamiento AutoGluon
predictor = TimeSeriesPredictor(
    prediction_length=2,
    target='tn',
    freq='MS',
    path="autogluon_temp_ts",
    eval_metric='MASE',
    verbosity=2
)

predictor.fit(
    ts_data,
    num_val_windows=3,
    time_limit=60 * 60,
    enable_ensemble=True,
    presets="high_quality",
    hyperparameters={
        "DeepAR": {
            "context_length": 12,
            "epochs": 50,
            "learning_rate": 1e-3
        },
        "ETS": {},
        "AutoARIMA": {},
        "Naive": {},
        "SeasonalNaive": {},
        "RecursiveTabular": {},
        "DirectTabular": {},
        "Theta": {},
        "Chronos": {},
        "TemporalFusionTransformer": {
            "context_length": 12,
            "learning_rate": 3e-4,
            "dropout_rate": 0.1
        }
    }
)

# 7. Predicción para febrero 2020
forecast = predictor.predict(ts_data)

# 8. Guardar salida
predicciones = forecast.reset_index()
predicciones_febrero = predicciones[predicciones['timestamp'] == '2020-02-01']
predicciones_febrero = predicciones_febrero[['item_id', 'mean']].rename(columns={'item_id': 'product_id', 'mean': 'tn_predicho'})
predicciones_febrero['product_id'] = predicciones_febrero['product_id'].astype(int)
predicciones_febrero.sort_values('product_id').to_csv("predicciones_febrero2020_autogluon_hq.csv", index=False)

print("✅ Archivo guardado: predicciones_febrero2020_autogluon_hq.csv")


Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to 'c:\Users\waldo\Dropbox\Maestría Ciencia de Datos\Labo 3\Proceso\autogluon_temp_ts'
AutoGluon Version:  1.3.1
Python Version:     3.9.23
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
GPU Count:          0
Memory Avail:       39.42 GB / 63.68 GB (61.9%)
Disk Space Avail:   1293.06 GB / 1862.26 GB (69.4%)
Setting presets to: high_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MASE,
 'freq': 'MS',
 'hyperparameters': {'AutoARIMA': {},
                     'Chronos': {},
                     'DeepAR': {'context_length': 12,
                                'epochs': 50,
                                'learning_rate': 0.001},
                     'DirectTabular': {},
                     'ETS': {},
                     'Naive': {},
                     'RecursiveTabular': {},
                     'SeasonalNaive': {},
     

✅ Archivo guardado: predicciones_febrero2020_autogluon_hq.csv
