AutoGluon - Predicción de ventas (tn) por producto para febrero 2020

In [None]:
# 📦 1. Importar librerías
import pandas as pd

In [None]:
# 💬 Instalar AutoGluon si es necesario
%pip install autogluon.timeseries

from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

Collecting autogluon.timeseries
  Downloading autogluon.timeseries-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting lightning<2.7,>=2.2 (from autogluon.timeseries)
  Downloading lightning-2.5.2-py3-none-any.whl.metadata (38 kB)
Collecting pytorch-lightning (from autogluon.timeseries)
  Downloading pytorch_lightning-2.5.2-py3-none-any.whl.metadata (21 kB)
Collecting transformers<4.50,>=4.38.0 (from transformers[sentencepiece]<4.50,>=4.38.0->autogluon.timeseries)
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting gluonts<0.17,>=0.15.0 (from autogluon.timeseries)
  Downloading gluonts-0.16.2-py3-none-any.whl.metadata (9.8 kB)
Collecting statsforecast<2.0.2,>=1.7.0 (from autogluon.timeseries)
  Downloading statsforecast-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (29 kB)
Collecting mlforecast<0.14,>0.13 (from aut

In [None]:
# 📄 2. Cargar datasets
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd, numpy as np
RUTA = '/content/drive/MyDrive/Austral/Laboratorio III/Datos/'
df_ventas    = pd.read_csv(RUTA + 'sell-in.csv')
df_objetivo  = pd.read_csv(RUTA + 'product_id_apredecir201912.csv')
df_stocks    = pd.read_csv(RUTA + 'tb_stocks.csv')

# Filtrar a productos objetivo
df_ventas = df_ventas[df_ventas['product_id'].isin(df_objetivo['product_id'])]
df_stocks = df_stocks[df_stocks['product_id'].isin(df_objetivo['product_id'])]


Mounted at /content/drive


In [None]:
# 🧹 3. Preprocesamiento
# Convertir periodo a datetime
df_ventas['timestamp'] = pd.to_datetime(df_ventas['periodo'], format='%Y%m')

In [None]:
# Agregar tn por periodo, cliente y producto
df_grouped = df_ventas.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()

In [None]:
# Agregar tn total por periodo y producto
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()

In [None]:
# Agregar columna 'item_id' para AutoGluon
df_monthly_product['item_id'] = df_monthly_product['product_id']

In [None]:
# 🧠 Feature engineering: agregar columnas temporales útiles
df_monthly_product['month'] = df_monthly_product['timestamp'].dt.month
df_monthly_product['quarter'] = df_monthly_product['timestamp'].dt.quarter
df_monthly_product['year'] = df_monthly_product['timestamp'].dt.year
df_monthly_product['is_december'] = (df_monthly_product['month'] == 12).astype(int)
df_monthly_product['item_id'] = df_monthly_product['product_id']  # por si se perdió

# Podemos visualizar brevemente
df_monthly_product.tail()


Unnamed: 0,timestamp,product_id,tn,item_id,month,quarter,year,is_december
22344,2019-12-01,21263,0.0127,21263,12,4,2019,1
22345,2019-12-01,21265,0.05007,21265,12,4,2019,1
22346,2019-12-01,21266,0.05121,21266,12,4,2019,1
22347,2019-12-01,21267,0.01569,21267,12,4,2019,1
22348,2019-12-01,21276,0.00892,21276,12,4,2019,1


In [None]:
# ⏰ 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly_product,
    id_column='item_id',
    timestamp_column='timestamp'
)

In [None]:
# Completar valores faltantes
ts_data = ts_data.fill_missing_values()

In [None]:
# ⚙️ 5. Definir y entrenar predictor
predictor = TimeSeriesPredictor(
    prediction_length=2,
    target='tn',
    freq='MS'  # Frecuencia mensual (Month Start),
)

predictor.fit(ts_data, num_val_windows=2, time_limit=60*60)

Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to '/content/AutogluonModels/ag-20250713_235105'
AutoGluon Version:  1.3.1
Python Version:     3.11.13
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Sun Mar 30 16:01:29 UTC 2025
CPU Count:          2
GPU Count:          0
Memory Avail:       10.71 GB / 12.67 GB (84.5%)
Disk Space Avail:   64.99 GB / 107.72 GB (60.3%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'freq': 'MS',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 2,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'tn',
 'time_limit': 3600,
 'verbosity': 2}

train_data with frequency 'IRREG' has been resampled to frequency 'MS'.
Provided train_data has 22375 rows (NaN fraction=0.1%), 7

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/821M [00:00<?, ?B/s]

	-0.1905       = Validation score (-WQL)
	47.21   s     = Training runtime
	14.56   s     = Validation (prediction) runtime
Training timeseries model ChronosFineTuned[bolt_small]. Training for up to 568.7s of the 3412.4s of remaining time.
	Skipping covariate_regressor since the dataset contains no covariates or static features.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/191M [00:00<?, ?B/s]

	Fine-tuning on the CPU detected. We recommend using a GPU for faster fine-tuning of Chronos.
	Saving fine-tuned model to /content/AutogluonModels/ag-20250713_235105/models/ChronosFineTuned[bolt_small]/W0/fine-tuned-ckpt
	Skipping covariate_regressor since the dataset contains no covariates or static features.
	Fine-tuning on the CPU detected. We recommend using a GPU for faster fine-tuning of Chronos.
	Saving fine-tuned model to /content/AutogluonModels/ag-20250713_235105/models/ChronosFineTuned[bolt_small]/W1/fine-tuned-ckpt
	-0.1829       = Validation score (-WQL)
	505.59  s     = Training runtime
	3.87    s     = Validation (prediction) runtime
Training timeseries model TemporalFusionTransformer. Training for up to 580.6s of the 2902.9s of remaining time.
	-0.1914       = Validation score (-WQL)
	550.10  s     = Training runtime
	0.92    s     = Validation (prediction) runtime
Training timeseries model DeepAR. Training for up to 588.0s of the 2351.8s of remaining time.
	-0.1860    

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x7cf69b165a50>

In [None]:
# 🔮 6. Generar predicción
forecast = predictor.predict(ts_data)

data with frequency 'IRREG' has been resampled to frequency 'MS'.
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [None]:
# Extraer predicción media y filtrar febrero 2020
forecast_mean = forecast['mean'].reset_index()
print(forecast_mean.columns)

Index(['item_id', 'timestamp', 'mean'], dtype='object')


In [None]:
# Tomar solo item_id y la predicción 'mean'
resultado = forecast['mean'].reset_index()[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']

# Filtrar solo febrero 2020
resultado = forecast['mean'].reset_index()
resultado = resultado[resultado['timestamp'] == '2020-02-01']

# Renombrar columnas
resultado = resultado[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']


In [None]:
# 💾 7. Guardar archivo
resultado.to_csv("predicciones_febrero2020_autogluon3.csv", index=False)
resultado.head()

Unnamed: 0,product_id,tn
1,20001,1323.611713
3,20002,1091.614417
5,20003,716.859191
7,20004,548.437905
9,20005,544.514201


In [None]:
from google.colab import files
files.download("predicciones_febrero2020_autogluon3.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>