AutoGluon - Predicción de ventas (tn) por producto para febrero 2020

In [1]:
# 📦 1. Importar librerías
import pandas as pd

In [2]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

2.5.1+cu121
True


In [3]:


from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import os

In [5]:
drive_base_path = 'datan/'
filename = 'sell-in.txt'
filepath = os.path.join(drive_base_path, filename)
df_sellin = pd.read_csv(filepath, sep='\t')

In [6]:
drive_base_path = 'datan/'
filename = 'productos_a_predecir.txt'
filepath = os.path.join(drive_base_path, filename)
df_productos  = pd.read_csv(filepath)

In [None]:
# 📄 2. Cargar datasets
#df_sellin = pd.read_csv("sell-in.txt", sep="\t")
#df_productos = pd.read_csv("tb_productos.txt", sep="\t")

In [7]:
# 📄 Leer lista de productos a predecir
with open("datan/productos_a_predecir.txt", "r") as f:
    product_ids = [int(line.strip()) for line in f if line.strip().isdigit()]

In [8]:
# 🧹 3. Preprocesamiento
# Convertir periodo a datetime
df_sellin['timestamp'] = pd.to_datetime(df_sellin['periodo'], format='%Y%m')

In [9]:
# Filtrar hasta dic 2019 y productos requeridos
df_filtered = df_sellin[
    (df_sellin['timestamp'] <= '2019-12-01') &
    (df_sellin['product_id'].isin(product_ids))
]

In [None]:
# Agregar tn por periodo, cliente y producto
df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()
# Agregar tn total por periodo y producto
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()
# Agregar columna 'item_id' para AutoGluon
df_monthly_product['item_id'] = df_monthly_product['product_id']

In [54]:
df_monthly_product

Unnamed: 0,timestamp,product_id,tn,item_id
0,2017-01-01,20001,934.77222,20001
1,2017-01-01,20002,550.15707,20002
2,2017-01-01,20003,1063.45835,20003
3,2017-01-01,20004,555.91614,20004
4,2017-01-01,20005,494.27011,20005
...,...,...,...,...
22344,2019-12-01,21263,0.01270,21263
22345,2019-12-01,21265,0.05007,21265
22346,2019-12-01,21266,0.05121,21266
22347,2019-12-01,21267,0.01569,21267


In [61]:
from prophet import Prophet

resultados_prophet = {}

for product_id in df_monthly_product['item_id'].unique():
    df_prod = df_monthly_product[df_monthly_product['item_id'] == product_id][['timestamp', 'tn']].copy()
    df_prod = df_prod.rename(columns={'timestamp': 'ds', 'tn': 'y'})
    
    if len(df_prod) < 2:
        continue

    model = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
    model.fit(df_prod)
    
    forecast = model.predict(df_prod)

    # Resetear índices para alinear
    df_prod = df_prod.reset_index(drop=True)
    forecast = forecast.reset_index(drop=True)

    df_prod['trend'] = forecast['trend']
    if 'yearly' in forecast.columns:
        df_prod['yearly'] = forecast['yearly']
    else:
        df_prod['yearly'] = 0

    resultados_prophet[product_id] = df_prod

19:33:00 - cmdstanpy - INFO - Chain [1] start processing
19:33:00 - cmdstanpy - INFO - Chain [1] done processing
19:33:00 - cmdstanpy - INFO - Chain [1] start processing
19:33:00 - cmdstanpy - INFO - Chain [1] done processing
19:33:00 - cmdstanpy - INFO - Chain [1] start processing
19:33:01 - cmdstanpy - INFO - Chain [1] done processing
19:33:01 - cmdstanpy - INFO - Chain [1] start processing
19:33:01 - cmdstanpy - INFO - Chain [1] done processing
19:33:01 - cmdstanpy - INFO - Chain [1] start processing
19:33:01 - cmdstanpy - INFO - Chain [1] done processing
19:33:01 - cmdstanpy - INFO - Chain [1] start processing
19:33:01 - cmdstanpy - INFO - Chain [1] done processing
19:33:01 - cmdstanpy - INFO - Chain [1] start processing
19:33:01 - cmdstanpy - INFO - Chain [1] done processing
19:33:01 - cmdstanpy - INFO - Chain [1] start processing
19:33:02 - cmdstanpy - INFO - Chain [1] done processing
19:33:02 - cmdstanpy - INFO - Chain [1] start processing
19:33:02 - cmdstanpy - INFO - Chain [1]

In [67]:
resultados_prophet

{20001:            ds           y        trend      yearly  item_id
 0  2017-01-01   934.77222  1236.574818 -233.783855    20001
 1  2017-02-01   798.01620  1245.975663 -377.884599    20001
 2  2017-03-01  1303.35771  1254.466750  264.037919    20001
 3  2017-04-01  1069.96130  1263.867594 -198.752402    20001
 4  2017-05-01  1502.20132  1272.965186  136.055196    20001
 5  2017-06-01  1520.06539  1282.366031  150.154559    20001
 6  2017-07-01  1030.67391  1291.463622 -273.258333    20001
 7  2017-08-01  1267.39462  1300.864467   89.297841    20001
 8  2017-09-01  1316.94604  1310.265312  -61.020128    20001
 9  2017-10-01  1439.75563  1319.362903  352.178300    20001
 10 2017-11-01  1580.47401  1328.763747  356.899026    20001
 11 2017-12-01  1049.38860  1337.861338 -192.903638    20001
 12 2018-01-01  1169.07532  1347.262182 -220.662662    20001
 13 2018-02-01  1043.76470  1356.663026 -322.552702    20001
 14 2018-03-01  1856.83534  1365.154109  178.297982    20001
 15 2018-04-01  1

In [68]:
df_all = []

for product_id, df_prod in resultados_prophet.items():
    df_prod['item_id'] = product_id
    df_all.append(df_prod)

df_prophet_features = pd.concat(df_all, ignore_index=True)

In [69]:
df_prophet_features

Unnamed: 0,ds,y,trend,yearly,item_id
0,2017-01-01,934.77222,1236.574818,-233.783855,20001
1,2017-02-01,798.01620,1245.975663,-377.884599,20001
2,2017-03-01,1303.35771,1254.466750,264.037919,20001
3,2017-04-01,1069.96130,1263.867594,-198.752402,20001
4,2017-05-01,1502.20132,1272.965186,136.055196,20001
...,...,...,...,...,...
22344,2019-12-01,1.02205,1.427455,-0.405417,21087
22345,2019-09-01,0.34250,0.312250,0.030241,21214
22346,2019-10-01,0.21735,-0.369909,0.587255,21214
22347,2019-11-01,0.84012,-1.074807,1.914933,21214


In [71]:
# ⏰ 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_prophet_features,
    id_column='item_id',
    timestamp_column='ds'
)

In [72]:
# Completar valores faltantes
ts_data = ts_data.fill_missing_values()

In [75]:
# ⚙️ 5. Definir y entrenar predictor
predictor = TimeSeriesPredictor(
    prediction_length=2,
    target='y',
    freq='MS'  # Frecuencia mensual (Month Start), 
)

predictor.fit(ts_data, num_val_windows=2,time_limit=60*60)

Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to 'e:\lab3\Analisis\AutogluonModels\ag-20250706_233838'
AutoGluon Version:  1.3.1
Python Version:     3.11.5
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
GPU Count:          1
Memory Avail:       4.50 GB / 15.63 GB (28.8%)
Disk Space Avail:   150.34 GB / 280.37 GB (53.6%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'freq': 'MS',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 2,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'y',
 'time_limit': 3600,
 'verbosity': 2}

train_data with frequency 'IRREG' has been resampled to frequency 'MS'.
Provided train_data has 22375 rows (NaN fraction=0.1%), 780 time series. Median time serie

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x21389b0d890>

In [76]:
# 🔮 6. Generar predicción
forecast = predictor.predict(ts_data)

data with frequency 'IRREG' has been resampled to frequency 'MS'.
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [77]:
# Extraer predicción media y filtrar febrero 2020
forecast_mean = forecast['mean'].reset_index()
print(forecast_mean.columns)

Index(['item_id', 'timestamp', 'mean'], dtype='object')


In [78]:
# Tomar solo item_id y la predicción 'mean'
resultado = forecast['mean'].reset_index()[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']

# Filtrar solo febrero 2020
resultado = forecast['mean'].reset_index()
resultado = resultado[resultado['timestamp'] == '2020-02-01']

# Renombrar columnas
resultado = resultado[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']


In [79]:
# 💾 7. Guardar archivo
resultado.to_csv("Autogluon_virtualitos_11.csv", index=False)
resultado.head()

Unnamed: 0,product_id,tn
1,20001,1328.134351
3,20002,1102.101115
5,20003,691.276739
7,20004,503.747513
9,20005,476.941449


In [80]:
resultado

Unnamed: 0,product_id,tn
1,20001,1328.134351
3,20002,1102.101115
5,20003,691.276739
7,20004,503.747513
9,20005,476.941449
...,...,...
1551,20962,2.794797
1553,20975,2.592929
1555,20995,2.434219
1557,21087,0.954794
