In [1]:
# 📦 1. Importar librerías
import pandas as pd

In [2]:
# 💬 Instalar AutoGluon si es necesario
%pip install autogluon.timeseries

from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

Collecting autogluon.timeseries
  Downloading autogluon.timeseries-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting lightning<2.7,>=2.2 (from autogluon.timeseries)
  Downloading lightning-2.5.2-py3-none-any.whl.metadata (38 kB)
Collecting transformers<4.50,>=4.38.0 (from transformers[sentencepiece]<4.50,>=4.38.0->autogluon.timeseries)
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting gluonts<0.17,>=0.15.0 (from autogluon.timeseries)
  Downloading gluonts-0.16.2-py3-none-any.whl.metadata (9.8 kB)
Collecting statsforecast<2.0.2,>=1.7.0 (from autogluon.timeseries)
  Downloading statsforecast-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (29 kB)
Collecting mlforecast<0.14,>0.13 (from autogluon.timeseries)
  Downloading mlforecast-0.13.6-py3-none-any.whl.metadata (12 kB)
Collecting utilsforecast<0.2.11

In [3]:
# 📄 2. Cargar datasets desde carpeta local
import pandas as pd

BASE = "/kaggle/input/labo3-sales-data"

# i) Carga sin parsear fechas
df_sellin = pd.read_csv(
    f"{BASE}/sell-in.txt",
    sep="\t",
)
df_productos = pd.read_csv(f"{BASE}/tb_productos.txt", sep="\t")


In [4]:
# 📄 Leer lista de productos a predecir
with open(f"{BASE}/780_a_predecir.txt", "r") as f:
    product_ids = [
        int(line.strip())
        for line in f
        if line.strip().isdigit()
    ]


In [5]:
# 🧹 3. Preprocesamiento
# Convertir periodo a datetime
df_sellin['timestamp'] = pd.to_datetime(df_sellin['periodo'], format='%Y%m')

In [6]:
# Filtrar hasta dic 2019 y productos requeridos
df_filtered = df_sellin[
    (df_sellin['timestamp'] <= '2019-12-01') &
    (df_sellin['product_id'].isin(product_ids))
]

In [7]:
# Agregar tn por periodo, cliente y producto
df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()

In [8]:
# Agregar tn total por periodo y producto
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()

In [9]:
# Agregar columna 'item_id' para AutoGluon
df_monthly_product['item_id'] = df_monthly_product['product_id']

In [10]:
# ⏰ 4. Crear TimeSeriesDataFrame

!pip install autogluon.timeseries

from autogluon.timeseries import TimeSeriesDataFrame

ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly_product,
    id_column='item_id',
    timestamp_column='timestamp'
)



In [11]:
# Completar valores faltantes
ts_data = ts_data.fill_missing_values()

In [12]:
!pip install autogluon.timeseries



In [13]:
from autogluon.timeseries import TimeSeriesPredictor

# Entrenamiento del modelo y guardado automático en carpeta personalizada
predictor = TimeSeriesPredictor(
    path="modelo_autogluon_guardado",  # 👈 ruta personalizada donde se guarda el modelo
    prediction_length=2,
    target='tn',
    freq='MS'
)

predictor.fit(
    ts_data,
    presets='best_quality',
    num_val_windows=5,
    time_limit=3600,
)

Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to '/kaggle/working/modelo_autogluon_guardado'
AutoGluon Version:  1.3.1
Python Version:     3.11.13
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Sun Nov 10 10:07:59 UTC 2024
CPU Count:          4
GPU Count:          1
Memory Avail:       29.50 GB / 31.35 GB (94.1%)
Disk Space Avail:   19.50 GB / 19.52 GB (99.9%)
Setting presets to: best_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'freq': 'MS',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 5,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'tn',
 'time_limit': 3600,
 'verbosity': 2}

train_data with frequency 'IRREG' has been resampled to frequency 'MS'.
Provided train_data has 223

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/821M [00:00<?, ?B/s]

	-0.1979       = Validation score (-WQL)
	36.70   s     = Training runtime
	1.98    s     = Validation (prediction) runtime
Training timeseries model ChronosFineTuned[bolt_small]. Training for up to 574.4s of the 3446.5s of remaining time.
	Skipping covariate_regressor since the dataset contains no covariates or static features.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/191M [00:00<?, ?B/s]

	Saving fine-tuned model to /kaggle/working/modelo_autogluon_guardado/models/ChronosFineTuned[bolt_small]/W0/fine-tuned-ckpt
	Skipping covariate_regressor since the dataset contains no covariates or static features.
	Saving fine-tuned model to /kaggle/working/modelo_autogluon_guardado/models/ChronosFineTuned[bolt_small]/W1/fine-tuned-ckpt
	Skipping covariate_regressor since the dataset contains no covariates or static features.
	Saving fine-tuned model to /kaggle/working/modelo_autogluon_guardado/models/ChronosFineTuned[bolt_small]/W2/fine-tuned-ckpt
	Skipping covariate_regressor since the dataset contains no covariates or static features.
	Saving fine-tuned model to /kaggle/working/modelo_autogluon_guardado/models/ChronosFineTuned[bolt_small]/W3/fine-tuned-ckpt
	Skipping covariate_regressor since the dataset contains no covariates or static features.
	Saving fine-tuned model to /kaggle/working/modelo_autogluon_guardado/models/ChronosFineTuned[bolt_small]/W4/fine-tuned-ckpt
	-0.1930   

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x7b3475c2a1d0>

In [14]:
from autogluon.timeseries import TimeSeriesPredictor

predictor = TimeSeriesPredictor.load("modelo_autogluon_guardado")
predictor.leaderboard(ts_data, silent=True)

Loading predictor from path /kaggle/working/modelo_autogluon_guardado
data with frequency 'IRREG' has been resampled to frequency 'MS'.
Additional data provided, testing on additional data. Resulting leaderboard will be sorted according to test score (`score_test`).


Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time_marginal,fit_order
0,TemporalFusionTransformer,-0.180739,-0.190562,0.53588,0.342809,336.455718,9
1,ChronosFineTuned[bolt_small],-0.183669,-0.193032,0.741137,0.071185,328.103194,8
2,WeightedEnsemble,-0.184422,-0.176077,12.551904,11.427441,7.148927,13
3,DeepAR,-0.205054,-0.196111,0.602854,0.388145,326.628862,10
4,TiDE,-0.207584,-0.188622,0.969625,0.765512,710.706101,12
5,ChronosZeroShot[bolt_base],-0.211922,-0.19792,1.993482,1.976008,36.703136,7
6,AutoETS,-0.215147,-0.207196,8.725089,8.691619,28.484118,6
7,PatchTST,-0.21609,-0.190533,0.470833,0.347009,172.504754,11
8,DynamicOptimizedTheta,-0.221798,-0.202014,5.156441,1.198734,8.271274,5
9,DirectTabular,-0.233167,-0.230648,0.095233,0.08049,42.806079,3


In [15]:
# Cargar el predictor guardado
from autogluon.timeseries import TimeSeriesPredictor

predictor = TimeSeriesPredictor.load("modelo_autogluon_guardado")


Loading predictor from path /kaggle/working/modelo_autogluon_guardado


In [16]:
from autogluon.tabular import TabularPredictor
import pandas as pd


In [17]:
# Separar features y target
df_train = df_tabular[~df_tabular['tn'].isna()]
df_test  = df_tabular[df_tabular['tn'].isna()]


NameError: name 'df_tabular' is not defined

In [None]:
predictor_tab = TabularPredictor(
    label="tn", 
    problem_type="regression", 
    eval_metric="mean_absolute_error"
).fit(df_train, time_limit=600)  # 10 minutos de prueba


In [None]:
pred_tab = predictor_tab.predict(df_test)


In [None]:
submission = pd.DataFrame({
    "product_id": df_test["product_id"],
    "tn": pred_tab
})

submission.to_csv("submission_tabular.csv", index=False)
submission.head()
