In [3]:
pip install pandas

Collecting pandas
  Downloading pandas-2.3.0-cp310-cp310-win_amd64.whl (11.1 MB)
     ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
     ---------------------------------------- 0.1/11.1 MB 2.6 MB/s eta 0:00:05
     - -------------------------------------- 0.4/11.1 MB 4.5 MB/s eta 0:00:03
     ---- ----------------------------------- 1.2/11.1 MB 9.5 MB/s eta 0:00:02
     -------------- ------------------------- 4.0/11.1 MB 23.3 MB/s eta 0:00:01
     ---------------------- ----------------- 6.4/11.1 MB 31.3 MB/s eta 0:00:01
     ------------------------- -------------- 7.0/11.1 MB 29.9 MB/s eta 0:00:01
     ----------------------------------- --- 10.1/11.1 MB 34.0 MB/s eta 0:00:01
     --------------------------------------- 11.1/11.1 MB 43.7 MB/s eta 0:00:00
Collecting tzdata>=2022.7
  Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
     ---------------------------------------- 0.0/347.8 kB ? eta -:--:--
     ---------------------------------------- 347.8/3


[notice] A new release of pip is available: 23.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
pip install pathlib

Collecting pathlib
  Downloading pathlib-1.0.1-py3-none-any.whl (14 kB)
Installing collected packages: pathlib
Successfully installed pathlib-1.0.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
pip install --upgrade pip

Collecting pip
  Using cached pip-25.1.1-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.0.1
    Uninstalling pip-23.0.1:
      Successfully uninstalled pip-23.0.1
Successfully installed pip-25.1.1
Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install autogluon.timeseries

Collecting autogluon.timeseries
  Using cached autogluon.timeseries-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting torch<2.7,>=2.2 (from autogluon.timeseries)
  Using cached torch-2.6.0-cp310-cp310-win_amd64.whl.metadata (28 kB)
Collecting lightning<2.7,>=2.2 (from autogluon.timeseries)
  Using cached lightning-2.5.2-py3-none-any.whl.metadata (38 kB)
Collecting pytorch-lightning (from autogluon.timeseries)
  Using cached pytorch_lightning-2.5.2-py3-none-any.whl.metadata (21 kB)
Collecting transformers<4.50,>=4.38.0 (from transformers[sentencepiece]<4.50,>=4.38.0->autogluon.timeseries)
  Using cached transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
Collecting accelerate<2.0,>=0.34.0 (from autogluon.timeseries)
  Using cached accelerate-1.8.1-py3-none-any.whl.metadata (19 kB)
Collecting gluonts<0.17,>=0.15.0 (from autogluon.timeseries)
  Using cached gluonts-0.16.2-py3-none-any.whl.metadata (9.8 kB)
Collecting statsforecast<2.0.2,>=1.7.0 (from autogluon.timeseries)
  Using cache

In [11]:
"""
run_autogluon.py
Genera submission_t780_autogluon.csv usando AutoGluon TimeSeries
"""

import pandas as pd
from pathlib import Path
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

# ─── Rutas  ─────────────────────────────────────────────
BASE_DIR       = Path(r"C:\Maestria\Labo 3")          # carpeta de trabajo
RUTA_VENTAS    = BASE_DIR / "sell-in.txt"             # ventas históricas
RUTA_LISTA_780 = BASE_DIR / "780_a_predecir.txt"      # lista de 780 SKU
OUTPUT_CSV     = BASE_DIR / "submission_t780_autogluon.csv"
# ───────────────────────────────────────────────────────

# 1) Leer ventas (detecta separador automático; prueba tabulación si falla)
def leer_flexible(ruta: Path) -> pd.DataFrame:
    try:
        return pd.read_csv(ruta, sep=None, engine="python")
    except Exception:
        return pd.read_csv(ruta, sep="\t")            # fallback tab

df = leer_flexible(RUTA_VENTAS)

# 2) Leer lista exacta de productos (780 filas)
with open(RUTA_LISTA_780, encoding="utf-8") as f:
    product_ids = [
        int(line.strip())
        for line in f
        if line.strip() and not line.lower().startswith("product")
    ]

# 3) Preparar timestamp y filtrar SKU
df["periodo"]   = df["periodo"].astype(str).str.zfill(6)
df["timestamp"] = pd.to_datetime(df["periodo"], format="%Y%m")
df              = df[df["product_id"].isin(product_ids)]

# 4) Agregar item_id y agrupar toneladas mensuales
df["item_id"] = df["product_id"]
df_monthly = (
    df.groupby(["timestamp", "item_id"], as_index=False)["tn"]
      .sum()
      .rename(columns={"tn": "target"})               # AutoGluon espera 'target'
      .sort_values(["item_id", "timestamp"])
)

# 5) Crear TimeSeriesDataFrame y normalizar frecuencia mensual (MS)
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly,
    id_column="item_id",
    timestamp_column="timestamp"
)
ts_data = ts_data.convert_frequency("MS").fill_missing_values()

# 6) Entrenar predictor (30 min máx) con freq explícita
predictor = TimeSeriesPredictor(
    prediction_length=1,
    target="target",
    freq="MS"
)
predictor.fit(ts_data, time_limit=1800)

# 7) Predecir 1 mes adelante
forecast   = predictor.predict(ts_data)
pred_mean  = forecast["mean"].reset_index()
last_date  = pred_mean["timestamp"].max()

# 8) Formatear resultado
resultado = (
    pred_mean[pred_mean["timestamp"] == last_date][["item_id", "mean"]]
      .rename(columns={"item_id": "product_id", "mean": "tn"})
      .sort_values("product_id")
)
resultado["tn"] = resultado["tn"].round(5)

# 9) Guardar CSV
resultado.to_csv(OUTPUT_CSV, index=False, float_format="%.5f")
print(f"✅  Archivo generado: {OUTPUT_CSV}")




Beginning AutoGluon training... Time limit = 1800s
AutoGluon will save models to 'c:\Maestria\Labo 3\AutogluonModels\ag-20250706_211832'
AutoGluon Version:  1.3.1
Python Version:     3.10.11
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          8
GPU Count:          0
Memory Avail:       2.25 GB / 11.65 GB (19.3%)
Disk Space Avail:   342.96 GB / 476.18 GB (72.0%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'freq': 'MS',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 1,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'time_limit': 1800,
 'verbosity': 2}

Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 24 short time series from train_data. Only

✅  Archivo generado: C:\Maestria\Labo 3\submission_t780_autogluon.csv


autogluon 2

In [13]:
"""
AutoGluon + semilla del profe (blend priorizado)
Genera submission_t780_autogluon_seed.csv
"""

import pandas as pd, numpy as np
from pathlib import Path
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

# ── rutas ───────────────────────────────
BASE = Path(r"C:\Maestria\Labo 3")
VENTAS = BASE / "sell-in.txt"
LISTA  = BASE / "780_a_predecir.txt"
OUT    = BASE / "submission_t780_autogluon_seed.csv"
# ── SKU mágicos ─────────────────────────
MAGICOS = [
    20002,20003,20006,20010,20011,20018,20019,20021,20026,20028,
    20035,20039,20042,20044,20045,20046,20049,20051,20052,20053,
    20055,20008,20001,20017,20086,20180,20193,20320,20532,20612,
    20637,20807,20838
]
# ── coeficientes semilla ────────────────
SEED_INTER  = 0.441467
SEED_COEF   = np.array([
   -0.001339, 0.236558, 0.178208, -0.060031,
   -0.161875, -0.007775, 0.151936, 0.043933,
    0.142839, 0.103804, 0.119211, 0.073671
])
# ────────────────────────────────────────

# 1) leer ventas + lista 780
df = pd.read_csv(VENTAS, sep=None, engine="python")
sku780 = [int(l.strip()) for l in open(LISTA)
          if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku780)]

# 2) preparar fecha y tabla mensual
df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df.periodo, format="%Y%m")
mensual = (df.groupby(["product_id","fecha"])["tn"]
             .sum().unstack(level=0).fillna(0).sort_index())

# 3) armar TimeSeriesDataFrame (¡renombrar primero!)
ts_df = (df.rename(columns={"fecha":"timestamp"})
           .assign(item_id=df.product_id, target=df.tn)
           .loc[:, ["timestamp", "item_id", "target"]])

ts_data = (TimeSeriesDataFrame.from_data_frame(
                ts_df, id_column="item_id", timestamp_column="timestamp")
           .convert_frequency("MS")
           .fill_missing_values())

# 4) entrenar AutoGluon (30 min máx)
predictor = TimeSeriesPredictor(prediction_length=1, target="target", freq="MS")
predictor.fit(ts_data, time_limit=1800)

# 5) pred AutoGluon para t+1
ag_pred = (predictor.predict(ts_data)["mean"]
             .reset_index()
             .groupby("item_id", as_index=False)
             .last()                       # sólo la última fecha
             .rename(columns={"item_id":"product_id","mean":"tn_ag"}))

# 6) semilla del profe (lags del último mes)
last = mensual.index.max()
seed_rows = []
for pid in sku780:
    serie = mensual[pid]
    if len(serie) >= 12:
        lags = np.array([serie.iloc[-1 - l] for l in range(12)])
        seed_pred = SEED_INTER + SEED_COEF.dot(lags)
    else:
        seed_pred = np.nan
    seed_rows.append((pid, seed_pred))
seed_df = pd.DataFrame(seed_rows, columns=["product_id","tn_seed"])

# 7) promedio 12 m (fallback)
prom12 = (mensual.tail(12).mean()
            .reset_index()
            .rename(columns={0:"tn_prom"}))

# 8) combinar todo
df_all = (prom12.merge(ag_pred,  on="product_id", how="left")
                   .merge(seed_df, on="product_id", how="left"))

def blend(row):
    ag, sd, pr = row.tn_ag, row.tn_seed, row.tn_prom
    pid = row.product_id
    # casos
    if not np.isnan(sd) and not np.isnan(ag):
        w_seed = 0.8 if pid in MAGICOS else 0.5
        return w_seed*sd + (1-w_seed)*ag
    if not np.isnan(sd):
        return sd
    if not np.isnan(ag):
        return ag
    return pr

df_all["tn"] = df_all.apply(blend, axis=1).round(5)

# 9) guardar CSV
df_all[["product_id","tn"]].to_csv(OUT, index=False, float_format="%.5f")
print(f"✅  CSV AutoGluon+semilla → {OUT}")


Trying to fill missing values in an unsorted dataframe. It is highly recommended to call `ts_df.sort_index()` before calling `ts_df.fill_missing_values()`
Beginning AutoGluon training... Time limit = 1800s
AutoGluon will save models to 'c:\Maestria\Labo 3\AutogluonModels\ag-20250706_222611'
AutoGluon Version:  1.3.1
Python Version:     3.10.11
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          8
GPU Count:          0
Memory Avail:       2.35 GB / 11.65 GB (20.1%)
Disk Space Avail:   341.47 GB / 476.18 GB (71.7%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'freq': 'MS',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 1,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'time_limit': 1800,
 'verbosity': 2}



✅  CSV AutoGluon+semilla → C:\Maestria\Labo 3\submission_t780_autogluon_seed.csv
