In [1]:
import pandas as pd
from pathlib import Path

# ─── Rutas ─────────────────────────────
BASE_DIR       = Path(r"C:\Maestria\Labo 3")
RUTA_VENTAS    = BASE_DIR / "sell-in.txt"
RUTA_LISTA_780 = BASE_DIR / "780_a_predecir.txt"
OUT_CSV        = BASE_DIR / "submission_t780_win12.csv"
WINDOW_MESES   = 12
# ───────────────────────────────────────

# 1) Leer ventas
df = pd.read_csv(RUTA_VENTAS, sep=None, engine="python")

# 2) Leer lista de 780 productos
with open(RUTA_LISTA_780, encoding="utf-8") as f:
    product_ids = [
        int(l.strip()) for l in f
        if l.strip() and not l.lower().startswith("product")
    ]
lista = pd.DataFrame({'product_id': product_ids})

# 3) Preparar fecha
df['periodo'] = df['periodo'].astype(str).str.zfill(6)
df['anio']    = df['periodo'].str[:4].astype(int)
df['mes']     = df['periodo'].str[4:6].astype(int)
df['fecha']   = pd.to_datetime(
    df['anio'].astype(str) + df['mes'].astype(str) + '01',
    format='%Y%m%d'
)

# 4) Filtrar sólo los 780 SKU
df = df[df['product_id'].isin(product_ids)]

# 5) Serie mensual por SKU y media móvil de 12 meses
serie = (df.groupby(['product_id', 'fecha'])['tn']
           .sum()
           .sort_index()
           .groupby(level=0))

pred = (serie.apply(lambda s: s.tail(WINDOW_MESES).mean())
              .reset_index()
              .groupby('product_id', as_index=False)['tn']
              .last())

# 6) Completar faltantes con 0 y redondear
pred = lista.merge(pred, on='product_id', how='left')
pred['tn'] = pred['tn'].fillna(0).round(5)

# 7) Guardar CSV
pred.to_csv(OUT_CSV, index=False, float_format="%.5f")
print(f"✅  CSV promedio 12 meses guardado → {OUT_CSV.name}")


✅  CSV promedio 12 meses guardado → submission_t780_win12.csv


In [3]:
pip install sklearn

Collecting sklearn
  Downloading sklearn-0.0.post12.tar.gz (2.6 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  × Getting requirements to build wheel did not run successfully.
  │ exit code: 1
  ╰─> [15 lines of output]
      The 'sklearn' PyPI package is deprecated, use 'scikit-learn'
      rather than 'sklearn' for pip commands.
      
      Here is how to fix this error in the main use cases:
      - use 'pip install scikit-learn' rather than 'pip install sklearn'
      - replace 'sklearn' by 'scikit-learn' in your pip requirements files
        (requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)
      - if the 'sklearn' package is used by one of your dependencies,
        it would be great if you take some time to track which package uses
        'sklearn' instead of 'scikit-learn' and report it to their issue tracker
      - as a last resort, set the environment variable
        SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True to avoid this error
      
      More information is available at
      https://github.com/scikit-learn/sklearn-

In [5]:
pip install scikit-learn


Collecting scikit-learn
  Downloading scikit_learn-1.7.0-cp313-cp313-win_amd64.whl.metadata (14 kB)
Downloading scikit_learn-1.7.0-cp313-cp313-win_amd64.whl (10.7 MB)
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
   --------------------- ------------------ 5.8/10.7 MB 43.1 MB/s eta 0:00:01
   ---------------------------------------- 10.7/10.7 MB 45.9 MB/s eta 0:00:00
Installing collected packages: scikit-learn
Successfully installed scikit-learn-1.7.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


mejora regresion lineal

In [6]:
"""
submission_t780_lr.py
Regresión lineal estilo 'Gustavo Denicolay' + promedio fallback
"""

import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.linear_model import LinearRegression

# ───── CONFIG RUTAS ──────────────────────────
BASE_DIR       = Path(r"C:\Maestria\Labo 3")
RUTA_VENTAS    = BASE_DIR / "sell-in.txt"
RUTA_LISTA_780 = BASE_DIR / "780_a_predecir.txt"
OUT_CSV        = BASE_DIR / "submission_t780_lr.csv"
# ──────────────────────────────────────────────

# 0) SKU “mágicos”
magicos = [
    20002, 20003, 20006, 20010, 20011, 20018, 20019, 20021,
    20026, 20028, 20035, 20039, 20042, 20044, 20045, 20046, 20049,
    20051, 20052, 20053, 20055, 20008, 20001, 20017, 20086, 20180,
    20193, 20320, 20532, 20612, 20637, 20807, 20838
]

# 1) Leer datos + lista 780
df = pd.read_csv(RUTA_VENTAS, sep=None, engine="python")
with open(RUTA_LISTA_780, encoding="utf-8") as f:
    sku_780 = [int(l.strip()) for l in f if l.strip() and not l.lower().startswith("product")]

df = df[df["product_id"].isin(sku_780)]

# 2) Preparar fecha y agrupar tn por mes
df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df["periodo"], format="%Y%m")

mensual = (
    df.groupby(["product_id", "fecha"])["tn"]
      .sum()
      .sort_index()
      .unstack(level=0)
      .fillna(0)
)

# 3) Calcular lags tn, tn_1, …, tn_11
lags = {0: "tn"}
for l in range(1, 12):
    lags[l] = f"tn_{l}"

feature_rows = []
for pid in mensual.columns:
    serie = mensual[pid]
    for idx in range(11, len(serie)):  # necesitamos al menos 12 puntos
        row = {"product_id": pid, "fecha": serie.index[idx]}
        for lag, name in lags.items():
            row[name] = serie.iloc[idx - lag]
        feature_rows.append(row)

feat_df = pd.DataFrame(feature_rows)

# 4) Crear la columna 'clase' = tn del mes+2
feat_df["clase_fecha"] = feat_df["fecha"] + pd.DateOffset(months=2)
clase_lookup = mensual.stack()
feat_df["clase"] = feat_df.apply(
    lambda r: clase_lookup.get((r["clase_fecha"], r["product_id"]), np.nan),
    axis=1
)

# 5) Dataset training = registros 201812 de los 33 mágicos (con lags completos)
train_df = feat_df[
    (feat_df["fecha"].dt.year == 2018) &
    (feat_df["fecha"].dt.month == 12) &
    (feat_df["product_id"].isin(magicos))
].dropna()

X_train = train_df[[f"tn_{l}" if l else "tn" for l in range(12)]]
y_train = train_df["clase"]

# 6) Entrenar regresión lineal
lr = LinearRegression()
lr.fit(X_train, y_train)

# 7) Aplicar al último mes disponible
last_month = mensual.index.max()
pred_feat = feat_df[feat_df["fecha"] == last_month].copy()

X_pred_full = pred_feat[[f"tn_{l}" if l else "tn" for l in range(12)]]
pred_feat["pred_lr"] = lr.predict(X_pred_full)

# 8) Fallback promedio 12 meses
prom12 = (
    mensual.tail(12).mean().round(5).reset_index()
    .rename(columns={0: "tn_prom12"})
)

# 9) Ensamblar resultado final
pred_lr = pred_feat[["product_id", "pred_lr"]].rename(columns={"pred_lr": "tn"})
pred_lr["metodo"] = "lr"

pred_prom = prom12.rename(columns={"tn_prom12": "tn"})
pred_prom["metodo"] = "prom"

#   Combinar: si SKU tiene pred_lr (696 completos), usarla; si no, usa prom
result = (
    pred_lr.set_index("product_id")
    .combine_first(pred_prom.set_index("product_id"))
    .reset_index()
)

result["tn"] = result["tn"].round(5)

# 10) Guardar CSV
result[["product_id", "tn"]].to_csv(OUT_CSV, index=False, float_format="%.5f")
print(f"✅  CSV regresión lineal guardado → {OUT_CSV.name}")


✅  CSV regresión lineal guardado → submission_t780_lr.csv


intento de mejora

In [8]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.linear_model import RidgeCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# ─── rutas ─────────────────────────────────
BASE_DIR       = Path(r"C:\Maestria\Labo 3")
RUTA_VENTAS    = BASE_DIR / "sell-in.txt"
RUTA_LISTA_780 = BASE_DIR / "780_a_predecir.txt"
OUT_CSV        = BASE_DIR / "submission_t780_ridge_ens.csv"
# ───────────────────────────────────────────

# 1) leer ventas + lista 780
df = pd.read_csv(RUTA_VENTAS, sep=None, engine="python")
with open(RUTA_LISTA_780, encoding="utf-8") as f:
    sku_780 = [int(l.strip()) for l in f if l.strip() and not l.lower().startswith("product")]
df = df[df["product_id"].isin(sku_780)]

# 2) fecha & tabla mensual
df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df["periodo"], format="%Y%m")
mensual = (df.groupby(["product_id", "fecha"])["tn"]
             .sum()
             .unstack(level=0)
             .sort_index()
             .fillna(0))

# 3) construir lags y clase (tn_{0..11}, clase = t+2)
rows = []
for pid in mensual.columns:
    serie = mensual[pid]
    for i in range(len(serie) - 11):        # asegura 12 lags
        t   = serie.index[i + 11]           # fecha con 12 meses completos
        row = {"product_id": pid, "fecha": t}
        for l in range(12):
            row[f"tn_{l}" if l else "tn"] = serie.iloc[i + 11 - l]
        clase_idx = i + 13                  # t + 2
        if clase_idx < len(serie):
            row["clase"] = serie.iloc[clase_idx]
        else:
            row["clase"] = np.nan
        rows.append(row)
feat_df = pd.DataFrame(rows)

# 4) train set = sept-dic 2018 con clase no nula
mask_train = (feat_df["fecha"] >= "2018-09-01") & (feat_df["fecha"] <= "2018-12-01")
train_df   = feat_df[mask_train].dropna()
X_cols     = [f"tn_{l}" if l else "tn" for l in range(12)]
X_train, y_train = train_df[X_cols], train_df["clase"]

# 5) modelo RidgeCV escalado
ridge = make_pipeline(StandardScaler(with_mean=False),
                      RidgeCV(alphas=[0.01, 0.1, 1, 10, 100]))
ridge.fit(X_train, y_train)

# 6) predicción SKU × SKU para el último mes observado
last_month = mensual.index.max()
pred_list  = []
for pid in sku_780:
    serie = mensual[pid]
    if len(serie) < 12:
        pred_lr = np.nan                       # fallback luego
    else:
        feats = [serie.iloc[-1 - l] for l in range(12)]
        pred_lr = ridge.predict([feats])[0]
    prom12 = serie.tail(12).mean()
    # ensemble: si hay ridge, 0.5 ridge + 0.5 prom; si no, solo prom
    tn_final = 0.5 * pred_lr + 0.5 * prom12 if not np.isnan(pred_lr) else prom12
    pred_list.append((pid, round(tn_final, 5)))

# 7) CSV
submission = pd.DataFrame(pred_list, columns=["product_id", "tn"]).sort_values("product_id")
submission.to_csv(OUT_CSV, index=False, float_format="%.5f")
print(f"✅  CSV listo → {OUT_CSV}")


✅  CSV listo → C:\Maestria\Labo 3\submission_t780_ridge_ens.csv




In [10]:
pip install lightgbm

Collecting lightgbm
  Using cached lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Using cached lightgbm-4.6.0-py3-none-win_amd64.whl (1.5 MB)
Installing collected packages: lightgbm
Successfully installed lightgbm-4.6.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


lgbm

In [12]:
import pandas as pd, numpy as np
from pathlib import Path
import lightgbm as lgb
from pandas.tseries.offsets import MonthBegin

# ─── RUTAS ─────────────────────────────
BASE = Path(r"C:\Maestria\Labo 3")
VENTAS = BASE / "sell-in.txt"
LISTA  = BASE / "780_a_predecir.txt"
OUT    = BASE / "submission_t780_lgbm.csv"
# ───────────────────────────────────────

# 1) datos
df = pd.read_csv(VENTAS, sep=None, engine="python")
with open(LISTA) as f:
    sku780 = [int(l.strip()) for l in f if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku780)]

df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df["periodo"], format="%Y%m")

mensual = (df.groupby(["product_id", "fecha"])["tn"]
             .sum()
             .unstack(level=0)
             .fillna(0)
             .sort_index())

last_date  = mensual.index.max()
pred_date  = last_date + MonthBegin()
valid_date = last_date
train_cut  = valid_date - MonthBegin()

rows = []
for pid in mensual.columns:
    serie = mensual[pid]
    for fecha in serie.index:
        if fecha < pred_date:
            r = {"product_id": pid, "fecha": fecha, "tn": serie.loc[fecha]}
            for l in range(1, 13):
                r[f"lag{l}"] = serie.shift(l).loc[fecha]
            r["month"] = fecha.month
            r["is_q4"] = int(fecha.month >= 10)
            rows.append(r)
feat_df = pd.DataFrame(rows)

train = feat_df[feat_df.fecha <= train_cut].dropna()
valid = feat_df[feat_df.fecha == valid_date].dropna()

X_cols = [c for c in train.columns if c.startswith("lag")] + ["month", "is_q4"]
y_col  = "tn"

model = lgb.LGBMRegressor(
    objective="rmse",
    learning_rate=0.05,
    n_estimators=800,
    num_leaves=63,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

model.fit(
    train[X_cols], train[y_col],
    eval_set=[(valid[X_cols], valid[y_col])],
    eval_metric="rmse",
    callbacks=[lgb.early_stopping(stopping_rounds=50, verbose=False)]
)

# ── features futuro
pred_rows = []
for pid in sku780:
    serie = mensual[pid]
    r = {"product_id": pid}
    for l in range(1, 13):
        r[f"lag{l}"] = serie.iloc[-l] if len(serie) >= l else np.nan
    r["month"] = pred_date.month
    r["is_q4"] = int(pred_date.month >= 10)
    pred_rows.append(r)

pred_df = pd.DataFrame(pred_rows)
pred_df["tn_pred"] = model.predict(pred_df[X_cols])

media12 = mensual.tail(12).mean()
pred_df["tn_final"] = np.where(
    pred_df[X_cols].isna().any(axis=1),
    media12.values,
    0.8 * pred_df["tn_pred"] + 0.2 * media12.values
)
pred_df["tn_final"] = pred_df["tn_final"].round(5)

pred_df[["product_id", "tn_final"]].rename(columns={"tn_final": "tn"}).to_csv(
    OUT, index=False, float_format="%.5f"
)
print(f"✅  submission LightGBM guardada → {OUT}")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000700 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3075
[LightGBM] [Info] Number of data points in the train set: 17940, number of used features: 14
[LightGBM] [Info] Start training from score 38.839856
✅  submission LightGBM guardada → C:\Maestria\Labo 3\submission_t780_lgbm.csv


el del profe pero mejorado

In [13]:
"""
submission_t780_lr_quarter.py
Regresión lineal estilo Gustavo pero a nivel trimestre
"""

import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.linear_model import LinearRegression

# ───── CONFIG ─────────────────────────────
BASE = Path(r"C:\Maestria\Labo 3")
VENTAS = BASE / "sell-in.txt"
LISTA  = BASE / "780_a_predecir.txt"
OUT    = BASE / "submission_t780_lr_quarter.csv"
MAGICOS = [
    20002, 20003, 20006, 20010, 20011, 20018, 20019, 20021,
    20026, 20028, 20035, 20039, 20042, 20044, 20045, 20046, 20049,
    20051, 20052, 20053, 20055, 20008, 20001, 20017, 20086, 20180,
    20193, 20320, 20532, 20612, 20637, 20807, 20838
]
# ──────────────────────────────────────────

# 1) Datos
df = pd.read_csv(VENTAS, sep=None, engine="python")
with open(LISTA) as f:
    sku = [int(l.strip()) for l in f if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku)]

# 2) Fecha → trimestre
df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"] = pd.to_datetime(df["periodo"], format="%Y%m")
df["quarter"] = df["fecha"].dt.to_period("Q").dt.start_time  # ej. 2018Q4 → 2018-10-01

# 3) Toneladas trimestrales
tri = (df.groupby(["product_id", "quarter"])["tn"]
         .sum()
         .unstack(level=0)
         .fillna(0)
         .sort_index())

# 4) Lags (0-3) y clase (t+2)
rows = []
for pid in tri.columns:
    s = tri[pid]
    for i in range(3, len(s)):            # necesitamos 4 trimestres de historia
        t = s.index[i]
        row = {"product_id": pid, "quarter": t, "tnQ": s.iloc[i]}
        for l in range(1, 4):
            row[f"tnQ_{l}"] = s.iloc[i - l]
        # clase = tn del trimestre +2
        idx_clase = i + 2
        row["clase"] = s.iloc[idx_clase] if idx_clase < len(s) else np.nan
        rows.append(row)
feat = pd.DataFrame(rows)

# 5) Train = 2018-Q4 de SKU mágicos (sin NaN)
train = feat[
    (feat["quarter"] == pd.Timestamp("2018-10-01")) &
    (feat["product_id"].isin(MAGICOS))
].dropna()

X_cols = ["tnQ", "tnQ_1", "tnQ_2", "tnQ_3"]
X_train, y_train = train[X_cols], train["clase"]

lr = LinearRegression().fit(X_train, y_train)

# 6) Predicción para último trimestre
last_q = tri.index.max()
feat_last = feat[feat["quarter"] == last_q]

# Si alguno no tiene los 3 lags, quedará NaN
X_pred = feat_last[X_cols]
feat_last["pred_lr"] = lr.predict(X_pred)

# 7) Fallback promedio 4 trimestres
mean4 = tri.tail(4).mean().reset_index().rename(columns={0: "mean4"})
merged = mean4.merge(feat_last[["product_id", "pred_lr"]], on="product_id", how="left")

merged["tn"] = np.where(
    merged["pred_lr"].notna(),
    0.5 * merged["pred_lr"] + 0.5 * merged["mean4"],   # pequeño blend
    merged["mean4"]
)
merged["tn"] = merged["tn"].round(5)

# 8) CSV
merged[["product_id", "tn"]].to_csv(OUT, index=False, float_format="%.5f")
print(f"✅  CSV trimestral guardado → {OUT}")


✅  CSV trimestral guardado → C:\Maestria\Labo 3\submission_t780_lr_quarter.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feat_last["pred_lr"] = lr.predict(X_pred)


In [14]:
import pandas as pd, numpy as np
from pathlib import Path
from pandas.tseries.offsets import MonthBegin

# --- RUTAS -------------------------------------------------
BASE = Path(r"C:\Maestria\Labo 3")
VENTAS = BASE / "sell-in.txt"
LISTA  = BASE / "780_a_predecir.txt"
OUT    = BASE / "submission_t780_mix_lag.csv"
# -----------------------------------------------------------

# 1) datos
df = pd.read_csv(VENTAS, sep=None, engine="python")
with open(LISTA) as f:
    sku = [int(l.strip()) for l in f if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku)]

df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df["periodo"], format="%Y%m")

# tabla mensual tn
mensual = (df.groupby(["product_id", "fecha"])["tn"]
             .sum()
             .unstack(level=0)
             .fillna(0)
             .sort_index())

pred_rows = []
for pid in sku:
    serie = mensual[pid]

    # lag1 y lag12 (usar 0 si no existe)
    lag1  = serie.iloc[-1]            if len(serie) >= 1  else 0.0
    lag12 = serie.iloc[-12]           if len(serie) >= 12 else lag1

    # mezcla 80/20   ― ajustable
    tn_pred = 0.8 * lag1 + 0.2 * lag12
    pred_rows.append((pid, round(tn_pred, 5)))

# CSV
pd.DataFrame(pred_rows, columns=["product_id", "tn"]).to_csv(
    OUT, index=False, float_format="%.5f"
)
print(f"✅  submission simple 80/20 guardada → {OUT}")


✅  submission simple 80/20 guardada → C:\Maestria\Labo 3\submission_t780_mix_lag.csv


ridge mejorado

In [15]:
import pandas as pd, numpy as np
from pathlib import Path
from sklearn.linear_model import RidgeCV, LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# ── Rutas ────────────────────────────────
BASE = Path(r"C:\Maestria\Labo 3")
VENTAS = BASE / "sell-in.txt"
LISTA  = BASE / "780_a_predecir.txt"
OUT    = BASE / "submission_t780_ridge_seed.csv"
# ─────────────────────────────────────────

MAGICOS = [
    20002,20003,20006,20010,20011,20018,20019,20021,20026,20028,
    20035,20039,20042,20044,20045,20046,20049,20051,20052,20053,
    20055,20008,20001,20017,20086,20180,20193,20320,20532,20612,
    20637,20807,20838
]

# 1) Leer datos
df = pd.read_csv(VENTAS, sep=None, engine="python")
with open(LISTA) as f:
    sku780 = [int(l.strip()) for l in f if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku780)]
df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df["periodo"], format="%Y%m")

# 2) Tabla mensual
mensual = (df.groupby(["product_id","fecha"])["tn"]
             .sum()
             .unstack(level=0)
             .sort_index()
             .fillna(0))

last_month = mensual.index.max()

def build_feat(df_mens):
    rows=[]
    for pid in df_mens.columns:
        s=df_mens[pid]
        for i in range(len(s)-11):
            t=s.index[i+11]
            row={"product_id":pid,"fecha":t}
            for l in range(12):
                row[f"lag{l}"]=s.iloc[i+11-l]
            row["clase"]=s.iloc[i+13] if i+13<len(s) else np.nan
            rows.append(row)
    return pd.DataFrame(rows)

feat=build_feat(mensual)
Xcols=[f"lag{l}" for l in range(12)]

# 3) Ridge global (sept-dic 2018)
mask=(feat.fecha.between("2018-09-01","2018-12-01"))
train_ridge=feat[mask].dropna()
ridge=make_pipeline(StandardScaler(with_mean=False),
                    RidgeCV(alphas=[0.01,0.1,1,10,100]))
ridge.fit(train_ridge[Xcols], train_ridge.clase)

# 4) Seed lineal (2018-12, 33 mágicos)
train_seed=feat[(feat.fecha=="2018-12-01") & (feat.product_id.isin(MAGICOS))].dropna()
seed=LinearRegression().fit(train_seed[Xcols], train_seed.clase)

# 5) Predicciones último mes
pred_rows=[]
for pid in sku780:
    s=mensual[pid]
    if len(s)<12:
        lag_feats=[np.nan]*12
    else:
        lag_feats=[s.iloc[-1-l] for l in range(12)]
    row=dict(product_id=pid, **{c:v for c,v in zip(Xcols,lag_feats)})
    pred_rows.append(row)

pred_df=pd.DataFrame(pred_rows)

# Ridge
mask_full=pred_df[Xcols].notna().all(axis=1)
pred_df["ridge"]=np.nan
pred_df.loc[mask_full,"ridge"]=ridge.predict(pred_df.loc[mask_full,Xcols])

# Seed
mask_seed=mask_full
pred_df["seed"]=np.nan
pred_df.loc[mask_seed,"seed"]=seed.predict(pred_df.loc[mask_seed,Xcols])

# Promedio 12 meses
prom12=mensual.tail(12).mean()
pred_df["prom"]=pred_df.product_id.map(prom12)

# Ensemble final
def mix(row):
    if not np.isnan(row["ridge"]) and not np.isnan(row["seed"]):
        return 0.7*row["ridge"]+0.3*row["seed"]
    if not np.isnan(row["ridge"]):
        return row["ridge"]
    if not np.isnan(row["seed"]):
        return row["seed"]
    return row["prom"]

pred_df["tn"]=pred_df.apply(mix,axis=1).round(5)

# 6) CSV
pred_df[["product_id","tn"]].to_csv(OUT, index=False, float_format="%.5f")
print(f"✅ CSV ensemble guardado → {OUT}")


✅ CSV ensemble guardado → C:\Maestria\Labo 3\submission_t780_ridge_seed.csv


regresion lineal modificado

In [16]:
import pandas as pd, numpy as np
from pathlib import Path
from sklearn.linear_model import LinearRegression

BASE = Path(r"C:\Maestria\Labo 3")
VENTAS, LISTA = BASE/"sell-in.txt", BASE/"780_a_predecir.txt"
OUT = BASE/"submission_t780_lr_full.csv"

# ── leer datos y filtrar 780
df = pd.read_csv(VENTAS, sep=None, engine="python")
sku = [int(l.strip()) for l in open(LISTA) if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku)]

df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df["periodo"], format="%Y%m")
mensual = (df.groupby(["product_id","fecha"])["tn"].sum()
             .unstack(level=0).fillna(0).sort_index())

# ── construir lags 0-11 y clase (t+2)
rows=[]
for pid in mensual.columns:
    s=mensual[pid]
    for i in range(len(s)-11):
        t=s.index[i+11]
        row={"product_id":pid,"fecha":t}
        for l in range(12):
            row[f"lag{l}"]=s.iloc[i+11-l]
        row["clase"]=s.iloc[i+13] if i+13<len(s) else np.nan
        rows.append(row)
feat=pd.DataFrame(rows)

Xcols=[f"lag{l}" for l in range(12)]

# ── train = 2018-12 con lags completos
train=feat[(feat.fecha=="2018-12-01")].dropna()
print(f"Registros completos 2018-12: {len(train)}")
lr=LinearRegression().fit(train[Xcols], train.clase)

# ── predicción último mes
last=mensual.index.max()
pred=[]
for pid in sku:
    s=mensual[pid]
    if len(s)>=12:
        feats=[s.iloc[-1-l] for l in range(12)]
        pred_lr=float(lr.predict([feats])[0])
    else:
        pred_lr=np.nan
    prom12=s.tail(12).mean()
    tn=pred_lr if not np.isnan(pred_lr) else prom12
    pred.append((pid,round(tn,5)))
pd.DataFrame(pred,columns=["product_id","tn"]).to_csv(
    OUT,index=False,float_format="%.5f")
print("✅ CSV lineal →",OUT.name)


Registros completos 2018-12: 780
✅ CSV lineal → submission_t780_lr_full.csv




ridge regularizado

In [17]:
import pandas as pd, numpy as np
from pathlib import Path
from sklearn.linear_model import RidgeCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

BASE=Path(r"C:\Maestria\Labo 3")
VENTAS,LISTA=BASE/"sell-in.txt",BASE/"780_a_predecir.txt"
OUT=BASE/"submission_t780_ridge_full.csv"

df=pd.read_csv(VENTAS,sep=None,engine="python")
sku=[int(l.strip()) for l in open(LISTA) if l.strip() and not l.lower().startswith("product")]
df=df[df.product_id.isin(sku)]
df["periodo"]=df["periodo"].astype(str).str.zfill(6)
df["fecha"]=pd.to_datetime(df.periodo,format="%Y%m")
mensual=(df.groupby(["product_id","fecha"])["tn"].sum()
           .unstack(level=0).fillna(0).sort_index())

rows=[]
for pid in mensual.columns:
    s=mensual[pid]
    for i in range(len(s)-11):
        t=s.index[i+11]
        row={"product_id":pid,"fecha":t}
        for l in range(12): row[f"lag{l}"]=s.iloc[i+11-l]
        row["clase"]=s.iloc[i+13] if i+13<len(s) else np.nan
        rows.append(row)
feat=pd.DataFrame(rows)
Xcols=[f"lag{l}" for l in range(12)]
train=feat[(feat.fecha=="2018-12-01")].dropna()
ridge=make_pipeline(StandardScaler(with_mean=False),
                    RidgeCV(alphas=[0.01,0.1,1,10,100])).fit(train[Xcols],train.clase)

last=mensual.index.max(); pred=[]
for pid in sku:
    s=mensual[pid]
    if len(s)>=12:
        feats=[s.iloc[-1-l] for l in range(12)]
        tn=float(ridge.predict([feats])[0])
    else: tn=np.nan
    prom12=s.tail(12).mean()
    tn_final=0.5*tn+0.5*prom12 if not np.isnan(tn) else prom12
    pred.append((pid,round(tn_final,5)))
pd.DataFrame(pred,columns=["product_id","tn"]).to_csv(
    OUT,index=False,float_format="%.5f")
print("✅ CSV ridge →",OUT.name)


✅ CSV ridge → submission_t780_ridge_full.csv




otro modelo

In [18]:
import pandas as pd, numpy as np
from pathlib import Path
from sklearn.ensemble import GradientBoostingRegressor

BASE=Path(r"C:\Maestria\Labo 3")
VENTAS,LISTA=BASE/"sell-in.txt",BASE/"780_a_predecir.txt"
OUT=BASE/"submission_t780_gb_full.csv"

df=pd.read_csv(VENTAS,sep=None,engine="python")
sku=[int(l.strip()) for l in open(LISTA) if l.strip() and not l.lower().startswith("product")]
df=df[df.product_id.isin(sku)]
df["periodo"]=df["periodo"].astype(str).str.zfill(6)
df["fecha"]=pd.to_datetime(df.periodo,format="%Y%m")
mensual=(df.groupby(["product_id","fecha"])["tn"].sum()
           .unstack(level=0).fillna(0).sort_index())

rows=[]
for pid in mensual.columns:
    s=mensual[pid]
    for i in range(len(s)-11):
        t=s.index[i+11]
        row={"product_id":pid,"fecha":t}
        for l in range(12): row[f"lag{l}"]=s.iloc[i+11-l]
        row["clase"]=s.iloc[i+13] if i+13<len(s) else np.nan
        rows.append(row)
feat=pd.DataFrame(rows)
Xcols=[f"lag{l}" for l in range(12)]
train=feat[(feat.fecha=="2018-12-01")].dropna()

gb=GradientBoostingRegressor(
        n_estimators=300, learning_rate=0.05,
        max_depth=3, subsample=0.8, random_state=42)
gb.fit(train[Xcols], train.clase)

last=mensual.index.max(); pred=[]
for pid in sku:
    s=mensual[pid]
    if len(s)>=12:
        feats=[s.iloc[-1-l] for l in range(12)]
        tn=float(gb.predict([feats])[0])
    else: tn=np.nan
    prom12=s.tail(12).mean()
    tn_final=0.7*tn+0.3*prom12 if not np.isnan(tn) else prom12
    pred.append((pid,round(tn_final,5)))
pd.DataFrame(pred,columns=["product_id","tn"]).to_csv(
    OUT,index=False,float_format="%.5f")
print("✅ CSV GB →",OUT.name)




✅ CSV GB → submission_t780_gb_full.csv




prometedor

In [19]:
import pandas as pd, numpy as np
from pathlib import Path
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression

# ─── Rutas ─────────────────────────────────
BASE = Path(r"C:\Maestria\Labo 3")
VENTAS, LISTA = BASE / "sell-in.txt", BASE / "780_a_predecir.txt"
OUT = BASE / "submission_t780_gb_seed.csv"
# ─── SKU mágicos  ──────────────────────────
MAGICOS = [
    20002, 20003, 20006, 20010, 20011, 20018, 20019, 20021,
    20026, 20028, 20035, 20039, 20042, 20044, 20045, 20046, 20049,
    20051, 20052, 20053, 20055, 20008, 20001, 20017, 20086, 20180,
    20193, 20320, 20532, 20612, 20637, 20807, 20838
]
# ───────────────────────────────────────────

# 1) Leer ventas y lista 780
df = pd.read_csv(VENTAS, sep=None, engine="python")
sku = [int(l.strip()) for l in open(LISTA) if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku)]

# 2) Fecha y tabla mensual
df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df.periodo, format="%Y%m")
mensual = (df.groupby(["product_id", "fecha"])["tn"]
             .sum()
             .unstack(level=0)
             .fillna(0)
             .sort_index())

# 3) Construir lags (0-11) y clase (t+2)
rows = []
for pid in mensual.columns:
    s = mensual[pid]
    for i in range(len(s) - 11):
        t = s.index[i + 11]
        row = {"product_id": pid, "fecha": t}
        for l in range(12):
            row[f"lag{l}"] = s.iloc[i + 11 - l]
        idx_clase = i + 13
        row["clase"] = s.iloc[idx_clase] if idx_clase < len(s) else np.nan
        rows.append(row)
feat = pd.DataFrame(rows)
Xcols = [f"lag{l}" for l in range(12)]

# 4) ── ENTRENAR MODELOS ────────────────────
# 4a) Gradient Boosting (todos los SKU completos en 2018-12)
gb_train = feat[(feat.fecha == "2018-12-01")].dropna()
gb = GradientBoostingRegressor(
        n_estimators=300, learning_rate=0.05,
        max_depth=3, subsample=0.8, random_state=42
).fit(gb_train[Xcols], gb_train.clase)

# 4b) Regresión lineal “semillas” (solo 33 mágicos en 2018-12)
seed_train = gb_train[gb_train.product_id.isin(MAGICOS)]
seed_lr = LinearRegression().fit(seed_train[Xcols], seed_train.clase)

# 5) ── PREDICCIÓN ÚLTIMO MES ───────────────
last = mensual.index.max()
pred_list = []
for pid in sku:
    s = mensual[pid]
    if len(s) >= 12:
        feats = [s.iloc[-1 - l] for l in range(12)]
        gb_pred   = gb.predict([feats])[0]
        seed_pred = seed_lr.predict([feats])[0]
    else:
        gb_pred = seed_pred = np.nan
    prom12 = s.tail(12).mean()

    if not np.isnan(gb_pred) and not np.isnan(seed_pred):
        tn = 0.7 * gb_pred + 0.3 * seed_pred          # blend 70/30
    elif not np.isnan(gb_pred):
        tn = gb_pred
    elif not np.isnan(seed_pred):
        tn = seed_pred
    else:
        tn = prom12                                   # fallback

    pred_list.append((pid, round(tn, 5)))

# 6) Guardar CSV
pd.DataFrame(pred_list, columns=["product_id", "tn"]).to_csv(
    OUT, index=False, float_format="%.5f"
)
print("✅  CSV ensemble GB+seed →", OUT.name)




✅  CSV ensemble GB+seed → submission_t780_gb_seed.csv




otro intento

In [20]:
"""
submission_t780_profesor.py
Regresión lineal EXACTA según la lógica de Gustavo Denicolay
"""

import pandas as pd, numpy as np
from pathlib import Path
from sklearn.linear_model import LinearRegression

# ─── Rutas ──────────────────────────────────────────────
BASE = Path(r"C:\Maestria\Labo 3")
VENTAS = BASE / "sell-in.txt"
LISTA  = BASE / "780_a_predecir.txt"
OUT    = BASE / "submission_t780_profesor.csv"
# ────────────────────────────────────────────────────────

# 33 SKU “mágicos”
MAGICOS = [
    20002, 20003, 20006, 20010, 20011, 20018, 20019, 20021,
    20026, 20028, 20035, 20039, 20042, 20044, 20045, 20046, 20049,
    20051, 20052, 20053, 20055, 20008, 20001, 20017, 20086, 20180,
    20193, 20320, 20532, 20612, 20637, 20807, 20838
]

# 1) Leer datos y lista de 780 productos
df = pd.read_csv(VENTAS, sep=None, engine="python")
sku780 = [int(l.strip()) for l in open(LISTA)
          if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku780)]

# 2) Preparar fecha y tabla mensual tn
df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"] = pd.to_datetime(df.periodo, format="%Y%m")
mensual = (df.groupby(["product_id", "fecha"])["tn"]
             .sum()
             .unstack(level=0)
             .fillna(0)
             .sort_index())

# 3) Construir lags tn, tn_1 … tn_11 y columna clase (t+2)
rows = []
for pid in mensual.columns:
    serie = mensual[pid]
    for i in range(len(serie) - 11):
        t = serie.index[i + 11]                    # fecha con 12 lags
        row = {"product_id": pid, "fecha": t}
        for lag in range(12):
            row[f"tn_{lag}" if lag else "tn"] = serie.iloc[i + 11 - lag]
        idx_clase = i + 13                         # t + 2
        row["clase"] = serie.iloc[idx_clase] if idx_clase < len(serie) else np.nan
        rows.append(row)
feat = pd.DataFrame(rows)

# 4) Dataset entrenamiento = 2018-12 solo 33 mágicos
train = feat[
    (feat.fecha == "2018-12-01") &
    (feat.product_id.isin(MAGICOS))
].dropna()

X_cols = [f"tn_{l}" if l else "tn" for l in range(12)]
X_train, y_train = train[X_cols], train.clase

# 5) Entrenar Regresión Lineal (sin regularización)
model = LinearRegression().fit(X_train, y_train)

# 6) Predicción para el último mes disponible
last_month = mensual.index.max()
pred_feat = feat[feat.fecha == last_month].copy()

# SKU con lags completos
mask_full = pred_feat[X_cols].notna().all(axis=1)
pred_feat["pred_lr"] = np.nan
pred_feat.loc[mask_full, "pred_lr"] = model.predict(pred_feat.loc[mask_full, X_cols])

# 7) Promedio móvil 12 m (fallback)
prom12 = mensual.tail(12).mean()

# 8) Resultado final: lr si existe, si no promedio
final = []
for pid in sku780:
    lr_val = pred_feat.loc[pred_feat.product_id == pid, "pred_lr"].values
    lr_val = lr_val[0] if lr_val.size else np.nan
    prom_val = prom12.get(pid, np.nan)
    tn = lr_val if not np.isnan(lr_val) else prom_val
    final.append((pid, round(tn, 5)))

# 9) Guardar CSV
pd.DataFrame(final, columns=["product_id", "tn"]).to_csv(
    OUT, index=False, float_format="%.5f"
)
print(f"✅ CSV profesor → {OUT}")


✅ CSV profesor → C:\Maestria\Labo 3\submission_t780_profesor.csv


otro intento

In [21]:
import pandas as pd, numpy as np
from pathlib import Path
from sklearn.ensemble import GradientBoostingRegressor

# ── Rutas ─────────────────────────
BASE   = Path(r"C:\Maestria\Labo 3")
VENTAS = BASE / "sell-in.txt"
LISTA  = BASE / "780_a_predecir.txt"
OUT    = BASE / "submission_t780_gb_profesor.csv"
# ── SKU mágicos del profe ─────────
MAGICOS = [
    20002,20003,20006,20010,20011,20018,20019,20021,20026,20028,
    20035,20039,20042,20044,20045,20046,20049,20051,20052,20053,
    20055,20008,20001,20017,20086,20180,20193,20320,20532,20612,
    20637,20807,20838
]
# ── Coeficientes “semilla” ────────
INTC   = 0.441467
COEF   = [-0.001339, 0.236558, 0.178208, -0.060031, -0.161875,
          -0.007775, 0.151936, 0.043933, 0.142839, 0.103804,
           0.119211, 0.073671]      # lag0 … lag11
# ──────────────────────────────────

# 1) datos
df = pd.read_csv(VENTAS, sep=None, engine="python")
sku = [int(l.strip()) for l in open(LISTA)
       if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku)]
df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df.periodo, format="%Y%m")
mensual = (df.groupby(["product_id","fecha"])["tn"]
             .sum().unstack(level=0).fillna(0).sort_index())

# 2) features (lags) y clase
rows=[]
for pid in mensual.columns:
    s=mensual[pid]
    for i in range(len(s)-11):
        t=s.index[i+11]
        row={"product_id":pid,"fecha":t}
        for l in range(12):
            row[f"lag{l}"]=s.iloc[i+11-l]
        row["clase"]=s.iloc[i+13] if i+13<len(s) else np.nan
        rows.append(row)
feat=pd.DataFrame(rows)
Xcols=[f"lag{l}" for l in range(12)]

# 3) GB entrenado con todos los SKU completos en 2018-12
gb_train = feat[(feat.fecha=="2018-12-01")].dropna()
gb = GradientBoostingRegressor(
        n_estimators=300, learning_rate=0.05,
        max_depth=3, subsample=0.8, random_state=42
).fit(gb_train[Xcols], gb_train.clase)

# 4) Predicción último mes (blend con semilla)
last = mensual.index.max()
pred=[]
for pid in sku:
    s = mensual[pid]
    if len(s) >= 12:
        lags = [s.iloc[-1-l] for l in range(12)]
        gb_pred = gb.predict([lags])[0]
        # semilla del profe
        seed_pred = INTC + np.dot(COEF, lags)
        # reglas de combinación
        if pid in MAGICOS:
            tn = 0.8*seed_pred + 0.2*gb_pred
        else:
            tn = 0.5*seed_pred + 0.5*gb_pred
    else:
        tn = np.nan
    # fallback: promedio 12 m si falta algo
    if np.isnan(tn):
        tn = s.tail(12).mean()
    pred.append((pid, round(tn,5)))

pd.DataFrame(pred, columns=["product_id","tn"]).to_csv(
    OUT, index=False, float_format="%.5f"
)
print("✅ CSV GB+profesor guardado →", OUT.name)




✅ CSV GB+profesor guardado → submission_t780_gb_profesor.csv




este hasta ahora es el mejor

nuevo intento ahora abajo

In [1]:
import pandas as pd, numpy as np
from pathlib import Path
from itertools import product
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# ── rutas y archivos
BASE = Path(r"C:\Maestria\Labo 3")
VENTAS   = BASE / "sell-in.txt"
LISTA    = BASE / "780_a_predecir.txt"
OUT_CSV  = BASE / "submission_t780_gb_seed_tuned.csv"

MAGICOS = [20002,20003,20006,20010,20011,20018,20019,20021,20026,20028,
           20035,20039,20042,20044,20045,20046,20049,20051,20052,20053,
           20055,20008,20001,20017,20086,20180,20193,20320,20532,20612,
           20637,20807,20838]

SEED_INT = 0.441467
SEED_COEF = np.array([-0.001339, 0.236558, 0.178208, -0.060031,
                      -0.161875, -0.007775, 0.151936, 0.043933,
                       0.142839, 0.103804, 0.119211, 0.073671])

# 1) datos
df = pd.read_csv(VENTAS, sep=None, engine="python")
sku = [int(l.strip()) for l in open(LISTA) if l.strip() and not l.lower().startswith("product")]
df = df[df.product_id.isin(sku)]
df["periodo"] = df["periodo"].astype(str).str.zfill(6)
df["fecha"]   = pd.to_datetime(df.periodo, format="%Y%m")
mensual = (df.groupby(["product_id","fecha"])["tn"]
             .sum().unstack(level=0).fillna(0).sort_index())

# fechas útiles
last_date     = mensual.index.max()
valid_date    = last_date            # penúltimo mes = last_date (t-1)
train_cutoff  = valid_date - pd.DateOffset(months=1)  # hasta t-2

# 2) construir dataset de lags
def make_feat(df_mens):
    rows=[]
    for pid in df_mens.columns:
        s=df_mens[pid]
        for i in range(len(s)-11):
            t=s.index[i+11]
            row={"product_id":pid,"fecha":t}
            for l in range(12):
                row[f"lag{l}"]=s.iloc[i+11-l]
            row["clase"]=s.iloc[i+13] if i+13<len(s) else np.nan
            rows.append(row)
    return pd.DataFrame(rows)

feat = make_feat(mensual)
XCOLS = [f"lag{l}" for l in range(12)]

# 3) entrenar GB con 2018-12 (misma idea que antes)
gb_train = feat[(feat.fecha=="2018-12-01")].dropna()
gb = GradientBoostingRegressor(n_estimators=300,learning_rate=0.05,
                               max_depth=3,subsample=0.8,random_state=42)
gb.fit(gb_train[XCOLS], gb_train.clase)

# 4) predicciones GB + semilla en VALID (last_date)
valid_rows = []
for pid in sku:
    s=mensual[pid]
    if len(s)>=12:
        lags=np.array([s.iloc[-1-l] for l in range(12)])
        gb_pred = gb.predict([lags])[0]
        seed_pred = SEED_INT + SEED_COEF.dot(lags)
    else:
        gb_pred = seed_pred = np.nan
    true = np.nan  # clase real no disponible para t+1 pero sirve para check nulidad
    valid_rows.append((pid, gb_pred, seed_pred, s.tail(12).mean()))
valid_df = pd.DataFrame(valid_rows, columns=["product_id","gb","seed","prom"])

# 5) grid search de pesos
best_rmse, best_w_mag, best_w_oth = 1e9, 0, 0
for w_mag, w_oth in product(np.arange(0.60,0.96,0.05), np.arange(0.10,0.61,0.05)):
    def mix(row):
        if np.isnan(row.gb): return row.seed if not np.isnan(row.seed) else row.prom
        if np.isnan(row.seed): return row.gb
        if row.product_id in MAGICOS:
            return w_mag*row.seed + (1-w_mag)*row.gb
        else:
            return w_oth*row.seed + (1-w_oth)*row.gb
    valid_df["blend"] = valid_df.apply(mix, axis=1)
    # proxy rmse: gb_train RMSE vs clase (since true class not known for t)
    # use variance proxy: penalize large preds
    rmse = np.sqrt(np.mean(valid_df["blend"]**2))
    if rmse < best_rmse:
        best_rmse, best_w_mag, best_w_oth = rmse, w_mag, w_oth

print(f"🛈 pesos óptimos encontrados -> mágicos:{best_w_mag:.2f}  otros:{best_w_oth:.2f}")

# 6) predicción FINAL para mes futuro (t+1)
pred=[]
for pid in sku:
    s=mensual[pid]
    if len(s)>=12:
        lags=np.array([s.iloc[-1-l] for l in range(12)])
        gb_pred   = gb.predict([lags])[0]
        seed_pred = SEED_INT + SEED_COEF.dot(lags)
        if pid in MAGICOS:
            tn = best_w_mag*seed_pred + (1-best_w_mag)*gb_pred
        else:
            tn = best_w_oth*seed_pred + (1-best_w_oth)*gb_pred
    else:
        tn = s.tail(12).mean()
    pred.append((pid, round(tn,5)))

pd.DataFrame(pred, columns=["product_id","tn"]).to_csv(
    OUT_CSV, index=False, float_format="%.5f"
)
print(f"✅ CSV GB+seed tuned guardado → {OUT_CSV.name}")




🛈 pesos óptimos encontrados -> mágicos:0.60  otros:0.10




✅ CSV GB+seed tuned guardado → submission_t780_gb_seed_tuned.csv
