In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# -----------------------------
# 0) Config
# -----------------------------
start_date = "2012-01-01"   # on coupe avant 2012
forecast_start = pd.Timestamp("2025-01-01")
forecast_end   = pd.Timestamp("2025-05-31")
seasonality = 7
Positive_Deliveries = True

# -----------------------------
# 1) Charger et nettoyer
# -----------------------------
receivals = pd.read_csv("./data/kernel/receivals.csv")
receivals = receivals.dropna(subset=["net_weight"])
receivals = receivals[receivals["net_weight"] > 0]

receivals["date_arrival"] = (
    pd.to_datetime(receivals["date_arrival"], utc=True)
      .dt.tz_localize(None)
      .dt.normalize()
)

# on enlève les données < 2012
receivals = receivals[receivals["date_arrival"] >= pd.Timestamp(start_date)].reset_index(drop=True)

# -----------------------------
# 2) Agrégation journalière
# -----------------------------
daily = (
    receivals.groupby(["rm_id","date_arrival"], as_index=False)
             .agg(net_weight=("net_weight","sum"))
             .rename(columns={"date_arrival":"date"})
             .sort_values(["rm_id","date"])
)

# -----------------------------
# 3) Filtrer les séries trop rares
# -----------------------------
stats = (
    daily.groupby("rm_id")
         .agg(last_delivery=("date","max"),
              total_deliveries=("date","count"))
)
cutoff = pd.Timestamp("2024-12-31") - pd.DateOffset(years=5)
rare_rm_id = stats.index[(stats["last_delivery"] <= cutoff) & (stats["total_deliveries"] <= 3)]
daily = daily[~daily["rm_id"].isin(rare_rm_id)].reset_index(drop=True)

# -----------------------------
# 4) Reindexer sur grille complète
# -----------------------------
full_start = daily["date"].min()
full_end   = pd.Timestamp("2024-12-31")
rm_ids = pd.Index(daily["rm_id"].unique(), name="rm_id")
calendar = pd.date_range(full_start, full_end, freq="D", name="date")
full_idx = pd.MultiIndex.from_product([rm_ids, calendar], names=["rm_id","date"])

daily = (
    daily.set_index(["rm_id","date"])["net_weight"]
         .reindex(full_idx, fill_value=0)
         .reset_index()
)

# -----------------------------
# 5) Prévisions Holt-Winters par série
# -----------------------------
predictions = []
for rm in daily["rm_id"].unique():
    series = (
        daily.loc[daily["rm_id"]==rm, ["date","net_weight"]]
             .set_index("date")["net_weight"]
             .asfreq("D")
    )
    # Option : démarrer à la première livraison >0
    pos_idx = series.index[series.gt(0)]
    if len(pos_idx)>0:
        first_dt = pos_idx[0]
        series = series.loc[first_dt:]

    # Fit Holt-Winters
    try:
        model = ExponentialSmoothing(
            series,
            trend=None,
            seasonal="add",
            seasonal_periods=seasonality,
            initialization_method="estimated"
        ).fit(optimized=True)
        fc = model.predict(start=forecast_start, end=forecast_end)
    except Exception:
        # fallback : tout plat
        fc = pd.Series(0.0, index=pd.date_range(forecast_start, forecast_end, freq="D"))

    pred = (
        fc.rename("net_weight")
          .reset_index()
          .rename(columns={"index":"date"})
    )
    if Positive_Deliveries:
        pred["net_weight"] = pred["net_weight"].clip(lower=0)
    pred["rm_id"] = rm
    predictions.append(pred)

df_forecast = pd.concat(predictions, ignore_index=True)

# -----------------------------
# 6) Construire df_final avec cumuls
# -----------------------------
df_cum = (
    df_forecast.sort_values(["rm_id","date"])
               .groupby("rm_id", as_index=False)
               .apply(lambda g: g.assign(cum=g["net_weight"].cumsum()))
               .reset_index(drop=True)[["rm_id","date","cum"]]
)
df_cum["cum"] = df_cum["cum"].clip(lower=0)

# -----------------------------
# 7) Charger mapping et générer soumission
# -----------------------------
ids = pd.read_csv("./data/prediction_mapping.csv")
ids["forecast_start_date"] = pd.to_datetime(ids["forecast_start_date"])
ids["forecast_end_date"] = pd.to_datetime(ids["forecast_end_date"])
ids["rm_id"] = pd.to_numeric(ids["rm_id"], errors="coerce").astype("Int64")
ids = ids.rename(columns={"forecast_end_date":"date"})

out = (
    ids.merge(df_cum, on=["rm_id","date"], how="left")
       .assign(cum=lambda d: d["cum"].fillna(0))
       .sort_values(["ID"])
       .reset_index(drop=True)
)

submission = (
    out[["ID","cum"]]
      .rename(columns={"cum":"predicted_weight"})
      .fillna({"predicted_weight":0})
      .astype({"ID":int,"predicted_weight":float})
)

submission.to_csv("./submissions/submission_timeseries.csv", index=False)
print("✅ Fichier de soumission généré : ./submissions/submission_timeseries.csv")
print(submission.head())

  .apply(lambda g: g.assign(cum=g["net_weight"].cumsum()))


✅ Fichier de soumission généré : ./submissions/submission_timeseries.csv
   ID  predicted_weight
0   1               0.0
1   2               0.0
2   3               0.0
3   4               0.0
4   5               0.0
