In [1]:
!pip -q install -U pip setuptools wheel
!pip -q install "pycaret==3.3.2"

import pycaret
print("PyCaret version:", pycaret.__version__)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m44.5 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.
pandas-gbq 0.29.2 requires google-api-core<3.0.0,>=2.10.2, but you have google-api-core 1.34.1 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
datasets 4.1.1 requires pyarrow>=21.0.0, but you have pyarrow 19.0.1 whic

In [2]:
import pandas as pd
import numpy as np

rng = pd.date_range("2021-01-01", periods=365*2, freq="D")  # 2 years of daily data
rs = np.random.RandomState(42)

# Exogenous drivers
is_weekend = (rng.weekday >= 5).astype(int)
promo = ((rng.day % 10) == 0).astype(int)          # simple promo every 10th day
holiday = ((rng.month==12) & (rng.day.isin([24,25,31]))).astype(int)

# Optional numeric exog (e.g., temperature-like)
temp = 15 + 10*np.sin(2*np.pi*rng.dayofyear/365) + rs.normal(0, 1.0, len(rng))

# Target: base + weekly seasonality + promo/holiday lift + noise
y = (
    50
    + 3*np.sin(2*np.pi*rng.weekday/7)    # weekly seasonality
    + 8*promo
    + 12*holiday
    + 0.05*np.arange(len(rng))           # tiny trend
    + rs.normal(0, 2.0, len(rng))        # noise
)

df = pd.DataFrame({
    "y": y,
    "is_weekend": is_weekend,
    "promo": promo,
    "holiday": holiday,
    "temp": temp
}, index=rng)

print(df.shape)
df.head()


(730, 5)


Unnamed: 0,y,is_weekend,promo,holiday,temp
2021-01-01,49.090039,0,0,0,15.668848
2021-01-02,45.168471,1,0,0,15.205952
2021-01-03,48.571011,1,0,0,16.163885
2021-01-04,46.744833,0,0,0,17.211054
2021-01-05,54.603806,0,0,0,15.625495


In [3]:
# Keep last 14 days for forecasting horizon
fh = 14
df_train = df.iloc[:-fh].copy()
df_future_exog = df.iloc[-fh:].drop(columns=["y"]).copy()  # exogenous only for future horizon

print("Train:", df_train.shape, "| Future exog:", df_future_exog.shape)
df_future_exog.head()


Train: (716, 5) | Future exog: (14, 4)


Unnamed: 0,is_weekend,promo,holiday,temp
2022-12-18,1,0,0,12.895013
2022-12-19,0,0,0,13.099257
2022-12-20,0,1,0,12.754121
2022-12-21,0,0,0,13.230123
2022-12-22,0,0,0,13.764714


In [5]:
# Remove RAPIDS bits so PyCaret won’t import them
!pip -q uninstall -y cuml cudf rmm cupy-cuda12x cupy-cuda11x cupy || true


[0m

In [7]:
from pycaret.time_series import *

s = setup(
    data=df_train,
    target="y",
    fh=14,
    fold=3,
    session_id=42,
    use_gpu=False,        # <-- CPU mode (stable on Kaggle)
    seasonal_period=7,
    verbose=False
)

best = compare_models()
best


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.4953,0.4523,1.6952,2.0646,0.0199,0.02,0.6325,0.3433
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.496,0.4518,1.6976,2.0625,0.02,0.02,0.6386,0.26
catboost_cds_dt,CatBoost Regressor w/ Cond. Deseasonalize & Detrending,0.4962,0.4541,1.6986,2.073,0.02,0.02,0.6386,2.35
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,0.4963,0.4525,1.6987,2.0652,0.02,0.02,0.6462,0.24
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.4991,0.4511,1.7084,2.0588,0.0201,0.0201,0.6494,0.2233
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.5012,0.452,1.7156,2.0633,0.0202,0.0202,0.6466,0.2367
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.5019,0.4525,1.7179,2.0654,0.0202,0.0202,0.6455,1.4467
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.5027,0.4622,1.7207,2.1097,0.0202,0.0203,0.6228,0.6467
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.5201,0.4903,1.7801,2.2384,0.0209,0.0209,0.5538,0.3533
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.5329,0.4892,1.8241,2.2329,0.0214,0.0215,0.5734,6.72


In [11]:
# finalize + forecast with exogenous
final_best = finalize_model(best)
future_pred = predict_model(final_best, fh=14, X=df_future_exog)
display(future_pred.head(20))

Unnamed: 0,y_pred
2022-12-18,83.1108
2022-12-19,85.0269
2022-12-20,88.3083
2022-12-21,89.0032
2022-12-22,87.0828
2022-12-23,84.4691
2022-12-24,83.5964
2022-12-25,83.5062
2022-12-26,85.4852
2022-12-27,88.6909


In [12]:
# supply exogenous vars for forecast plots
plot_model(final_best, plot="forecast", data_kwargs={"X": df_future_exog})
plot_model(final_best, plot="diagnostics", data_kwargs={"X": df_future_exog})


In [13]:
import os
os.makedirs("/kaggle/working/media/figures", exist_ok=True)
os.makedirs("/kaggle/working/notebooks", exist_ok=True)

save_model(final_best, "/kaggle/working/notebooks/ts_with_exog_final")
future_pred.to_csv("/kaggle/working/media/figures/ts_with_exog_forecast.csv", index=True)

print("Saved:")
print("- /kaggle/working/notebooks/ts_with_exog_final.pkl")
print("- /kaggle/working/media/figures/ts_with_exog_forecast.csv")

Transformation Pipeline and Model Successfully Saved
Saved:
- /kaggle/working/notebooks/ts_with_exog_final.pkl
- /kaggle/working/media/figures/ts_with_exog_forecast.csv
