In [6]:
# ===== PyCaret Time Series: Univariate Forecast (No Exogenous) =====
!pip -q install -U pycaret pandas scikit-learn

import pandas as pd
from pycaret.datasets import get_data
from pycaret.time_series import setup, compare_models, blend_models, finalize_model, predict_model
import os

# 1) Load "airline" robustly (Series or DataFrame)
obj = get_data("airline")

def to_series_monthly(x):
    if isinstance(x, pd.Series):
        y = x.copy()
        # Ensure DatetimeIndex (convert PeriodIndex -> Timestamp @ start of month)
        if isinstance(y.index, pd.PeriodIndex):
            y.index = y.index.to_timestamp(how="S")
        elif not isinstance(y.index, pd.DatetimeIndex):
            y.index = pd.to_datetime(y.index)
        y.index.freq = "MS"
        return y.rename("y")
    else:  # DataFrame
        df = x.copy()
        if {"Month","Passengers"}.issubset(df.columns):
            df["Month"] = pd.to_datetime(df["Month"])
            df = df.set_index("Month").asfreq("MS")
            return df["Passengers"].rename("y")
        # Fallback: first col is date, second is target
        df.iloc[:,0] = pd.to_datetime(df.iloc[:,0])
        df = df.set_index(df.columns[0]).asfreq("MS")
        return df.iloc[:,0].rename("y")

y = to_series_monthly(obj)
print("Series ready:", y.shape, "| index freq:", y.index.freq)

# 2) PyCaret setup
exp = setup(
    data=y,
    session_id=42,
    fh=24,          # forecast horizon (24 months)
    fold=3,
    use_gpu=False,
    verbose=True
)

# 3) Train & forecast
best = compare_models(n_select=3)
blend = blend_models(best)
final = finalize_model(blend)
forecast_df = predict_model(final)

# 4) Save last 24 months forecast
out_path = "/kaggle/working/airline_univariate_forecast.csv" if os.path.exists("/kaggle") else "airline_univariate_forecast.csv"
forecast_df.tail(24).to_csv(out_path)
print("✅ Saved:", out_path)
print(forecast_df.tail(5))


Period
1949-01    112.0
1949-02    118.0
1949-03    132.0
1949-04    129.0
1949-05    121.0
Freq: M, Name: Number of airline passengers, dtype: float64

Series ready: (144,) | index freq: <MonthBegin>


Unnamed: 0,Description,Value
0,session_id,42
1,Target,y
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(144, 1)"
5,Transformed data shape,"(144, 1)"
6,Transformed train set shape,"(120, 1)"
7,Transformed test set shape,"(24, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
exp_smooth,Exponential Smoothing,0.7349,0.8061,17.9986,22.1378,0.0574,0.059,0.7379,0.0833
arima,ARIMA,0.8027,0.8531,19.532,23.2748,0.0636,0.0657,0.6974,0.0967
stlf,STLF,0.8047,0.8543,19.6169,23.3408,0.0639,0.0657,0.6985,0.04
catboost_cds_dt,CatBoost Regressor w/ Cond. Deseasonalize & Detrending,0.8228,0.9688,20.4107,26.8814,0.0635,0.0658,0.6367,1.64
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.8231,0.9296,20.4887,25.8326,0.064,0.0657,0.6694,0.68
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.8598,1.0185,21.5618,28.6972,0.0657,0.0683,0.6163,0.7433
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.9042,1.0528,22.4872,29.3845,0.0709,0.0735,0.5785,0.3567
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.9042,1.0527,22.4869,29.3839,0.0709,0.0735,0.5786,0.6
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.9048,1.0542,22.5038,29.4275,0.0708,0.0734,0.5777,0.53
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.9062,1.0539,22.5395,29.4245,0.0709,0.0735,0.5781,0.3767


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,1952-12,0.4264,0.5295,9.9963,13.6131,0.0457,0.0448,0.8107
1,1954-12,1.3937,1.4047,32.079,36.3602,0.0997,0.1059,0.4382
2,1956-12,0.4807,0.5411,14.0384,17.7124,0.0357,0.0359,0.91
Mean,NaT,0.7669,0.8251,18.7046,22.5619,0.0604,0.0622,0.7196
SD,NaT,0.4438,0.4099,9.6001,9.8993,0.0281,0.0311,0.2031


✅ Saved: /kaggle/working/airline_univariate_forecast.csv
           y_pred
1962-08  681.1948
1962-09  574.4690
1962-10  520.9907
1962-11  450.1401
1962-12  493.1960
