# 08 — Time Series (Univariate + Exogenous) — PyCaret 3
**Dataset:** Beijing PM2.5 with weather  
**Goal:** Forecast next 24 hours using exogenous features.

In [1]:
# Install latest stable PyCaret 3.x and common GPU-capable libs
# !pip -q install --upgrade pycaret xgboost catboost lightgbm plotly gradio

import sys, sklearn, pycaret, platform
print("Python:", sys.version)
print("Platform:", platform.platform())
print("scikit-learn:", sklearn.__version__)
print("pycaret:", pycaret.__version__)

# Check GPU
try:
    import subprocess, shutil
    if shutil.which("nvidia-smi"):
        print("\nGPU detected:")
        print(subprocess.check_output(["nvidia-smi"], text=True))
    else:
        print("\nNo NVIDIA GPU detected in this runtime. (Colab: Runtime -> Change runtime type -> GPU)")
except Exception as e:
    print("GPU check error:", e)

Python: 3.11.11 (main, Feb  5 2025, 19:11:07) [Clang 19.1.6 ]
Platform: Linux-6.6.87.2-microsoft-standard-WSL2-x86_64-with-glibc2.35
scikit-learn: 1.4.2
pycaret: 3.3.2

GPU detected:
Sun Nov  2 20:31:48 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.65.05              Driver Version: 580.88         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 Ti     On  |   00000000:01:00.0  On |                  N/A |
|  0%   31C    P8             10W /  200W |    1002MiB /   8192MiB |      5%      Default |
|                                         |      

In [2]:
import pandas as pd
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/00381/PRSA_data_2010.1.1-2014.12.31.csv")
df['datetime'] = pd.to_datetime(df[['year','month','day','hour']])
df = df[['datetime','pm2.5','DEWP','TEMP','PRES','Iws']].dropna(subset=['pm2.5'])
# Set the index and specify the frequency as hourly ('H')
df = df.set_index('datetime').sort_index().asfreq('H')
y = df['pm2.5']
exog = df[['DEWP','TEMP','PRES','Iws']]

In [3]:
print("Target length:", len(y), "Range:", y.index.min(), "to", y.index.max())
display(y.head())
display(exog.head())

Target length: 43800 Range: 2010-01-02 00:00:00 to 2014-12-31 23:00:00


datetime
2010-01-02 00:00:00    129.0
2010-01-02 01:00:00    148.0
2010-01-02 02:00:00    159.0
2010-01-02 03:00:00    181.0
2010-01-02 04:00:00    138.0
Freq: H, Name: pm2.5, dtype: float64

Unnamed: 0_level_0,DEWP,TEMP,PRES,Iws
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-02 00:00:00,-16.0,-4.0,1020.0,1.79
2010-01-02 01:00:00,-15.0,-4.0,1020.0,2.68
2010-01-02 02:00:00,-11.0,-5.0,1021.0,3.57
2010-01-02 03:00:00,-7.0,-5.0,1022.0,5.36
2010-01-02 04:00:00,-7.0,-5.0,1022.0,6.25


In [4]:
from pycaret.time_series import TSForecastingExperiment

# Combine target and exogenous variables into one dataframe
data = pd.concat([y, exog], axis=1)

exp = TSForecastingExperiment()
exp.setup(
    data=data,
    target='pm2.5',  # Specify which column is the target
    fh=24, 
    fold=3, 
    session_id=42, 
    seasonal_period=24, 
    n_jobs=1,
    verbose=False, 
    use_gpu=True,
    numeric_imputation_target='mean',
    numeric_imputation_exogenous='mean'
)

# Get list of available models
print("Available models:", exp.models().keys())

# Compare all available models
best = exp.compare_models(verbose=True)
final = exp.finalize_model(best)
preds = exp.predict_model(final)
fig = exp.plot_model(final, plot='forecast')

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
arima,ARIMA,1.2242,1.048,82.3911,103.973,1.6369,0.7336,-0.9897,74.2867


Processing:   0%|          | 0/81 [00:00<?, ?it/s]

KeyboardInterrupt: 