In [None]:
# imports
import pandas as pd
import numpy as np

from sktime.datasets import load_airline
from sktime.utils.plotting import plot_series
from sktime.forecasting.naive import NaiveForecaster

In [None]:
# step 1: data specification
y = load_airline()

print(y.head())
plot_series(y)

In [None]:
# step 2: specify the forecasting horizon

from sktime.forecasting.base import ForecastingHorizon

print(y.tail())  # last date in data is 1960-12
fh = ForecastingHorizon(
    # forecast next 12 months from last date in data
    pd.PeriodIndex(pd.date_range("1961-01", periods=12, freq="M")),
    is_relative=False,
)

In [None]:
# step 3: specifying the forecasting algorithm

# sp=12: Indicates the data has yearly seasonality with 12 time steps per season (e.g., monthly data where patterns repeat every year).
# strategy="last": Predict future values using the last observed value for each season (e.g., 1961-01 = 1960-01, Jan 2024 = Jan 2023)
forecaster = NaiveForecaster(strategy="last", sp=12)

In [None]:
# step 4: fit the forecaster

# Exogeneous time series should always be passed as pandas.DataFrames
X_fit = pd.DataFrame(index=y.index)

# X represents exogenous data (e.g., advertising spend), not forecasted itself,
# but used to improve the forecast of y (the target variable).
# Pass X in fit() to teach the model how X influences y,
# and in predict() to provide future values of X for making predictions.
# Multivariate forecasting differs by treating all variables as targets.
forecaster.fit(y, X=X_fit, fh=fh)

In [None]:
# step 5: querying predictions

# Time indices of X passed to predict are a super-set of time indices in fh

X_predict = pd.DataFrame(index=fh.to_absolute_index())
y_pred = forecaster.predict(X=X_predict)
plot_series(y, y_pred, labels=["y", "y_pred"])

In [None]:
from sktime.datasets import load_airline
from sktime.forecasting.theta import ThetaForecaster

# step 1: data specification
y = load_airline()
# step 2: specifying forecasting horizon
fh = ForecastingHorizon(np.arange(1, 13), is_relative=True)  # forecast next 12 months
# step 3: specifying the forecasting algorithm
forecaster = ThetaForecaster(sp=12)
# step 4: fitting the forecaster
forecaster.fit(y, fh=fh)
# step 5: call a probabilistic forecasting method

# Coverage refers to the probability that the true value lies within the predicted interval.
# Higher coverage = wider intervals, less informative but more certainty
# Smaller coverage = narrower intervals, more informative but less certainty.
y_pred_int = forecaster.predict_interval(coverage=0.9)
y_pred_int

In [None]:
from sktime.registry import all_estimators

# estimators that support probabilistic forecasting
estimators = all_estimators(
    "forecaster", filter_tags={"capability:pred_int": True}, as_dataframe=True
)
estimators

In [None]:
y_pred = forecaster.predict()

fig, ax = plot_series(y, y_pred, labels=["y", "y_pred"], pred_interval=y_pred_int)

ax.legend()

In [None]:
# Higher coverage = wider intervals, less informative but more certainty
# Smaller coverage = narrower intervals, more informative but less certainty.

coverage = [0.5, 0.9, 0.95]
y_pred_ints = forecaster.predict_interval(coverage=coverage)
y_pred_ints

In [None]:
columns = [y_pred_ints[i] for i in y_pred_ints.columns]
fig, ax = plot_series(y[-50:], *columns)

In [None]:
# Example using PinballLoss evaluation metric
# Pinball Loss evaluates how well predicted quantiles or intervals capture true values.
# It penalizes over-predictions and under-predictions differently, weighted by quantile level or interval coverage.
# Supports both predict_quantiles() (specific percentiles) and predict_interval() (ranges at desired coverage).

from sktime.performance_metrics.forecasting.probabilistic import PinballLoss

pinball_loss = PinballLoss()

y_train = load_airline()[0:24]  # train on 24 months, 1949 and 1950
y_test = load_airline()[24:36]  # ground truth for 12 months in 1951

# try to forecast 12 months ahead, from y_train
fh = np.arange(1, 13)

forecaster = ThetaForecaster(sp=12)
forecaster.fit(y_train, fh=fh)

pred_intervals = forecaster.predict_interval(coverage=[0.5, 0.9, 0.95])

# Aggregated pinball loss for all intervals
print(pinball_loss(y_true=y_test, y_pred=pred_intervals))
# Detailed pinball loss for each time step in the forecasting horizon
print(pinball_loss.evaluate_by_index(y_true=y_test, y_pred=pred_intervals))