In [1]:
import jax; jax.config.update("jax_enable_x64", True)
import jax.numpy as np, jax.random as rnd
import polars as pl

In [2]:
import matplotlib.pyplot as plt

In [3]:
import GARCH

Каждая модель получает на вход временной ряд (здесь - 100 наблюдений) и прогнозирет одно число - условную дисперсию на 1 шаг вперёд.

In [4]:
GARCH.GARCH(1.0, key=rnd.PRNGKey(4)).predict(
    rnd.normal(rnd.PRNGKey(5), (100, 1))
)

Array(1.41559389, dtype=float64)

In [5]:
GARCH.MixGARCH([1, 1.5], key=rnd.PRNGKey(4)).predict(
    rnd.normal(rnd.PRNGKey(5), (100, 1))
)

Array(0.02127214, dtype=float64)

Метрики, они же - функции ошибок. Меньше - лучше.

In [6]:
def MSE(var_pred, var_true):
    return np.mean((np.log(var_pred) - np.log(var_true))**2)

def QLIKE(var_pred, var_true):
    tmp = var_true / var_pred
    return (tmp - np.log(tmp) - 1).mean()

# Прогнозирование дисперсии на 1 шаг вперёд

- `time_series_cv` - кросс-валидация для временных рядов.
- `forecast_many` запускает `time_series_cv` для всех тикеров.

In [7]:
def time_series_cv(
    model, series_in: np.ndarray, var_true: np.ndarray, oos: int,
    quiet: bool=False
):
    series_in, var_true = (
        np.asarray(series_in).reshape(-1, 1), np.asarray(var_true)
    )
    assert series_in.ndim == 2
    assert var_true.ndim == 1
    assert series_in.shape[0] == var_true.shape[0]

    var_preds = []
    L = series_in.shape[0]
    ibegin = L - oos
    for t in range(ibegin, L):
        window = series_in[:t, :]

        model, var_pred = model.fit_predict(window)
        var_preds.append(float(var_pred))
    return np.asarray(var_preds), var_true[ibegin:]

In [8]:
def forecast_many(df_full: pl.DataFrame, n_components: int, oos: int, key: rnd.PRNGKey):
    tickers = sorted(df_full["Ticker"].unique())
    results = []
    for ticker in tickers:
        df = df_full.filter(pl.col("Ticker") == ticker).select(
            "Datetime", pl.col("Ret_Close")*100, pl.col("RV_Close") * 100**2
        ).sort("Datetime")
        pred_garch, true_garch = time_series_cv(
            GARCH.GARCH(0.1, key=key),
            df["Ret_Close"], df["RV_Close"], oos, quiet=True
        )
        pred_mix, true_mix = time_series_cv(
            GARCH.MixGARCH([0.1] * n_components, key=key),
            df["Ret_Close"], df["RV_Close"], oos, quiet=True
        )

        mse_garch = MSE(pred_garch, true_garch).item()
        qlike_garch = QLIKE(pred_garch, true_garch).item()
        mse_mix = MSE(pred_mix, true_mix).item()
        qlike_mix = QLIKE(pred_mix, true_mix).item()
        print(ticker, f"{qlike_garch=:.5f} {qlike_mix=:.5f}")
        
        results.append([ticker, "GARCH", mse_garch, qlike_garch])
        results.append([ticker, "Mix", mse_mix, qlike_mix])
    return pl.DataFrame(
        results, {'Ticker': str, 'Model': str, 'MSE': float, 'QLIKE': float}
    )

## Пример данных

- `Ret_Close` - дневная доходность по ценам закрытия.
- `RV_Close` - реализованная волатильность за соовтетствующий день; рассчитана по 5-минутным доходностям.

In [9]:
df = pl.read_parquet("../data/returns_rv_1d.parquet").filter(
    pl.col("Ticker") == "AFLT"
).select(pl.col("Ret_Close")*100, pl.col("RV_Close") * 100**2)
df.head()

Ret_Close,RV_Close
f64,f64
-1.376254,3.527826
3.604657,13.091509
-2.11764,14.35587
0.698661,2.927011
0.766567,1.770776


## Пример оценки модели и расчёта точности прогноза

In [10]:
model, sol, crit = GARCH.GARCH(1.0, key=rnd.PRNGKey(4)).fit(
    np.asarray(df["Ret_Close"]).reshape((-1, 1))
)
sol

 message: Optimization terminated successfully
 success: True
  status: 0
     fun: 1.9817065241921938
       x: [ 3.725e-01  2.109e-01  6.857e-01]
     nit: 10
     jac: [-5.848e-04 -1.637e-04 -1.439e-03]
    nfev: 45
    njev: 10

In [11]:
fc_GARCH = time_series_cv(
    GARCH.GARCH(0.1, key=rnd.PRNGKey(4)),
    df["Ret_Close"], df["RV_Close"], 100
)

In [12]:
{'MSE': MSE(fc_GARCH[0], fc_GARCH[1]).item(),
 'QLIKE': QLIKE(fc_GARCH[0], fc_GARCH[1]).item()}

{'MSE': 0.3772259204704197, 'QLIKE': 0.24808227654808002}

In [13]:
fc_RNN = time_series_cv(
    GARCH.MixGARCH([0.1] * 3, key=rnd.PRNGKey(4)),
    df["Ret_Close"], df["RV_Close"], 100
)

In [14]:
{'MSE': MSE(fc_RNN[0], fc_RNN[1]).item(),
 'QLIKE': QLIKE(fc_RNN[0], fc_RNN[1]).item()}

{'MSE': 0.3803638469073552, 'QLIKE': 0.2608282942097771}

## Прогноз для всех тикеров

Формат вывода по колонкам:

1. Тикер.
2. `qlike_garch` - метрика QLIKE для GARCH(1,1).
3. `qlike_mix` - QLIKE для GARCH(1,1)-RNN с 3 компонентами.

In [16]:
df_res = forecast_many(
    pl.read_parquet("../data/returns_rv_1d.parquet"),
    n_components=3, oos=100, key=rnd.PRNGKey(4)
)
df_res

AFLT qlike_garch=0.24808 qlike_mix=0.26083
ALRS qlike_garch=0.34750 qlike_mix=0.30788
BANE qlike_garch=0.67326 qlike_mix=0.70765
CBOM qlike_garch=0.35427 qlike_mix=0.39677
FESH qlike_garch=0.72344 qlike_mix=0.63469
GAZP qlike_garch=0.43535 qlike_mix=0.30101
INGR qlike_garch=0.60650 qlike_mix=0.54391
KAZT qlike_garch=1.58235 qlike_mix=1.34884
KMAZ qlike_garch=1.97968 qlike_mix=1.56727
LKOH qlike_garch=0.30257 qlike_mix=0.31997
LVHK qlike_garch=1.22298 qlike_mix=1.97527
MGTS qlike_garch=4.73733 qlike_mix=3.53192
MTSS qlike_garch=0.55991 qlike_mix=0.58865
MVID qlike_garch=0.39255 qlike_mix=0.39837
ROSN qlike_garch=0.23985 qlike_mix=0.21684
SBER qlike_garch=0.20172 qlike_mix=0.20908
UTAR qlike_garch=4.42047 qlike_mix=3.96832
VTBR qlike_garch=0.25724 qlike_mix=0.24062
ZILL qlike_garch=1.50444 qlike_mix=1.93816


Ticker,Model,MSE,QLIKE
str,str,f64,f64
"""AFLT""","""GARCH""",0.377226,0.248082
"""AFLT""","""Mix""",0.380364,0.260828
"""ALRS""","""GARCH""",0.498489,0.347505
"""ALRS""","""Mix""",0.470948,0.307877
"""BANE""","""GARCH""",0.624547,0.67326
"""BANE""","""Mix""",0.660329,0.707645
"""CBOM""","""GARCH""",0.466118,0.354269
"""CBOM""","""Mix""",0.512202,0.396771
"""FESH""","""GARCH""",0.791431,0.723439
"""FESH""","""Mix""",0.734025,0.634689


In [21]:
results_full = df_res.filter(pl.col("Model") == "GARCH").select(
    "Ticker", pl.col("MSE", "QLIKE").name.suffix(" GARCH")
).join(
    df_res.filter(pl.col("Model") == "Mix").select(
        "Ticker", pl.col("MSE", "QLIKE").name.suffix(" Mix")
    ), "Ticker"
).with_columns(
    (pl.col("QLIKE Mix") <= pl.col("QLIKE GARCH")).cast(int).alias("QLIKE: mix better?"),
    (pl.col("MSE Mix") <= pl.col("MSE GARCH")).cast(int).alias("MSE: mix better?"),
).sort("Ticker")
results_full

Ticker,MSE GARCH,QLIKE GARCH,MSE Mix,QLIKE Mix,QLIKE: mix better?,MSE: mix better?
str,f64,f64,f64,f64,i64,i64
"""AFLT""",0.377226,0.248082,0.380364,0.260828,0,0
"""ALRS""",0.498489,0.347505,0.470948,0.307877,1,1
"""BANE""",0.624547,0.67326,0.660329,0.707645,0,0
"""CBOM""",0.466118,0.354269,0.512202,0.396771,0,0
"""FESH""",0.791431,0.723439,0.734025,0.634689,1,1
"""GAZP""",0.567048,0.435354,0.52929,0.301013,1,1
"""INGR""",0.888885,0.6065,0.836101,0.543911,1,1
"""KAZT""",1.572509,1.582352,1.325976,1.348845,1,1
"""KMAZ""",1.530039,1.979676,1.20808,1.567271,1,1
"""LKOH""",0.438951,0.302565,0.443106,0.319973,0,0


In [25]:
print(results_full.select(
    "Ticker", "MSE GARCH", pl.col("MSE Mix").alias("MSE MixGARCH"),
    "QLIKE GARCH", pl.col("QLIKE Mix").alias("QLIKE MixGARCH")
).sort("Ticker").to_pandas().to_markdown(index=False, floatfmt="8.3f"))

| Ticker   |   MSE GARCH |   MSE MixGARCH |   QLIKE GARCH |   QLIKE MixGARCH |
|:---------|------------:|---------------:|--------------:|-----------------:|
| AFLT     |       0.377 |          0.380 |         0.248 |            0.261 |
| ALRS     |       0.498 |          0.471 |         0.348 |            0.308 |
| BANE     |       0.625 |          0.660 |         0.673 |            0.708 |
| CBOM     |       0.466 |          0.512 |         0.354 |            0.397 |
| FESH     |       0.791 |          0.734 |         0.723 |            0.635 |
| GAZP     |       0.567 |          0.529 |         0.435 |            0.301 |
| INGR     |       0.889 |          0.836 |         0.606 |            0.544 |
| KAZT     |       1.573 |          1.326 |         1.582 |            1.349 |
| KMAZ     |       1.530 |          1.208 |         1.980 |            1.567 |
| LKOH     |       0.439 |          0.443 |         0.303 |            0.320 |
| LVHK     |       1.046 |          1.611 |         

In [22]:
results_full.write_csv("GARCH_RNN_forecast.csv")

In [23]:
results_full["QLIKE: mix better?"].mean()

0.5263157894736842

In [24]:
results_full["MSE: mix better?"].mean()

0.5263157894736842

Смесь (RNN) даёт лучшую точность в 50% случаев, т.е. в среднем точность RNN не хуже и не лучше, чем GARCH.