Creating baseline model EGARCH for BTC 

In [39]:
import warnings
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, kpss
from arch.univariate import EGARCH, ZeroMean, StudentsT, Normal
from statsmodels.tools.sm_exceptions import InterpolationWarning

Helper functions 

In [40]:
#need to import this for rmse and qlike
#import numpy as np


def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))


def qlike(y2, var_fc, eps=1e-12):
    ratio = (y2 + eps) / (var_fc + eps)
    return float(np.mean(ratio - np.log(ratio) - 1.0))


def clean_series(x, name="series"):
    x = pd.to_numeric(x, errors="coerce").astype(float)
    x = x.replace([np.inf, -np.inf], np.nan).dropna()
    if not x.index.is_monotonic_increasing:
        x = x.sort_index()
    return x


def scale_series(x, factor=100.0):
    if factor is None or factor == 1.0:
        return x, 1.0
    return x * factor, factor

Loading dataset 

In [41]:
df = pd.read_csv("../Data/btc_final_df.csv")

df['timestamp'] = pd.to_datetime(df['timestamp'], utc=True)
df = df.set_index('timestamp')
df = df.sort_index()
df = df[~df.index.duplicated(keep='last')]
df = df.asfreq('h')  # make sure it's hourly


if 'hourly_return' in df.columns:
    df['hourly_return'] = pd.to_numeric(df['hourly_return'], errors='coerce').replace([np.inf, -np.inf], np.nan)


r = df['hourly_return'].dropna()

print("Index dtype:", df.index.dtype)
print("Columns:", df.columns.tolist())
print("Data shape:", df.shape)
print(f"\nUsing {len(r)} observations for EGARCH modelling.")

Index dtype: datetime64[ns, UTC]
Columns: ['btc_exchange_netflow_usd', 'active_sending_addresses', 'active_receiving_addresses', 'onchain_volume_usd', 'open', 'low', 'high', 'close', 'mint_reward_usd', 'total_fee_usd', 'transaction_count', 'exchange_to_wallet_usd', 'wallet_to_exchange_usd', 'RV_MA_1hr', 'RV_MA_3hr', 'RV_MA_12hr', 'hourly_return', 'vol_future', 'whale_net_usd', 'whale_net_usd_24h', 'whale_burst_flag', 'etow_usd_log', 'etow_coins_log', 'whale_txn_count_log', 'wtoe_usd_log', 'wtoe_coins_log', 'eth_to_btc_spill']
Data shape: (8208, 27)

Using 8208 observations for EGARCH modelling.


Defining stationary tests 

In [42]:
def adf_kpss(x: pd.Series):
    x = x.dropna()
    adf_res = adfuller(x, autolag="AIC")
    kpss_level = kpss(x, regression="c", nlags="auto")
    kpss_trend = kpss(x, regression="ct", nlags="auto")
    return {
        "ADF": {"stat": adf_res[0], "p": adf_res[1]},
        "KPSS_level": {"stat": kpss_level[0], "p": kpss_level[1]},
        "KPSS_trend": {"stat": kpss_trend[0], "p": kpss_trend[1]},
    }

print("\n=== Stationarity Tests ===")
print(adf_kpss(r))


=== Stationarity Tests ===
{'ADF': {'stat': np.float64(-20.897212593128913), 'p': 0.0}, 'KPSS_level': {'stat': np.float64(0.14799086823493415), 'p': np.float64(0.1)}, 'KPSS_trend': {'stat': np.float64(0.1074770706815293), 'p': np.float64(0.1)}}


look-up table. The actual p-value is greater than the p-value returned.

  kpss_level = kpss(x, regression="c", nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_trend = kpss(x, regression="ct", nlags="auto")


Use table to show at least weak stationarity. 

In [43]:
def stationarity_table(x, alpha=0.05):
    x = pd.to_numeric(x, errors="coerce").astype(float).dropna()

    adf_stat, adf_p, adf_lags, adf_nobs, adf_crit, *_ = adfuller(x, autolag="AIC")

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=InterpolationWarning)
        kpss_level_stat, kpss_level_p, kpss_level_lags, kpss_level_crit = kpss(x, regression="c", nlags="auto")
        kpss_trend_stat, kpss_trend_p, kpss_trend_lags, kpss_trend_crit = kpss(x, regression="ct", nlags="auto")

    rows = [
        {
            "Test": "ADF",
            "Statistic": adf_stat,
            "p-value": adf_p,
            "H0": "Unit root (non-stationary)",
            f"Decision @ {int(alpha*100)}%": "Stationary (reject H0)" if adf_p < alpha else "Non-stationary (fail to reject)",
            "Lags used": adf_lags,
            "Obs used": adf_nobs,
        },
        {
            "Test": "KPSS (level)",
            "Statistic": kpss_level_stat,
            "p-value": kpss_level_p,
            "H0": "Level-stationary",
            f"Decision @ {int(alpha*100)}%": "Non-stationary (reject H0)" if kpss_level_p < alpha else "Stationary (fail to reject)",
            "Lags used": kpss_level_lags,
            "Obs used": len(x),
        },
        {
            "Test": "KPSS (trend)",
            "Statistic": kpss_trend_stat,
            "p-value": kpss_trend_p,
            "H0": "Trend-stationary",
            f"Decision @ {int(alpha*100)}%": "Non-stationary (reject H0)" if kpss_trend_p < alpha else "Stationary (fail to reject)",
            "Lags used": kpss_trend_lags,
            "Obs used": len(x),
        },
    ]

    df_out = pd.DataFrame(rows)
    return df_out

    num_cols = ["Statistic", "p-value"]
    df_out[num_cols] = df_out[num_cols].applymap(lambda v: np.nan if pd.isna(v) else float(v))
    return df_out

tbl = stationarity_table(r, alpha=0.05)
tbl

Unnamed: 0,Test,Statistic,p-value,H0,Decision @ 5%,Lags used,Obs used
0,ADF,-20.897213,0.0,Unit root (non-stationary),Stationary (reject H0),22,8185
1,KPSS (level),0.147991,0.1,Level-stationary,Stationary (fail to reject),7,8208
2,KPSS (trend),0.107477,0.1,Trend-stationary,Stationary (fail to reject),7,8208


Train+Validation/Test Split (85/15)

In [44]:
use_validation = True 
n = len(r)
if use_validation:
    n_train = int(0.70 * n)
    n_val   = int(0.15 * n)
    train_proto = r.iloc[:n_train]
    val_proto   = r.iloc[n_train:n_train+n_val]
    test_proto  = r.iloc[n_train+n_val:]
else:
    n_train = int(0.85 * n)
    train_proto = r.iloc[:n_train]
    val_proto   = None
    test_proto  = r.iloc[n_train:]

PURGE_HOURS = 1
N_SPLITS = 5 
if len(test_proto) == 0:
    raise ValueError("Empty test set after temporal split.")
boundary = test_proto.index.min()
purged_start = boundary + pd.Timedelta(hours=PURGE_HOURS)

train_val = r.loc[: purged_start - pd.Timedelta(hours=1)]
test      = r.loc[purged_start:]

print(f"Train/Val for CV: {len(train_val)} obs  |  Test (after 1h purge): {len(test)} obs")

#picking fixed sizes to fit in train_val
N_SPLITS    = 5
TEST_SIZE   = 24 * 5      
n           = len(train_val)

# Pick TRAIN_SIZE to fit the feasibility condition:
TRAIN_SIZE = n - (PURGE_HOURS + N_SPLITS * TEST_SIZE)

print(f"Rolling CV plan → train={TRAIN_SIZE}, test={TEST_SIZE}, purge={PURGE_HOURS}, folds={N_SPLITS} "
      f"(train_val length={n})")

Train/Val for CV: 6977 obs  |  Test (after 1h purge): 1231 obs
Rolling CV plan → train=6376, test=120, purge=1, folds=5 (train_val length=6977)


Rolling Window = 5 Splits 

In [45]:
def rolling_split_fixed(x, *, train_size, test_size, purge=24, n_splits=None):
    n = len(x)
    step = test_size
    start_train = 0
    k = 0

    while True:
        end_train   = start_train + train_size               
        start_test  = end_train + purge
        end_test    = start_test + test_size                  

        if end_test > n:                                     
            break
        yield (np.arange(start_train, end_train),
               np.arange(start_test,  end_test))

        k += 1
        if (n_splits is not None) and (k >= n_splits):
            break
        
        start_train += step

Fit EGARCH

In [46]:
def egarch_path_refit(train_ret, test_ret, dist="t", scale=100.0):
    tr = clean_series(train_ret, "train_ret")
    te = clean_series(test_ret, "test_ret")

    tr_s, s = scale_series(tr, scale)
    te_s, _ = scale_series(te, scale)

    all_s = pd.concat([tr_s, te_s])
    n_tr, n_te = len(tr_s), len(te_s)

    var_fc_s = np.empty(n_te)

    for j in range(n_te):
        y_fit = all_s.iloc[: n_tr + j]
        model = ZeroMean(y_fit)
        model.volatility = EGARCH(p=1, o=1, q=1)
        model.distribution = StudentsT() if dist == "t" else Normal()
        res = model.fit(disp="off")

        fcast = res.forecast(horizon=1, reindex=False)
        var_fc_s[j] = float(fcast.variance.values[-1, 0])

    var_fc = var_fc_s / (s ** 2)
    r2 = te.values ** 2

    return {
        "var_fc": var_fc,
        "r2": r2,
        "rmse": rmse(r2, var_fc),
        "qlike": qlike(r2, var_fc),
        "n_train": len(tr),
        "n_test": len(te),
    }

Tune for RMSE 

In [47]:
candidates = [
    ("t", 100.0),
    ("normal", 100.0),
    ("t", 1000.0),
    ("normal", 1000.0),
]

tune_results = []
for dist, scale in candidates:
    o = egarch_path_refit(train_val, test, dist=dist, scale=scale)
    tune_results.append((dist, scale, o["rmse"]))

#rank by lowest RMSE 
tune_results = sorted(tune_results, key=lambda x: x[2])
best_dist, best_scale, best_rmse = tune_results[0]

print("Best EGARCH tuning by RMSE")
print(f"  Distribution: {best_dist}")
print(f"  Scale:        {best_scale}")
print(f"  RMSE:         {best_rmse:.6f}")


Best EGARCH tuning by RMSE
  Distribution: t
  Scale:        100.0
  RMSE:         0.000047


Cross Validation 

In [48]:
print("\n=== Rolling CV ===")
cv_scores = []

for i, (tri, tei) in enumerate(
        rolling_split_fixed(train_val,
                            train_size=TRAIN_SIZE,
                            test_size=TEST_SIZE,
                            purge=PURGE_HOURS,
                            n_splits=N_SPLITS), 1):
    tr_i = train_val.iloc[tri]
    te_i = train_val.iloc[tei]

    out = egarch_path_refit(
        tr_i, te_i,
        dist=best_dist if 'best_dist' in globals() else "t",
        scale=best_scale if 'best_scale' in globals() else 100.0
    )
    cv_scores.append((out["rmse"], out["qlike"]))
    print(f"Fold {i}: RMSE={out['rmse']:.6e} | QLIKE={out['qlike']:.6f} "
          f"| Train={out['n_train']} Test={out['n_test']}")

if cv_scores:
    mean_rmse  = float(np.mean([s[0] for s in cv_scores]))
    mean_qlike = float(np.mean([s[1] for s in cv_scores]))
    print(f"\nMean CV RMSE={mean_rmse:.6e} | Mean CV QLIKE={mean_qlike:.6f}")
else:
    print("No valid CV folds.")


=== Rolling CV ===
Fold 1: RMSE=2.183799e-05 | QLIKE=1.543314 | Train=6376 Test=120
Fold 2: RMSE=2.432175e-05 | QLIKE=1.745252 | Train=6376 Test=120
Fold 3: RMSE=2.720034e-05 | QLIKE=1.871547 | Train=6376 Test=120
Fold 4: RMSE=2.188111e-05 | QLIKE=1.676564 | Train=6376 Test=120
Fold 5: RMSE=7.523257e-05 | QLIKE=2.272770 | Train=6376 Test=120

Mean CV RMSE=3.409475e-05 | Mean CV QLIKE=1.821889


Final Evaluation 

In [49]:
final_train = train_val 
final_out = egarch_path_refit(final_train, test, dist="t", scale=100.0)
print("\n=== Final Test Results ===")
print(f"Test RMSE={final_out['rmse']:.6e} | QLIKE={final_out['qlike']:.6f}")


=== Final Test Results ===
Test RMSE=4.694536e-05 | QLIKE=1.868401


Check on why RMSE is so low - due to scaling since returns are of small magnitude 

In [50]:
print("Scale checks:")
print(f"mean|r|     = {np.mean(np.abs(r)):.3e}")
print(f"mean r^2    = {np.mean(r**2):.3e}")
print(f"std(r)^2    = {np.std(r)**2:.3e}")  


print("\nMetrics (scientific):")
print(f"RMSE       = {final_out['rmse']:.3e}")   
print(f"QLIKE      = {final_out['qlike']:.6f}")


vf = final_out['var_fc']
print("\nForecast variance summary:")
print(f"min={vf.min():.3e}  p50={np.median(vf):.3e}  mean={vf.mean():.3e}  max={vf.max():.3e}")


Scale checks:
mean|r|     = 3.079e-03
mean r^2    = 2.272e-05
std(r)^2    = 2.272e-05

Metrics (scientific):
RMSE       = 4.695e-05
QLIKE      = 1.868401

Forecast variance summary:
min=2.341e-06  p50=1.106e-05  mean=1.600e-05  max=2.463e-04


Save CSV for EGARCH predicted_vol_future 

In [51]:
pred_vol_future = np.sqrt(np.maximum(final_out["var_fc"], 0.0))
vol_future_df = pd.DataFrame(
    {
        "pred_vol_future": pred_vol_future,
    },
    index=test.index,
)


print("=== EGARCH Volatility Forecast Results ===")
display(vol_future_df.head(10))  


output_path = "../Results/btc_egarch_prediction.csv"
vol_future_df.to_csv(output_path, index_label="timestamp")

print(f"\nSaved EGARCH results to: {output_path}")


=== EGARCH Volatility Forecast Results ===


Unnamed: 0_level_0,pred_vol_future
timestamp,Unnamed: 1_level_1
2025-08-22 17:00:00+00:00,0.009963
2025-08-22 18:00:00+00:00,0.008877
2025-08-22 19:00:00+00:00,0.007697
2025-08-22 20:00:00+00:00,0.006712
2025-08-22 21:00:00+00:00,0.006079
2025-08-22 22:00:00+00:00,0.005446
2025-08-22 23:00:00+00:00,0.005306
2025-08-23 00:00:00+00:00,0.004814
2025-08-23 01:00:00+00:00,0.004268
2025-08-23 02:00:00+00:00,0.00442



Saved EGARCH results to: ../Results/btc_egarch_prediction.csv
