In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from AdaptiveWindowCV import *
# import plotly.express as px
# import statsmodels.api as sm
# from scipy import stats
import warnings
warnings.filterwarnings("ignore")

# Data request

In [4]:
btc = yf.Ticker('BTC-USD').history(start='2021-04-21', end='2023-04-21')[['Open', 'High', 'Low', 'Close', 'Volume']]
btc

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-04-21 00:00:00+00:00,56471.128906,56757.972656,53695.468750,53906.089844,54926612466
2021-04-22 00:00:00+00:00,53857.105469,55410.230469,50583.812500,51762.273438,74798630778
2021-04-23 00:00:00+00:00,51739.808594,52120.792969,47714.664062,51093.652344,86668667320
2021-04-24 00:00:00+00:00,51143.226562,51167.562500,48805.285156,50050.867188,49014494781
2021-04-25 00:00:00+00:00,50052.832031,50506.019531,47159.484375,49004.253906,46117114240
...,...,...,...,...,...
2023-04-16 00:00:00+00:00,30315.976562,30555.537109,30157.832031,30315.355469,12854816417
2023-04-17 00:00:00+00:00,30317.146484,30319.197266,29275.371094,29445.044922,17872186762
2023-04-18 00:00:00+00:00,29449.091797,30470.302734,29154.849609,30397.552734,19480529496
2023-04-19 00:00:00+00:00,30394.187500,30411.054688,28669.898438,28822.679688,24571565421


In [5]:
px.line(btc.Close)

# EWMA Volatility 

In [4]:
def ewma_volatility(returns, lambda_):
    squared_returns = returns.shift().dropna() ** 2
    forecast = squared_returns[0]
    volatility = np.array([squared_returns[0]])

    for i in squared_returns[1:]:
        forecast = lambda_ * forecast + (1 - lambda_) * i
        volatility = np.append(volatility, [forecast])

    return pd.Series(volatility, index=squared_returns.index)

In [5]:
returns = np.log(btc.Close).diff().dropna()
lambda_ = 0.94
volatility = np.sqrt(ewma_volatility(returns, lambda_))
px.line(volatility)

# Ljung-Box Statistic

In [6]:
ljung_box_auto_correlation_statistic = (returns ** 2).rolling(45).apply(lambda x: sm.stats.acorr_ljungbox(x, lags=[15]).values[0, 0]).dropna()
px.line(ljung_box_auto_correlation_statistic)

# Proxies

In [11]:
training_proxy = AdaptiveWindowCV.training_proxy_volatility_ewma(btc.Close, 0.94, 4)
px.line(training_proxy)

In [12]:
validation_proxy = AdaptiveWindowCV.validation_proxy_autocorrelation_ljung_box(btc.Close, 45, 15, 4)
px.line(validation_proxy)

# AdaptiveWindowCV

In [19]:
cross_validator = AdaptiveWindowCV(
    n_splits=10,
    times=pd.Series(btc.index, index=btc.index),
    validation_proxy=validation_proxy,
    training_proxy=training_proxy,
)

In [20]:
for i, (train_index, test_index) in enumerate(cross_validator.split(btc)):
    print(f"Fold {i + 1}:")
    print(f"  Train: index={train_index}")
    print(f"  Test:  index={test_index}")

Fold 1:
  Train: index=[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69]
  Test:  index=[ 70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87
  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105
 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
 124 125 126 127 128 129 130 131 132 133 134 135 136 137]
Fold 2:
  Train: index=[  4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21
  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39
  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57
  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75
  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93
  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 111
 112 113 1