In [475]:
import pandas as pd
import numpy as np 
from statsmodels.tsa.api import VAR
from tqdm import tqdm  # optional progress bar

In [476]:
btc_dune_df = pd.read_csv('../Data/dune_btc_hour.csv')
btc_whale_alerts_df = pd.read_csv('../Data/whale_alert_btc.csv')
eth_dune_df = pd.read_csv('../Data/dune_eth_hour.csv')
eth_whale_alerts_df = pd.read_csv('../Data/whale_alert_eth.csv')

In [477]:
# Dropping realized_volatility column
btc_dune_df = btc_dune_df.drop(columns = 'realized_volatility')

In [478]:
eth_dune_df.columns

Index(['datetime', 'RV_MA_1hr', 'RV_MA_3hr', 'RV_MA_12hr', 'vol_future',
       'active_sending_addresses', 'active_receiving_addresses',
       'exchange_withdrawing_count', 'transaction_count', 'fail_rate_percent',
       'open', 'low', 'high', 'close', 'hourly_return',
       'onchain_volume_usd_log', 'avg_gas_fee_usd_log',
       'avg_priority_fee_usd_log', 'staking_inflow_log',
       'exchange_depositing_count_log', 'exchange_netflow_usd_log'],
      dtype='object')

In [479]:
btc_dune_df = btc_dune_df.rename(columns={'hour_utc': 'timestamp'})
eth_dune_df = eth_dune_df.rename(columns={'datetime': 'timestamp'})

In [480]:
btc_dune_df.dropna(inplace=True)

### VAR-FEVD

We aim to analyze the dynamic relationship between Bitcoin and Ethereum volatility using a bivariate Vector Autoregression (VAR) model, followed by Forecast Error Variance Decomposition (FEVD).

FEVD quantifies the spillover effect — e.g., whether ETH volatility or drives BTC volatility shocks over time.

In [481]:
# For Bitcoin
var_btc = btc_dune_df[['timestamp', 'vol_future']].copy()

# For Ethereum (if column name is 'datetime')
var_eth = eth_dune_df[['timestamp', 'vol_future']].copy()


var_btc['timestamp'] = pd.to_datetime(var_btc['timestamp'], utc=True)
var_eth['timestamp'] = pd.to_datetime(var_eth['timestamp'], utc=True)

In [482]:
var_btc = var_btc.rename(columns={'vol_future': 'btc_vol_future'})
var_eth = var_eth.rename(columns={'vol_future': 'eth_vol_future'})

print(f"{len(var_btc)} rows in BTC volatility dataframe.")
print(f"{len(var_eth)} rows in ETH volatility dataframe.")

8207 rows in BTC volatility dataframe.
8208 rows in ETH volatility dataframe.


In [483]:
var_merged_df = pd.merge(var_btc, var_eth, on='timestamp', how='inner')
print(f"{len(var_merged_df)} rows in merged dataframe after inner join.")

8207 rows in merged dataframe after inner join.


In [484]:
data = var_merged_df.set_index('timestamp').sort_index() 

In [None]:
## HELP
# # --- 1️⃣ Prepare data ---
# data = var_merged_df.set_index('timestamp').sort_index()

# # Split point (first 70% = training)
# split_idx = int(len(data) * 0.7)
# train_data = data.iloc[:split_idx]

# print(f"Training data: {train_data.index.min()} → {train_data.index.max()}")

# # --- 2️⃣ Fit VAR model on training data only ---
# model = VAR(train_data)
# results = model.fit(maxlags=24, ic='aic')
# print(results.summary())

# # --- 3️⃣ Compute 1-hour FEVD ---
# fevd = results.fevd(1)

# cols = list(results.names)
# btc_idx = cols.index('btc_vol_future')
# eth_idx = cols.index('eth_vol_future')

# fevd_btc = pd.DataFrame(fevd.decomp[btc_idx], columns=cols, index=[1])
# fevd_eth = pd.DataFrame(fevd.decomp[eth_idx], columns=cols, index=[1])

# # Extract FEVD shares
# btc_self = fevd_btc.loc[1, 'btc_vol_future']
# eth_to_btc = fevd_btc.loc[1, 'eth_vol_future']
# btc_to_eth = fevd_eth.loc[1, 'btc_vol_future']
# eth_self = fevd_eth.loc[1, 'eth_vol_future']

# # --- 4️⃣ Create constant “spillover structure” ---
# fevd_const = {
#     'btc_self_1h': btc_self,
#     'eth_self_1h': eth_self,
#     'btc_to_eth_1h': btc_to_eth,
#     'eth_to_btc_1h': eth_to_btc,
# }

# fevd_const_df = pd.DataFrame([fevd_const])
# print("\nConstant FEVD structure:")
# print(fevd_const_df)

# # --- 5️⃣ Assign constants to all rows in dataset ---
# for col in fevd_const_df.columns:
#     data[col] = fevd_const_df[col].iloc[0]

# print("\nData with constant FEVD features:")
# print(data.head())


In [486]:
data

Unnamed: 0_level_0,btc_vol_future,eth_vol_future
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-11-05 00:00:00+00:00,0.008782,0.009336
2024-11-05 01:00:00+00:00,0.005309,0.003825
2024-11-05 02:00:00+00:00,0.006122,0.003505
2024-11-05 03:00:00+00:00,0.006630,0.003399
2024-11-05 04:00:00+00:00,0.007759,0.003244
...,...,...
2025-10-12 18:00:00+00:00,0.003767,0.006251
2025-10-12 19:00:00+00:00,0.006841,0.007959
2025-10-12 20:00:00+00:00,0.008244,0.013743
2025-10-12 21:00:00+00:00,0.006577,0.007669


In [487]:
## merging datasets

In [488]:
eth_whale_alerts_df.columns

Index(['datetime1h', 'whale_net_usd', 'whale_net_usd_24h', 'whale_burst_flag',
       'etow_usd_log', 'etow_coins_log', 'whale_txn_count_log', 'wtoe_usd_log',
       'wtoe_coins_log'],
      dtype='object')

In [489]:
eth_dune_df.columns

Index(['timestamp', 'RV_MA_1hr', 'RV_MA_3hr', 'RV_MA_12hr', 'vol_future',
       'active_sending_addresses', 'active_receiving_addresses',
       'exchange_withdrawing_count', 'transaction_count', 'fail_rate_percent',
       'open', 'low', 'high', 'close', 'hourly_return',
       'onchain_volume_usd_log', 'avg_gas_fee_usd_log',
       'avg_priority_fee_usd_log', 'staking_inflow_log',
       'exchange_depositing_count_log', 'exchange_netflow_usd_log'],
      dtype='object')

In [490]:

eth_whale_alerts_df['datetime1h'] = pd.to_datetime(eth_whale_alerts_df['datetime1h'], utc=True)
eth_dune_df['timestamp'] = pd.to_datetime(eth_dune_df['timestamp'], utc=True)

eth_whale_alerts_df = eth_whale_alerts_df.rename(columns={'datetime1h': 'timestamp'})

eth_whale_alerts_df = eth_whale_alerts_df.sort_values('timestamp')
eth_dune_df = eth_dune_df.sort_values('timestamp')

eth_merged_df = pd.merge_asof(
    eth_dune_df,
    eth_whale_alerts_df,
    on='timestamp',
    direction='nearest',       # optional: use nearest timestamp match
    tolerance=pd.Timedelta('1h')  # optional: only merge if within 1 hour
)


In [491]:
eth_merged_df.isna().sum()

timestamp                           0
RV_MA_1hr                           0
RV_MA_3hr                           0
RV_MA_12hr                          0
vol_future                          0
active_sending_addresses            0
active_receiving_addresses          0
exchange_withdrawing_count          0
transaction_count                   0
fail_rate_percent                   0
open                                0
low                                 0
high                                0
close                               0
hourly_return                       0
onchain_volume_usd_log              0
avg_gas_fee_usd_log                 0
avg_priority_fee_usd_log            0
staking_inflow_log                  0
exchange_depositing_count_log       0
exchange_netflow_usd_log            0
whale_net_usd                    6383
whale_net_usd_24h                6383
whale_burst_flag                 6383
etow_usd_log                     6383
etow_coins_log                   6383
whale_txn_co

In [492]:
eth_merged_df.fillna(0, inplace=True)

In [493]:
btc_whale_alerts_df['datetime1h'] = pd.to_datetime(btc_whale_alerts_df['datetime1h'], utc=True)
btc_dune_df['timestamp'] = pd.to_datetime(btc_dune_df['timestamp'], utc=True)

btc_whale_alerts_df = btc_whale_alerts_df.rename(columns={'datetime1h': 'timestamp'})

btc_whale_alerts_df = btc_whale_alerts_df.sort_values('timestamp')
btc_dune_df = btc_dune_df.sort_values('timestamp')

btc_merged_df = pd.merge_asof(
    btc_dune_df,
    btc_whale_alerts_df,
    on='timestamp',
    direction='nearest',          # get nearest match in time
    tolerance=pd.Timedelta('1h')  # only merge if within 1 hour
)

btc_merged_df.isna().sum()

timestamp                        0
btc_exchange_netflow_usd         0
active_sending_addresses         0
active_receiving_addresses       0
onchain_volume_usd               0
open                             0
low                              0
high                             0
close                            0
mint_reward_usd                  0
total_fee_usd                    0
transaction_count                0
exchange_to_wallet_usd           0
wallet_to_exchange_usd           0
RV_MA_1hr                        0
RV_MA_3hr                        0
RV_MA_12hr                       0
hourly_return                    0
vol_future                       0
whale_net_usd                 5070
whale_net_usd_24h             5070
whale_burst_flag              5070
etow_usd_log                  5070
etow_coins_log                5070
whale_txn_count_log           5070
wtoe_usd_log                  5070
wtoe_coins_log                5070
dtype: int64

In [494]:
btc_merged_df.fillna(0, inplace=True)

In [495]:
#btc_merged_df.to_csv('../Data/final_btc_df_var_fevd.csv', index=False)
#eth_merged_df.to_csv('../Data/final_btc_df_var_fevd.csv', index=False)
