In [1481]:
import pandas as pd
import numpy as np 
from statsmodels.tsa.api import VAR
import warnings


In [1482]:
btc_dune_df = pd.read_csv('../Data/dune_btc_hour.csv')
btc_whale_alerts_df = pd.read_csv('../Data/whale_alert_btc.csv')
eth_dune_df = pd.read_csv('../Data/dune_eth_hour.csv')
eth_whale_alerts_df = pd.read_csv('../Data/whale_alert_eth.csv')

In [1483]:
# Dropping realized_volatility column
btc_dune_df = btc_dune_df.drop(columns = 'realized_volatility')

In [1484]:
eth_dune_df.columns

Index(['datetime', 'RV_MA_1hr', 'RV_MA_3hr', 'RV_MA_12hr', 'vol_future',
       'active_sending_addresses', 'active_receiving_addresses',
       'exchange_withdrawing_count', 'transaction_count', 'fail_rate_percent',
       'open', 'low', 'high', 'close', 'hourly_return',
       'onchain_volume_usd_log', 'avg_gas_fee_usd_log',
       'avg_priority_fee_usd_log', 'staking_inflow_log',
       'exchange_depositing_count_log', 'exchange_netflow_usd_log'],
      dtype='object')

In [1485]:
btc_dune_df = btc_dune_df.rename(columns={'hour_utc': 'timestamp'})
eth_dune_df = eth_dune_df.rename(columns={'datetime': 'timestamp'})

In [1486]:
btc_dune_df.dropna(inplace=True)

### VAR-FEVD

We aim to analyze the dynamic relationship between Bitcoin and Ethereum volatility using a bivariate Vector Autoregression (VAR) model, followed by Forecast Error Variance Decomposition (FEVD).

FEVD quantifies the spillover effect — e.g., whether ETH volatility or drives BTC volatility shocks over time.

In [1487]:
btc_vol_df = pd.read_csv('../Data/btc_vol_future.csv')
eth_vol_df = pd.read_csv('../Data/eth_vol_future.csv')

print(f'bitcoin columns: {len(btc_vol_df)}, ethereum columns: {len(eth_vol_df)}')

bitcoin columns: 8232, ethereum columns: 8232


In [1488]:
btc_vol_df.head()

Unnamed: 0,datetime,vol_future
0,2024-11-04 00:00:00.000 UTC,0.009547
1,2024-11-04 01:00:00.000 UTC,0.012712
2,2024-11-04 02:00:00.000 UTC,0.009945
3,2024-11-04 03:00:00.000 UTC,0.009561
4,2024-11-04 04:00:00.000 UTC,0.008098


In [1489]:
# Renaming vol_future columns
btc_vol_df = btc_vol_df.rename(columns={'vol_future': 'btc_vol_future'})
eth_vol_df = eth_vol_df.rename(columns={'vol_future': 'eth_vol_future'})

In [1490]:
vol_merged_df = pd.merge(eth_vol_df, btc_vol_df, on='datetime', how='inner')
print(f'After merging vol data: {len(vol_merged_df)} columns')

vol_merged_df = vol_merged_df.rename(columns={'datetime': 'timestamp'})

vol_merged_df.head()


After merging vol data: 8232 columns


Unnamed: 0,timestamp,eth_vol_future,btc_vol_future
0,2024-11-04 00:00:00.000 UTC,0.008161,0.009547
1,2024-11-04 01:00:00.000 UTC,0.00525,0.012712
2,2024-11-04 02:00:00.000 UTC,0.003669,0.009945
3,2024-11-04 03:00:00.000 UTC,0.002562,0.009561
4,2024-11-04 04:00:00.000 UTC,0.002531,0.008098


In [1491]:
vol_merged_df.isna().sum()

timestamp         0
eth_vol_future    0
btc_vol_future    0
dtype: int64

In [1492]:
for col in vol_merged_df.columns:
    if col != 'timestamp':
        lower_limit = vol_merged_df[col].quantile(0.05)
        upper_limit = vol_merged_df[col].quantile(0.95)
        vol_merged_df[col] = np.clip(vol_merged_df[col], lower_limit, upper_limit)


### Code for VAR-FEVD 
We will do a rolling window to calculate the spill effects, each var model computed in each row will use up to the previous 24-hour data, the lags are determined BIC


We will use bic: https://stats.stackexchange.com/questions/313586/var-lag-selection-tests-which-one-do-i-choose
Response is verified using stock and watson paper: https://www.princeton.edu/~mwatson/papers/Stock_Watson_HOM_Vol2.pdf
we also want a more conservative approach to avoid overfitting so bic will be a more appropriate choice

In [1493]:
import numpy as np
import pandas as pd
from statsmodels.tsa.api import VAR
from tqdm import tqdm
import warnings
from collections import Counter

def compute_var_fevd_features(df, window=24, horizon=1, maxlags=12, 
                               min_samples=None, fixed_lag=None, verbose=False):
    """
    Compute volatility spillover features between BTC and ETH using VAR-FEVD.
    
    Parameters
    ----------
    df : pd.DataFrame
        Must contain 'btc_vol_future' and 'eth_vol_future' columns, indexed by timestamp.
    window : int, default=24
        Rolling window size (number of observations).
    horizon : int, default=1
        Forecast horizon for FEVD (steps ahead for decomposition).
    maxlags : int, default=12
        Maximum lags for VAR model selection (if fixed_lag not specified).
    min_samples : int, optional
        Minimum samples required to fit model. If None, uses max(10, window//2).
    fixed_lag : int, optional
        If specified, uses fixed lag order instead of BIC selection (faster).
    verbose : bool, default=False
        If True, prints detailed error diagnostics.
    
    Returns
    -------
    pd.DataFrame
        DataFrame with columns: ['btc_to_eth_spill', 'eth_to_btc_spill']
        Same index as input df.
    dict
        Diagnostic information about failures
    """
    
    # Prepare data
    df_work = df[['btc_vol_future', 'eth_vol_future']].copy()
    df_work = df_work.sort_index()
    
    # Check for sufficient data
    non_nan_count = len(df_work.dropna())
    if non_nan_count < window:
        raise ValueError(f"Insufficient data: need at least {window} non-NaN rows, found {non_nan_count}")
    
    # Set minimum samples
    if min_samples is None:
        min_samples = max(10, window // 2)
    
    # Initialize output arrays
    n = len(df_work)
    btc_to_eth_list = np.full(n, np.nan)
    eth_to_btc_list = np.full(n, np.nan)
    
    # Diagnostics
    error_counter = Counter()
    success_count = 0
    
    # Rolling window computation
    for i in tqdm(range(window, n), desc="Computing VAR-FEVD spillovers", 
                  disable=not verbose):
        
        # Extract window
        sub_df = df_work.iloc[i - window:i].dropna()
        
        # Check sufficient data
        if len(sub_df) < min_samples:
            error_counter['insufficient_samples'] += 1
            continue
        
        if sub_df.shape[1] < 2:
            error_counter['missing_columns'] += 1
            continue
        
        # Check for zero variance
        if (sub_df.std() == 0).any():
            error_counter['zero_variance'] += 1
            continue
        
        try:
            # Fit VAR model
            model = VAR(sub_df)
            
            # Determine lag order
            if fixed_lag is not None:
                p = fixed_lag
            else:
                maxlags_eff = min(maxlags, len(sub_df) // 3)
                if maxlags_eff < 1:
                    error_counter['window_too_small'] += 1
                    continue
                
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    try:
                        sel = model.select_order(maxlags=maxlags_eff)
                        p = sel.selected_orders.get('bic')
                        p = max(1, min(p, maxlags_eff))
                    except Exception as e:
                        error_counter[f'lag_selection_error'] += 1
                        if verbose:
                            print(f"Lag selection error at {i}: {str(e)[:50]}")
                        p = 1
            
            # Check if we have enough data for chosen lag
            if len(sub_df) < p * 2 + 5:
                error_counter['insufficient_for_lag'] += 1
                continue
            
            # Fit model with chosen lag
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                results = model.fit(p)
            
            # Check stability
            if not results.is_stable():
                error_counter['unstable_model'] += 1
                continue
            
            # Compute FEVD
            fevd = results.fevd(horizon)
            
            # FEVD decomp shape is (steps, variables, shocks) 
            # But actual shape can be (2, 1, 2) or (1, 2, 2) depending on statsmodels version
            # We need a (variables, shocks) matrix = (2, 2)
            
            decomp_shape = fevd.decomp.shape
            
            # Handle different possible shapes
            if decomp_shape == (2, 2):
                # Already correct 2D shape
                table = fevd.decomp
            elif len(decomp_shape) == 3:
                # 3D array - need to extract the right slice
                if decomp_shape[0] == 2 and decomp_shape[1] == 1 and decomp_shape[2] == 2:
                    # Shape is (variables=2, steps=1, shocks=2) - squeeze middle dim
                    table = fevd.decomp[:, 0, :]  # Result: (2, 2)
                elif decomp_shape[0] == 1 and decomp_shape[1] == 2 and decomp_shape[2] == 2:
                    # Shape is (steps=1, variables=2, shocks=2)
                    table = fevd.decomp[0, :, :]  # Result: (2, 2)
                elif decomp_shape[0] >= horizon and decomp_shape[1] == 2 and decomp_shape[2] == 2:
                    # Standard shape (steps, variables, shocks)
                    table = fevd.decomp[horizon - 1, :, :]
                else:
                    error_counter[f'unexpected_shape_{decomp_shape}'] += 1
                    if verbose and error_counter[f'unexpected_shape_{decomp_shape}'] <= 3:
                        print(f"Cannot handle decomp shape at {i}: {decomp_shape}")
                    continue
            else:
                error_counter[f'invalid_ndim_{len(decomp_shape)}'] += 1
                continue
            
            # Validate final table shape
            if table.shape != (2, 2):
                error_counter[f'wrong_table_shape_{table.shape}'] += 1
                if verbose and error_counter[f'wrong_table_shape_{table.shape}'] <= 3:
                    print(f"Table shape at {i}: {table.shape} from decomp {decomp_shape}")
                continue
            
            # Extract spillover effects
            # table[i,j] = contribution of shock to variable j on forecast error of variable i
            btc_to_eth = table[1, 0]
            eth_to_btc = table[0, 1]
            
            # Sanity check: values should be between 0 and 1
            if not (0 <= btc_to_eth <= 1 and 0 <= eth_to_btc <= 1):
                error_counter['invalid_fevd_values'] += 1
                continue
            
            # Assign to current timestamp
            btc_to_eth_list[i] = btc_to_eth
            eth_to_btc_list[i] = eth_to_btc
            success_count += 1
            
        except np.linalg.LinAlgError:
            error_counter['linalg_error'] += 1
        except ValueError as e:
            error_counter['value_error'] += 1
            if verbose and error_counter['value_error'] <= 3:
                print(f"ValueError at {i}: {str(e)[:100]}")
        except Exception as e:
            error_type = type(e).__name__
            error_counter[f'other_{error_type}'] += 1
            if verbose and error_counter[f'other_{error_type}'] <= 3:
                print(f"{error_type} at {i}: {str(e)[:100]}")
    
    # Create output dataframe
    result = pd.DataFrame({
        'btc_to_eth_spill': btc_to_eth_list,
        'eth_to_btc_spill': eth_to_btc_list
    }, index=df_work.index)
    
    # Diagnostics
    diagnostics = {
        'success_count': success_count,
        'total_attempts': n - window,
        'success_rate': success_count / (n - window) if n > window else 0,
        'errors': dict(error_counter)
    }
    
    return result, diagnostics


def diagnose_data(df):
    """
    Diagnose data quality issues that might prevent VAR-FEVD from working.
    
    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe with volatility columns
    
    Returns
    -------
    dict
        Diagnostic report
    """
    df_work = df[['btc_vol_future', 'eth_vol_future']].copy()
    
    report = {
        'total_rows': len(df_work),
        'btc_null_count': df_work['btc_vol_future'].isna().sum(),
        'eth_null_count': df_work['eth_vol_future'].isna().sum(),
        'btc_zero_count': (df_work['btc_vol_future'] == 0).sum(),
        'eth_zero_count': (df_work['eth_vol_future'] == 0).sum(),
        'btc_mean': df_work['btc_vol_future'].mean(),
        'eth_mean': df_work['eth_vol_future'].mean(),
        'btc_std': df_work['btc_vol_future'].std(),
        'eth_std': df_work['eth_vol_future'].std(),
        'btc_min': df_work['btc_vol_future'].min(),
        'eth_min': df_work['eth_vol_future'].min(),
        'btc_max': df_work['btc_vol_future'].max(),
        'eth_max': df_work['eth_vol_future'].max(),
        'correlation': df_work['btc_vol_future'].corr(df_work['eth_vol_future']),
    }
    
    # Check for constant values in rolling windows
    window = 24
    constant_windows = 0
    for i in range(window, len(df_work)):
        sub = df_work.iloc[i-window:i].dropna()
        if len(sub) > 0 and ((sub.std() == 0).any()):
            constant_windows += 1
    
    report['constant_windows'] = constant_windows
    report['constant_window_pct'] = constant_windows / max(1, len(df_work) - window) * 100
    
    return report


def add_spillover_diagnostics(df, spillover_df):
    """Add diagnostic statistics about spillover features."""
    stats = {
        'total_observations': len(df),
        'valid_spillover_obs': spillover_df['btc_to_eth_spill'].notna().sum(),
        'coverage_pct': spillover_df['btc_to_eth_spill'].notna().sum() / len(df) * 100,
        'btc_to_eth_mean': spillover_df['btc_to_eth_spill'].mean(),
        'eth_to_btc_mean': spillover_df['eth_to_btc_spill'].mean(),
        'btc_to_eth_std': spillover_df['btc_to_eth_spill'].std(),
        'eth_to_btc_std': spillover_df['eth_to_btc_spill'].std(),
        'correlation': spillover_df[['btc_to_eth_spill', 'eth_to_btc_spill']].corr().iloc[0, 1]
    }
    return stats


def test_fevd_structure(df, window=24, horizon=1, fixed_lag=1):
    """
    Quick test to understand FEVD structure with your data.
    
    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    window : int
        Window size to test
    horizon : int
        Horizon to test
    fixed_lag : int
        Lag order to use
    
    Returns
    -------
    dict
        Information about FEVD structure
    """
    df_work = df[['btc_vol_future', 'eth_vol_future']].copy()
    df_work = df_work.sort_index()
    
    # Find first valid window
    for i in range(window, len(df_work)):
        sub_df = df_work.iloc[i - window:i].dropna()
        
        if len(sub_df) >= window and (sub_df.std() != 0).all():
            try:
                model = VAR(sub_df)
                results = model.fit(fixed_lag)
                
                if results.is_stable():
                    fevd = results.fevd(horizon)
                    
                    info = {
                        'fevd_decomp_shape': fevd.decomp.shape,
                        'fevd_decomp_ndim': fevd.decomp.ndim,
                        'fevd_type': type(fevd).__name__,
                        'window_used': i,
                        'sample_decomp': fevd.decomp,
                        'variable_names': list(sub_df.columns),
                    }
                    
                    print("FEVD Structure Test Results:")
                    print("=" * 60)
                    for key, value in info.items():
                        if key != 'sample_decomp':
                            print(f"{key}: {value}")
                    
                    print("\nFull decomp array:")
                    print(fevd.decomp)
                    
                    print("\nInterpretation:")
                    if fevd.decomp.ndim == 3:
                        print(f"  Shape: (steps={fevd.decomp.shape[0]}, "
                              f"variables={fevd.decomp.shape[1]}, "
                              f"shocks={fevd.decomp.shape[2]})")
                        print(f"  For horizon {horizon}, use: fevd.decomp[{horizon-1}, :, :]")
                    else:
                        print(f"  Shape: (variables={fevd.decomp.shape[0]}, "
                              f"shocks={fevd.decomp.shape[1]})")
                        print(f"  Already a 2D matrix, use directly")
                    
                    return info
                    
            except Exception as e:
                print(f"Error at window {i}: {e}")
                continue
    
    print("Could not find valid window for testing")
    return None


# Example usage with diagnostics:
if __name__ == "__main__":
    # Prepare data
    df = vol_merged_df.sort_values('timestamp').set_index('timestamp')
    
    # Step 1: Diagnose data quality
    print("=" * 60)
    print("DATA QUALITY DIAGNOSTICS")
    print("=" * 60)
    data_report = diagnose_data(df)
    for key, value in data_report.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.6f}")
        else:
            print(f"  {key}: {value}")
    
    # Step 2: Compute spillover features with verbose output
    print("\n" + "=" * 60)
    print("COMPUTING SPILLOVER FEATURES")
    print("=" * 60)
    
    spillover_features, var_diagnostics = compute_var_fevd_features(
        df, 
        window=24, 
        horizon=1,
        maxlags=6,  # Reduced from 12 - try smaller first
        fixed_lag=None,  # Use BIC selection
        verbose=True
    )
    
    # Step 3: Print error breakdown
    print("\n" + "=" * 60)
    print("VAR MODEL DIAGNOSTICS")
    print("=" * 60)
    print(f"Success count: {var_diagnostics['success_count']}")
    print(f"Total attempts: {var_diagnostics['total_attempts']}")
    print(f"Success rate: {var_diagnostics['success_rate']:.2%}")
    
    if var_diagnostics['errors']:
        print("\nError breakdown:")
        for error, count in sorted(var_diagnostics['errors'].items(), 
                                   key=lambda x: x[1], reverse=True):
            pct = count / var_diagnostics['total_attempts'] * 100
            print(f"  {error}: {count} ({pct:.1f}%)")
    
    # Step 4: Feature diagnostics
    print("\n" + "=" * 60)
    print("SPILLOVER FEATURE DIAGNOSTICS")
    print("=" * 60)
    feature_stats = add_spillover_diagnostics(df, spillover_features)
    for key, value in feature_stats.items():
        if isinstance(value, float) and not np.isnan(value):
            print(f"  {key}: {value:.4f}")
        else:
            print(f"  {key}: {value}")
    
    # Merge with original data
    ml_ready_df = df.join(spillover_features)
    
    print(f"\nFinal dataset shape: {ml_ready_df.shape}")
    print(f"Spillover columns added: {spillover_features.columns.tolist()}")

DATA QUALITY DIAGNOSTICS
  total_rows: 8232
  btc_null_count: 0
  eth_null_count: 0
  btc_zero_count: 0
  eth_zero_count: 0
  btc_mean: 0.006379
  eth_mean: 0.005875
  btc_std: 0.003893
  eth_std: 0.002958
  btc_min: 0.001607
  eth_min: 0.002317
  btc_max: 0.014931
  eth_max: 0.013275
  correlation: 0.431853
  constant_windows: 22
  constant_window_pct: 0.268031

COMPUTING SPILLOVER FEATURES


Computing VAR-FEVD spillovers:  96%|█████████▌| 7844/8208 [00:10<00:00, 798.62it/s]

Lag selection error at 7694: x contains one or more constant columns. Column(s)
Lag selection error at 7695: x contains one or more constant columns. Column(s)
Lag selection error at 7696: x contains one or more constant columns. Column(s)
Lag selection error at 7697: x contains one or more constant columns. Column(s)
Lag selection error at 7698: x contains one or more constant columns. Column(s)
Lag selection error at 7721: x contains one or more constant columns. Column(s)
ValueError at 7721: x contains one or more constant columns. Column(s) 0 are constant. Adding a constant with trend='c' 
Lag selection error at 7722: x contains one or more constant columns. Column(s)
Lag selection error at 7723: x contains one or more constant columns. Column(s)
Lag selection error at 7724: x contains one or more constant columns. Column(s)
Lag selection error at 7725: x contains one or more constant columns. Column(s)
Lag selection error at 7726: x contains one or more constant columns. Column(s)

Computing VAR-FEVD spillovers: 100%|██████████| 8208/8208 [00:10<00:00, 765.08it/s]


VAR MODEL DIAGNOSTICS
Success count: 7093
Total attempts: 8208
Success rate: 86.42%

Error breakdown:
  unstable_model: 1092 (13.3%)
  zero_variance: 22 (0.3%)
  lag_selection_error: 11 (0.1%)
  value_error: 1 (0.0%)

SPILLOVER FEATURE DIAGNOSTICS
  total_observations: 8232
  valid_spillover_obs: 7093
  coverage_pct: 86.1638
  btc_to_eth_mean: 0.3024
  eth_to_btc_mean: 0.0000
  btc_to_eth_std: 0.2444
  eth_to_btc_std: 0.0000
  correlation: nan

Final dataset shape: (8232, 4)
Spillover columns added: ['btc_to_eth_spill', 'eth_to_btc_spill']





In [None]:
ml_ready_df =ml_ready_df.reset_index()

final_spillover_df = ml_ready_df[ml_ready_df['timestamp'] >= '2024-11-05']

max(final_spillover_df['eth_to_btc_spill'])


0.0

In [1501]:
final_spillover_df

Unnamed: 0,timestamp,eth_vol_future,btc_vol_future,btc_to_eth_spill,eth_to_btc_spill
24,2024-11-05 00:00:00.000 UTC,0.009336,0.008782,0.032570,0.0
25,2024-11-05 01:00:00.000 UTC,0.003813,0.005309,0.061816,0.0
26,2024-11-05 02:00:00.000 UTC,0.003502,0.006122,0.080956,0.0
27,2024-11-05 03:00:00.000 UTC,0.003399,0.006630,0.129080,0.0
28,2024-11-05 04:00:00.000 UTC,0.003244,0.007759,0.159381,0.0
...,...,...,...,...,...
8227,2025-10-12 19:00:00.000 UTC,0.007959,0.006841,0.498550,0.0
8228,2025-10-12 20:00:00.000 UTC,0.013275,0.008244,,
8229,2025-10-12 21:00:00.000 UTC,0.007669,0.006577,0.336595,0.0
8230,2025-10-12 22:00:00.000 UTC,0.009990,0.006362,0.304397,0.0


### Final merging

In [1495]:

eth_whale_alerts_df['datetime1h'] = pd.to_datetime(eth_whale_alerts_df['datetime1h'], utc=True)
eth_dune_df['timestamp'] = pd.to_datetime(eth_dune_df['timestamp'], utc=True)

eth_whale_alerts_df = eth_whale_alerts_df.rename(columns={'datetime1h': 'timestamp'})

eth_whale_alerts_df = eth_whale_alerts_df.sort_values('timestamp')
eth_dune_df = eth_dune_df.sort_values('timestamp')

eth_merged_df = pd.merge_asof(
    eth_dune_df,
    eth_whale_alerts_df,
    on='timestamp',
    direction='nearest',       # optional: use nearest timestamp match
    tolerance=pd.Timedelta('1h')  # optional: only merge if within 1 hour
)


In [1496]:
eth_merged_df.isna().sum()

timestamp                           0
RV_MA_1hr                           0
RV_MA_3hr                           0
RV_MA_12hr                          0
vol_future                          0
active_sending_addresses            0
active_receiving_addresses          0
exchange_withdrawing_count          0
transaction_count                   0
fail_rate_percent                   0
open                                0
low                                 0
high                                0
close                               0
hourly_return                       0
onchain_volume_usd_log              0
avg_gas_fee_usd_log                 0
avg_priority_fee_usd_log            0
staking_inflow_log                  0
exchange_depositing_count_log       0
exchange_netflow_usd_log            0
whale_net_usd                    6383
whale_net_usd_24h                6383
whale_burst_flag                 6383
etow_usd_log                     6383
etow_coins_log                   6383
whale_txn_co

In [1497]:
eth_merged_df.fillna(0, inplace=True)

In [1498]:
btc_whale_alerts_df['datetime1h'] = pd.to_datetime(btc_whale_alerts_df['datetime1h'], utc=True)
btc_dune_df['timestamp'] = pd.to_datetime(btc_dune_df['timestamp'], utc=True)

btc_whale_alerts_df = btc_whale_alerts_df.rename(columns={'datetime1h': 'timestamp'})

btc_whale_alerts_df = btc_whale_alerts_df.sort_values('timestamp')
btc_dune_df = btc_dune_df.sort_values('timestamp')

btc_merged_df = pd.merge_asof(
    btc_dune_df,
    btc_whale_alerts_df,
    on='timestamp',
    direction='nearest',          # get nearest match in time
    tolerance=pd.Timedelta('1h')  # only merge if within 1 hour
)

btc_merged_df.isna().sum()

timestamp                        0
btc_exchange_netflow_usd         0
active_sending_addresses         0
active_receiving_addresses       0
onchain_volume_usd               0
open                             0
low                              0
high                             0
close                            0
mint_reward_usd                  0
total_fee_usd                    0
transaction_count                0
exchange_to_wallet_usd           0
wallet_to_exchange_usd           0
RV_MA_1hr                        0
RV_MA_3hr                        0
RV_MA_12hr                       0
hourly_return                    0
vol_future                       0
whale_net_usd                 5071
whale_net_usd_24h             5071
whale_burst_flag              5071
etow_usd_log                  5071
etow_coins_log                5071
whale_txn_count_log           5071
wtoe_usd_log                  5071
wtoe_coins_log                5071
dtype: int64

In [1499]:
btc_merged_df.fillna(0, inplace=True)

In [1500]:
#btc_merged_df.to_csv('../Data/final_btc_df_var_fevd.csv', index=False)
#eth_merged_df.to_csv('../Data/final_btc_df_var_fevd.csv', index=False)
