### ETH Whale Activity ML Pipeline

- Setup & Configuration

In [1]:
import os
import time
import pickle
import requests
import warnings
import json
from datetime import datetime, timedelta

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report, confusion_matrix, make_scorer
)

try:
    from imblearn.over_sampling import SMOTE
    from imblearn.pipeline import Pipeline as ImbPipeline
    HAS_IMBLEARN = True
except ImportError:
    HAS_IMBLEARN = False
    print("‚ö†Ô∏è imbalanced-learn not installed")

try:
    from xgboost import XGBClassifier
    HAS_XGBOOST = True
except ImportError:
    HAS_XGBOOST = False
    print("‚ö†Ô∏è XGBoost not installed")

from dotenv import load_dotenv

warnings.filterwarnings('ignore')

‚ö†Ô∏è imbalanced-learn not installed


- Loading and Configuring Environmental Varriables

In [2]:
load_dotenv()
DUNE_API_KEY = os.getenv("DUNE_WHALES_API")
COINGECKO_API_KEY = os.getenv("COINGECKO_API_KEY")
os.makedirs("data/price_cache", exist_ok=True)

QUERIES = {
    "whales": ("6395391", "dune_whales_cache.json", "whale_ml_ready.csv"),
    "market_intent": ("6385600", "dune_intent_cache.json", "market_intent_ml_ready.csv")
}

#### Data Loading Pipeline - Dune + CoinGecko
- Data Collection - Fetch Whale Data from Dune

In [3]:

def fetch_dune(qid, cache):
    headers = {"x-dune-api-key": DUNE_API_KEY}
    today = pd.Timestamp.now(tz='UTC').normalize()
    
    # Load cache
    if os.path.exists(cache):
        c = json.load(open(cache))
        df_cached = pd.DataFrame(c["data"])
        df_cached["block_date"] = pd.to_datetime(df_cached["block_date"], utc=True)
        last_date = pd.to_datetime(c["last_block_date"], utc=True)
        
        # If cache has yesterday's data, it's current enough (today's data doesn't exist yet)
        if last_date >= today - timedelta(1):
            print(f"‚úÖ {cache} current ({last_date.date()})")
            return df_cached
        
        print(f"üîÑ {cache}: fetching latest")
    else:
        df_cached = pd.DataFrame()
        print(f"üÜï {cache}: full fetch")
    
    # Execute query
    resp = requests.post(f"https://api.dune.com/api/v1/query/{qid}/execute", headers=headers).json()
    if "execution_id" not in resp:
        raise RuntimeError(f"Dune API error: {resp}")
    eid = resp["execution_id"]
    
    # Poll
    while True:
        s = requests.get(f"https://api.dune.com/api/v1/execution/{eid}/status", headers=headers).json()["state"]
        if s == "QUERY_STATE_COMPLETED": break
        if s == "QUERY_STATE_FAILED": raise RuntimeError("Query failed")
        time.sleep(10)
    
    # Get results & merge with cache
    df_new = pd.DataFrame(requests.get(f"https://api.dune.com/api/v1/execution/{eid}/results", headers=headers).json()["result"]["rows"])
    if df_new.empty: return df_cached
    
    df_new["block_date"] = pd.to_datetime(df_new["block_date"], utc=True)
    df = pd.concat([df_cached, df_new[df_new["block_date"] < today]]).drop_duplicates("block_date", keep="last").sort_values("block_date").reset_index(drop=True)
    
    # Save cache
    json.dump({"last_block_date": df["block_date"].max().strftime("%Y-%m-%d"), 
               "data": json.loads(df.to_json(orient="records", date_format="iso"))}, open(cache, "w"))
    print(f"‚úÖ {cache}: {len(df)} rows (added {len(df_new)} new)")
    return df


- Load Dune Data

In [4]:

print("="*60, "\nDUNE DATA\n", "="*60)
datasets = {}
for name, (qid, cache, output) in QUERIES.items():
    datasets[name] = fetch_dune(qid, cache)
    datasets[name].to_csv(output, index=False)
    time.sleep(0.5)

df_whales, df_market_intent = datasets["whales"], datasets["market_intent"]
print(f"\n‚úÖ Whales: {len(df_whales)} | Intent: {len(df_market_intent)}")

DUNE DATA
‚úÖ dune_whales_cache.json current (2025-12-24)


‚úÖ dune_intent_cache.json current (2025-12-24)

‚úÖ Whales: 1097 | Intent: 1097


- Coingecko Price Function
     - Utilities:Normalize any date-like input to UTC Timestamp.
    Handles tz-naive, tz-aware, date, datetime safely.


In [5]:
def to_utc(ts):
    ts = pd.Timestamp(ts)
    if ts.tzinfo is None:
        return ts.tz_localize("UTC")
    return ts.tz_convert("UTC")

- CoinGecko chunked fetch

In [6]:
def fetch_cg_chunked(cg_id, start, end, key=None, days=30):
    """
    Fetch DAILY UTC prices from CoinGecko.
    No skipped days. No today.
    """
    url_base = "https://pro-api.coingecko.com/api/v3" if key else "https://api.coingecko.com/api/v3"
    headers = {"x-cg-pro-api-key": key} if key else {}

    start_dt = to_utc(start)
    end_dt   = to_utc(end) + pd.Timedelta(days=1)  # ‚úÖ INCLUSIVE FIX

    all_prices = []
    curr = start_dt

    while curr < end_dt:
        next_dt = min(curr + pd.Timedelta(days=days), end_dt)

        params = {
            "vs_currency": "usd",
            "from": int(curr.timestamp()),
            "to": int(next_dt.timestamp())
        }

        for attempt in range(3):
            try:
                r = requests.get(
                    f"{url_base}/coins/{cg_id}/market_chart/range",
                    params=params,
                    headers=headers,
                    timeout=30
                )
                r.raise_for_status()

                prices = r.json().get("prices", [])
                all_prices.extend(prices)

                print(f"üì• {cg_id}: {curr.date()} ‚Üí {next_dt.date()} ({len(prices)} pts)")
                time.sleep(0.3)
                break

            except Exception as e:
                if attempt == 2:
                    raise
                print(f"‚ö†Ô∏è Retry {attempt + 1}/3 ({e})")
                time.sleep(5)

        # NO +1 DAY SKIP
        curr = next_dt

    if not all_prices:
        return pd.DataFrame(columns=["date", "price"])

    # Build DAILY UTC prices
    df = pd.DataFrame(all_prices, columns=["timestamp", "price"])
    df["date"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True).dt.floor("D")

    df = (
        df.groupby("date", as_index=False)["price"]
          .mean()
          .sort_values("date")
    )

    # Enforce full daily range (no silent gaps)
    
    full_range = pd.date_range(
        start=df["date"].min(),
        end=df["date"].max(),
        freq="D",
        tz="UTC"
    )

    df = (
        df.set_index("date")
          .reindex(full_range)
          .rename_axis("date")
          .reset_index()
    )

    return df

- Cached price loader (EXCLUDES TODAY)

In [7]:
def get_price(sym, cg_id, start, end, key=None):
    """
    Load DAILY prices through YESTERDAY ONLY.
    Today is always excluded.
    """
    cache = f"data/price_cache/{sym}.csv"

    today_utc = pd.Timestamp.utcnow().floor("D")
    yesterday = today_utc - pd.Timedelta(days=1)

    start = to_utc(start)
    end   = min(to_utc(end), yesterday)

    if start > end:
        return pd.DataFrame(columns=["date", f"{sym}_price"])

    # Cache exists
    
    if os.path.exists(cache):
        df = pd.read_csv(cache, parse_dates=["date"])
        df["date"] = df["date"].apply(to_utc)

        last_cached = df["date"].max()

        if last_cached >= end:
            print(f"‚úÖ {sym.upper()} cache current ({last_cached.date()})")
            return df

        fetch_start = last_cached + pd.Timedelta(days=1)
        print(f"üîÑ {sym.upper()}: fetching {fetch_start.date()} ‚Üí {end.date()}")

        new = fetch_cg_chunked(cg_id, fetch_start, end, key)

        if not new.empty:
            new = new.rename(columns={"price": f"{sym}_price"})
            df = (
                pd.concat([df, new])
                  .drop_duplicates("date", keep="last")
                  .sort_values("date")
                  .reset_index(drop=True)
            )

       # No cache
   
    else:
        print(f"üì¶ {sym.upper()}: full fetch {start.date()} ‚Üí {end.date()}")
        df = fetch_cg_chunked(cg_id, start, end, key)

        if df.empty:
            return df

        df = df.rename(columns={"price": f"{sym}_price"})

    df.to_csv(cache, index=False)
    print(f"‚úÖ {sym.upper()} saved (through {df['date'].max().date()})")
    return df



- Example usage

In [8]:
if __name__ == "__main__":

    min_date = min(
        df_whales["block_date"].min(),
        df_market_intent["block_date"].min()
    ) - pd.Timedelta(days=100)

    max_date = max(
        df_whales["block_date"].max(),
        df_market_intent["block_date"].max()
    )

    print(f"\nüìÖ Range: {min_date.date()} ‚Üí {max_date.date()} (today excluded)\n")

    df_btc = get_price("btc", "bitcoin", min_date, max_date, COINGECKO_API_KEY)
    df_eth = get_price("eth", "ethereum", min_date, max_date, COINGECKO_API_KEY)

    print("\n‚úÖ FINAL CHECK")
    print(f"BTC last date: {df_btc['date'].max().date()}")
    print(f"ETH last date: {df_eth['date'].max().date()}")


üìÖ Range: 2022-09-15 ‚Üí 2025-12-24 (today excluded)

‚úÖ BTC cache current (2025-12-24)
‚úÖ ETH cache current (2025-12-24)

‚úÖ FINAL CHECK
BTC last date: 2025-12-24
ETH last date: 2025-12-24


In [9]:
pd.read_csv('/workspaces/Whale-Movement-Based-Price-Direction-Generator-V2/WhalesIntent/Intent/market_intent_ml_ready.csv')

Unnamed: 0,block_date,block_fullness_delta_1d,eth_burned_delta_1d,eth_burned_zscore_90d,exchange_flow_share,median_gas_delta_1d,median_gas_delta_7d,net_exchange_flow_ratio,smart_contract_ratio_delta_1d,tx_per_active_delta_1d,tx_per_active_zscore_90d,whale_exchange_asymmetry,whale_exchange_flow_ratio,whale_tx_zscore_90d,whale_volume_ratio,whale_volume_ratio_delta_1d,whale_volume_ratio_delta_3d
0,2022-12-24 00:00:00+00:00,,,0.0000,0.214255,,,-0.046363,,,0.0000,-0.220737,-0.041071,0.0000,0.787476,,
1,2022-12-25 00:00:00+00:00,,,0.0000,0.465847,,,-0.067329,,,0.0000,-0.145227,-0.064300,0.0000,0.828582,,
2,2022-12-26 00:00:00+00:00,,,0.0000,0.267112,,,0.059721,,,0.0000,0.274527,0.065927,0.0000,0.802250,,
3,2022-12-27 00:00:00+00:00,-0.000582,358.3551,0.7071,0.232362,3.0361,,-0.070792,-0.001002,0.0820,0.7071,-0.312723,-0.063788,0.0555,0.801580,-0.000671,
4,2022-12-28 00:00:00+00:00,-0.000340,120.1106,0.8016,0.241721,0.6857,,-0.081152,0.014817,0.0344,0.8405,-0.351500,-0.077298,0.9225,0.836426,0.034846,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,2025-12-20 00:00:00+00:00,0.001126,-22.1130,-0.3346,0.203059,-0.1076,-0.0303,-0.051241,0.013453,-0.1327,-1.2810,-0.238883,-0.046222,-1.8828,0.851792,-0.103314,-0.107734
1093,2025-12-21 00:00:00+00:00,0.000199,0.4850,-0.3298,0.354921,0.0038,-0.0284,0.055381,0.057881,0.3857,1.9035,0.171106,0.059485,-2.4013,0.889958,0.038166,-0.052551
1094,2025-12-22 00:00:00+00:00,-0.001665,1.3817,-0.3228,0.301284,0.0098,-0.1075,-0.066520,-0.066476,-0.4239,-1.5519,-0.216851,-0.064440,0.7183,0.936694,0.046736,-0.018412
1095,2025-12-23 00:00:00+00:00,-0.000778,1.5696,-0.3154,0.224627,0.0101,-0.0248,-0.029405,0.041258,-0.1504,-2.6310,-0.127150,-0.028212,0.4125,0.956047,0.019353,0.104255


In [10]:
pd.read_csv('/workspaces/Whale-Movement-Based-Price-Direction-Generator-V2/WhalesIntent/Intent/whale_ml_ready.csv')

Unnamed: 0,block_date,deposit_tx_count,deposit_withdrawal_ratio,exchange_volume_ratio,mega_whale_ratio,mega_whale_tx_count,mega_whale_volume_eth,net_flow_ma7,non_exchange_ratio,non_exchange_tx_count,non_exchange_volume_eth,std_whale_tx_size_eth,whale_exchange_deposits_eth,whale_exchange_withdrawals_eth,whale_net_exchange_flow_eth,whale_tx_count,whale_volume_eth,withdrawal_tx_count
0,2022-12-24 00:00:00+00:00,11,1.5554,0.2899,0.9475,36,1.083547e+05,-7203.9190,0.7101,24,81206.9408,3279.562931,20175.0838,12971.1648,-7203.9190,42,1.143532e+05,7
1,2022-12-25 00:00:00+00:00,10,4.4874,0.6327,1.0000,36,1.475401e+05,-59329.7899,0.3673,18,54184.9701,8106.493990,76342.4799,17012.6900,-59329.7899,36,1.475401e+05,8
2,2022-12-26 00:00:00+00:00,8,0.1890,0.3896,0.9751,50,1.961529e+05,53454.7638,0.6104,38,122784.2329,7771.982946,12456.6605,65911.4244,53454.7638,55,2.011523e+05,9
3,2022-12-27 00:00:00+00:00,19,2.0556,0.2697,1.0000,74,2.223786e+05,16368.8473,0.7303,45,162409.1610,3078.704806,40343.2672,19626.1980,-20717.0692,74,2.223786e+05,10
4,2022-12-28 00:00:00+00:00,14,6.4245,0.2372,1.0000,65,3.348447e+05,-8432.1986,0.7628,49,255413.2942,6903.329720,68732.8362,10698.5457,-58034.2905,65,3.348447e+05,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,2025-12-20 00:00:00+00:00,9,11.0293,0.2642,1.0000,40,1.632716e+05,-91552.7172,0.7358,29,120134.5612,3108.900831,39550.9925,3586.0000,-35964.9925,40,1.632716e+05,2
1093,2025-12-21 00:00:00+00:00,8,0.8793,0.5248,1.0000,32,1.928101e+05,-100668.3469,0.4752,19,91619.3550,4433.349204,47347.1628,53843.5845,6496.4217,32,1.928101e+05,5
1094,2025-12-22 00:00:00+00:00,14,2.7336,0.3557,1.0000,70,6.105957e+05,-95628.5165,0.6443,51,393401.9015,5992.049099,159021.1937,58172.5992,-100848.5945,70,6.105957e+05,5
1095,2025-12-23 00:00:00+00:00,11,1.6121,0.2043,1.0000,69,1.014827e+06,-72486.9484,0.7957,53,807479.5955,16471.118552,127968.2705,79379.5928,-48588.6777,69,1.014827e+06,5


In [11]:
pd.read_csv('/workspaces/Whale-Movement-Based-Price-Direction-Generator-V2/WhalesIntent/Intent/data/price_cache/btc.csv')

Unnamed: 0,date,btc_price
0,2022-09-15 00:00:00+00:00,20009.052983
1,2022-09-16 00:00:00+00:00,19713.657885
2,2022-09-17 00:00:00+00:00,19944.803793
3,2022-09-18 00:00:00+00:00,19896.269820
4,2022-09-19 00:00:00+00:00,19028.641104
...,...,...
1192,2025-12-20 00:00:00+00:00,88174.588995
1193,2025-12-21 00:00:00+00:00,88260.199788
1194,2025-12-22 00:00:00+00:00,89102.172598
1195,2025-12-23 00:00:00+00:00,87769.792000


In [12]:
pd.read_csv('/workspaces/Whale-Movement-Based-Price-Direction-Generator-V2/WhalesIntent/Intent/data/price_cache/eth.csv')

Unnamed: 0,date,eth_price
0,2022-09-15 00:00:00+00:00,1566.672324
1,2022-09-16 00:00:00+00:00,1456.046704
2,2022-09-17 00:00:00+00:00,1443.421822
3,2022-09-18 00:00:00+00:00,1424.248562
4,2022-09-19 00:00:00+00:00,1330.210600
...,...,...
1192,2025-12-20 00:00:00+00:00,2979.478368
1193,2025-12-21 00:00:00+00:00,2980.317996
1194,2025-12-22 00:00:00+00:00,3025.604874
1195,2025-12-23 00:00:00+00:00,2966.388075


- Merging the Dataset into single Dataset ready for feature Engineering


In [13]:
# Load datasets
print("üìÇ Loading datasets...")
df_whales = pd.read_csv('whale_ml_ready.csv')
df_market_intent = pd.read_csv('market_intent_ml_ready.csv')
df_btc = pd.read_csv('data/price_cache/btc.csv')
df_eth = pd.read_csv('data/price_cache/eth.csv')

# Convert dates to datetime (UTC timezone-aware for consistency)
print("üìÖ Converting dates...")
df_whales['block_date'] = pd.to_datetime(df_whales['block_date'], utc=True)
df_market_intent['block_date'] = pd.to_datetime(df_market_intent['block_date'], utc=True)
df_btc['date'] = pd.to_datetime(df_btc['date'], utc=True)
df_eth['date'] = pd.to_datetime(df_eth['date'], utc=True)

print(f"‚úÖ Whales: {len(df_whales)} rows")
print(f"‚úÖ Market Intent: {len(df_market_intent)} rows")
print(f"‚úÖ BTC Prices: {len(df_btc)} rows")
print(f"‚úÖ ETH Prices: {len(df_eth)} rows")

# Merge prices first
print("\nüîó Merging price data...")
df_prices = pd.merge(
    df_btc,
    df_eth,
    on='date',
    how='outer',
    suffixes=('_btc', '_eth')
).sort_values('date').reset_index(drop=True)

print(f"‚úÖ Combined prices: {len(df_prices)} rows")

# Merge whale data with prices
print("\nüîó Merging whale data with prices...")
df_merged = pd.merge(
    df_whales,
    df_prices,
    left_on='block_date',
    right_on='date',
    how='left'
).drop(columns=['date'])  # Remove duplicate date column

print(f"‚úÖ After whale + prices: {len(df_merged)} rows")

# Merge market intent data
print("\nüîó Merging market intent data...")
df_final = pd.merge(
    df_merged,
    df_market_intent,
    on='block_date',
    how='left',
    suffixes=('', '_intent')
)

print(f"‚úÖ Final merged dataset: {len(df_final)} rows")

# Check for missing values
print("\nüîç Checking data quality...")
missing_btc = df_final['btc_price'].isna().sum()
missing_eth = df_final['eth_price'].isna().sum()
missing_intent = df_final[[col for col in df_final.columns if 'intent' in col.lower()]].isna().sum().sum()

print(f"   Missing BTC prices: {missing_btc}")
print(f"   Missing ETH prices: {missing_eth}")
print(f"   Missing intent data: {missing_intent}")

# Display date range
print(f"\nüìÖ Date range: {df_final['block_date'].min().date()} ‚Üí {df_final['block_date'].max().date()}")
print(f"üìä Total columns: {len(df_final.columns)}")

# Save merged dataset
OUTPUT_FILE = "merged_ml_dataset.csv"
df_final.to_csv(OUTPUT_FILE, index=False)
print(f"\nüíæ Saved: {OUTPUT_FILE}")

# Display sample
print("\nüìã Sample of merged data:")
print(df_final.head())
print("\nüìã Columns:")
print(df_final.columns.tolist())

üìÇ Loading datasets...
üìÖ Converting dates...
‚úÖ Whales: 1097 rows
‚úÖ Market Intent: 1097 rows
‚úÖ BTC Prices: 1197 rows
‚úÖ ETH Prices: 1197 rows

üîó Merging price data...
‚úÖ Combined prices: 1197 rows

üîó Merging whale data with prices...
‚úÖ After whale + prices: 1097 rows

üîó Merging market intent data...
‚úÖ Final merged dataset: 1097 rows

üîç Checking data quality...
   Missing BTC prices: 0
   Missing ETH prices: 0
   Missing intent data: 0.0

üìÖ Date range: 2022-12-24 ‚Üí 2025-12-24
üìä Total columns: 36

üíæ Saved: merged_ml_dataset.csv

üìã Sample of merged data:
                 block_date  deposit_tx_count  deposit_withdrawal_ratio  \
0 2022-12-24 00:00:00+00:00                11                    1.5554   
1 2022-12-25 00:00:00+00:00                10                    4.4874   
2 2022-12-26 00:00:00+00:00                 8                    0.1890   
3 2022-12-27 00:00:00+00:00                19                    2.0556   
4 2022-12-28 00:00:00+00:

In [14]:
df_final

Unnamed: 0,block_date,deposit_tx_count,deposit_withdrawal_ratio,exchange_volume_ratio,mega_whale_ratio,mega_whale_tx_count,mega_whale_volume_eth,net_flow_ma7,non_exchange_ratio,non_exchange_tx_count,...,net_exchange_flow_ratio,smart_contract_ratio_delta_1d,tx_per_active_delta_1d,tx_per_active_zscore_90d,whale_exchange_asymmetry,whale_exchange_flow_ratio,whale_tx_zscore_90d,whale_volume_ratio,whale_volume_ratio_delta_1d,whale_volume_ratio_delta_3d
0,2022-12-24 00:00:00+00:00,11,1.5554,0.2899,0.9475,36,1.083547e+05,-7203.9190,0.7101,24,...,-0.046363,,,0.0000,-0.220737,-0.041071,0.0000,0.787476,,
1,2022-12-25 00:00:00+00:00,10,4.4874,0.6327,1.0000,36,1.475401e+05,-59329.7899,0.3673,18,...,-0.067329,,,0.0000,-0.145227,-0.064300,0.0000,0.828582,,
2,2022-12-26 00:00:00+00:00,8,0.1890,0.3896,0.9751,50,1.961529e+05,53454.7638,0.6104,38,...,0.059721,,,0.0000,0.274527,0.065927,0.0000,0.802250,,
3,2022-12-27 00:00:00+00:00,19,2.0556,0.2697,1.0000,74,2.223786e+05,16368.8473,0.7303,45,...,-0.070792,-0.001002,0.0820,0.7071,-0.312723,-0.063788,0.0555,0.801580,-0.000671,
4,2022-12-28 00:00:00+00:00,14,6.4245,0.2372,1.0000,65,3.348447e+05,-8432.1986,0.7628,49,...,-0.081152,0.014817,0.0344,0.8405,-0.351500,-0.077298,0.9225,0.836426,0.034846,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,2025-12-20 00:00:00+00:00,9,11.0293,0.2642,1.0000,40,1.632716e+05,-91552.7172,0.7358,29,...,-0.051241,0.013453,-0.1327,-1.2810,-0.238883,-0.046222,-1.8828,0.851792,-0.103314,-0.107734
1093,2025-12-21 00:00:00+00:00,8,0.8793,0.5248,1.0000,32,1.928101e+05,-100668.3469,0.4752,19,...,0.055381,0.057881,0.3857,1.9035,0.171106,0.059485,-2.4013,0.889958,0.038166,-0.052551
1094,2025-12-22 00:00:00+00:00,14,2.7336,0.3557,1.0000,70,6.105957e+05,-95628.5165,0.6443,51,...,-0.066520,-0.066476,-0.4239,-1.5519,-0.216851,-0.064440,0.7183,0.936694,0.046736,-0.018412
1095,2025-12-23 00:00:00+00:00,11,1.6121,0.2043,1.0000,69,1.014827e+06,-72486.9484,0.7957,53,...,-0.029405,0.041258,-0.1504,-2.6310,-0.127150,-0.028212,0.4125,0.956047,0.019353,0.104255


In [15]:
df_final.columns

Index(['block_date', 'deposit_tx_count', 'deposit_withdrawal_ratio',
       'exchange_volume_ratio', 'mega_whale_ratio', 'mega_whale_tx_count',
       'mega_whale_volume_eth', 'net_flow_ma7', 'non_exchange_ratio',
       'non_exchange_tx_count', 'non_exchange_volume_eth',
       'std_whale_tx_size_eth', 'whale_exchange_deposits_eth',
       'whale_exchange_withdrawals_eth', 'whale_net_exchange_flow_eth',
       'whale_tx_count', 'whale_volume_eth', 'withdrawal_tx_count',
       'btc_price', 'eth_price', 'block_fullness_delta_1d',
       'eth_burned_delta_1d', 'eth_burned_zscore_90d', 'exchange_flow_share',
       'median_gas_delta_1d', 'median_gas_delta_7d', 'net_exchange_flow_ratio',
       'smart_contract_ratio_delta_1d', 'tx_per_active_delta_1d',
       'tx_per_active_zscore_90d', 'whale_exchange_asymmetry',
       'whale_exchange_flow_ratio', 'whale_tx_zscore_90d',
       'whale_volume_ratio', 'whale_volume_ratio_delta_1d',
       'whale_volume_ratio_delta_3d'],
      dtype='object

- Feature Engineering from version 1

In [17]:
def add_price_features(df, price_col, prefix):
    """Add price-based ML features"""
    df = df.sort_values('block_date').reset_index(drop=True)
    
    # Returns
    df[f'{prefix}_daily_return'] = df[price_col].pct_change()
    df[f'{prefix}_log_return'] = np.log(df[price_col] / df[price_col].shift(1))
    
    # Volatility
    df[f'{prefix}_vol7'] = df[f'{prefix}_daily_return'].rolling(7, min_periods=1).std()
    df[f'{prefix}_vol30'] = df[f'{prefix}_daily_return'].rolling(30, min_periods=1).std()
    
    # RSI
    returns = df[f'{prefix}_daily_return']
    gains = returns.where(returns > 0, 0).rolling(14, min_periods=1).mean()
    losses = -returns.where(returns < 0, 0).rolling(14, min_periods=1).mean()
    rs = gains / (losses + 1e-10)
    df[f'{prefix}_rsi'] = 100 - (100 / (1 + rs))
    
    # Lags
    for lag in [1, 3, 7]:
        df[f'{prefix}_ret_lag{lag}'] = df[f'{prefix}_daily_return'].shift(lag)
    
    return df

def add_correlation_features(df):
    """Add ETH-BTC correlation features"""
    df['eth_btc_ratio'] = df['eth_price'] / df['btc_price']
    df['eth_btc_ratio_ma7'] = df['eth_btc_ratio'].rolling(7, min_periods=1).mean()
    df['eth_btc_corr_30d'] = df['eth_daily_return'].rolling(30, min_periods=20).corr(df['btc_daily_return'])
    df['eth_outperformance'] = df['eth_daily_return'] - df['btc_daily_return']
    return df

def create_target(df):
    """Create target: next day price direction"""
    df['next_day_return'] = df['eth_price'].pct_change().shift(-1)
    df['next_day_price_direction'] = (df['next_day_return'] > 0).astype(int)
    return df

# Feature engineering pipeline
print("\n‚öôÔ∏è Engineering features...")
df_merged = add_price_features(df_merged, 'eth_price', 'eth')
df_merged = add_price_features(df_merged, 'btc_price', 'btc')
df_merged = add_correlation_features(df_merged)
df_merged = create_target(df_merged)
print(f"‚úÖ Features created: {len(df_merged.columns)} total columns")


‚öôÔ∏è Engineering features...
‚úÖ Features created: 42 total columns


In [32]:
df_merged.columns

Index(['block_date', 'deposit_tx_count', 'deposit_withdrawal_ratio',
       'exchange_volume_ratio', 'mega_whale_ratio', 'mega_whale_tx_count',
       'mega_whale_volume_eth', 'net_flow_ma7', 'non_exchange_ratio',
       'non_exchange_tx_count', 'non_exchange_volume_eth',
       'std_whale_tx_size_eth', 'whale_exchange_deposits_eth',
       'whale_exchange_withdrawals_eth', 'whale_net_exchange_flow_eth',
       'whale_tx_count', 'whale_volume_eth', 'withdrawal_tx_count',
       'btc_price', 'eth_price', 'eth_daily_return', 'eth_log_return',
       'eth_vol7', 'eth_vol30', 'eth_rsi', 'eth_ret_lag1', 'eth_ret_lag3',
       'eth_ret_lag7', 'btc_daily_return', 'btc_log_return', 'btc_vol7',
       'btc_vol30', 'btc_rsi', 'btc_ret_lag1', 'btc_ret_lag3', 'btc_ret_lag7',
       'eth_btc_ratio', 'eth_btc_ratio_ma7', 'eth_btc_corr_30d',
       'eth_outperformance', 'next_day_return', 'next_day_price_direction'],
      dtype='object')

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report, confusion_matrix
)
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# PHASE 1: ABLATION TESTING FRAMEWORK
# ============================================================================

class AblationTester:
    """Test feature group contributions via ablation study"""
    
    def __init__(self, df, target_col='next_day_price_direction'):
        self.df = df.copy()
        self.target_col = target_col
        self.results = {}
        
    def define_feature_groups(self):
        """Define Model A-C feature sets"""
        
        # Model A: Price-only baseline
        self.groups = {
            'A_price_only': [
                'eth_daily_return', 'eth_log_return', 'eth_vol7', 'eth_vol30',
                'eth_rsi', 'eth_ret_lag1', 'eth_ret_lag3', 'eth_ret_lag7',
                'btc_daily_return', 'btc_log_return', 'btc_vol7', 'btc_vol30',
                'btc_rsi', 'btc_ret_lag1', 'btc_ret_lag3', 'btc_ret_lag7',
                'eth_btc_ratio', 'eth_btc_ratio_ma7', 'eth_btc_corr_30d',
                'eth_outperformance'
            ]
        }
        
        # Verify features exist
        missing = [f for f in self.groups['A_price_only'] if f not in self.df.columns]
        if missing:
            print(f"‚ö†Ô∏è Missing features: {missing}")
            self.groups['A_price_only'] = [f for f in self.groups['A_price_only'] 
                                            if f in self.df.columns]
        
        return self.groups
    
    def prepare_data(self, features):
        """Clean data for modeling"""
        df_clean = self.df[features + [self.target_col, 'block_date']].copy()
        
        # Remove rows with missing target or features
        df_clean = df_clean.dropna(subset=[self.target_col])
        df_clean = df_clean.dropna(subset=features)
        
        # Sort by date
        df_clean = df_clean.sort_values('block_date').reset_index(drop=True)
        
        return df_clean
    
    def time_series_cv(self, X, y, n_splits=5):
        """Walk-forward time series cross-validation"""
        tscv = TimeSeriesSplit(n_splits=n_splits)
        
        scores = {
            'accuracy': [], 'precision': [], 'recall': [], 
            'f1': [], 'roc_auc': []
        }
        
        for train_idx, test_idx in tscv.split(X):
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
            
            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            # Train model
            model = LogisticRegression(max_iter=1000, random_state=42)
            model.fit(X_train_scaled, y_train)
            
            # Predictions
            y_pred = model.predict(X_test_scaled)
            y_proba = model.predict_proba(X_test_scaled)[:, 1]
            
            # Metrics
            scores['accuracy'].append(accuracy_score(y_test, y_pred))
            scores['precision'].append(precision_score(y_test, y_pred, zero_division=0))
            scores['recall'].append(recall_score(y_test, y_pred, zero_division=0))
            scores['f1'].append(f1_score(y_test, y_pred, zero_division=0))
            scores['roc_auc'].append(roc_auc_score(y_test, y_proba))
        
        return {k: np.mean(v) for k, v in scores.items()}
    
    def run_ablation(self, model_name='A_price_only', n_splits=5):
        """Execute ablation test for a feature group"""
        
        features = self.groups[model_name]
        print(f"\n{'='*70}")
        print(f"MODEL {model_name.split('_')[0]}: {model_name.replace('_', ' ').title()}")
        print(f"{'='*70}")
        print(f"Features: {len(features)}")
        print(f"Feature list: {', '.join(features[:5])}...")
        
        # Prepare data
        df_clean = self.prepare_data(features)
        print(f"Clean samples: {len(df_clean)}")
        print(f"Date range: {df_clean['block_date'].min().date()} ‚Üí "
              f"{df_clean['block_date'].max().date()}")
        
        X = df_clean[features]
        y = df_clean[self.target_col]
        
        # Class distribution
        class_dist = y.value_counts(normalize=True)
        print(f"\nTarget distribution:")
        print(f"  Class 0 (down): {class_dist[0]:.1%}")
        print(f"  Class 1 (up):   {class_dist[1]:.1%}")
        
        # Run CV
        print(f"\nRunning {n_splits}-fold walk-forward CV...")
        scores = self.time_series_cv(X, y, n_splits)
        
        # Store results
        self.results[model_name] = {
            'features': features,
            'n_features': len(features),
            'n_samples': len(df_clean),
            'scores': scores
        }
        
        # Display results
        print(f"\nüìä RESULTS:")
        print(f"  Accuracy:  {scores['accuracy']:.4f}")
        print(f"  Precision: {scores['precision']:.4f}")
        print(f"  Recall:    {scores['recall']:.4f}")
        print(f"  F1 Score:  {scores['f1']:.4f}")
        print(f"  ROC AUC:   {scores['roc_auc']:.4f}")
        
        return scores
    
    def compare_models(self):
        """Compare all tested models"""
        if not self.results:
            print("No models tested yet!")
            return
        
        print(f"\n{'='*70}")
        print("ABLATION COMPARISON")
        print(f"{'='*70}")
        
        comparison = pd.DataFrame({
            name: res['scores'] for name, res in self.results.items()
        }).T
        
        comparison['n_features'] = [res['n_features'] for res in self.results.values()]
        comparison = comparison[['n_features', 'accuracy', 'precision', 
                                 'recall', 'f1', 'roc_auc']]
        
        print(comparison.to_string())
        
        # Find best model
        best_acc = comparison['accuracy'].idxmax()
        best_f1 = comparison['f1'].idxmax()
        
        print(f"\nüèÜ Best Accuracy: {best_acc} ({comparison.loc[best_acc, 'accuracy']:.4f})")
        print(f"üèÜ Best F1 Score: {best_f1} ({comparison.loc[best_f1, 'f1']:.4f})")
        
        return comparison


# ============================================================================
# EXECUTION
# ============================================================================

def add_price_features(df, price_col, prefix):
    """Add price-based ML features"""
    df = df.sort_values('block_date').reset_index(drop=True)
    
    df[f'{prefix}_daily_return'] = df[price_col].pct_change()
    df[f'{prefix}_log_return'] = np.log(df[price_col] / df[price_col].shift(1))
    df[f'{prefix}_vol7'] = df[f'{prefix}_daily_return'].rolling(7, min_periods=1).std()
    df[f'{prefix}_vol30'] = df[f'{prefix}_daily_return'].rolling(30, min_periods=1).std()
    
    # RSI
    returns = df[f'{prefix}_daily_return']
    gains = returns.where(returns > 0, 0).rolling(14, min_periods=1).mean()
    losses = -returns.where(returns < 0, 0).rolling(14, min_periods=1).mean()
    rs = gains / (losses + 1e-10)
    df[f'{prefix}_rsi'] = 100 - (100 / (1 + rs))
    
    # Lags
    for lag in [1, 3, 7]:
        df[f'{prefix}_ret_lag{lag}'] = df[f'{prefix}_daily_return'].shift(lag)
    
    return df

def add_correlation_features(df):
    """Add ETH-BTC correlation features"""
    df['eth_btc_ratio'] = df['eth_price'] / df['btc_price']
    df['eth_btc_ratio_ma7'] = df['eth_btc_ratio'].rolling(7, min_periods=1).mean()
    df['eth_btc_corr_30d'] = df['eth_daily_return'].rolling(30, min_periods=20).corr(df['btc_daily_return'])
    df['eth_outperformance'] = df['eth_daily_return'] - df['btc_daily_return']
    return df

def create_target(df):
    """Create target: next day price direction"""
    df['next_day_return'] = df['eth_price'].pct_change().shift(-1)
    df['next_day_price_direction'] = (df['next_day_return'] > 0).astype(int)
    return df


if __name__ == "__main__":
    # Load merged dataset
    print("üìÇ Loading merged dataset...")
    df_merged = pd.read_csv('merged_ml_dataset.csv')
    df_merged['block_date'] = pd.to_datetime(df_merged['block_date'], utc=True)
    
    print(f"‚úÖ Loaded {len(df_merged)} rows, {len(df_merged.columns)} columns")
    
    # Feature engineering
    print("\n‚öôÔ∏è Engineering features...")
    df_merged = add_price_features(df_merged, 'eth_price', 'eth')
    df_merged = add_price_features(df_merged, 'btc_price', 'btc')
    df_merged = add_correlation_features(df_merged)
    df_merged = create_target(df_merged)
    
    print(f"‚úÖ Features created: {len(df_merged.columns)} columns")
    print(f"Date range: {df_merged['block_date'].min().date()} ‚Üí "
          f"{df_merged['block_date'].max().date()}")
    
    # Initialize tester
    tester = AblationTester(df_merged)
    
    # Define feature groups
    tester.define_feature_groups()
    
    # Run Model A: Price-only baseline
    tester.run_ablation('A_price_only', n_splits=5)
    
    print("\n‚úÖ Model A (Price-only baseline) complete!")
    print("\nNext steps:")
    print("  1. Add whale features for Model B")
    print("  2. Add market intent for Model C")
    print("  3. Compare all models to see what adds value")

üìÇ Loading merged dataset...
‚úÖ Loaded 1097 rows, 36 columns

‚öôÔ∏è Engineering features...
‚úÖ Features created: 58 columns
Date range: 2022-12-24 ‚Üí 2025-12-24

MODEL A: A Price Only
Features: 20
Feature list: eth_daily_return, eth_log_return, eth_vol7, eth_vol30, eth_rsi...
Clean samples: 1077
Date range: 2023-01-13 ‚Üí 2025-12-24

Target distribution:
  Class 0 (down): 48.2%
  Class 1 (up):   51.8%

Running 5-fold walk-forward CV...

üìä RESULTS:
  Accuracy:  0.5564
  Precision: 0.5525
  Recall:    0.8343
  F1 Score:  0.6620
  ROC AUC:   0.5721

‚úÖ Model A (Price-only baseline) complete!

Next steps:
  1. Add whale features for Model B
  2. Add market intent for Model C
  3. Compare all models to see what adds value


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report, confusion_matrix
)
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# PHASE 1: ABLATION TESTING FRAMEWORK
# ============================================================================

class AblationTester:
    """Test feature group contributions via ablation study"""
    
    def __init__(self, df, target_col='next_day_price_direction'):
        self.df = df.copy()
        self.target_col = target_col
        self.results = {}
        
    def define_feature_groups(self):
        """Define Model A-C feature sets"""
        
        # Model A: Price-only baseline
        self.groups = {
            'A_price_only': [
                'eth_daily_return', 'eth_log_return', 'eth_vol7', 'eth_vol30',
                'eth_rsi', 'eth_ret_lag1', 'eth_ret_lag3', 'eth_ret_lag7',
                'btc_daily_return', 'btc_log_return', 'btc_vol7', 'btc_vol30',
                'btc_rsi', 'btc_ret_lag1', 'btc_ret_lag3', 'btc_ret_lag7',
                'eth_btc_ratio', 'eth_btc_ratio_ma7', 'eth_btc_corr_30d',
                'eth_outperformance'
            ],
            
            # Model B: On-chain only
            'B_onchain_only': [
                'deposit_tx_count', 'withdrawal_tx_count', 'deposit_withdrawal_ratio',
                'exchange_volume_ratio', 'exchange_flow_share', 'net_exchange_flow_ratio',
                'whale_exchange_deposits_eth', 'whale_exchange_withdrawals_eth',
                'whale_net_exchange_flow_eth', 'whale_exchange_flow_ratio',
                'whale_exchange_asymmetry', 'whale_tx_count', 'whale_volume_eth',
                'whale_volume_ratio', 'whale_volume_ratio_delta_1d',
                'whale_volume_ratio_delta_3d', 'whale_tx_zscore_90d',
                'mega_whale_ratio', 'net_flow_ma7', 'tx_per_active_delta_1d',
                'tx_per_active_zscore_90d', 'block_fullness_delta_1d',
                'eth_burned_delta_1d', 'eth_burned_zscore_90d', 'median_gas_delta_1d',
                'median_gas_delta_7d', 'smart_contract_ratio_delta_1d'
            ]
        }
        
        # Verify features exist for all groups
        for group_name, features in self.groups.items():
            missing = [f for f in features if f not in self.df.columns]
            if missing:
                print(f"‚ö†Ô∏è {group_name} missing: {missing}")
                self.groups[group_name] = [f for f in features if f in self.df.columns]
        
        return self.groups
    
    def prepare_data(self, features):
        """Clean data for modeling"""
        df_clean = self.df[features + [self.target_col, 'block_date']].copy()
        
        # Remove rows with missing target or features
        df_clean = df_clean.dropna(subset=[self.target_col])
        df_clean = df_clean.dropna(subset=features)
        
        # Sort by date
        df_clean = df_clean.sort_values('block_date').reset_index(drop=True)
        
        return df_clean
    
    def time_series_cv(self, X, y, n_splits=5):
        """Walk-forward time series cross-validation"""
        tscv = TimeSeriesSplit(n_splits=n_splits)
        
        scores = {
            'accuracy': [], 'precision': [], 'recall': [], 
            'f1': [], 'roc_auc': []
        }
        
        for train_idx, test_idx in tscv.split(X):
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
            
            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            # Train model
            model = LogisticRegression(max_iter=1000, random_state=42)
            model.fit(X_train_scaled, y_train)
            
            # Predictions
            y_pred = model.predict(X_test_scaled)
            y_proba = model.predict_proba(X_test_scaled)[:, 1]
            
            # Metrics
            scores['accuracy'].append(accuracy_score(y_test, y_pred))
            scores['precision'].append(precision_score(y_test, y_pred, zero_division=0))
            scores['recall'].append(recall_score(y_test, y_pred, zero_division=0))
            scores['f1'].append(f1_score(y_test, y_pred, zero_division=0))
            scores['roc_auc'].append(roc_auc_score(y_test, y_proba))
        
        return {k: np.mean(v) for k, v in scores.items()}
    
    def run_ablation(self, model_name='A_price_only', n_splits=5):
        """Execute ablation test for a feature group"""
        
        features = self.groups[model_name]
        print(f"\n{'='*70}")
        print(f"MODEL {model_name.split('_')[0]}: {model_name.replace('_', ' ').title()}")
        print(f"{'='*70}")
        print(f"Features: {len(features)}")
        print(f"Feature list: {', '.join(features[:5])}...")
        
        # Prepare data
        df_clean = self.prepare_data(features)
        print(f"Clean samples: {len(df_clean)}")
        print(f"Date range: {df_clean['block_date'].min().date()} ‚Üí "
              f"{df_clean['block_date'].max().date()}")
        
        X = df_clean[features]
        y = df_clean[self.target_col]
        
        # Class distribution
        class_dist = y.value_counts(normalize=True)
        print(f"\nTarget distribution:")
        print(f"  Class 0 (down): {class_dist[0]:.1%}")
        print(f"  Class 1 (up):   {class_dist[1]:.1%}")
        
        # Run CV
        print(f"\nRunning {n_splits}-fold walk-forward CV...")
        scores = self.time_series_cv(X, y, n_splits)
        
        # Store results
        self.results[model_name] = {
            'features': features,
            'n_features': len(features),
            'n_samples': len(df_clean),
            'scores': scores
        }
        
        # Display results
        print(f"\nüìä RESULTS:")
        print(f"  Accuracy:  {scores['accuracy']:.4f}")
        print(f"  Precision: {scores['precision']:.4f}")
        print(f"  Recall:    {scores['recall']:.4f}")
        print(f"  F1 Score:  {scores['f1']:.4f}")
        print(f"  ROC AUC:   {scores['roc_auc']:.4f}")
        
        return scores
    
    def compare_models(self):
        """Compare all tested models"""
        if not self.results:
            print("No models tested yet!")
            return
        
        print(f"\n{'='*70}")
        print("ABLATION COMPARISON")
        print(f"{'='*70}")
        
        comparison = pd.DataFrame({
            name: res['scores'] for name, res in self.results.items()
        }).T
        
        comparison['n_features'] = [res['n_features'] for res in self.results.values()]
        comparison = comparison[['n_features', 'accuracy', 'precision', 
                                 'recall', 'f1', 'roc_auc']]
        
        print(comparison.to_string())
        
        # Find best model
        best_acc = comparison['accuracy'].idxmax()
        best_f1 = comparison['f1'].idxmax()
        
        print(f"\nüèÜ Best Accuracy: {best_acc} ({comparison.loc[best_acc, 'accuracy']:.4f})")
        print(f"üèÜ Best F1 Score: {best_f1} ({comparison.loc[best_f1, 'f1']:.4f})")
        
        return comparison


# ============================================================================
# EXECUTION
# ============================================================================

def add_price_features(df, price_col, prefix):
    """Add price-based ML features"""
    df = df.sort_values('block_date').reset_index(drop=True)
    
    df[f'{prefix}_daily_return'] = df[price_col].pct_change()
    df[f'{prefix}_log_return'] = np.log(df[price_col] / df[price_col].shift(1))
    df[f'{prefix}_vol7'] = df[f'{prefix}_daily_return'].rolling(7, min_periods=1).std()
    df[f'{prefix}_vol30'] = df[f'{prefix}_daily_return'].rolling(30, min_periods=1).std()
    
    # RSI
    returns = df[f'{prefix}_daily_return']
    gains = returns.where(returns > 0, 0).rolling(14, min_periods=1).mean()
    losses = -returns.where(returns < 0, 0).rolling(14, min_periods=1).mean()
    rs = gains / (losses + 1e-10)
    df[f'{prefix}_rsi'] = 100 - (100 / (1 + rs))
    
    # Lags
    for lag in [1, 3, 7]:
        df[f'{prefix}_ret_lag{lag}'] = df[f'{prefix}_daily_return'].shift(lag)
    
    return df

def add_correlation_features(df):
    """Add ETH-BTC correlation features"""
    df['eth_btc_ratio'] = df['eth_price'] / df['btc_price']
    df['eth_btc_ratio_ma7'] = df['eth_btc_ratio'].rolling(7, min_periods=1).mean()
    df['eth_btc_corr_30d'] = df['eth_daily_return'].rolling(30, min_periods=20).corr(df['btc_daily_return'])
    df['eth_outperformance'] = df['eth_daily_return'] - df['btc_daily_return']
    return df

def create_target(df):
    """Create target: next day price direction"""
    df['next_day_return'] = df['eth_price'].pct_change().shift(-1)
    df['next_day_price_direction'] = (df['next_day_return'] > 0).astype(int)
    return df


if __name__ == "__main__":
    # Load merged dataset
    print("üìÇ Loading merged dataset...")
    df_merged = pd.read_csv('merged_ml_dataset.csv')
    df_merged['block_date'] = pd.to_datetime(df_merged['block_date'], utc=True)
    
    print(f"‚úÖ Loaded {len(df_merged)} rows, {len(df_merged.columns)} columns")
    
    # Feature engineering
    print("\n‚öôÔ∏è Engineering features...")
    df_merged = add_price_features(df_merged, 'eth_price', 'eth')
    df_merged = add_price_features(df_merged, 'btc_price', 'btc')
    df_merged = add_correlation_features(df_merged)
    df_merged = create_target(df_merged)
    
    print(f"‚úÖ Features created: {len(df_merged.columns)} columns")
    print(f"Date range: {df_merged['block_date'].min().date()} ‚Üí "
          f"{df_merged['block_date'].max().date()}")
    
    # Initialize tester
    tester = AblationTester(df_merged)
    
    # Define feature groups
    tester.define_feature_groups()
    
    # Run Model A: Price-only baseline
    print("\n" + "="*70)
    print("PHASE 1: BASELINE TESTING")
    print("="*70)
    tester.run_ablation('A_price_only', n_splits=5)
    
    # Run Model B: On-chain only
    print("\n" + "="*70)
    print("PHASE 2: ON-CHAIN SIGNAL TESTING")
    print("="*70)
    tester.run_ablation('B_onchain_only', n_splits=5)
    
    # Compare results
    comparison = tester.compare_models()
    
    print("\n‚úÖ Ablation Phase 1-2 complete!")
    print("\nüîç Key Insights:")
    print(f"  ‚Ä¢ Model A (price): {tester.results['A_price_only']['scores']['accuracy']:.2%} accuracy")
    print(f"  ‚Ä¢ Model B (on-chain): {tester.results['B_onchain_only']['scores']['accuracy']:.2%} accuracy")
    
    acc_diff = tester.results['B_onchain_only']['scores']['accuracy'] - tester.results['A_price_only']['scores']['accuracy']
    if acc_diff > 0:
        print(f"  ‚Ä¢ On-chain signals add {acc_diff:.2%} raw accuracy")
    else:
        print(f"  ‚Ä¢ On-chain signals underperform by {abs(acc_diff):.2%}")
    
    print("\nNext steps:")
    print("  ‚Ä¢ If Model B > 50.5%: on-chain has signal")
    print("  ‚Ä¢ Build Model C (combined) to test complementarity")

üìÇ Loading merged dataset...
‚úÖ Loaded 1097 rows, 36 columns

‚öôÔ∏è Engineering features...
‚úÖ Features created: 58 columns
Date range: 2022-12-24 ‚Üí 2025-12-24

PHASE 1: BASELINE TESTING

MODEL A: A Price Only
Features: 20
Feature list: eth_daily_return, eth_log_return, eth_vol7, eth_vol30, eth_rsi...
Clean samples: 1077
Date range: 2023-01-13 ‚Üí 2025-12-24

Target distribution:
  Class 0 (down): 48.2%
  Class 1 (up):   51.8%

Running 5-fold walk-forward CV...

üìä RESULTS:
  Accuracy:  0.5564
  Precision: 0.5525
  Recall:    0.8343
  F1 Score:  0.6620
  ROC AUC:   0.5721

PHASE 2: ON-CHAIN SIGNAL TESTING

MODEL B: B Onchain Only
Features: 27
Feature list: deposit_tx_count, withdrawal_tx_count, deposit_withdrawal_ratio, exchange_volume_ratio, exchange_flow_share...
Clean samples: 1086
Date range: 2023-01-02 ‚Üí 2025-12-24

Target distribution:
  Class 0 (down): 47.9%
  Class 1 (up):   52.1%

Running 5-fold walk-forward CV...

üìä RESULTS:
  Accuracy:  0.5193
  Precision: 0.55

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report, confusion_matrix
)
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# PHASE 1: ABLATION TESTING FRAMEWORK
# ============================================================================

class AblationTester:
    """Test feature group contributions via ablation study"""
    
    def __init__(self, df, target_col='next_day_price_direction'):
        self.df = df.copy()
        self.target_col = target_col
        self.results = {}
        
    def define_feature_groups(self):
        """Define Model A-C feature sets"""
        
        # Model A: Price-only baseline
        self.groups = {
            'A_price_only': [
                'eth_daily_return', 'eth_log_return', 'eth_vol7', 'eth_vol30',
                'eth_rsi', 'eth_ret_lag1', 'eth_ret_lag3', 'eth_ret_lag7',
                'btc_daily_return', 'btc_log_return', 'btc_vol7', 'btc_vol30',
                'btc_rsi', 'btc_ret_lag1', 'btc_ret_lag3', 'btc_ret_lag7',
                'eth_btc_ratio', 'eth_btc_ratio_ma7', 'eth_btc_corr_30d',
                'eth_outperformance'
            ],
            
            # Model B: On-chain only
            'B_onchain_only': [
                'deposit_tx_count', 'withdrawal_tx_count', 'deposit_withdrawal_ratio',
                'exchange_volume_ratio', 'exchange_flow_share', 'net_exchange_flow_ratio',
                'whale_exchange_deposits_eth', 'whale_exchange_withdrawals_eth',
                'whale_net_exchange_flow_eth', 'whale_exchange_flow_ratio',
                'whale_exchange_asymmetry', 'whale_tx_count', 'whale_volume_eth',
                'whale_volume_ratio', 'whale_volume_ratio_delta_1d',
                'whale_volume_ratio_delta_3d', 'whale_tx_zscore_90d',
                'mega_whale_ratio', 'net_flow_ma7', 'tx_per_active_delta_1d',
                'tx_per_active_zscore_90d', 'block_fullness_delta_1d',
                'eth_burned_delta_1d', 'eth_burned_zscore_90d', 'median_gas_delta_1d',
                'median_gas_delta_7d', 'smart_contract_ratio_delta_1d'
            ]
        }
        
        # Model C: Hybrid (A + B, no raw prices)
        self.groups['C_hybrid'] = (
            self.groups['A_price_only'] + 
            self.groups['B_onchain_only']
        )
        
        # Verify features exist for all groups
        for group_name, features in self.groups.items():
            missing = [f for f in features if f not in self.df.columns]
            if missing:
                print(f"‚ö†Ô∏è {group_name} missing: {missing}")
                self.groups[group_name] = [f for f in features if f in self.df.columns]
        
        return self.groups
    
    def prepare_data(self, features):
        """Clean data for modeling"""
        df_clean = self.df[features + [self.target_col, 'block_date']].copy()
        
        # Remove rows with missing target or features
        df_clean = df_clean.dropna(subset=[self.target_col])
        df_clean = df_clean.dropna(subset=features)
        
        # Sort by date
        df_clean = df_clean.sort_values('block_date').reset_index(drop=True)
        
        return df_clean
    
    def time_series_cv(self, X, y, n_splits=5):
        """Walk-forward time series cross-validation"""
        tscv = TimeSeriesSplit(n_splits=n_splits)
        
        scores = {
            'accuracy': [], 'precision': [], 'recall': [], 
            'f1': [], 'roc_auc': []
        }
        
        for train_idx, test_idx in tscv.split(X):
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
            
            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            # Train model
            model = LogisticRegression(max_iter=1000, random_state=42)
            model.fit(X_train_scaled, y_train)
            
            # Predictions
            y_pred = model.predict(X_test_scaled)
            y_proba = model.predict_proba(X_test_scaled)[:, 1]
            
            # Metrics
            scores['accuracy'].append(accuracy_score(y_test, y_pred))
            scores['precision'].append(precision_score(y_test, y_pred, zero_division=0))
            scores['recall'].append(recall_score(y_test, y_pred, zero_division=0))
            scores['f1'].append(f1_score(y_test, y_pred, zero_division=0))
            scores['roc_auc'].append(roc_auc_score(y_test, y_proba))
        
        return {k: np.mean(v) for k, v in scores.items()}
    
    def run_ablation(self, model_name='A_price_only', n_splits=5):
        """Execute ablation test for a feature group"""
        
        features = self.groups[model_name]
        print(f"\n{'='*70}")
        print(f"MODEL {model_name.split('_')[0]}: {model_name.replace('_', ' ').title()}")
        print(f"{'='*70}")
        print(f"Features: {len(features)}")
        print(f"Feature list: {', '.join(features[:5])}...")
        
        # Prepare data
        df_clean = self.prepare_data(features)
        print(f"Clean samples: {len(df_clean)}")
        print(f"Date range: {df_clean['block_date'].min().date()} ‚Üí "
              f"{df_clean['block_date'].max().date()}")
        
        X = df_clean[features]
        y = df_clean[self.target_col]
        
        # Class distribution
        class_dist = y.value_counts(normalize=True)
        print(f"\nTarget distribution:")
        print(f"  Class 0 (down): {class_dist[0]:.1%}")
        print(f"  Class 1 (up):   {class_dist[1]:.1%}")
        
        # Run CV
        print(f"\nRunning {n_splits}-fold walk-forward CV...")
        scores = self.time_series_cv(X, y, n_splits)
        
        # Store results
        self.results[model_name] = {
            'features': features,
            'n_features': len(features),
            'n_samples': len(df_clean),
            'scores': scores
        }
        
        # Display results
        print(f"\nüìä RESULTS:")
        print(f"  Accuracy:  {scores['accuracy']:.4f}")
        print(f"  Precision: {scores['precision']:.4f}")
        print(f"  Recall:    {scores['recall']:.4f}")
        print(f"  F1 Score:  {scores['f1']:.4f}")
        print(f"  ROC AUC:   {scores['roc_auc']:.4f}")
        
        return scores
    
    def compare_models(self):
        """Compare all tested models"""
        if not self.results:
            print("No models tested yet!")
            return
        
        print(f"\n{'='*70}")
        print("ABLATION COMPARISON")
        print(f"{'='*70}")
        
        comparison = pd.DataFrame({
            name: res['scores'] for name, res in self.results.items()
        }).T
        
        comparison['n_features'] = [res['n_features'] for res in self.results.values()]
        comparison = comparison[['n_features', 'accuracy', 'precision', 
                                 'recall', 'f1', 'roc_auc']]
        
        print(comparison.to_string())
        
        # Find best model
        best_acc = comparison['accuracy'].idxmax()
        best_f1 = comparison['f1'].idxmax()
        
        print(f"\nüèÜ Best Accuracy: {best_acc} ({comparison.loc[best_acc, 'accuracy']:.4f})")
        print(f"üèÜ Best F1 Score: {best_f1} ({comparison.loc[best_f1, 'f1']:.4f})")
        
        return comparison


# ============================================================================
# EXECUTION
# ============================================================================

def add_price_features(df, price_col, prefix):
    """Add price-based ML features"""
    df = df.sort_values('block_date').reset_index(drop=True)
    
    df[f'{prefix}_daily_return'] = df[price_col].pct_change()
    df[f'{prefix}_log_return'] = np.log(df[price_col] / df[price_col].shift(1))
    df[f'{prefix}_vol7'] = df[f'{prefix}_daily_return'].rolling(7, min_periods=1).std()
    df[f'{prefix}_vol30'] = df[f'{prefix}_daily_return'].rolling(30, min_periods=1).std()
    
    # RSI
    returns = df[f'{prefix}_daily_return']
    gains = returns.where(returns > 0, 0).rolling(14, min_periods=1).mean()
    losses = -returns.where(returns < 0, 0).rolling(14, min_periods=1).mean()
    rs = gains / (losses + 1e-10)
    df[f'{prefix}_rsi'] = 100 - (100 / (1 + rs))
    
    # Lags
    for lag in [1, 3, 7]:
        df[f'{prefix}_ret_lag{lag}'] = df[f'{prefix}_daily_return'].shift(lag)
    
    return df

def add_correlation_features(df):
    """Add ETH-BTC correlation features"""
    df['eth_btc_ratio'] = df['eth_price'] / df['btc_price']
    df['eth_btc_ratio_ma7'] = df['eth_btc_ratio'].rolling(7, min_periods=1).mean()
    df['eth_btc_corr_30d'] = df['eth_daily_return'].rolling(30, min_periods=20).corr(df['btc_daily_return'])
    df['eth_outperformance'] = df['eth_daily_return'] - df['btc_daily_return']
    return df

def create_target(df):
    """Create target: next day price direction"""
    df['next_day_return'] = df['eth_price'].pct_change().shift(-1)
    df['next_day_price_direction'] = (df['next_day_return'] > 0).astype(int)
    return df


if __name__ == "__main__":
    # Load merged dataset
    print("üìÇ Loading merged dataset...")
    df_merged = pd.read_csv('merged_ml_dataset.csv')
    df_merged['block_date'] = pd.to_datetime(df_merged['block_date'], utc=True)
    
    print(f"‚úÖ Loaded {len(df_merged)} rows, {len(df_merged.columns)} columns")
    
    # Feature engineering
    print("\n‚öôÔ∏è Engineering features...")
    df_merged = add_price_features(df_merged, 'eth_price', 'eth')
    df_merged = add_price_features(df_merged, 'btc_price', 'btc')
    df_merged = add_correlation_features(df_merged)
    df_merged = create_target(df_merged)
    
    print(f"‚úÖ Features created: {len(df_merged.columns)} columns")
    print(f"Date range: {df_merged['block_date'].min().date()} ‚Üí "
          f"{df_merged['block_date'].max().date()}")
    
    # Initialize tester
    tester = AblationTester(df_merged)
    
    # Define feature groups
    tester.define_feature_groups()
    
    # Run Model A: Price-only baseline
    print("\n" + "="*70)
    print("PHASE 1: BASELINE TESTING")
    print("="*70)
    tester.run_ablation('A_price_only', n_splits=5)
    
    # Run Model B: On-chain only
    print("\n" + "="*70)
    print("PHASE 2: ON-CHAIN SIGNAL TESTING")
    print("="*70)
    tester.run_ablation('B_onchain_only', n_splits=5)
    
    # Run Model C: Hybrid
    print("\n" + "="*70)
    print("PHASE 3: HYBRID MODEL (PRICE + ON-CHAIN)")
    print("="*70)
    tester.run_ablation('C_hybrid', n_splits=5)
    
    # Compare results
    comparison = tester.compare_models()
    
    # Final analysis
    print("\n" + "="*70)
    print("FINAL ABLATION ANALYSIS")
    print("="*70)
    
    acc_a = tester.results['A_price_only']['scores']['accuracy']
    acc_b = tester.results['B_onchain_only']['scores']['accuracy']
    acc_c = tester.results['C_hybrid']['scores']['accuracy']
    
    print(f"\nüìä Accuracy Comparison:")
    print(f"  Model A (price):    {acc_a:.2%}")
    print(f"  Model B (on-chain): {acc_b:.2%} ({acc_b - 0.5:.2%} above random)")
    print(f"  Model C (hybrid):   {acc_c:.2%}")
    
    print(f"\nüéØ Incremental Value:")
    c_vs_a = acc_c - acc_a
    print(f"  On-chain adds: {c_vs_a:+.2%} accuracy")
    
    if c_vs_a > 0.01:
        print(f"  ‚úÖ PASS: On-chain signals add {c_vs_a:.2%} value")
    elif c_vs_a > 0:
        print(f"  ‚ö†Ô∏è MARGINAL: Only {c_vs_a:.2%} improvement")
    else:
        print(f"  ‚ùå FAIL: On-chain adds no value (or hurts)")
    
    # ROC AUC comparison
    auc_c = tester.results['C_hybrid']['scores']['roc_auc']
    auc_a = tester.results['A_price_only']['scores']['roc_auc']
    print(f"\nüìà ROC AUC:")
    print(f"  Model C: {auc_c:.4f} vs Model A: {auc_a:.4f} ({auc_c - auc_a:+.4f})")
    
    print("\nüí° Next Steps:")
    if c_vs_a > 0.01:
        print("  ‚Ä¢ On-chain signals validated")
        print("  ‚Ä¢ Proceed to feature importance analysis")
        print("  ‚Ä¢ Test with RandomForest/XGBoost for non-linear effects")
    else:
        print("  ‚Ä¢ On-chain signals may be regime-specific")
        print("  ‚Ä¢ Try interaction features (price_vol * whale_flow)")
        print("  ‚Ä¢ Consider threshold-based rules instead of ML")

üìÇ Loading merged dataset...
‚úÖ Loaded 1097 rows, 36 columns

‚öôÔ∏è Engineering features...
‚úÖ Features created: 58 columns
Date range: 2022-12-24 ‚Üí 2025-12-24

PHASE 1: BASELINE TESTING

MODEL A: A Price Only
Features: 20
Feature list: eth_daily_return, eth_log_return, eth_vol7, eth_vol30, eth_rsi...
Clean samples: 1077
Date range: 2023-01-13 ‚Üí 2025-12-24

Target distribution:
  Class 0 (down): 48.2%
  Class 1 (up):   51.8%

Running 5-fold walk-forward CV...

üìä RESULTS:
  Accuracy:  0.5564
  Precision: 0.5525
  Recall:    0.8343
  F1 Score:  0.6620
  ROC AUC:   0.5721

PHASE 2: ON-CHAIN SIGNAL TESTING

MODEL B: B Onchain Only
Features: 27
Feature list: deposit_tx_count, withdrawal_tx_count, deposit_withdrawal_ratio, exchange_volume_ratio, exchange_flow_share...
Clean samples: 1086
Date range: 2023-01-02 ‚Üí 2025-12-24

Target distribution:
  Class 0 (down): 47.9%
  Class 1 (up):   52.1%

Running 5-fold walk-forward CV...

üìä RESULTS:
  Accuracy:  0.5193
  Precision: 0.55

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report, confusion_matrix
)
from sklearn.inspection import permutation_importance
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# PHASE 1: ABLATION TESTING FRAMEWORK
# ============================================================================

class AblationTester:
    """Test feature group contributions via ablation study"""
    
    def __init__(self, df, target_col='next_day_price_direction'):
        self.df = df.copy()
        self.target_col = target_col
        self.results = {}
        
    def define_feature_groups(self):
        """Define Model A-C feature sets"""
        
        # Model A: Price-only baseline
        self.groups = {
            'A_price_only': [
                'eth_daily_return', 'eth_log_return', 'eth_vol7', 'eth_vol30',
                'eth_rsi', 'eth_ret_lag1', 'eth_ret_lag3', 'eth_ret_lag7',
                'btc_daily_return', 'btc_log_return', 'btc_vol7', 'btc_vol30',
                'btc_rsi', 'btc_ret_lag1', 'btc_ret_lag3', 'btc_ret_lag7',
                'eth_btc_ratio', 'eth_btc_ratio_ma7', 'eth_btc_corr_30d',
                'eth_outperformance'
            ],
            
            # Model B: On-chain only
            'B_onchain_only': [
                'deposit_tx_count', 'withdrawal_tx_count', 'deposit_withdrawal_ratio',
                'exchange_volume_ratio', 'exchange_flow_share', 'net_exchange_flow_ratio',
                'whale_exchange_deposits_eth', 'whale_exchange_withdrawals_eth',
                'whale_net_exchange_flow_eth', 'whale_exchange_flow_ratio',
                'whale_exchange_asymmetry', 'whale_tx_count', 'whale_volume_eth',
                'whale_volume_ratio', 'whale_volume_ratio_delta_1d',
                'whale_volume_ratio_delta_3d', 'whale_tx_zscore_90d',
                'mega_whale_ratio', 'net_flow_ma7', 'tx_per_active_delta_1d',
                'tx_per_active_zscore_90d', 'block_fullness_delta_1d',
                'eth_burned_delta_1d', 'eth_burned_zscore_90d', 'median_gas_delta_1d',
                'median_gas_delta_7d', 'smart_contract_ratio_delta_1d'
            ]
        }
        
        # Model C: Hybrid (A + B, no raw prices)
        self.groups['C_hybrid'] = (
            self.groups['A_price_only'] + 
            self.groups['B_onchain_only']
        )
        
        # Verify features exist for all groups
        for group_name, features in self.groups.items():
            missing = [f for f in features if f not in self.df.columns]
            if missing:
                print(f"‚ö†Ô∏è {group_name} missing: {missing}")
                self.groups[group_name] = [f for f in features if f in self.df.columns]
        
        return self.groups
    
    def prepare_data(self, features):
        """Clean data for modeling"""
        df_clean = self.df[features + [self.target_col, 'block_date']].copy()
        
        # Remove rows with missing target or features
        df_clean = df_clean.dropna(subset=[self.target_col])
        df_clean = df_clean.dropna(subset=features)
        
        # Sort by date
        df_clean = df_clean.sort_values('block_date').reset_index(drop=True)
        
        return df_clean
    
    def time_series_cv(self, X, y, n_splits=5):
        """Walk-forward time series cross-validation"""
        tscv = TimeSeriesSplit(n_splits=n_splits)
        
        scores = {
            'accuracy': [], 'precision': [], 'recall': [], 
            'f1': [], 'roc_auc': []
        }
        
        for train_idx, test_idx in tscv.split(X):
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
            
            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            # Train model
            model = LogisticRegression(max_iter=1000, random_state=42)
            model.fit(X_train_scaled, y_train)
            
            # Predictions
            y_pred = model.predict(X_test_scaled)
            y_proba = model.predict_proba(X_test_scaled)[:, 1]
            
            # Metrics
            scores['accuracy'].append(accuracy_score(y_test, y_pred))
            scores['precision'].append(precision_score(y_test, y_pred, zero_division=0))
            scores['recall'].append(recall_score(y_test, y_pred, zero_division=0))
            scores['f1'].append(f1_score(y_test, y_pred, zero_division=0))
            scores['roc_auc'].append(roc_auc_score(y_test, y_proba))
        
        return {k: np.mean(v) for k, v in scores.items()}
    
    def run_ablation(self, model_name='A_price_only', n_splits=5):
        """Execute ablation test for a feature group"""
        
        features = self.groups[model_name]
        print(f"\n{'='*70}")
        print(f"MODEL {model_name.split('_')[0]}: {model_name.replace('_', ' ').title()}")
        print(f"{'='*70}")
        print(f"Features: {len(features)}")
        print(f"Feature list: {', '.join(features[:5])}...")
        
        # Prepare data
        df_clean = self.prepare_data(features)
        print(f"Clean samples: {len(df_clean)}")
        print(f"Date range: {df_clean['block_date'].min().date()} ‚Üí "
              f"{df_clean['block_date'].max().date()}")
        
        X = df_clean[features]
        y = df_clean[self.target_col]
        
        # Class distribution
        class_dist = y.value_counts(normalize=True)
        print(f"\nTarget distribution:")
        print(f"  Class 0 (down): {class_dist[0]:.1%}")
        print(f"  Class 1 (up):   {class_dist[1]:.1%}")
        
        # Run CV
        print(f"\nRunning {n_splits}-fold walk-forward CV...")
        scores = self.time_series_cv(X, y, n_splits)
        
        # Store results
        self.results[model_name] = {
            'features': features,
            'n_features': len(features),
            'n_samples': len(df_clean),
            'scores': scores
        }
        
        # Display results
        print(f"\nüìä RESULTS:")
        print(f"  Accuracy:  {scores['accuracy']:.4f}")
        print(f"  Precision: {scores['precision']:.4f}")
        print(f"  Recall:    {scores['recall']:.4f}")
        print(f"  F1 Score:  {scores['f1']:.4f}")
        print(f"  ROC AUC:   {scores['roc_auc']:.4f}")
        
        return scores
    
    def feature_importance_analysis(self, model_name='C_hybrid'):
        """Analyze which features drive predictions"""
        
        if model_name not in self.results:
            print(f"‚ùå Model {model_name} not tested yet")
            return
        
        features = self.groups[model_name]
        print(f"\n{'='*70}")
        print(f"FEATURE IMPORTANCE: {model_name.upper()}")
        print(f"{'='*70}")
        
        # Prepare data
        df_clean = self.prepare_data(features)
        X = df_clean[features]
        y = df_clean[self.target_col]
        
        # Train on full dataset (for importance, not prediction)
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        
        # Train RandomForest for better feature importance
        print("\nüå≤ Training RandomForest for feature importance...")
        rf = RandomForestClassifier(n_estimators=100, max_depth=8, 
                                     random_state=42, n_jobs=-1)
        rf.fit(X_scaled, y)
        
        # Feature importance from RF
        feat_imp = pd.DataFrame({
            'feature': features,
            'importance': rf.feature_importances_
        }).sort_values('importance', ascending=False)
        
        print(f"\nüìä TOP 15 FEATURES (RandomForest):")
        print(feat_imp.head(15).to_string(index=False))
        
        # Categorize features
        price_feats = [f for f in feat_imp['feature'].values[:15] 
                       if any(x in f for x in ['eth_', 'btc_', 'ratio', 'return', 
                                                'vol', 'rsi', 'lag', 'outperform'])]
        whale_feats = [f for f in feat_imp['feature'].values[:15] 
                       if any(x in f for x in ['whale', 'exchange', 'deposit', 
                                                'withdrawal', 'flow', 'tx_', 'gas', 
                                                'burned', 'mega'])]
        
        print(f"\nüîç Feature Breakdown (Top 15):")
        print(f"  Price/Technical: {len(price_feats)}")
        print(f"  Whale/On-chain:  {len(whale_feats)}")
        
        if len(whale_feats) >= 3:
            print(f"\n‚úÖ GOOD: Whale features present in top 15:")
            for f in whale_feats:
                imp = feat_imp[feat_imp['feature'] == f]['importance'].values[0]
                print(f"     {f}: {imp:.4f}")
        else:
            print(f"\n‚ö†Ô∏è WARNING: Only {len(whale_feats)} whale features in top 15")
            print("  ‚Üí Model relies almost entirely on price momentum")
        
        # Permutation importance (more reliable)
        print(f"\nüîÄ Computing permutation importance (slower but accurate)...")
        perm_imp = permutation_importance(rf, X_scaled, y, n_repeats=10, 
                                          random_state=42, n_jobs=-1)
        
        perm_df = pd.DataFrame({
            'feature': features,
            'importance': perm_imp.importances_mean
        }).sort_values('importance', ascending=False)
        
        print(f"\nüìä TOP 15 FEATURES (Permutation):")
        print(perm_df.head(15).to_string(index=False))
        
        return feat_imp, perm_df
    
    def compare_models(self):
        """Compare all tested models"""
        if not self.results:
            print("No models tested yet!")
            return
        
        print(f"\n{'='*70}")
        print("ABLATION COMPARISON")
        print(f"{'='*70}")
        
        comparison = pd.DataFrame({
            name: res['scores'] for name, res in self.results.items()
        }).T
        
        comparison['n_features'] = [res['n_features'] for res in self.results.values()]
        comparison = comparison[['n_features', 'accuracy', 'precision', 
                                 'recall', 'f1', 'roc_auc']]
        
        print(comparison.to_string())
        
        # Find best model
        best_acc = comparison['accuracy'].idxmax()
        best_f1 = comparison['f1'].idxmax()
        
        print(f"\nüèÜ Best Accuracy: {best_acc} ({comparison.loc[best_acc, 'accuracy']:.4f})")
        print(f"üèÜ Best F1 Score: {best_f1} ({comparison.loc[best_f1, 'f1']:.4f})")
        
        return comparison
        """Compare all tested models"""
        if not self.results:
            print("No models tested yet!")
            return
        
        print(f"\n{'='*70}")
        print("ABLATION COMPARISON")
        print(f"{'='*70}")
        
        comparison = pd.DataFrame({
            name: res['scores'] for name, res in self.results.items()
        }).T
        
        comparison['n_features'] = [res['n_features'] for res in self.results.values()]
        comparison = comparison[['n_features', 'accuracy', 'precision', 
                                 'recall', 'f1', 'roc_auc']]
        
        print(comparison.to_string())
        
        # Find best model
        best_acc = comparison['accuracy'].idxmax()
        best_f1 = comparison['f1'].idxmax()
        
        print(f"\nüèÜ Best Accuracy: {best_acc} ({comparison.loc[best_acc, 'accuracy']:.4f})")
        print(f"üèÜ Best F1 Score: {best_f1} ({comparison.loc[best_f1, 'f1']:.4f})")
        
        return comparison


# ============================================================================
# EXECUTION
# ============================================================================

def add_price_features(df, price_col, prefix):
    """Add price-based ML features"""
    df = df.sort_values('block_date').reset_index(drop=True)
    
    df[f'{prefix}_daily_return'] = df[price_col].pct_change()
    df[f'{prefix}_log_return'] = np.log(df[price_col] / df[price_col].shift(1))
    df[f'{prefix}_vol7'] = df[f'{prefix}_daily_return'].rolling(7, min_periods=1).std()
    df[f'{prefix}_vol30'] = df[f'{prefix}_daily_return'].rolling(30, min_periods=1).std()
    
    # RSI
    returns = df[f'{prefix}_daily_return']
    gains = returns.where(returns > 0, 0).rolling(14, min_periods=1).mean()
    losses = -returns.where(returns < 0, 0).rolling(14, min_periods=1).mean()
    rs = gains / (losses + 1e-10)
    df[f'{prefix}_rsi'] = 100 - (100 / (1 + rs))
    
    # Lags
    for lag in [1, 3, 7]:
        df[f'{prefix}_ret_lag{lag}'] = df[f'{prefix}_daily_return'].shift(lag)
    
    return df

def add_correlation_features(df):
    """Add ETH-BTC correlation features"""
    df['eth_btc_ratio'] = df['eth_price'] / df['btc_price']
    df['eth_btc_ratio_ma7'] = df['eth_btc_ratio'].rolling(7, min_periods=1).mean()
    df['eth_btc_corr_30d'] = df['eth_daily_return'].rolling(30, min_periods=20).corr(df['btc_daily_return'])
    df['eth_outperformance'] = df['eth_daily_return'] - df['btc_daily_return']
    return df

def create_target(df):
    """Create target: next day price direction"""
    df['next_day_return'] = df['eth_price'].pct_change().shift(-1)
    df['next_day_price_direction'] = (df['next_day_return'] > 0).astype(int)
    return df


if __name__ == "__main__":
    # Load merged dataset
    print("üìÇ Loading merged dataset...")
    df_merged = pd.read_csv('merged_ml_dataset.csv')
    df_merged['block_date'] = pd.to_datetime(df_merged['block_date'], utc=True)
    
    print(f"‚úÖ Loaded {len(df_merged)} rows, {len(df_merged.columns)} columns")
    
    # Feature engineering
    print("\n‚öôÔ∏è Engineering features...")
    df_merged = add_price_features(df_merged, 'eth_price', 'eth')
    df_merged = add_price_features(df_merged, 'btc_price', 'btc')
    df_merged = add_correlation_features(df_merged)
    df_merged = create_target(df_merged)
    
    print(f"‚úÖ Features created: {len(df_merged.columns)} columns")
    print(f"Date range: {df_merged['block_date'].min().date()} ‚Üí "
          f"{df_merged['block_date'].max().date()}")
    
    # Initialize tester
    tester = AblationTester(df_merged)
    
    # Define feature groups
    tester.define_feature_groups()
    
    # Run Model A: Price-only baseline
    print("\n" + "="*70)
    print("PHASE 1: BASELINE TESTING")
    print("="*70)
    tester.run_ablation('A_price_only', n_splits=5)
    
    # Run Model B: On-chain only
    print("\n" + "="*70)
    print("PHASE 2: ON-CHAIN SIGNAL TESTING")
    print("="*70)
    tester.run_ablation('B_onchain_only', n_splits=5)
    
    # Run Model C: Hybrid
    print("\n" + "="*70)
    print("PHASE 3: HYBRID MODEL (PRICE + ON-CHAIN)")
    print("="*70)
    tester.run_ablation('C_hybrid', n_splits=5)
    
    # Compare results
    comparison = tester.compare_models()
    
    # Final analysis
    print("\n" + "="*70)
    print("FINAL ABLATION ANALYSIS")
    print("="*70)
    
    acc_a = tester.results['A_price_only']['scores']['accuracy']
    acc_b = tester.results['B_onchain_only']['scores']['accuracy']
    acc_c = tester.results['C_hybrid']['scores']['accuracy']
    
    print(f"\nüìä Accuracy Comparison:")
    print(f"  Model A (price):    {acc_a:.2%}")
    print(f"  Model B (on-chain): {acc_b:.2%} ({acc_b - 0.5:.2%} above random)")
    print(f"  Model C (hybrid):   {acc_c:.2%}")
    
    print(f"\nüéØ Incremental Value:")
    c_vs_a = acc_c - acc_a
    print(f"  On-chain adds: {c_vs_a:+.2%} accuracy")
    
    if c_vs_a > 0.01:
        print(f"  ‚úÖ PASS: On-chain signals add {c_vs_a:.2%} value")
    elif c_vs_a > 0:
        print(f"  ‚ö†Ô∏è MARGINAL: Only {c_vs_a:.2%} improvement")
    else:
        print(f"  ‚ùå FAIL: On-chain adds no value (or hurts)")
    
    # ROC AUC comparison
    auc_c = tester.results['C_hybrid']['scores']['roc_auc']
    auc_a = tester.results['A_price_only']['scores']['roc_auc']
    print(f"\nüìà ROC AUC:")
    print(f"  Model C: {auc_c:.4f} vs Model A: {auc_a:.4f} ({auc_c - auc_a:+.4f})")
    
    print("\nüí° Next Steps:")
    if c_vs_a > 0.01:
        print("  ‚Ä¢ On-chain signals validated")
        print("  ‚Ä¢ Proceed to feature importance analysis")
        print("  ‚Ä¢ Test with RandomForest/XGBoost for non-linear effects")
    else:
        print("  ‚Ä¢ On-chain signals may be regime-specific")
        print("  ‚Ä¢ Try interaction features (price_vol * whale_flow)")
        print("  ‚Ä¢ Consider threshold-based rules instead of ML")
    
    # PHASE 2: Feature Importance Analysis
    print("\n" + "="*70)
    print("PHASE 2: FEATURE DOMINANCE CHECK")
    print("="*70)
    print("Analyzing what Model C actually learned...")
    
    feat_imp_rf, feat_imp_perm = tester.feature_importance_analysis('C_hybrid')
    
    # Final verdict
    print("\n" + "="*70)
    print("FINAL VERDICT")
    print("="*70)
    
    whale_in_top10_rf = sum(1 for f in feat_imp_rf['feature'].values[:10] 
                            if any(x in f for x in ['whale', 'exchange', 'deposit', 
                                                     'withdrawal', 'flow', 'tx_', 
                                                     'burned', 'mega']))
    
    whale_in_top10_perm = sum(1 for f in feat_imp_perm['feature'].values[:10] 
                              if any(x in f for x in ['whale', 'exchange', 'deposit', 
                                                       'withdrawal', 'flow', 'tx_', 
                                                       'burned', 'mega']))
    
    print(f"\nWhale features in top 10:")
    print(f"  RandomForest:  {whale_in_top10_rf}/10")
    print(f"  Permutation:   {whale_in_top10_perm}/10")
    
    if whale_in_top10_perm == 0:
        print("\n‚ùå VERDICT: Model ignores whale data entirely")
        print("   ‚Üí Whale signals redundant with price momentum")
    elif whale_in_top10_perm <= 2:
        print("\n‚ö†Ô∏è VERDICT: Whale data plays minor role")
        print("   ‚Üí Try regime-specific modeling or interactions")
    else:
        print("\n‚úÖ VERDICT: Whale data actively used")
        print("   ‚Üí Performance issue may be feature engineering, not signal")

üìÇ Loading merged dataset...
‚úÖ Loaded 1097 rows, 36 columns

‚öôÔ∏è Engineering features...
‚úÖ Features created: 58 columns
Date range: 2022-12-24 ‚Üí 2025-12-24

PHASE 1: BASELINE TESTING

MODEL A: A Price Only
Features: 20
Feature list: eth_daily_return, eth_log_return, eth_vol7, eth_vol30, eth_rsi...
Clean samples: 1077
Date range: 2023-01-13 ‚Üí 2025-12-24

Target distribution:
  Class 0 (down): 48.2%
  Class 1 (up):   51.8%

Running 5-fold walk-forward CV...

üìä RESULTS:
  Accuracy:  0.5564
  Precision: 0.5525
  Recall:    0.8343
  F1 Score:  0.6620
  ROC AUC:   0.5721

PHASE 2: ON-CHAIN SIGNAL TESTING

MODEL B: B Onchain Only
Features: 27
Feature list: deposit_tx_count, withdrawal_tx_count, deposit_withdrawal_ratio, exchange_volume_ratio, exchange_flow_share...
Clean samples: 1086
Date range: 2023-01-02 ‚Üí 2025-12-24

Target distribution:
  Class 0 (down): 47.9%
  Class 1 (up):   52.1%

Running 5-fold walk-forward CV...

üìä RESULTS:
  Accuracy:  0.5193
  Precision: 0.55


üìä RESULTS:
  Accuracy:  0.5397
  Precision: 0.5736
  Recall:    0.6838
  F1 Score:  0.5839
  ROC AUC:   0.5707

ABLATION COMPARISON
                n_features  accuracy  precision    recall        f1   roc_auc
A_price_only            20  0.556425   0.552493  0.834288  0.661993  0.572111
B_onchain_only          27  0.519337   0.558822  0.469713  0.487912  0.538859
C_hybrid                47  0.539665   0.573636  0.683770  0.583876  0.570721

üèÜ Best Accuracy: A_price_only (0.5564)
üèÜ Best F1 Score: A_price_only (0.6620)

FINAL ABLATION ANALYSIS

üìä Accuracy Comparison:
  Model A (price):    55.64%
  Model B (on-chain): 51.93% (1.93% above random)
  Model C (hybrid):   53.97%

üéØ Incremental Value:
  On-chain adds: -1.68% accuracy
  ‚ùå FAIL: On-chain adds no value (or hurts)

üìà ROC AUC:
  Model C: 0.5707 vs Model A: 0.5721 (-0.0014)

üí° Next Steps:
  ‚Ä¢ On-chain signals may be regime-specific
  ‚Ä¢ Try interaction features (price_vol * whale_flow)
  ‚Ä¢ Consider thresh

In [7]:
"""
Complete Enhanced ML Pipeline - Phase 3 & 4 Fixes
Run this script directly on your merged_ml_dataset.csv

This implements:
- Phase 3: Relative price features + gated momentum + interactions
- Phase 4: Confidence-weighted evaluation

No external imports needed - everything is self-contained.
"""

import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import warnings
warnings.filterwarnings('ignore')


# ============================================================================
# STEP 1: BASIC FEATURE ENGINEERING (from your original code)
# ============================================================================

def add_price_features(df, price_col, prefix):
    """Add price-based ML features"""
    df = df.sort_values('block_date').reset_index(drop=True)
    
    df[f'{prefix}_daily_return'] = df[price_col].pct_change()
    df[f'{prefix}_log_return'] = np.log(df[price_col] / df[price_col].shift(1))
    df[f'{prefix}_vol7'] = df[f'{prefix}_daily_return'].rolling(7, min_periods=1).std()
    df[f'{prefix}_vol30'] = df[f'{prefix}_daily_return'].rolling(30, min_periods=1).std()
    
    # RSI
    returns = df[f'{prefix}_daily_return']
    gains = returns.where(returns > 0, 0).rolling(14, min_periods=1).mean()
    losses = -returns.where(returns < 0, 0).rolling(14, min_periods=1).mean()
    rs = gains / (losses + 1e-10)
    df[f'{prefix}_rsi'] = 100 - (100 / (1 + rs))
    
    # Lags
    for lag in [1, 3, 7]:
        df[f'{prefix}_ret_lag{lag}'] = df[f'{prefix}_daily_return'].shift(lag)
    
    return df


def add_correlation_features(df):
    """Add ETH-BTC correlation features"""
    df['eth_btc_ratio'] = df['eth_price'] / df['btc_price']
    df['eth_btc_ratio_ma7'] = df['eth_btc_ratio'].rolling(7, min_periods=1).mean()
    df['eth_btc_corr_30d'] = df['eth_daily_return'].rolling(30, min_periods=20).corr(df['btc_daily_return'])
    df['eth_outperformance'] = df['eth_daily_return'] - df['btc_daily_return']
    return df


# ============================================================================
# STEP 2: PHASE 3 ENHANCEMENTS - RELATIVE FEATURES & GATING
# ============================================================================

def add_phase3_features(df):
    """Add Phase 3 enhancements: relative prices, gated momentum, interactions"""
    
    print("\nüîÑ Phase 3: Creating enhanced features...")
    
    # 1. RELATIVE PRICE FEATURES (replace absolute prices)
    print("  ‚Ä¢ Relative price features...")
    for col in ['eth_price', 'btc_price']:
        prefix = col.split('_')[0]
        
        # Z-scores
        mean_90d = df[col].rolling(90, min_periods=30).mean()
        std_90d = df[col].rolling(90, min_periods=30).std()
        df[f'{prefix}_price_zscore_90d'] = (df[col] - mean_90d) / (std_90d + 1e-10)
        
        # Distance from MAs
        ma_20 = df[col].rolling(20, min_periods=10).mean()
        df[f'{prefix}_pct_from_ma20'] = (df[col] - ma_20) / (ma_20 + 1e-10)
        
        ma_50 = df[col].rolling(50, min_periods=20).mean()
        df[f'{prefix}_pct_from_ma50'] = (df[col] - ma_50) / (ma_50 + 1e-10)
    
    # 2. GATED MOMENTUM (the key innovation!)
    print("  ‚Ä¢ Gated momentum features...")
    df['eth_momentum_valid'] = df['eth_ret_lag1'] * np.sign(df['whale_net_exchange_flow_eth'])
    df['btc_momentum_valid'] = df['btc_ret_lag1'] * np.sign(df['net_exchange_flow_ratio'])
    
    vol_threshold = df['eth_vol7'].quantile(0.5)
    df['eth_momentum_lowvol'] = df['eth_ret_lag1'] * (df['eth_vol7'] < vol_threshold).astype(float)
    
    df['whale_confirms_price'] = (
        np.sign(df['eth_ret_lag1']) == np.sign(df['whale_net_exchange_flow_eth'])
    ).astype(float)
    
    volume_z = (df['whale_volume_eth'] - df['whale_volume_eth'].rolling(30).mean()) / \
               (df['whale_volume_eth'].rolling(30).std() + 1e-10)
    df['momentum_volume_confirmed'] = df['eth_ret_lag1'] * (volume_z > 0.5).astype(float)
    
    # 3. REGIME FEATURES
    print("  ‚Ä¢ Regime features...")
    vol_75th = df['eth_vol30'].quantile(0.75)
    df['high_vol_regime'] = (df['eth_vol30'] > vol_75th).astype(float)
    
    df['trend_strength'] = abs(df['eth_price'].rolling(20).mean() - df['eth_price'].rolling(50).mean())
    trend_25th = df['trend_strength'].quantile(0.25)
    df['choppy_regime'] = (df['trend_strength'] < trend_25th).astype(float)
    
    corr_median = df['eth_btc_corr_30d'].median()
    df['low_corr_regime'] = (df['eth_btc_corr_30d'] < corr_median).astype(float)
    
    # 4. INTERACTIONS
    print("  ‚Ä¢ Interaction features...")
    df['vol_x_whale_flow'] = df['eth_vol7'] * df['whale_net_exchange_flow_eth']
    df['momentum_x_exchange_pressure'] = df['eth_ret_lag1'] * df['net_exchange_flow_ratio']
    df['whale_activity_x_vol'] = df['whale_tx_count'] * df['eth_vol30']
    df['gas_x_momentum'] = df['median_gas_delta_1d'] * df['eth_ret_lag1']
    
    print("  ‚úÖ Phase 3 complete!")
    return df


# ============================================================================
# STEP 3: PHASE 4 - CONFIDENCE-WEIGHTED TARGET
# ============================================================================

def add_phase4_target(df):
    """Add Phase 4: confidence-weighted target"""
    print("\nüîÑ Phase 4: Creating confidence-weighted target...")
    
    df['next_day_return'] = df['eth_price'].pct_change().shift(-1)
    df['next_day_price_direction'] = (df['next_day_return'] > 0).astype(int)
    df['signal_confidence'] = abs(df['next_day_return'])
    
    confidence_median = df['signal_confidence'].median()
    df['high_confidence_sample'] = (df['signal_confidence'] > confidence_median)
    
    print(f"  ‚úÖ Median move: {confidence_median:.4f}")
    print(f"  ‚úÖ High-conf samples: {df['high_confidence_sample'].sum()}")
    
    return df


# ============================================================================
# STEP 4: FEATURE GROUPS
# ============================================================================

def define_feature_groups():
    """Define enhanced feature groups"""
    
    return {
        'D_relative_price': [
            'eth_price_zscore_90d', 'eth_pct_from_ma20', 'eth_pct_from_ma50',
            'btc_price_zscore_90d', 'btc_pct_from_ma20', 'btc_pct_from_ma50',
            'eth_daily_return', 'eth_log_return', 'eth_vol7', 'eth_vol30',
            'btc_daily_return', 'btc_log_return', 'btc_vol7', 'btc_vol30',
            'eth_rsi', 'btc_rsi',
            'eth_ret_lag1', 'eth_ret_lag3', 'eth_ret_lag7',
            'btc_ret_lag1', 'btc_ret_lag3', 'btc_ret_lag7',
            'eth_btc_ratio', 'eth_btc_ratio_ma7', 'eth_btc_corr_30d', 'eth_outperformance'
        ],
        
        'E_gated_hybrid': [
            'eth_price_zscore_90d', 'eth_pct_from_ma20',
            'btc_price_zscore_90d', 'btc_pct_from_ma20',
            'eth_momentum_valid', 'btc_momentum_valid', 'eth_momentum_lowvol',
            'whale_confirms_price', 'momentum_volume_confirmed',
            'high_vol_regime', 'choppy_regime', 'low_corr_regime',
            'vol_x_whale_flow', 'momentum_x_exchange_pressure',
            'whale_activity_x_vol', 'gas_x_momentum',
            'whale_net_exchange_flow_eth', 'whale_tx_zscore_90d',
            'whale_volume_ratio_delta_3d', 'exchange_flow_share',
            'tx_per_active_delta_1d', 'eth_burned_zscore_90d', 'median_gas_delta_7d'
        ]
    }


# ============================================================================
# STEP 5: ENHANCED ABLATION TESTER
# ============================================================================

def run_confidence_weighted_cv(df, features, n_splits=5):
    """Run CV with confidence-weighted evaluation"""
    
    # Prepare data
    required = features + ['next_day_price_direction', 'signal_confidence', 'block_date']
    df_clean = df[required].dropna().sort_values('block_date').reset_index(drop=True)
    
    X = df_clean[features]
    y = df_clean['next_day_price_direction']
    conf = df_clean['signal_confidence']
    
    tscv = TimeSeriesSplit(n_splits=n_splits)
    
    scores_all = {'acc': [], 'prec': [], 'rec': [], 'f1': [], 'auc': []}
    scores_hc = {'acc': [], 'prec': [], 'rec': [], 'f1': [], 'auc': []}
    
    for train_idx, test_idx in tscv.split(X):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        conf_test = conf.iloc[test_idx]
        
        # Scale and train
        scaler = StandardScaler()
        X_train_sc = scaler.fit_transform(X_train)
        X_test_sc = scaler.transform(X_test)
        
        model = GradientBoostingClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, random_state=42)
        model.fit(X_train_sc, y_train)
        
        y_pred = model.predict(X_test_sc)
        y_proba = model.predict_proba(X_test_sc)[:, 1]
        
        # All samples
        scores_all['acc'].append(accuracy_score(y_test, y_pred))
        scores_all['prec'].append(precision_score(y_test, y_pred, zero_division=0))
        scores_all['rec'].append(recall_score(y_test, y_pred, zero_division=0))
        scores_all['f1'].append(f1_score(y_test, y_pred, zero_division=0))
        scores_all['auc'].append(roc_auc_score(y_test, y_proba))
        
        # High confidence only
        hc_mask = conf_test > conf_test.median()
        if hc_mask.sum() > 10:
            y_test_hc = y_test[hc_mask]
            y_pred_hc = y_pred[hc_mask]
            y_proba_hc = y_proba[hc_mask]
            
            scores_hc['acc'].append(accuracy_score(y_test_hc, y_pred_hc))
            scores_hc['prec'].append(precision_score(y_test_hc, y_pred_hc, zero_division=0))
            scores_hc['rec'].append(recall_score(y_test_hc, y_pred_hc, zero_division=0))
            scores_hc['f1'].append(f1_score(y_test_hc, y_pred_hc, zero_division=0))
            scores_hc['auc'].append(roc_auc_score(y_test_hc, y_proba_hc))
    
    return {
        'all': {k: np.mean(v) for k, v in scores_all.items()},
        'hc': {k: np.mean(v) for k, v in scores_hc.items()},
        'n_samples': len(df_clean)
    }


# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    
    print("="*70)
    print("ENHANCED ML PIPELINE - PHASE 3 & 4")
    print("="*70)
    
    # 1. Load data
    print("\nüìÇ Loading data...")
    df = pd.read_csv('merged_ml_dataset.csv')
    df['block_date'] = pd.to_datetime(df['block_date'], utc=True)
    print(f"‚úÖ Loaded {len(df)} rows, {len(df.columns)} columns")
    print(f"   Date range: {df['block_date'].min().date()} ‚Üí {df['block_date'].max().date()}")
    
    # 2. Add basic features
    print("\n‚öôÔ∏è Creating basic features...")
    df = add_price_features(df, 'eth_price', 'eth')
    df = add_price_features(df, 'btc_price', 'btc')
    df = add_correlation_features(df)
    
    # 3. Add Phase 3 enhancements
    df = add_phase3_features(df)
    
    # 4. Add Phase 4 target
    df = add_phase4_target(df)
    
    print(f"\n‚úÖ Feature engineering complete: {len(df.columns)} columns")
    
    # 5. Define feature groups
    groups = define_feature_groups()
    
    # Verify features exist
    print("\nüîç Verifying features...")
    for name, feats in groups.items():
        missing = [f for f in feats if f not in df.columns]
        if missing:
            print(f"  ‚ö†Ô∏è {name} missing {len(missing)} features: {missing[:3]}...")
            groups[name] = [f for f in feats if f in df.columns]
        print(f"  ‚úÖ {name}: {len(groups[name])} features available")
    
    # 6. Run ablation tests
    results = {}
    
    print("\n" + "="*70)
    print("MODEL D: RELATIVE PRICE BASELINE")
    print("="*70)
    results['D'] = run_confidence_weighted_cv(df, groups['D_relative_price'])
    print(f"\nAll Samples:     Acc={results['D']['all']['acc']:.4f}, AUC={results['D']['all']['auc']:.4f}")
    print(f"High-Confidence: Acc={results['D']['hc']['acc']:.4f}, AUC={results['D']['hc']['auc']:.4f}")
    print(f"Lift:            {(results['D']['hc']['acc'] - results['D']['all']['acc'])*100:+.2f}%")
    
    print("\n" + "="*70)
    print("MODEL E: GATED HYBRID (THE FIX)")
    print("="*70)
    results['E'] = run_confidence_weighted_cv(df, groups['E_gated_hybrid'])
    print(f"\nAll Samples:     Acc={results['E']['all']['acc']:.4f}, AUC={results['E']['all']['auc']:.4f}")
    print(f"High-Confidence: Acc={results['E']['hc']['acc']:.4f}, AUC={results['E']['hc']['auc']:.4f}")
    print(f"Lift:            {(results['E']['hc']['acc'] - results['E']['all']['acc'])*100:+.2f}%")
    
    # 7. Final comparison
    print("\n" + "="*70)
    print("FINAL VERDICT")
    print("="*70)
    
    d_hc = results['D']['hc']['acc']
    e_hc = results['E']['hc']['acc']
    value = e_hc - d_hc
    
    print(f"\nHigh-Confidence Accuracy:")
    print(f"  Model D (Relative Price): {d_hc:.2%}")
    print(f"  Model E (Gated Hybrid):   {e_hc:.2%}")
    print(f"\nüéØ ON-CHAIN VALUE: {value:+.2%}")
    
    if value > 0.02:
        print("\n‚úÖ SUCCESS! On-chain signals add significant value")
        print("   ‚Üí Proceed with Model E for production")
        print("   ‚Üí Use high-confidence filtering for trading")
    elif value > 0:
        print("\n‚ö†Ô∏è MARGINAL: Small improvement detected")
        print("   ‚Üí May be regime-specific")
        print("   ‚Üí Try splitting by market conditions")
    else:
        print("\n‚ùå FAILURE: Gating didn't solve the problem")
        print("   ‚Üí On-chain may lag price")
        print("   ‚Üí Try leading indicators or regime-split models")
    
    print("\n" + "="*70)
    print("COMPARISON TABLE")
    print("="*70)
    
    comparison = pd.DataFrame({
        'Model D': [results['D']['all']['acc'], results['D']['hc']['acc'], 
                    results['D']['hc']['acc'] - results['D']['all']['acc']],
        'Model E': [results['E']['all']['acc'], results['E']['hc']['acc'],
                    results['E']['hc']['acc'] - results['E']['all']['acc']]
    }, index=['All Samples', 'High-Confidence', 'Lift'])
    
    print(comparison.to_string())
    
    print("\nüíæ Save enhanced dataset? Uncomment below:")
    print("# df.to_csv('enhanced_ml_dataset.csv', index=False)")

ENHANCED ML PIPELINE - PHASE 3 & 4

üìÇ Loading data...
‚úÖ Loaded 1097 rows, 36 columns
   Date range: 2022-12-24 ‚Üí 2025-12-24

‚öôÔ∏è Creating basic features...

üîÑ Phase 3: Creating enhanced features...
  ‚Ä¢ Relative price features...
  ‚Ä¢ Gated momentum features...
  ‚Ä¢ Regime features...
  ‚Ä¢ Interaction features...
  ‚úÖ Phase 3 complete!

üîÑ Phase 4: Creating confidence-weighted target...
  ‚úÖ Median move: 0.0120
  ‚úÖ High-conf samples: 548

‚úÖ Feature engineering complete: 79 columns

üîç Verifying features...
  ‚úÖ D_relative_price: 26 features available
  ‚úÖ E_gated_hybrid: 23 features available

MODEL D: RELATIVE PRICE BASELINE

All Samples:     Acc=0.5390, AUC=0.5573
High-Confidence: Acc=0.5455, AUC=0.5721
Lift:            +0.65%

MODEL E: GATED HYBRID (THE FIX)

All Samples:     Acc=0.5311, AUC=0.5307
High-Confidence: Acc=0.5545, AUC=0.5416
Lift:            +2.35%

FINAL VERDICT

High-Confidence Accuracy:
  Model D (Relative Price): 54.55%
  Model E (Gated 