# Gweizy Model Training Notebook

Train all gas prediction models for Gweizy.

## Instructions:
1. Upload your `gas_data.db` file (from `backend/gas_data.db`)
2. Run all cells
3. Download the trained models zip file
4. Extract to `backend/models/saved_models/` and push to GitHub

In [None]:
# Install dependencies
!pip install -q scikit-learn pandas numpy joblib lightgbm xgboost matplotlib seaborn optuna

In [None]:
# Upload your gas_data.db file
from google.colab import files
import os

print("Upload your gas_data.db file from backend/gas_data.db")
uploaded = files.upload()

if 'gas_data.db' in uploaded:
    print(f"\n✅ Uploaded gas_data.db ({len(uploaded['gas_data.db']) / 1024 / 1024:.1f} MB)")
else:
    print("❌ Please upload gas_data.db")

In [None]:
import sqlite3
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Load data from database
conn = sqlite3.connect('gas_data.db')
df = pd.read_sql("""
    SELECT timestamp, current_gas as gas, base_fee, priority_fee, 
           block_number, gas_used, gas_limit, utilization
    FROM gas_prices ORDER BY timestamp ASC
""", conn)
conn.close()

df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.set_index('timestamp').sort_index()

print(f"Total records: {len(df):,}")
print(f"Date range: {df.index.min()} to {df.index.max()}")

# === IMPROVED: Resample to 30-second intervals (was 1-min, losing too much data) ===
print("\nResampling to 30-second intervals (preserves more data)...")
df = df.resample('30s').mean().dropna(subset=['gas'])
print(f"After resample: {len(df):,} records")

# Find segments (gap > 30 min = new segment)
df['time_diff'] = df.index.to_series().diff()
df['segment'] = (df['time_diff'] > pd.Timedelta(minutes=30)).cumsum()

segment_sizes = df.groupby('segment').size()
print(f"\nSegments found: {len(segment_sizes)}")
print(f"Segment sizes: {segment_sizes.sort_values(ascending=False).head(10).tolist()}")

# === IMPROVED: Lower threshold from 120 to 30 minutes (keeps more segments) ===
MIN_SEGMENT_SIZE = 60  # 30 minutes at 30-sec intervals = 60 records
good_segments = segment_sizes[segment_sizes >= MIN_SEGMENT_SIZE].index.tolist()
df = df[df['segment'].isin(good_segments)]
print(f"\nKeeping {len(good_segments)} segments with >= 30 minutes of data")
print(f"Total usable records: {len(df):,}")

# === DATA SUFFICIENCY CHECK ===
MIN_REQUIRED_SAMPLES = 10000
if len(df) < MIN_REQUIRED_SAMPLES:
    print(f"\n⚠️  WARNING: Only {len(df):,} samples. Recommend at least {MIN_REQUIRED_SAMPLES:,}")
    print("   Models may underperform. Consider collecting more data.")
else:
    print(f"\n✓ Data sufficiency check passed: {len(df):,} samples")

RECORDS_PER_HOUR = 120  # 30-sec intervals = 120 records per hour

In [None]:
# Fetch ETH Price Data - IMPROVED with Binance (1-minute data)
import requests

print("="*60)
print("FETCHING EXTERNAL DATA")
print("="*60)

def fetch_eth_price_binance(start_date, end_date):
    """Fetch ETH price from Binance API (1-minute candles, much better than CoinGecko hourly)"""
    try:
        start_ts = int(start_date.timestamp() * 1000)
        end_ts = int(end_date.timestamp() * 1000)
        
        all_prices = []
        current_ts = start_ts
        
        print(f"Fetching ETH prices from Binance (1-min candles)...")
        
        while current_ts < end_ts:
            url = "https://api.binance.com/api/v3/klines"
            params = {
                'symbol': 'ETHUSDT',
                'interval': '1m',
                'startTime': current_ts,
                'endTime': min(current_ts + 1000 * 60 * 1000, end_ts),  # Max 1000 candles
                'limit': 1000
            }
            
            response = requests.get(url, params=params, timeout=30)
            
            if response.status_code == 200:
                data = response.json()
                if not data:
                    break
                    
                for candle in data:
                    all_prices.append({
                        'timestamp': pd.to_datetime(candle[0], unit='ms'),
                        'eth_price': float(candle[4]),  # Close price
                        'eth_volume': float(candle[5]),  # Volume
                        'eth_high': float(candle[2]),
                        'eth_low': float(candle[3])
                    })
                
                current_ts = data[-1][0] + 60000  # Next minute
                
                if len(all_prices) % 5000 == 0:
                    print(f"  Fetched {len(all_prices):,} candles...")
            else:
                print(f"  Binance API error: {response.status_code}")
                break
        
        if all_prices:
            eth_df = pd.DataFrame(all_prices)
            eth_df = eth_df.set_index('timestamp')
            print(f"  Total: {len(eth_df):,} 1-minute ETH candles")
            return eth_df
        return None
        
    except Exception as e:
        print(f"  Failed to fetch from Binance: {e}")
        return None

def fetch_eth_price_coingecko(start_date, end_date):
    """Fallback: CoinGecko API (hourly data)"""
    try:
        start_ts = int(start_date.timestamp())
        end_ts = int(end_date.timestamp())
        
        url = "https://api.coingecko.com/api/v3/coins/ethereum/market_chart/range"
        params = {'vs_currency': 'usd', 'from': start_ts, 'to': end_ts}
        
        print(f"Fallback: Fetching from CoinGecko (hourly)...")
        response = requests.get(url, params=params, timeout=30)
        
        if response.status_code == 200:
            data = response.json()
            prices = data.get('prices', [])
            
            eth_df = pd.DataFrame(prices, columns=['timestamp', 'eth_price'])
            eth_df['timestamp'] = pd.to_datetime(eth_df['timestamp'], unit='ms')
            eth_df = eth_df.set_index('timestamp')
            eth_df['eth_volume'] = np.nan
            eth_df['eth_high'] = eth_df['eth_price']
            eth_df['eth_low'] = eth_df['eth_price']
            
            print(f"  Fetched {len(eth_df)} hourly ETH prices")
            return eth_df
        return None
    except Exception as e:
        print(f"  CoinGecko failed: {e}")
        return None

# Try Binance first, fallback to CoinGecko
eth_data = fetch_eth_price_binance(df.index.min(), df.index.max())
if eth_data is None or len(eth_data) < 100:
    eth_data = fetch_eth_price_coingecko(df.index.min(), df.index.max())

has_eth_data = False
if eth_data is not None and len(eth_data) > 0:
    # Resample to 30-second intervals
    eth_data = eth_data.resample('30s').ffill()
    
    # Merge with gas data
    df = df.join(eth_data, how='left')
    df['eth_price'] = df['eth_price'].ffill().bfill()
    
    # Fill other ETH columns
    for col in ['eth_volume', 'eth_high', 'eth_low']:
        if col in df.columns:
            df[col] = df[col].ffill().bfill()
    
    eth_coverage = df['eth_price'].notna().mean()
    print(f"  ETH price coverage: {eth_coverage:.1%}")
    
    if eth_coverage > 0.5:
        has_eth_data = True
        print("  ✓ ETH price data integrated (1-min resolution)")
else:
    print("  ⚠️ No ETH price data available")
    df['eth_price'] = np.nan
    df['eth_volume'] = np.nan
    df['eth_high'] = np.nan
    df['eth_low'] = np.nan

HAS_ETH_PRICE = has_eth_data

In [None]:
# Feature Engineering - IMPROVED v2
# Additions: Fourier seasonality, GARCH volatility, better ETH features, horizon-specific
# Fixes: Removed duplicate lag features

print("Engineering IMPROVED feature set...")

def engineer_features_for_segment(seg_df, has_eth=False, horizon='all'):
    """Engineer features with improvements for each horizon"""
    df = seg_df.copy()
    rph = 120  # records per hour (30-sec intervals)
    
    # === Log transform gas ===
    df['gas_log'] = np.log1p(df['gas'])
    
    # === FOURIER SEASONALITY FEATURES (NEW) ===
    # Capture daily and weekly patterns
    hour_of_day = df.index.hour + df.index.minute / 60
    day_of_week = df.index.dayofweek
    
    # Daily seasonality (24-hour cycle)
    for k in [1, 2, 3]:  # Multiple harmonics
        df[f'hour_sin_{k}'] = np.sin(2 * np.pi * k * hour_of_day / 24)
        df[f'hour_cos_{k}'] = np.cos(2 * np.pi * k * hour_of_day / 24)
    
    # Weekly seasonality (7-day cycle)
    df['dow_sin'] = np.sin(2 * np.pi * day_of_week / 7)
    df['dow_cos'] = np.cos(2 * np.pi * day_of_week / 7)
    
    # Time features
    df['hour'] = df.index.hour
    df['is_peak_hours'] = ((df['hour'] >= 14) & (df['hour'] <= 22)).astype(int)
    df['is_weekend'] = (day_of_week >= 5).astype(int)
    
    # === IMPROVED ETH FEATURES (with volume and range) ===
    if has_eth and 'eth_price' in df.columns and df['eth_price'].notna().any():
        df['eth_log'] = np.log1p(df['eth_price'])
        
        # Price momentum
        for mins in [15, 30, 60]:
            periods = mins * 2  # 30-sec intervals
            shift_val = df['eth_price'].shift(periods)
            df[f'eth_pct_{mins}min'] = np.where(shift_val > 0, 
                (df['eth_price'] - shift_val) / shift_val, 0)
        
        # Volatility
        eth_mean = df['eth_price'].rolling(4*rph).mean()
        eth_std = df['eth_price'].rolling(4*rph).std()
        df['eth_zscore'] = np.where(eth_std > 0.01, (df['eth_price'] - eth_mean) / eth_std, 0)
        
        # ETH range (high-low) if available
        if 'eth_high' in df.columns and 'eth_low' in df.columns:
            df['eth_range'] = (df['eth_high'] - df['eth_low']) / (df['eth_price'] + 1e-8)
            df['eth_range_1h'] = df['eth_range'].rolling(rph).mean()
        
        # Volume features if available
        if 'eth_volume' in df.columns and df['eth_volume'].notna().any():
            df['eth_vol_zscore'] = (df['eth_volume'] - df['eth_volume'].rolling(rph).mean()) / \
                                   (df['eth_volume'].rolling(rph).std() + 1e-8)
            df['eth_vol_zscore'] = df['eth_vol_zscore'].clip(-5, 5)
        
        # Gas-ETH correlation
        df['gas_eth_corr'] = df['gas'].rolling(rph).corr(df['eth_price']).fillna(0)
    
    # === NETWORK UTILIZATION ===
    if 'utilization' in df.columns:
        df['util_mean_1h'] = df['utilization'].rolling(rph, min_periods=rph//2).mean()
        df['util_std_1h'] = df['utilization'].rolling(rph, min_periods=rph//2).std()
    
    # === GAS LAG FEATURES (fixed - no duplicates) ===
    # Short-term lags (for 1h prediction)
    for lag_mins in [5, 15, 30]:
        lag_periods = lag_mins * 2
        df[f'gas_lag_{lag_mins}min'] = df['gas'].shift(lag_periods)
    
    # Hourly lags (for all horizons)
    for lag_hours in [1, 2, 4]:
        df[f'gas_lag_{lag_hours}h'] = df['gas'].shift(lag_hours * rph)
    
    # Long-term lags (for 24h prediction only)
    if horizon in ['24h', 'all']:
        for lag_hours in [6, 12, 24]:
            df[f'gas_lag_{lag_hours}h'] = df['gas'].shift(lag_hours * rph)
    
    # === ROLLING STATS ===
    # Short windows (1h, 2h)
    for window_hours in [1, 2]:
        window = window_hours * rph
        prefix = f'{window_hours}h'
        df[f'gas_mean_{prefix}'] = df['gas'].rolling(window, min_periods=window//2).mean()
        df[f'gas_std_{prefix}'] = df['gas'].rolling(window, min_periods=window//2).std()
        df[f'gas_median_{prefix}'] = df['gas'].rolling(window, min_periods=window//2).median()
        mean_val = df[f'gas_mean_{prefix}']
        df[f'gas_cv_{prefix}'] = np.where(mean_val > 0.01, df[f'gas_std_{prefix}'] / mean_val, 0)
    
    # Long windows (for 4h/24h)
    if horizon in ['4h', '24h', 'all']:
        for window_hours in [4, 8]:
            window = window_hours * rph
            prefix = f'{window_hours}h'
            df[f'gas_mean_{prefix}'] = df['gas'].rolling(window, min_periods=window//2).mean()
            df[f'gas_std_{prefix}'] = df['gas'].rolling(window, min_periods=window//2).std()
    
    # Very long windows (24h only)
    if horizon in ['24h', 'all']:
        for window_hours in [12, 24]:
            window = window_hours * rph
            if len(df) > window:
                prefix = f'{window_hours}h'
                df[f'gas_mean_{prefix}'] = df['gas'].rolling(window, min_periods=window//2).mean()
    
    # === GARCH-STYLE VOLATILITY CLUSTERING (NEW) ===
    # Squared returns (proxy for volatility)
    gas_returns = df['gas'].pct_change().fillna(0)
    df['gas_return_sq'] = gas_returns ** 2
    
    # Rolling volatility of volatility
    df['vol_1h'] = df['gas_return_sq'].rolling(rph).mean()
    df['vol_4h'] = df['gas_return_sq'].rolling(4*rph).mean()
    
    # Volatility ratio (short/long) - high means volatility increasing
    df['vol_ratio'] = np.where(df['vol_4h'] > 1e-8, df['vol_1h'] / df['vol_4h'], 1.0)
    df['vol_ratio'] = df['vol_ratio'].clip(0.1, 10)  # Clip extremes
    
    # Volatility trend
    df['vol_trend'] = df['vol_1h'] - df['vol_1h'].shift(rph)
    
    # === MOMENTUM ===
    df['momentum_1h'] = df['gas'] - df['gas'].shift(rph)
    df['momentum_2h'] = df['gas'] - df['gas'].shift(2*rph)
    
    # Momentum percentage (safe division)
    for hours in [1, 2]:
        shift_val = df['gas'].shift(hours * rph)
        df[f'momentum_pct_{hours}h'] = np.where(shift_val > 0.01, 
            (df['gas'] - shift_val) / shift_val, 0)
    
    # === TREND ===
    mean_1h = df['gas_mean_1h']
    mean_2h = df['gas_mean_2h']
    df['trend_1h_2h'] = np.where(mean_2h > 0.01, mean_1h / mean_2h, 1.0)
    
    if 'gas_mean_4h' in df.columns:
        mean_4h = df['gas_mean_4h']
        df['trend_1h_4h'] = np.where(mean_4h > 0.01, mean_1h / mean_4h, 1.0)
    
    # === Z-SCORE ===
    df['gas_zscore_1h'] = np.where(df['gas_std_1h'] > 0.001, 
        (df['gas'] - df['gas_mean_1h']) / df['gas_std_1h'], 0)
    
    # === SPIKE/REGIME INDICATORS ===
    df['is_spike'] = (df['gas'] > df['gas_mean_1h'] + 2 * df['gas_std_1h']).astype(int)
    df['is_high_gas'] = (df['gas'] > df['gas'].rolling(4*rph).quantile(0.9)).astype(int)
    
    # Volatility regime
    rolling_std = df['gas'].rolling(4*rph).std()
    overall_std = df['gas'].std()
    vol_normalized = np.where(overall_std > 0.001, rolling_std / overall_std, 1.0)
    df['volatility_regime'] = pd.cut(
        vol_normalized,
        bins=[0, 0.5, 1.5, float('inf')],
        labels=['low', 'normal', 'high']
    )
    
    return df

# Process each segment
print("\nProcessing segments...")
segments = df['segment'].unique()
processed_segments = []

for seg_id in segments:
    seg_df = df[df['segment'] == seg_id].copy()
    processed = engineer_features_for_segment(seg_df, has_eth=has_eth_data, horizon='all')
    processed_segments.append(processed)

df_features = pd.concat(processed_segments, axis=0)
print(f"After feature engineering: {len(df_features):,} records")

# Create targets
print("\nCreating prediction targets...")

def create_targets_for_segment(seg_df):
    """Create target variables - future gas prices"""
    df = seg_df.copy()
    rph = 120
    
    # Future prices
    df['target_1h'] = df['gas'].shift(-rph)
    df['target_4h'] = df['gas'].shift(-4*rph)
    df['target_24h'] = df['gas'].shift(-24*rph)
    
    # Price changes
    df['target_diff_1h'] = df['target_1h'] - df['gas']
    df['target_diff_4h'] = df['target_4h'] - df['gas']
    
    # Direction classification
    threshold = 0.02  # 2% change threshold
    for horizon in ['1h', '4h']:
        pct_change = np.where(df['gas'] > 0.001, 
            (df[f'target_{horizon}'] - df['gas']) / df['gas'], 0)
        df[f'direction_class_{horizon}'] = pd.cut(
            pct_change,
            bins=[-float('inf'), -threshold, threshold, float('inf')],
            labels=['down', 'stable', 'up']
        )
    
    return df

processed_with_targets = []
for seg_id in df_features['segment'].unique():
    seg_df = df_features[df_features['segment'] == seg_id].copy()
    processed = create_targets_for_segment(seg_df)
    processed_with_targets.append(processed)

df_features = pd.concat(processed_with_targets, axis=0)

# === CLEAN INF/NAN VALUES ===
print("\nCleaning inf/nan values...")
for col in df_features.select_dtypes(include=[np.number]).columns:
    df_features[col] = df_features[col].replace([np.inf, -np.inf], np.nan)
    if df_features[col].notna().sum() > 0:
        q_low = df_features[col].quantile(0.001)
        q_high = df_features[col].quantile(0.999)
        df_features[col] = df_features[col].clip(q_low, q_high)

df_features = df_features.ffill().bfill()

inf_count = np.isinf(df_features.select_dtypes(include=[np.number])).sum().sum()
nan_count = df_features.select_dtypes(include=[np.number]).isna().sum().sum()
print(f"  After cleaning: {inf_count} inf, {nan_count} nan values")
print(f"Records with targets: {len(df_features):,}")

# === DEFINE HORIZON-SPECIFIC FEATURE SETS ===
all_feature_cols = [c for c in df_features.columns if c not in [
    'gas', 'gas_log', 'base_fee', 'priority_fee', 'block_number',
    'gas_used', 'gas_limit', 'utilization', 'eth_price', 'eth_volume', 
    'eth_high', 'eth_low', 'segment', 'time_diff', 'gas_return_sq',
    'target_1h', 'target_4h', 'target_24h', 'target_diff_1h', 'target_diff_4h',
    'direction_class_1h', 'direction_class_4h', 'volatility_regime'
]]

# 1h features: focus on short-term, micro features
features_1h = [c for c in all_feature_cols if not any(x in c for x in ['12h', '24h', '6h', '8h'])]

# 4h features: include medium-term
features_4h = [c for c in all_feature_cols if not any(x in c for x in ['12h', '24h'])]

# 24h features: include all, especially long-term
features_24h = all_feature_cols.copy()

print(f"\n✓ Feature sets defined:")
print(f"  1h model: {len(features_1h)} features")
print(f"  4h model: {len(features_4h)} features") 
print(f"  24h model: {len(features_24h)} features")

In [None]:
# Prepare training data - with horizon-specific features and out-of-time holdout
from sklearn.preprocessing import RobustScaler

# Only keep numeric columns
numeric_features_1h = df_features[features_1h].select_dtypes(include=[np.number]).columns.tolist()
numeric_features_4h = df_features[features_4h].select_dtypes(include=[np.number]).columns.tolist()
numeric_features_24h = df_features[features_24h].select_dtypes(include=[np.number]).columns.tolist()

print(f"Numeric features: 1h={len(numeric_features_1h)}, 4h={len(numeric_features_4h)}, 24h={len(numeric_features_24h)}")

# Drop rows with NaN
df_clean = df_features.dropna()
print(f"Clean samples: {len(df_clean):,}")

# === OUT-OF-TIME HOLDOUT (last 2 days) ===
holdout_hours = 48
rph = 120
holdout_size = holdout_hours * rph

if len(df_clean) > holdout_size + 5000:
    df_train_val = df_clean.iloc[:-holdout_size]
    df_holdout = df_clean.iloc[-holdout_size:]
    print(f"\n✓ Out-of-time holdout: {len(df_holdout):,} samples (last {holdout_hours}h)")
    print(f"  Training/validation: {len(df_train_val):,} samples")
    HAS_HOLDOUT = True
else:
    df_train_val = df_clean
    df_holdout = None
    print(f"\n⚠️ Not enough data for holdout, using all for training")
    HAS_HOLDOUT = False

# Final safety check
for col in df_train_val.select_dtypes(include=[np.float64, np.float32, float]).columns:
    df_train_val[col] = df_train_val[col].replace([np.inf, -np.inf], np.nan)
    if df_train_val[col].isna().any():
        df_train_val[col] = df_train_val[col].fillna(df_train_val[col].median())

float_cols = df_train_val.select_dtypes(include=[np.float64, np.float32, float]).columns
has_inf = any(np.isinf(df_train_val[col]).any() for col in float_cols)
has_nan = any(np.isnan(df_train_val[col]).any() for col in float_cols)
assert not has_inf, "Data still contains inf!"
assert not has_nan, "Data still contains nan!"
print("✓ Data validated: no inf/nan values")

# Prepare feature matrices for each horizon
X_1h = df_train_val[numeric_features_1h]
X_4h = df_train_val[numeric_features_4h]
X_24h = df_train_val[numeric_features_24h]

y_1h = df_train_val['target_1h']
y_4h = df_train_val['target_4h']
y_24h = df_train_val['target_24h']

y_diff_1h = df_train_val['target_diff_1h']
y_diff_4h = df_train_val['target_diff_4h']

y_dir_1h = df_train_val['direction_class_1h']
y_dir_4h = df_train_val['direction_class_4h']

volatility_regime = df_train_val['volatility_regime']
current_gas = df_train_val['gas']

# === BASELINE MODELS ===
print(f"\n{'='*60}")
print("BASELINE COMPARISONS")
print("{'='*60}")

naive_mae_1h = np.mean(np.abs(y_1h.values - current_gas.values))
naive_mae_4h = np.mean(np.abs(y_4h.values - current_gas.values))

mean_pred = np.full_like(y_1h.values, y_1h.mean())
mean_mae_1h = np.mean(np.abs(y_1h.values - mean_pred))
mean_mae_4h = np.mean(np.abs(y_4h.values - mean_pred))

print(f"\nBaseline MAEs:")
print(f"  Naive (current price):     MAE_1h={naive_mae_1h:.6f}, MAE_4h={naive_mae_4h:.6f}")
print(f"  Mean (historical average): MAE_1h={mean_mae_1h:.6f}, MAE_4h={mean_mae_4h:.6f}")

best_baseline_1h = min(naive_mae_1h, mean_mae_1h)
best_baseline_4h = min(naive_mae_4h, mean_mae_4h)

print(f"\n  Best baseline 1h: {best_baseline_1h:.6f}")
print(f"  Best baseline 4h: {best_baseline_4h:.6f}")

BASELINES = {
    '1h': {'naive_mae': naive_mae_1h, 'mean_mae': mean_mae_1h, 'best': best_baseline_1h},
    '4h': {'naive_mae': naive_mae_4h, 'mean_mae': mean_mae_4h, 'best': best_baseline_4h}
}

FEATURE_IMPORTANCE = {}

print(f"\n{'='*60}")
print("TRAINING DATA SUMMARY")
print("{'='*60}")
print(f"Training samples: {len(df_train_val):,}")
print(f"Holdout samples: {len(df_holdout) if df_holdout is not None else 0:,}")
print(f"Features 1h: {len(numeric_features_1h)}")
print(f"Features 4h: {len(numeric_features_4h)}")
print(f"Features 24h: {len(numeric_features_24h)}")

In [None]:
# Model Training - IMPROVED v2
# Additions: LightGBM, XGBoost, Stacking Ensemble, Purged Walk-Forward, Regime-Specific
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import Ridge, HuberRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
import joblib
import warnings
warnings.filterwarnings('ignore')

# Try importing advanced libraries
try:
    import lightgbm as lgb
    HAS_LIGHTGBM = True
    print("✓ LightGBM available")
except ImportError:
    HAS_LIGHTGBM = False
    print("⚠️ LightGBM not available, using sklearn alternatives")

try:
    import xgboost as xgb
    HAS_XGBOOST = True
    print("✓ XGBoost available")
except ImportError:
    HAS_XGBOOST = False
    print("⚠️ XGBoost not available")

MINIMUM_IMPROVEMENT = 0.05

def check_baseline_gate(model_mae, baseline_mae, model_name):
    """Check if model beats baseline by minimum threshold"""
    improvement = (baseline_mae - model_mae) / baseline_mae
    passed = improvement >= MINIMUM_IMPROVEMENT
    if passed:
        print(f"  ✓ PASSED baseline gate: {improvement*100:.1f}% improvement")
    else:
        print(f"  ✗ FAILED baseline gate: {improvement*100:.1f}% (need {MINIMUM_IMPROVEMENT*100:.0f}%+)")
    return passed, improvement

def evaluate_model(y_true, y_pred, baseline_mae):
    """Evaluate model performance"""
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    improvement = (baseline_mae - mae) / baseline_mae
    
    if len(y_true) > 1:
        actual_dir = np.sign(np.diff(y_true))
        pred_dir = np.sign(np.diff(y_pred))
        dir_acc = np.mean(actual_dir == pred_dir)
    else:
        dir_acc = 0
    
    return {
        'mae': mae, 'rmse': rmse, 'r2': r2,
        'improvement': improvement,
        'vs_baseline': f"{improvement*100:+.1f}%",
        'directional_accuracy': dir_acc
    }

# === PURGED WALK-FORWARD VALIDATION ===
def purged_walk_forward_validate(model_class, model_params, X, y, baseline_mae, 
                                  n_splits=5, purge_gap=120):
    """
    Walk-forward validation with purge gap to prevent leakage.
    purge_gap: number of samples to skip between train and test (default 1 hour at 30-sec intervals)
    """
    n = len(X)
    fold_size = n // (n_splits + 1)
    fold_results = []
    
    for fold in range(n_splits):
        train_end = fold_size * (fold + 1)
        test_start = train_end + purge_gap  # Add gap to prevent leakage
        test_end = test_start + fold_size
        
        if test_end > n:
            break
            
        X_train = X.iloc[:train_end]
        X_test = X.iloc[test_start:test_end]
        y_train = y.iloc[:train_end]
        y_test = y.iloc[test_start:test_end]
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        model = model_class(**model_params)
        model.fit(X_train_scaled, y_train)
        
        y_pred = model.predict(X_test_scaled)
        mae = mean_absolute_error(y_test, y_pred)
        fold_results.append(mae)
    
    if not fold_results:
        return None
        
    return {
        'avg_mae': np.mean(fold_results),
        'std_mae': np.std(fold_results),
        'fold_maes': fold_results,
        'improvement': (baseline_mae - np.mean(fold_results)) / baseline_mae
    }

# === MODEL DEFINITIONS ===
def get_models_to_try():
    """Get list of models to try, including LightGBM and XGBoost if available"""
    models = [
        ('Ridge', Ridge, {'alpha': 1.0, 'random_state': 42}),
        ('Huber', HuberRegressor, {'epsilon': 1.35, 'alpha': 0.1, 'max_iter': 1000}),
        ('RF', RandomForestRegressor, {'n_estimators': 100, 'max_depth': 8, 'random_state': 42, 'n_jobs': -1}),
        ('GBM', GradientBoostingRegressor, {'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.1, 'random_state': 42}),
    ]
    
    if HAS_LIGHTGBM:
        models.append(('LightGBM', lgb.LGBMRegressor, {
            'n_estimators': 200, 'max_depth': 6, 'learning_rate': 0.05,
            'num_leaves': 31, 'min_child_samples': 20,
            'subsample': 0.8, 'colsample_bytree': 0.8,
            'reg_alpha': 0.1, 'reg_lambda': 0.1,
            'random_state': 42, 'n_jobs': -1, 'verbose': -1
        }))
    
    if HAS_XGBOOST:
        models.append(('XGBoost', xgb.XGBRegressor, {
            'n_estimators': 200, 'max_depth': 6, 'learning_rate': 0.05,
            'subsample': 0.8, 'colsample_bytree': 0.8,
            'reg_alpha': 0.1, 'reg_lambda': 0.1,
            'random_state': 42, 'n_jobs': -1, 'verbosity': 0
        }))
    
    return models

# === STACKING ENSEMBLE ===
def train_stacking_ensemble(X_train, y_train, X_test, y_test, scaler):
    """Train a stacking ensemble"""
    print("  Training stacking ensemble...")
    
    base_estimators = [
        ('huber', HuberRegressor(epsilon=1.35, alpha=0.1, max_iter=1000)),
        ('rf', RandomForestRegressor(n_estimators=50, max_depth=6, random_state=42, n_jobs=-1)),
    ]
    
    if HAS_LIGHTGBM:
        base_estimators.append(('lgbm', lgb.LGBMRegressor(
            n_estimators=100, max_depth=5, learning_rate=0.1,
            random_state=42, n_jobs=-1, verbose=-1
        )))
    
    stacking = StackingRegressor(
        estimators=base_estimators,
        final_estimator=Ridge(alpha=1.0, random_state=42),
        cv=3,
        n_jobs=-1
    )
    
    stacking.fit(X_train, y_train)
    y_pred = stacking.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    
    return stacking, mae

# === MAIN TRAINING FUNCTION ===
def train_model_advanced(X, y, baseline_mae, horizon_name, feature_names):
    """Train model with advanced methods"""
    print(f"\n{'='*60}")
    print(f"Training {horizon_name} model (ADVANCED)")
    print(f"{'='*60}")
    print(f"Samples: {len(X):,}, Features: {X.shape[1]}")
    print(f"Baseline MAE: {baseline_mae:.6f}")
    
    models_to_try = get_models_to_try()
    results = []
    
    # Purged walk-forward validation for each model
    for name, model_class, params in models_to_try:
        print(f"\n[{name}] Purged walk-forward validation...")
        try:
            wf_result = purged_walk_forward_validate(model_class, params, X, y, baseline_mae, n_splits=5, purge_gap=120)
            if wf_result:
                print(f"  Avg MAE: {wf_result['avg_mae']:.6f} ± {wf_result['std_mae']:.6f}")
                print(f"  vs Baseline: {wf_result['improvement']*100:+.1f}%")
                results.append({
                    'name': name, 'model_class': model_class, 'params': params,
                    'avg_mae': wf_result['avg_mae'], 'std_mae': wf_result['std_mae'],
                    'improvement': wf_result['improvement']
                })
        except Exception as e:
            print(f"  Failed: {e}")
    
    if not results:
        print("\n⚠️ All models failed!")
        return None, None, None, None
    
    # Train stacking ensemble
    split_idx = int(len(X) * 0.8)
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X.iloc[:split_idx])
    X_test_scaled = scaler.transform(X.iloc[split_idx:])
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
    
    try:
        stacking_model, stacking_mae = train_stacking_ensemble(
            X_train_scaled, y_train, X_test_scaled, y_test, scaler
        )
        stacking_improvement = (baseline_mae - stacking_mae) / baseline_mae
        print(f"  Stacking MAE: {stacking_mae:.6f} ({stacking_improvement*100:+.1f}%)")
        results.append({
            'name': 'Stacking', 'model_class': None, 'params': None,
            'avg_mae': stacking_mae, 'std_mae': 0,
            'improvement': stacking_improvement,
            'pretrained_model': stacking_model
        })
    except Exception as e:
        print(f"  Stacking failed: {e}")
    
    # Select best model
    best = min(results, key=lambda x: x['avg_mae'])
    print(f"\n>>> Best model: {best['name']} (MAE: {best['avg_mae']:.6f})")
    
    # Check baseline gate
    passed, improvement = check_baseline_gate(best['avg_mae'], baseline_mae, best['name'])
    
    # Train final model on all data
    print(f"\nTraining final {best['name']} model on all data...")
    scaler = RobustScaler()
    X_scaled = scaler.fit_transform(X)
    
    if 'pretrained_model' in best and best['pretrained_model'] is not None:
        # Use pretrained stacking model
        final_model = best['pretrained_model']
    else:
        final_model = best['model_class'](**best['params'])
        final_model.fit(X_scaled, y)
    
    # Get feature importance if available
    importance = {}
    if hasattr(final_model, 'feature_importances_'):
        importance = dict(zip(feature_names, final_model.feature_importances_))
    elif hasattr(final_model, 'coef_'):
        importance = dict(zip(feature_names, np.abs(final_model.coef_)))
    
    return final_model, scaler, {
        'name': best['name'],
        'mae': best['avg_mae'],
        'improvement': best['improvement'],
        'passed_baseline': passed
    }, importance

In [None]:
# Train all models with horizon-specific features
print("="*70)
print("TRAINING ALL PREDICTION MODELS")
print("="*70)

trained_models = {}
all_feature_importance = {}

# === 1H MODEL ===
model_1h, scaler_1h, metrics_1h, importance_1h = train_model_advanced(
    X_1h, y_1h, BASELINES['1h']['best'], '1h', numeric_features_1h
)
if model_1h:
    trained_models['1h'] = {'model': model_1h, 'scaler': scaler_1h, 'metrics': metrics_1h, 'features': numeric_features_1h}
    if importance_1h:
        all_feature_importance['1h'] = importance_1h

# === 4H MODEL ===
model_4h, scaler_4h, metrics_4h, importance_4h = train_model_advanced(
    X_4h, y_4h, BASELINES['4h']['best'], '4h', numeric_features_4h
)
if model_4h:
    trained_models['4h'] = {'model': model_4h, 'scaler': scaler_4h, 'metrics': metrics_4h, 'features': numeric_features_4h}
    if importance_4h:
        all_feature_importance['4h'] = importance_4h

# === 24H MODEL - REQUIRES 30+ DAYS OF DATA ===
print(f"\n{'='*60}")
print("24h Model - Data Sufficiency Check")
print(f"{'='*60}")

rph = 120
min_24h_samples = 30 * 24 * rph  # 30 days at 30-sec intervals
valid_24h_samples = y_24h.notna().sum()

# Calculate actual days of data
total_hours = len(df_clean) / rph
total_days = total_hours / 24

print(f"Total data: {total_days:.1f} days ({len(df_clean):,} samples)")
print(f"Valid 24h samples: {valid_24h_samples:,}")
print(f"Required for reliable 24h model: {min_24h_samples:,} samples (30 days)")

if total_days >= 30 and valid_24h_samples >= min_24h_samples // 2:
    print("\n✓ Sufficient data for 24h model - training...")
    
    # Use only samples with valid 24h targets
    mask_24h = y_24h.notna()
    X_24h_valid = X_24h[mask_24h]
    y_24h_valid = y_24h[mask_24h]
    
    model_24h, scaler_24h, metrics_24h, importance_24h = train_model_advanced(
        X_24h_valid, y_24h_valid, BASELINES['4h']['best'], '24h', numeric_features_24h
    )
    if model_24h:
        trained_models['24h'] = {
            'model': model_24h, 'scaler': scaler_24h, 
            'metrics': metrics_24h, 'features': numeric_features_24h,
            'is_fallback': False
        }
        if importance_24h:
            all_feature_importance['24h'] = importance_24h
else:
    print(f"\n⚠️ Insufficient data for reliable 24h model")
    print(f"   Need 30+ days, have {total_days:.1f} days")
    print("   Using 4h model as fallback (honest about limitations)")
    
    if model_4h:
        trained_models['24h'] = {
            'model': model_4h, 'scaler': scaler_4h,
            'metrics': {
                'name': metrics_4h['name'] + ' (4h fallback)',
                'mae': metrics_4h['mae'],
                'improvement': metrics_4h['improvement'],
                'passed_baseline': metrics_4h['passed_baseline']
            },
            'features': numeric_features_4h,
            'is_fallback': True
        }

# === OUT-OF-TIME HOLDOUT EVALUATION ===
if HAS_HOLDOUT and df_holdout is not None:
    print(f"\n{'='*60}")
    print("OUT-OF-TIME HOLDOUT EVALUATION")
    print(f"{'='*60}")
    
    for horizon in ['1h', '4h']:
        if horizon in trained_models:
            data = trained_models[horizon]
            features = data['features']
            
            X_holdout = df_holdout[features]
            y_holdout = df_holdout[f'target_{horizon}']
            
            # Remove NaN
            mask = y_holdout.notna()
            X_holdout = X_holdout[mask]
            y_holdout = y_holdout[mask]
            
            if len(X_holdout) > 100:
                X_holdout_scaled = data['scaler'].transform(X_holdout)
                y_pred = data['model'].predict(X_holdout_scaled)
                
                holdout_mae = mean_absolute_error(y_holdout, y_pred)
                holdout_improvement = (BASELINES[horizon]['best'] - holdout_mae) / BASELINES[horizon]['best']
                
                print(f"\n{horizon}: Holdout MAE = {holdout_mae:.6f} ({holdout_improvement*100:+.1f}% vs baseline)")
                
                # Update metrics with holdout performance
                trained_models[horizon]['holdout_mae'] = holdout_mae
                trained_models[horizon]['holdout_improvement'] = holdout_improvement

# === SUMMARY ===
print(f"\n{'='*70}")
print("TRAINING SUMMARY")
print(f"{'='*70}")

for horizon, data in trained_models.items():
    m = data['metrics']
    status = "✓ PASSED" if m['passed_baseline'] else "⚠ BELOW BASELINE"
    fallback = " (fallback)" if data.get('is_fallback') else ""
    holdout = f" | Holdout: {data.get('holdout_mae', 'N/A'):.4f}" if 'holdout_mae' in data else ""
    print(f"{horizon}: {m['name']}{fallback} | MAE: {m['mae']:.6f} | {m['improvement']*100:+.1f}%{holdout} | {status}")

# Store combined feature importance
FEATURE_IMPORTANCE = all_feature_importance.get('4h', all_feature_importance.get('1h', {}))

In [None]:
# PREDICTION INTERVALS - IMPROVED with Conformal Prediction + Calibration
from sklearn.ensemble import GradientBoostingRegressor

print("\n" + "="*60)
print("TRAINING PREDICTION INTERVALS (Conformal + Quantile)")
print("="*60)

quantile_models = {}
conformal_residuals = {}

def train_conformal_intervals(X, y, model, scaler, horizon, alpha=0.2):
    """
    Conformal prediction for guaranteed coverage.
    alpha=0.2 means 80% prediction interval
    """
    # Split into calibration set
    cal_size = int(len(X) * 0.2)
    X_train, X_cal = X.iloc[:-cal_size], X.iloc[-cal_size:]
    y_train, y_cal = y.iloc[:-cal_size], y.iloc[-cal_size:]
    
    # Get predictions on calibration set
    X_cal_scaled = scaler.transform(X_cal)
    y_pred_cal = model.predict(X_cal_scaled)
    
    # Calculate residuals
    residuals = np.abs(y_cal.values - y_pred_cal)
    
    # Get the (1-alpha) quantile of residuals
    q = np.quantile(residuals, 1 - alpha)
    
    return {
        'quantile': q,
        'residuals': residuals,
        'coverage_target': 1 - alpha
    }

for horizon in ['1h', '4h']:
    if horizon not in trained_models:
        continue
        
    print(f"\n{horizon} prediction intervals...")
    
    data = trained_models[horizon]
    features = data['features']
    
    X_h = df_train_val[features]
    y_h = df_train_val[f'target_{horizon}']
    
    mask = y_h.notna()
    X_h = X_h[mask]
    y_h = y_h[mask]
    
    if len(X_h) < 1000:
        print(f"  ⚠️ Insufficient data for {horizon} intervals, skipping")
        continue
    
    split_idx = int(len(X_h) * 0.8)
    X_train, X_test = X_h.iloc[:split_idx], X_h.iloc[split_idx:]
    y_train, y_test = y_h.iloc[:split_idx], y_h.iloc[split_idx:]
    
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # === Quantile Regression ===
    q_models = {}
    for q in [0.1, 0.5, 0.9]:
        model = GradientBoostingRegressor(
            loss='quantile', alpha=q,
            n_estimators=100, max_depth=5,
            learning_rate=0.1, random_state=42
        )
        model.fit(X_train_scaled, y_train)
        q_models[q] = model
    
    quantile_models[horizon] = (q_models, scaler)
    print(f"  ✓ Quantile models trained (10th, 50th, 90th percentiles)")
    
    # === Conformal Prediction ===
    conformal = train_conformal_intervals(X_h, y_h, data['model'], data['scaler'], horizon, alpha=0.2)
    conformal_residuals[horizon] = conformal
    print(f"  ✓ Conformal interval: ±{conformal['quantile']:.4f} gwei (80% coverage)")
    
    # === Calibration Check ===
    print(f"  Calibration check...")
    
    # Get predictions with intervals
    y_pred_test = data['model'].predict(data['scaler'].transform(X_test))
    
    # Quantile interval coverage
    q_low = q_models[0.1].predict(X_test_scaled)
    q_high = q_models[0.9].predict(X_test_scaled)
    q_coverage = np.mean((y_test.values >= q_low) & (y_test.values <= q_high))
    
    # Conformal interval coverage
    conf_low = y_pred_test - conformal['quantile']
    conf_high = y_pred_test + conformal['quantile']
    conf_coverage = np.mean((y_test.values >= conf_low) & (y_test.values <= conf_high))
    
    print(f"    Quantile 80% interval: actual coverage = {q_coverage:.1%}")
    print(f"    Conformal 80% interval: actual coverage = {conf_coverage:.1%}")
    
    # Store calibration results
    trained_models[horizon]['calibration'] = {
        'quantile_coverage': q_coverage,
        'conformal_coverage': conf_coverage,
        'conformal_width': conformal['quantile']
    }
    
    # Warning if miscalibrated
    if abs(q_coverage - 0.8) > 0.1:
        print(f"    ⚠️ Quantile intervals may be miscalibrated")
    if abs(conf_coverage - 0.8) > 0.05:
        print(f"    ⚠️ Conformal intervals may need recalibration")

# Copy 4h to 24h if available
if '4h' in quantile_models:
    quantile_models['24h'] = quantile_models['4h']
    print("\n24h: Using 4h quantile models")

if '4h' in conformal_residuals:
    conformal_residuals['24h'] = conformal_residuals['4h']

print(f"\n✓ Prediction intervals ready for: {list(quantile_models.keys())}")

In [None]:
# Direction Prediction (Classification: Down/Stable/Up)
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score

print("\n" + "="*60)
print("TRAINING DIRECTION MODELS")
print("="*60)

direction_models = {}

for horizon, X_h, y_dir in [('1h', X_1h, y_dir_1h), ('4h', X_4h, y_dir_4h)]:
    print(f"\n{horizon} direction model...")
    
    # Prepare data
    mask = y_dir.notna()
    X_d = X_h[mask]
    y_d = y_dir[mask]
    
    if len(X_d) < 1000:
        print(f"  ⚠️ Insufficient data for {horizon} direction, skipping")
        continue
    
    # Train/test split
    split_idx = int(len(X_d) * 0.8)
    X_train, X_test = X_d.iloc[:split_idx], X_d.iloc[split_idx:]
    y_train, y_test = y_d.iloc[:split_idx], y_d.iloc[split_idx:]
    
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train classifier
    clf = GradientBoostingClassifier(
        n_estimators=50, max_depth=4,
        learning_rate=0.1, random_state=42
    )
    clf.fit(X_train_scaled, y_train)
    
    # Evaluate
    y_pred = clf.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    direction_models[horizon] = {
        'model': clf,
        'scaler': scaler,
        'accuracy': float(acc),
        'f1_score': float(f1)
    }
    
    print(f"  Accuracy: {acc:.1%}, F1: {f1:.3f}")

print(f"\n✓ Direction models trained for: {list(direction_models.keys())}")

In [None]:
# REGIME DETECTION
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

print("\n" + "="*60)
print("TRAINING REGIME DETECTION MODEL")
print("="*60)

# Create regime labels from volatility
if 'volatility_regime' in df_train_val.columns:
    regime_labels = df_train_val['volatility_regime'].map({'low': 0, 'normal': 1, 'high': 2})
    mask = regime_labels.notna()
    
    X_r = X_4h[mask]  # Use 4h features for regime detection
    y_r = regime_labels[mask]
    
    if len(X_r) < 500:
        print("⚠️ Insufficient data for regime detection")
        regime_clf = None
        regime_scaler = None
        regime_accuracy = 0
    else:
        # Train/test split
        split_idx = int(len(X_r) * 0.8)
        X_train, X_test = X_r.iloc[:split_idx], X_r.iloc[split_idx:]
        y_train, y_test = y_r.iloc[:split_idx], y_r.iloc[split_idx:]
        
        regime_scaler = RobustScaler()
        X_train_scaled = regime_scaler.fit_transform(X_train)
        X_test_scaled = regime_scaler.transform(X_test)
        
        # Train classifier
        regime_clf = RandomForestClassifier(
            n_estimators=50, max_depth=6,
            random_state=42, n_jobs=-1
        )
        regime_clf.fit(X_train_scaled, y_train)
        
        # Evaluate
        y_pred = regime_clf.predict(X_test_scaled)
        regime_accuracy = accuracy_score(y_test, y_pred)
        
        print(f"Regime classes: Normal (0), Elevated (1), Spike (2)")
        print(f"Accuracy: {regime_accuracy:.1%}")
        
        if regime_accuracy > 0.95:
            print("⚠️ Warning: Very high accuracy may indicate overfitting")
else:
    regime_clf = None
    regime_scaler = None
    regime_accuracy = 0
    print("⚠️ No volatility regime data available, skipping regime detection")

In [None]:
# Train Spike Detectors
from sklearn.ensemble import GradientBoostingClassifier

print("\n" + "="*60)
print("TRAINING SPIKE DETECTORS")
print("="*60)

spike_models = {}

for horizon, X_h, y_target in [('1h', X_1h, y_1h), ('4h', X_4h, y_4h)]:
    print(f"\n{horizon} spike detector...")
    
    # Create spike labels (>2 std from mean is a spike)
    mask = y_target.notna()
    X_s = X_h[mask]
    y_s = y_target[mask]
    current = current_gas[mask]
    
    # Define spike threshold
    price_change = y_s - current
    threshold = price_change.std() * 2
    spike_labels = (price_change > threshold).astype(int)
    
    spike_rate = spike_labels.mean()
    print(f"  Spike rate: {spike_rate:.1%}")
    
    if spike_rate < 0.01 or spike_rate > 0.5:
        print(f"  ⚠️ Unusual spike rate, skipping")
        continue
    
    if len(X_s) < 1000:
        print(f"  ⚠️ Insufficient data, skipping")
        continue
    
    # Train/test split
    split_idx = int(len(X_s) * 0.8)
    X_train, X_test = X_s.iloc[:split_idx], X_s.iloc[split_idx:]
    y_train, y_test = spike_labels.iloc[:split_idx], spike_labels.iloc[split_idx:]
    
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train with class weights
    clf = GradientBoostingClassifier(
        n_estimators=50, max_depth=4,
        learning_rate=0.1, random_state=42
    )
    clf.fit(X_train_scaled, y_train)
    
    # Evaluate
    y_pred = clf.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    
    spike_models[horizon] = (clf, scaler)
    print(f"  Accuracy: {acc:.1%}")

# Copy 4h to 24h if available
if '4h' in spike_models:
    spike_models['24h'] = spike_models['4h']
    print("\n24h: Using 4h spike detector (fallback)")

print(f"\n✓ Spike detectors trained for: {list(spike_models.keys())}")

In [None]:
# DQN AGENT TRAINING (OPTIONAL)
# This trains a reinforcement learning agent for transaction timing
# Skip if you just need prediction models

TRAIN_DQN = False  # Set to True to train DQN agent

if not TRAIN_DQN:
    print("="*60)
    print("DQN TRAINING SKIPPED (set TRAIN_DQN = True to enable)")
    print("="*60)
    DQN_TRAINED = False

In [None]:
# DQN Training Implementation (runs only if TRAIN_DQN = True)

if TRAIN_DQN:
    print("\n" + "="*60)
    print("TRAINING DQN AGENT")
    print("="*60)
    
    try:
        import torch
        import torch.nn as nn
        import torch.optim as optim
        from collections import deque
        import random
        
        class DQNNetwork(nn.Module):
            def __init__(self, state_dim, action_dim):
                super().__init__()
                self.net = nn.Sequential(
                    nn.Linear(state_dim, 64),
                    nn.ReLU(),
                    nn.Linear(64, 32),
                    nn.ReLU(),
                    nn.Linear(32, action_dim)
                )
            
            def forward(self, x):
                return self.net(x)
        
        class DQNAgent:
            def __init__(self, state_dim, action_dim):
                self.state_dim = state_dim
                self.action_dim = action_dim
                self.epsilon = 1.0
                self.epsilon_min = 0.05
                self.epsilon_decay = 0.995
                self.gamma = 0.99
                self.lr = 0.001
                self.memory = deque(maxlen=10000)
                self.batch_size = 32
                self.training_steps = 0
                
                self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
                self.model = DQNNetwork(state_dim, action_dim).to(self.device)
                self.target_model = DQNNetwork(state_dim, action_dim).to(self.device)
                self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
                self.update_target()
            
            def update_target(self):
                self.target_model.load_state_dict(self.model.state_dict())
            
            def act(self, state):
                if random.random() < self.epsilon:
                    return random.randint(0, self.action_dim - 1)
                state_t = torch.FloatTensor(state).unsqueeze(0).to(self.device)
                with torch.no_grad():
                    q_values = self.model(state_t)
                return q_values.argmax().item()
            
            def remember(self, state, action, reward, next_state, done):
                self.memory.append((state, action, reward, next_state, done))
            
            def replay(self):
                if len(self.memory) < self.batch_size:
                    return
                
                batch = random.sample(self.memory, self.batch_size)
                states, actions, rewards, next_states, dones = zip(*batch)
                
                states = torch.FloatTensor(states).to(self.device)
                actions = torch.LongTensor(actions).to(self.device)
                rewards = torch.FloatTensor(rewards).to(self.device)
                next_states = torch.FloatTensor(next_states).to(self.device)
                dones = torch.FloatTensor(dones).to(self.device)
                
                current_q = self.model(states).gather(1, actions.unsqueeze(1))
                next_q = self.target_model(next_states).max(1)[0].detach()
                target_q = rewards + (1 - dones) * self.gamma * next_q
                
                loss = nn.MSELoss()(current_q.squeeze(), target_q)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                
                self.training_steps += 1
                if self.training_steps % 100 == 0:
                    self.update_target()
                
                self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
            
            def save(self, path):
                torch.save(self.model.state_dict(), path)
        
        # Create simple environment
        state_dim = min(30, len(X.columns))  # Limit state size
        action_dim = 2  # 0 = wait, 1 = execute
        
        DQN_AGENT = DQNAgent(state_dim, action_dim)
        
        # Train for a few episodes
        n_episodes = 500
        print(f"Training DQN for {n_episodes} episodes...")
        
        for episode in range(n_episodes):
            # Simple training loop
            for i in range(min(100, len(X) - 1)):
                state = X.iloc[i, :state_dim].values
                action = DQN_AGENT.act(state)
                
                # Simple reward: negative gas price change if executing
                next_gas = current_gas.iloc[i + 1] if i + 1 < len(current_gas) else current_gas.iloc[i]
                reward = -(next_gas - current_gas.iloc[i]) if action == 1 else -0.001  # Small wait penalty
                
                next_state = X.iloc[i + 1, :state_dim].values if i + 1 < len(X) else state
                done = (i >= min(99, len(X) - 2))
                
                DQN_AGENT.remember(state, action, reward, next_state, done)
                DQN_AGENT.replay()
            
            if (episode + 1) % 100 == 0:
                print(f"  Episode {episode + 1}/{n_episodes}, Epsilon: {DQN_AGENT.epsilon:.3f}")
        
        DQN_TRAINED = True
        DQN_METRICS = {
            'episodes': n_episodes,
            'training_steps': DQN_AGENT.training_steps,
            'final_epsilon': float(DQN_AGENT.epsilon)
        }
        print(f"\n✓ DQN training complete ({DQN_AGENT.training_steps} steps)")
        
    except ImportError:
        print("⚠️ PyTorch not available, skipping DQN training")
        DQN_TRAINED = False
    except Exception as e:
        print(f"⚠️ DQN training failed: {e}")
        DQN_TRAINED = False
else:
    DQN_TRAINED = False

In [None]:
# Save all models - IMPROVED with conformal intervals and calibration data
import os
from datetime import datetime
import json as json_lib

os.makedirs('saved_models', exist_ok=True)

print("\n" + "="*60)
print("SAVING MODELS")
print("="*60)

# === Save prediction models ===
for horizon in ['1h', '4h', '24h']:
    if horizon not in trained_models:
        print(f"⚠️ No {horizon} model to save")
        continue
    
    data = trained_models[horizon]
    model = data['model']
    scaler = data['scaler']
    metrics = data['metrics']
    features = data.get('features', [])
    
    model_data = {
        'model': model,
        'model_name': metrics['name'],
        'metrics': {
            'mae': float(metrics['mae']),
            'improvement': float(metrics['improvement']),
            'passed_baseline': bool(metrics['passed_baseline']),
            'is_fallback': data.get('is_fallback', False)
        },
        'trained_at': datetime.now().isoformat(),
        'feature_names': list(features),
        'feature_scaler': scaler,
        'scaler_type': 'RobustScaler'
    }
    
    # Add holdout metrics if available
    if 'holdout_mae' in data:
        model_data['holdout_metrics'] = {
            'mae': float(data['holdout_mae']),
            'improvement': float(data['holdout_improvement'])
        }
    
    # Add calibration data if available
    if 'calibration' in data:
        model_data['calibration'] = {
            'quantile_coverage': float(data['calibration']['quantile_coverage']),
            'conformal_coverage': float(data['calibration']['conformal_coverage']),
            'conformal_width': float(data['calibration']['conformal_width'])
        }
    
    # Add conformal interval width
    if horizon in conformal_residuals:
        model_data['conformal_interval'] = float(conformal_residuals[horizon]['quantile'])
    
    joblib.dump(model_data, f'saved_models/model_{horizon}.pkl')
    status = "✓" if metrics['passed_baseline'] else "⚠"
    print(f"{status} Saved model_{horizon}.pkl ({metrics['name']}, MAE={metrics['mae']:.6f})")
    
    joblib.dump(scaler, f'saved_models/scaler_{horizon}.pkl')

# === Save feature names (use 4h as default) ===
default_features = trained_models.get('4h', trained_models.get('1h', {})).get('features', [])
joblib.dump(list(default_features), 'saved_models/feature_names.pkl')
print(f"\nSaved feature_names.pkl ({len(default_features)} features)")

# === Save spike detectors ===
if 'spike_models' in dir() and spike_models:
    for horizon, (clf, scaler) in spike_models.items():
        spike_data = {'model': clf, 'scaler': scaler, 'trained_at': datetime.now().isoformat()}
        joblib.dump(spike_data, f'saved_models/spike_detector_{horizon}.pkl')
        print(f"Saved spike_detector_{horizon}.pkl")

# === Save regime detector ===
if 'regime_clf' in dir() and regime_clf is not None:
    regime_data = {
        'model': regime_clf, 'scaler': regime_scaler,
        'regimes': {0: 'Normal', 1: 'Elevated', 2: 'Spike'},
        'accuracy': regime_accuracy,
        'trained_at': datetime.now().isoformat()
    }
    joblib.dump(regime_data, 'saved_models/regime_detector.pkl')
    print(f"Saved regime_detector.pkl (Accuracy: {regime_accuracy:.1%})")

# === Save quantile models ===
if 'quantile_models' in dir() and quantile_models:
    for horizon, (q_models, q_scaler) in quantile_models.items():
        quantile_data = {
            'models': q_models, 'scaler': q_scaler,
            'quantiles': [0.1, 0.5, 0.9],
            'trained_at': datetime.now().isoformat()
        }
        
        # Add conformal data if available
        if horizon in conformal_residuals:
            quantile_data['conformal'] = {
                'interval_width': float(conformal_residuals[horizon]['quantile']),
                'coverage_target': 0.8
            }
        
        joblib.dump(quantile_data, f'saved_models/quantile_{horizon}.pkl')
        print(f"Saved quantile_{horizon}.pkl")

# === Save training metadata ===
def convert_to_python_types(obj):
    if isinstance(obj, dict):
        return {k: convert_to_python_types(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_python_types(v) for v in obj]
    elif isinstance(obj, (np.bool_, np.integer)):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

metadata = {
    'training_timestamp': datetime.now().isoformat(),
    'total_samples': len(df_clean),
    'training_samples': len(df_train_val),
    'holdout_samples': len(df_holdout) if df_holdout is not None else 0,
    'date_range': f"{df_clean.index.min()} to {df_clean.index.max()}",
    'resampling': '30-second intervals',
    'data_source': {
        'eth_price': 'Binance 1-min' if HAS_ETH_PRICE else 'None',
        'has_holdout': HAS_HOLDOUT
    },
    'features': {
        '1h': len(numeric_features_1h),
        '4h': len(numeric_features_4h),
        '24h': len(numeric_features_24h)
    },
    'baselines': BASELINES,
    'models': {},
    'improvements': [
        'Binance ETH price (1-min resolution)',
        'Fourier seasonality features',
        'GARCH-style volatility clustering',
        'Horizon-specific feature sets',
        'LightGBM/XGBoost models',
        'Stacking ensemble',
        'Purged walk-forward validation',
        'Out-of-time holdout (last 48h)',
        'Conformal prediction intervals',
        'Interval calibration checks',
        '24h model requires 30+ days data'
    ]
}

for horizon, data in trained_models.items():
    m = data['metrics']
    metadata['models'][horizon] = {
        'name': m['name'],
        'mae': float(m['mae']),
        'improvement_pct': float(m['improvement'] * 100),
        'passed_baseline': bool(m['passed_baseline']),
        'is_fallback': data.get('is_fallback', False),
        'features_count': len(data.get('features', []))
    }
    
    if 'holdout_mae' in data:
        metadata['models'][horizon]['holdout_mae'] = float(data['holdout_mae'])
        metadata['models'][horizon]['holdout_improvement_pct'] = float(data['holdout_improvement'] * 100)
    
    if 'calibration' in data:
        metadata['models'][horizon]['calibration'] = data['calibration']

if 'direction_models' in dir() and direction_models:
    metadata['direction_models'] = {
        horizon: {'accuracy': data['accuracy'], 'f1_score': data['f1_score']}
        for horizon, data in direction_models.items()
    }

metadata = convert_to_python_types(metadata)

with open('saved_models/training_metadata.json', 'w') as f:
    json_lib.dump(metadata, f, indent=2)
print(f"\nSaved training_metadata.json")

# === Save feature importance ===
if FEATURE_IMPORTANCE:
    sorted_importance = dict(sorted(FEATURE_IMPORTANCE.items(), key=lambda x: x[1], reverse=True))
    with open('saved_models/feature_importance.json', 'w') as f:
        json_lib.dump(convert_to_python_types(sorted_importance), f, indent=2)
    print(f"Saved feature_importance.json")

# === DQN Agent ===
if 'DQN_TRAINED' in dir() and DQN_TRAINED:
    os.makedirs('saved_models/rl_agents', exist_ok=True)
    DQN_AGENT.save('saved_models/rl_agents/dqn_agent.pt')
    dqn_meta = {
        'state_dim': DQN_AGENT.state_dim,
        'action_dim': DQN_AGENT.action_dim,
        'training_steps': DQN_AGENT.training_steps,
        'epsilon': float(DQN_AGENT.epsilon),
        'metrics': DQN_METRICS if 'DQN_METRICS' in dir() else {},
        'trained_at': datetime.now().isoformat()
    }
    with open('saved_models/rl_agents/dqn_metadata.json', 'w') as f:
        json_lib.dump(dqn_meta, f, indent=2)
    print("Saved DQN agent")

print("\n" + "="*60)
print("ALL MODELS SAVED")
print("="*60)

In [None]:
# Print final report
print("\n" + "="*70)
print("TRAINING COMPLETE - FINAL REPORT")
print("="*70)

total_days = len(df_clean) / (120 * 24)

print(f"\nDATA SUMMARY")
print(f"   Total samples: {len(df_clean):,} ({total_days:.1f} days)")
print(f"   Training: {len(df_train_val):,} | Holdout: {len(df_holdout) if df_holdout is not None else 0:,}")
print(f"   Date range: {df_clean.index.min()} to {df_clean.index.max()}")
print(f"   ETH price: {'Binance 1-min ✓' if HAS_ETH_PRICE else 'Not available'}")
print(f"   Features: 1h={len(numeric_features_1h)}, 4h={len(numeric_features_4h)}, 24h={len(numeric_features_24h)}")

print(f"\n" + "-"*70)
print(f"{'MODEL PERFORMANCE':^70}")
print("-"*70)
print(f"{'Horizon':<8} {'Model':<15} {'CV MAE':>10} {'Holdout':>10} {'vs Base':>10} {'Status':>12}")
print("-"*70)

for horizon in ['1h', '4h', '24h']:
    if horizon in trained_models:
        data = trained_models[horizon]
        m = data['metrics']
        name = m['name'][:14]
        if data.get('is_fallback'):
            name = name[:10] + '(fb)'
        
        cv_mae = f"{m['mae']:.4f}"
        holdout_mae = f"{data.get('holdout_mae', 0):.4f}" if 'holdout_mae' in data else "N/A"
        improvement = f"{m['improvement']*100:+.1f}%"
        status = "✓ PASS" if m['passed_baseline'] else "✗ FAIL"
        
        print(f"{horizon:<8} {name:<15} {cv_mae:>10} {holdout_mae:>10} {improvement:>10} {status:>12}")

print("-"*70)

# Calibration report
if any('calibration' in trained_models.get(h, {}) for h in ['1h', '4h']):
    print(f"\n" + "-"*70)
    print(f"{'PREDICTION INTERVAL CALIBRATION':^70}")
    print("-"*70)
    print(f"{'Horizon':<10} {'Quantile 80%':>15} {'Conformal 80%':>15} {'Width (gwei)':>15}")
    print("-"*70)
    
    for horizon in ['1h', '4h']:
        if horizon in trained_models and 'calibration' in trained_models[horizon]:
            cal = trained_models[horizon]['calibration']
            q_cov = f"{cal['quantile_coverage']:.1%}"
            c_cov = f"{cal['conformal_coverage']:.1%}"
            width = f"±{cal['conformal_width']:.4f}"
            print(f"{horizon:<10} {q_cov:>15} {c_cov:>15} {width:>15}")
    
    print("-"*70)

# Direction models
if 'direction_models' in dir() and direction_models:
    print(f"\n" + "-"*70)
    print(f"{'DIRECTION PREDICTION':^70}")
    print("-"*70)
    for horizon, data in direction_models.items():
        print(f"  {horizon}: Accuracy={data['accuracy']:.1%}, F1={data['f1_score']:.3f}")

# 24h model status
print(f"\n" + "-"*70)
print(f"{'24H MODEL STATUS':^70}")
print("-"*70)
if '24h' in trained_models:
    if trained_models['24h'].get('is_fallback'):
        print(f"  ⚠️ Using 4h model as fallback (need 30+ days of data)")
        print(f"     Current data: {total_days:.1f} days")
        print(f"     Recommendation: Collect {30 - total_days:.0f} more days before training true 24h model")
    else:
        print(f"  ✓ True 24h model trained with {total_days:.1f} days of data")

# Final recommendation
print(f"\n" + "="*70)
print("RECOMMENDATION")
print("="*70)

all_passed = all(trained_models.get(h, {}).get('metrics', {}).get('passed_baseline', False) 
                 for h in trained_models if h in trained_models)

if all_passed:
    print("✓ All models beat baseline - READY FOR DEPLOYMENT")
    print("\nNext steps:")
    print("  1. Download saved_models/ folder")
    print("  2. Copy to backend/models/saved_models/")
    print("  3. Restart backend")
else:
    failed = [h for h in trained_models 
              if not trained_models[h]['metrics']['passed_baseline']]
    print(f"⚠️ Some models did not pass baseline: {failed}")
    print("\nRecommendations:")
    print("  - Collect more data")
    print("  - Review feature engineering")
    print("  - Only deploy passing models")

In [None]:
# Visualizations
import matplotlib.pyplot as plt

print("\n" + "="*60)
print("GENERATING VISUALIZATIONS")
print("="*60)

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Training data distribution
ax1 = axes[0, 0]
ax1.hist(current_gas.values, bins=50, alpha=0.7, color='blue', edgecolor='black')
ax1.set_xlabel('Gas Price (gwei)')
ax1.set_ylabel('Frequency')
ax1.set_title('Gas Price Distribution')
ax1.axvline(current_gas.mean(), color='red', linestyle='--', label=f'Mean: {current_gas.mean():.2f}')
ax1.legend()

# 2. Model performance comparison
ax2 = axes[0, 1]
horizons = list(trained_models.keys())
maes = [trained_models[h]['metrics']['mae'] for h in horizons]
baselines = [BASELINES.get(h.replace('24h', '4h'), BASELINES['4h'])['best'] for h in horizons]

x = np.arange(len(horizons))
width = 0.35
bars1 = ax2.bar(x - width/2, maes, width, label='Model MAE', color='steelblue')
bars2 = ax2.bar(x + width/2, baselines, width, label='Baseline MAE', color='coral')
ax2.set_xlabel('Horizon')
ax2.set_ylabel('MAE (gwei)')
ax2.set_title('Model vs Baseline Performance')
ax2.set_xticks(x)
ax2.set_xticklabels(horizons)
ax2.legend()

# Add improvement percentages
for i, (h, m, b) in enumerate(zip(horizons, maes, baselines)):
    imp = (b - m) / b * 100
    color = 'green' if imp > 0 else 'red'
    ax2.annotate(f'{imp:+.1f}%', xy=(i, max(m, b) + 0.02), ha='center', fontsize=9, color=color)

# 3. Gas price time series (sample)
ax3 = axes[1, 0]
sample_size = min(2000, len(current_gas))
sample_gas = current_gas.iloc[-sample_size:]
ax3.plot(sample_gas.index, sample_gas.values, linewidth=0.5, alpha=0.8)
ax3.set_xlabel('Time')
ax3.set_ylabel('Gas Price (gwei)')
ax3.set_title(f'Recent Gas Prices (last {sample_size} samples)')
ax3.tick_params(axis='x', rotation=45)

# 4. Feature importance (top 10)
ax4 = axes[1, 1]
if FEATURE_IMPORTANCE:
    sorted_imp = sorted(FEATURE_IMPORTANCE.items(), key=lambda x: x[1], reverse=True)[:10]
    features_plot = [f[0][:20] for f in sorted_imp]  # Truncate names
    importances = [f[1] for f in sorted_imp]
    
    y_pos = np.arange(len(features_plot))
    ax4.barh(y_pos, importances, color='teal')
    ax4.set_yticks(y_pos)
    ax4.set_yticklabels(features_plot)
    ax4.invert_yaxis()
    ax4.set_xlabel('Importance')
    ax4.set_title('Top 10 Feature Importance')
else:
    ax4.text(0.5, 0.5, 'No feature importance data', ha='center', va='center')
    ax4.set_title('Feature Importance')

plt.tight_layout()
plt.savefig('saved_models/training_results.png', dpi=150, bbox_inches='tight')
plt.show()

print("\n✓ Saved training_results.png")

In [None]:
# Create zip file for download
import shutil

shutil.make_archive('gweizy_models', 'zip', 'saved_models')
print("\n✅ Created gweizy_models.zip")
print("\nDownload this file and extract to: backend/models/saved_models/")

# Auto-download
files.download('gweizy_models.zip')