# Gweizy Model Training Notebook

Train all gas prediction models for Gweizy.

## Instructions:
1. Upload your `gas_data.db` file (from `backend/gas_data.db`)
2. Run all cells
3. Download the trained models zip file
4. Extract to `backend/models/saved_models/` and push to GitHub

In [None]:
# Install dependencies
!pip install -q scikit-learn pandas numpy joblib lightgbm xgboost matplotlib seaborn optuna

In [None]:
# Upload your gas_data.db file
from google.colab import files
import os

print("Upload your gas_data.db file from backend/gas_data.db")
uploaded = files.upload()

if 'gas_data.db' in uploaded:
    print(f"\n✅ Uploaded gas_data.db ({len(uploaded['gas_data.db']) / 1024 / 1024:.1f} MB)")
else:
    print("❌ Please upload gas_data.db")

In [None]:
import sqlite3
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Load data from database
conn = sqlite3.connect('gas_data.db')
df = pd.read_sql("""
    SELECT timestamp, current_gas as gas, base_fee, priority_fee, 
           block_number, gas_used, gas_limit, utilization
    FROM gas_prices ORDER BY timestamp ASC
""", conn)
conn.close()

df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.set_index('timestamp').sort_index()

print(f"Total records: {len(df):,}")
print(f"Date range: {df.index.min()} to {df.index.max()}")

# Resample to 1-minute (reduces noise, easier to work with)
print("\nResampling to 1-minute intervals...")
df = df.resample('1min').mean().dropna(subset=['gas'])
print(f"After resample: {len(df):,} records")

# Find segments (gap > 30 min = new segment)
df['time_diff'] = df.index.to_series().diff()
df['segment'] = (df['time_diff'] > pd.Timedelta(minutes=30)).cumsum()

segment_sizes = df.groupby('segment').size()
print(f"\nSegments found: {len(segment_sizes)}")
print(f"Segment sizes: {segment_sizes.sort_values(ascending=False).head(10).tolist()}")

# Keep segments with at least 120 minutes (2 hours) of data
MIN_SEGMENT_SIZE = 120
good_segments = segment_sizes[segment_sizes >= MIN_SEGMENT_SIZE].index.tolist()
df = df[df['segment'].isin(good_segments)]
print(f"\nKeeping {len(good_segments)} segments with >= {MIN_SEGMENT_SIZE} minutes")
print(f"Total usable records: {len(df):,}")

RECORDS_PER_HOUR = 60

In [None]:
# Fetch ETH Price Data (External Feature)
import requests
from datetime import datetime, timedelta

print("="*60)
print("FETCHING EXTERNAL DATA: ETH PRICE")
print("="*60)

def fetch_eth_price_history(start_date, end_date):
    """Fetch ETH price history from CoinGecko API (free, no key needed)"""
    try:
        # Convert to timestamps
        start_ts = int(start_date.timestamp())
        end_ts = int(end_date.timestamp())
        
        url = f"https://api.coingecko.com/api/v3/coins/ethereum/market_chart/range"
        params = {
            'vs_currency': 'usd',
            'from': start_ts,
            'to': end_ts
        }
        
        print(f"Fetching ETH prices from {start_date} to {end_date}...")
        response = requests.get(url, params=params, timeout=30)
        
        if response.status_code == 200:
            data = response.json()
            prices = data.get('prices', [])
            
            # Convert to DataFrame
            eth_df = pd.DataFrame(prices, columns=['timestamp', 'eth_price'])
            eth_df['timestamp'] = pd.to_datetime(eth_df['timestamp'], unit='ms')
            eth_df = eth_df.set_index('timestamp')
            
            print(f"  Fetched {len(eth_df)} ETH price points")
            return eth_df
        else:
            print(f"  API returned status {response.status_code}")
            return None
            
    except Exception as e:
        print(f"  Failed to fetch ETH prices: {e}")
        return None

# Get date range from our gas data
start_date = df.index.min()
end_date = df.index.max()

# Fetch ETH prices
eth_prices = fetch_eth_price_history(start_date, end_date)

if eth_prices is not None and len(eth_prices) > 0:
    # Resample to 1-minute to match gas data
    eth_prices = eth_prices.resample('1min').ffill()
    
    # Merge with gas data
    df = df.join(eth_prices, how='left')
    df['eth_price'] = df['eth_price'].ffill().bfill()
    
    print(f"  Merged ETH prices with gas data")
    print(f"  ETH price range: ${df['eth_price'].min():.2f} - ${df['eth_price'].max():.2f}")
    
    HAS_ETH_PRICE = True
else:
    print("  Could not fetch ETH prices - will proceed without")
    df['eth_price'] = np.nan
    HAS_ETH_PRICE = False

# Add network utilization features from existing data
print("\nAdding network utilization features...")
if 'utilization' in df.columns:
    df['utilization'] = df['utilization'].fillna(df['gas_used'] / (df['gas_limit'] + 1e-8))
    print(f"  Utilization range: {df['utilization'].min():.2%} - {df['utilization'].max():.2%}")
else:
    df['utilization'] = df['gas_used'] / (df['gas_limit'] + 1e-8)
    print(f"  Created utilization from gas_used/gas_limit")

In [None]:
# Feature Engineering - IMPROVED with micro-features, ETH price, and utilization
# Key: Use SHORT windows (max 4h) + MICRO windows (5min, 15min, 30min) for 1h

print("Engineering features with MICRO + SHORT windows + EXTERNAL features...")

def engineer_features_for_segment(seg_df, has_eth=False):
    """Engineer features for a single continuous segment"""
    df = seg_df.copy()
    rph = 60  # records per hour (1-min intervals)
    
    # === Log transform gas (helps with skewed distribution) ===
    df['gas_log'] = np.log1p(df['gas'])
    
    # === Time features (ENHANCED) ===
    df['hour'] = df.index.hour
    df['minute'] = df.index.minute
    df['day_of_week'] = df.index.dayofweek
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)
    df['is_business_hours'] = ((df['hour'] >= 9) & (df['hour'] <= 17)).astype(int)
    # Peak hours for Ethereum (typically 14:00-22:00 UTC)
    df['is_peak_hours'] = ((df['hour'] >= 14) & (df['hour'] <= 22)).astype(int)
    
    # === DAY-OF-WEEK ONE-HOT ENCODING (NEW) ===
    for day in range(7):
        df[f'dow_{day}'] = (df['day_of_week'] == day).astype(int)
    
    # === ETH PRICE FEATURES (if available) ===
    if has_eth and 'eth_price' in df.columns and df['eth_price'].notna().any():
        df['eth_log'] = np.log1p(df['eth_price'])
        
        # ETH momentum (price changes) - ENHANCED with more lags
        for mins in [15, 30, 60]:
            df[f'eth_change_{mins}min'] = df['eth_price'].pct_change(mins)
        
        # ETH price changes over hours (NEW)
        for hours in [1, 2, 4]:
            window = hours * rph
            df[f'eth_change_{hours}h'] = df['eth_price'].pct_change(window)
            df[f'eth_momentum_{hours}h'] = df['eth_price'] - df['eth_price'].shift(window)
        
        # ETH volatility
        for hours in [1, 2, 4]:
            window = hours * rph
            df[f'eth_std_{hours}h'] = df['eth_price'].rolling(window, min_periods=window//2).std()
            df[f'eth_volatility_{hours}h'] = df[f'eth_std_{hours}h'] / (df['eth_price'].rolling(window).mean() + 1e-8)
        
        # Gas-ETH correlation (rolling)
        df['gas_eth_corr_1h'] = df['gas'].rolling(60).corr(df['eth_price'])
        df['gas_eth_corr_4h'] = df['gas'].rolling(240).corr(df['eth_price'])
        
        # ETH price position
        df['eth_zscore_1h'] = (df['eth_price'] - df['eth_price'].rolling(60).mean()) / (df['eth_price'].rolling(60).std() + 1e-8)
        df['eth_zscore_4h'] = (df['eth_price'] - df['eth_price'].rolling(240).mean()) / (df['eth_price'].rolling(240).std() + 1e-8)
        
        # ETH trend indicators (NEW)
        df['eth_trend_1h_4h'] = df['eth_price'].rolling(60).mean() / (df['eth_price'].rolling(240).mean() + 1e-8)
    
    # === NETWORK UTILIZATION FEATURES ===
    if 'utilization' in df.columns:
        # Utilization rolling stats
        for mins in [15, 30]:
            df[f'util_mean_{mins}min'] = df['utilization'].rolling(mins, min_periods=mins//2).mean()
        for hours in [1, 2]:
            window = hours * rph
            df[f'util_mean_{hours}h'] = df['utilization'].rolling(window, min_periods=window//2).mean()
            df[f'util_std_{hours}h'] = df['utilization'].rolling(window, min_periods=window//2).std()
        
        # High utilization indicator (>90%)
        df['high_utilization'] = (df['utilization'] > 0.9).astype(int)
        df['high_util_streak'] = df['high_utilization'].rolling(15).sum()  # How many of last 15 min were high
    
    # === MICRO Lag features (for 1h prediction) ===
    for lag_mins in [5, 10, 15, 30]:
        df[f'gas_lag_{lag_mins}min'] = df['gas'].shift(lag_mins)
        df[f'gas_change_{lag_mins}min'] = df['gas'] - df['gas'].shift(lag_mins)
        df[f'gas_pct_change_{lag_mins}min'] = df['gas'].pct_change(lag_mins)
    
    # === MICRO Rolling stats (5min, 15min, 30min windows) ===
    for window_mins in [5, 15, 30]:
        df[f'gas_mean_{window_mins}min'] = df['gas'].rolling(window_mins, min_periods=window_mins//2).mean()
        df[f'gas_std_{window_mins}min'] = df['gas'].rolling(window_mins, min_periods=window_mins//2).std()
        df[f'gas_min_{window_mins}min'] = df['gas'].rolling(window_mins, min_periods=window_mins//2).min()
        df[f'gas_max_{window_mins}min'] = df['gas'].rolling(window_mins, min_periods=window_mins//2).max()
        # Volatility
        df[f'gas_range_{window_mins}min'] = df[f'gas_max_{window_mins}min'] - df[f'gas_min_{window_mins}min']
        df[f'gas_cv_{window_mins}min'] = df[f'gas_std_{window_mins}min'] / (df[f'gas_mean_{window_mins}min'] + 1e-8)
    
    # === Standard Lag features (hours) ===
    for lag_hours in [1, 2, 4]:
        df[f'gas_lag_{lag_hours}h'] = df['gas'].shift(lag_hours * rph)
        df[f'gas_log_lag_{lag_hours}h'] = df['gas_log'].shift(lag_hours * rph)
    
    # === Rolling stats (SHORT windows: 1h, 2h, 4h) ===
    for window_hours in [1, 2, 4]:
        window = window_hours * rph
        df[f'gas_mean_{window_hours}h'] = df['gas'].rolling(window, min_periods=window//2).mean()
        df[f'gas_std_{window_hours}h'] = df['gas'].rolling(window, min_periods=window//2).std()
        df[f'gas_min_{window_hours}h'] = df['gas'].rolling(window, min_periods=window//2).min()
        df[f'gas_max_{window_hours}h'] = df['gas'].rolling(window, min_periods=window//2).max()
        df[f'gas_median_{window_hours}h'] = df['gas'].rolling(window, min_periods=window//2).median()
        
        # EMA (Exponential Moving Average)
        df[f'gas_ema_{window_hours}h'] = df['gas'].ewm(span=window, min_periods=window//2).mean()
        
        # Volatility features
        df[f'gas_cv_{window_hours}h'] = df[f'gas_std_{window_hours}h'] / (df[f'gas_mean_{window_hours}h'] + 1e-8)
        df[f'gas_range_{window_hours}h'] = df[f'gas_max_{window_hours}h'] - df[f'gas_min_{window_hours}h']
        df[f'gas_range_pct_{window_hours}h'] = df[f'gas_range_{window_hours}h'] / (df[f'gas_mean_{window_hours}h'] + 1e-8)
    
    # === MICRO Momentum (for 1h) ===
    for mins in [5, 15, 30]:
        df[f'momentum_{mins}min'] = df['gas'] - df['gas'].shift(mins)
        df[f'momentum_pct_{mins}min'] = df['gas'].pct_change(mins)
        # Acceleration (rate of change of momentum)
        df[f'acceleration_{mins}min'] = df[f'momentum_{mins}min'] - df[f'momentum_{mins}min'].shift(mins)
    
    # === Standard Momentum ===
    for hours in [1, 2]:
        periods = hours * rph
        df[f'momentum_{hours}h'] = df['gas'] - df['gas'].shift(periods)
        df[f'momentum_pct_{hours}h'] = df['gas'].pct_change(periods)
        df[f'acceleration_{hours}h'] = df[f'momentum_{hours}h'] - df[f'momentum_{hours}h'].shift(periods)
        df[f'direction_{hours}h'] = np.sign(df[f'momentum_{hours}h'])
    
    # === Z-score ===
    for hours in [1, 2, 4]:
        df[f'gas_zscore_{hours}h'] = (df['gas'] - df[f'gas_mean_{hours}h']) / (df[f'gas_std_{hours}h'] + 1e-8)
    
    # === Trend indicators ===
    df['trend_15min_1h'] = df['gas_mean_15min'] / (df['gas_mean_1h'] + 1e-8)
    df['trend_30min_1h'] = df['gas_mean_30min'] / (df['gas_mean_1h'] + 1e-8)
    df['trend_1h_2h'] = df['gas_mean_1h'] / (df['gas_mean_2h'] + 1e-8)
    df['trend_1h_4h'] = df['gas_mean_1h'] / (df['gas_mean_4h'] + 1e-8)
    df['ema_trend_short'] = df['gas_ema_1h'] / (df['gas_ema_2h'] + 1e-8)
    df['ema_trend_long'] = df['gas_ema_1h'] / (df['gas_ema_4h'] + 1e-8)
    
    # === Price position (where is current price in recent range) ===
    for window in ['30min', '1h', '2h', '4h']:
        col_max = f'gas_max_{window}'
        col_min = f'gas_min_{window}'
        if col_max in df.columns and col_min in df.columns:
            range_size = df[col_max] - df[col_min]
            df[f'price_position_{window}'] = (df['gas'] - df[col_min]) / (range_size + 1e-8)
    
    # === REGIME DETECTION FEATURES ===
    # Volatility regime
    df['volatility_regime'] = pd.cut(
        df['gas_cv_1h'], 
        bins=[0, 0.05, 0.15, float('inf')], 
        labels=[0, 1, 2]  # 0=Low, 1=Medium, 2=High
    ).astype(float)
    
    # Activity regime based on gas level and volatility
    gas_median = df['gas'].median()
    df['is_high_gas'] = (df['gas'] > gas_median * 1.5).astype(int)
    df['is_spike'] = (df['gas'] > gas_median * 2).astype(int)
    
    # Combined regime: 0=Normal, 1=Elevated, 2=Spike
    df['activity_regime'] = 0
    df.loc[df['gas_cv_1h'] > 0.1, 'activity_regime'] = 1  # Elevated volatility
    df.loc[df['is_spike'] == 1, 'activity_regime'] = 2    # Spike
    
    # === Targets (absolute) ===
    df['target_1h'] = df['gas'].shift(-1 * rph)
    df['target_4h'] = df['gas'].shift(-4 * rph)
    df['target_24h'] = df['gas'].shift(-4 * rph)  # Actually 4h (honest labeling)
    
    # === Targets (percentage change - for differencing approach) ===
    df['target_pct_1h'] = (df['target_1h'] - df['gas']) / (df['gas'] + 1e-8)
    df['target_pct_4h'] = (df['target_4h'] - df['gas']) / (df['gas'] + 1e-8)
    
    # === Targets (difference - for target differencing) ===
    df['target_diff_1h'] = df['target_1h'] - df['gas']
    df['target_diff_4h'] = df['target_4h'] - df['gas']
    
    # === Direction targets (for classification) ===
    threshold = 0.02  # 2% change threshold
    
    def classify_direction(pct_change, threshold):
        if pct_change < -threshold:
            return 0  # Down
        elif pct_change > threshold:
            return 2  # Up
        else:
            return 1  # Stable
    
    df['direction_class_1h'] = df['target_pct_1h'].apply(lambda x: classify_direction(x, threshold))
    df['direction_class_4h'] = df['target_pct_4h'].apply(lambda x: classify_direction(x, threshold))
    
    return df

# Process each segment independently
print("Processing segments independently...")
all_features = []

for seg_id in df['segment'].unique():
    seg_df = df[df['segment'] == seg_id].drop(columns=['segment', 'time_diff'])
    if len(seg_df) >= MIN_SEGMENT_SIZE:
        featured = engineer_features_for_segment(seg_df, has_eth=HAS_ETH_PRICE)
        all_features.append(featured)
        print(f"  Segment {seg_id}: {len(seg_df)} → {len(featured.dropna())} usable rows")

# Combine all segments
df_features = pd.concat(all_features)
df_features = df_features.replace([np.inf, -np.inf], np.nan)

print(f"\nTotal featured samples: {len(df_features):,}")
print(f"After dropping NaN: {len(df_features.dropna()):,}")
print(f"ETH features included: {HAS_ETH_PRICE}")

In [None]:
# Prepare training data with feature selection
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import RandomForestRegressor

# Columns to exclude from features
exclude_cols = ['gas', 'gas_log', 'base_fee', 'priority_fee', 'block_number', 
                'gas_used', 'gas_limit', 'utilization', 'eth_price',
                'target_1h', 'target_4h', 'target_24h',
                'target_pct_1h', 'target_pct_4h',
                'target_diff_1h', 'target_diff_4h',
                'direction_class_1h', 'direction_class_4h',
                'volatility_regime']

feature_cols = [c for c in df_features.columns if c not in exclude_cols]
print(f"Initial feature columns: {len(feature_cols)}")

# Drop rows with NaN
df_clean = df_features.dropna()
print(f"Clean samples: {len(df_clean):,}")

# === Feature Selection Step 1: Remove highly correlated features (>0.90) ===
print("\n" + "="*60)
print("FEATURE SELECTION")
print("="*60)
print("\nStep 1: Removing highly correlated features (>0.90)...")
X_temp = df_clean[feature_cols]
corr_matrix = X_temp.corr().abs()

# Find pairs with correlation > 0.90 (stricter for small dataset)
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop_corr = [column for column in upper.columns if any(upper[column] > 0.90)]
print(f"  Dropping {len(to_drop_corr)} highly correlated features")

feature_cols = [c for c in feature_cols if c not in to_drop_corr]
print(f"  Features after correlation filter: {len(feature_cols)}")

# === Feature Selection Step 2: Drop low-importance features ===
print("\nStep 2: Identifying low-importance features using RandomForest...")

# Quick RF to get feature importance
X_importance = df_clean[feature_cols]
y_importance = df_clean['target_4h']  # Use 4h target for importance

# Use a subset for speed
sample_size = min(5000, len(X_importance))
sample_idx = np.random.choice(len(X_importance), sample_size, replace=False)
X_sample = X_importance.iloc[sample_idx]
y_sample = y_importance.iloc[sample_idx]

# Scale and fit RF
scaler_temp = RobustScaler()
X_sample_scaled = scaler_temp.fit_transform(X_sample)

rf_importance = RandomForestRegressor(n_estimators=50, max_depth=10, random_state=42, n_jobs=-1)
rf_importance.fit(X_sample_scaled, y_sample)

# Get importance scores
importance_dict = dict(zip(feature_cols, rf_importance.feature_importances_))
sorted_importance = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)

# Show top 20 features
print("\n  Top 20 most important features:")
for feat, imp in sorted_importance[:20]:
    print(f"    {feat}: {imp:.4f}")

# Drop bottom 25% of features by importance
n_to_keep = int(len(feature_cols) * 0.75)
important_features = [f[0] for f in sorted_importance[:n_to_keep]]
dropped_features = [f[0] for f in sorted_importance[n_to_keep:]]
print(f"\n  Dropping {len(dropped_features)} low-importance features (bottom 25%)")
print(f"  Features after importance filter: {len(important_features)}")

# Update feature_cols
feature_cols = important_features

# === Create horizon-specific feature sets ===
# 1h model benefits from micro-features
micro_features = [c for c in feature_cols if 'min' in c or 'micro' in c.lower()]
hour_features = [c for c in feature_cols if 'h' in c and 'min' not in c]
time_features = [c for c in feature_cols if any(t in c for t in ['hour', 'day', 'sin', 'cos', 'weekend', 'business', 'peak', 'dow'])]
trend_features = [c for c in feature_cols if 'trend' in c or 'position' in c or 'zscore' in c]
eth_features = [c for c in feature_cols if 'eth' in c.lower()]

# 1h: prioritize micro-features + short-term
features_1h = list(set(micro_features + time_features + trend_features + eth_features + 
                       [c for c in feature_cols if '1h' in c or '2h' in c]))
features_1h = [c for c in features_1h if c in feature_cols]

# 4h: use all features but weight longer-term
features_4h = feature_cols  # Use all for 4h

print(f"\n1h model features: {len(features_1h)}")
print(f"4h model features: {len(features_4h)}")

# Prepare data
X = df_clean[feature_cols]
X_1h = df_clean[[c for c in features_1h if c in df_clean.columns]]
X_4h = df_clean[[c for c in features_4h if c in df_clean.columns]]

y_1h = df_clean['target_1h']
y_4h = df_clean['target_4h']
y_24h = df_clean['target_24h']

# Percentage targets (for differencing approach)
y_pct_1h = df_clean['target_pct_1h']
y_pct_4h = df_clean['target_pct_4h']

# Difference targets (for target differencing)
y_diff_1h = df_clean['target_diff_1h']
y_diff_4h = df_clean['target_diff_4h']

# Direction targets for classification
y_dir_1h = df_clean['direction_class_1h']
y_dir_4h = df_clean['direction_class_4h']

# Volatility regime for confidence
volatility_regime = df_clean['volatility_regime']

# Store current gas for baseline and reconstruction
current_gas = df_clean['gas']

# === Baseline Models ===
print(f"\n{'='*60}")
print("BASELINE COMPARISONS")
print("{'='*60}")

# Naive baseline: predict last known value
naive_pred_1h = current_gas.values
naive_mae_1h = np.mean(np.abs(y_1h.values - naive_pred_1h))
naive_mae_4h = np.mean(np.abs(y_4h.values - naive_pred_1h))

# Mean baseline: predict historical mean
mean_pred = np.full_like(y_1h.values, y_1h.mean())
mean_mae_1h = np.mean(np.abs(y_1h.values - mean_pred))
mean_mae_4h = np.mean(np.abs(y_4h.values - mean_pred))

# Drift baseline: extrapolate recent trend
drift_pred_1h = current_gas.values + df_clean['momentum_1h'].values
drift_mae_1h = np.mean(np.abs(y_1h.values - drift_pred_1h))

print(f"\nBaseline MAEs:")
print(f"  Naive (current price):     MAE_1h={naive_mae_1h:.6f}, MAE_4h={naive_mae_4h:.6f}")
print(f"  Mean (historical average): MAE_1h={mean_mae_1h:.6f}, MAE_4h={mean_mae_4h:.6f}")
print(f"  Drift (extrapolate trend): MAE_1h={drift_mae_1h:.6f}")

# Use best baseline for comparison
best_baseline_1h = min(naive_mae_1h, mean_mae_1h, drift_mae_1h)
best_baseline_4h = min(naive_mae_4h, mean_mae_4h)

print(f"\n  Best baseline 1h: {best_baseline_1h:.6f}")
print(f"  Best baseline 4h: {best_baseline_4h:.6f}")

# Store baselines for comparison
BASELINES = {
    '1h': {'naive_mae': naive_mae_1h, 'mean_mae': mean_mae_1h, 'drift_mae': drift_mae_1h, 'best': best_baseline_1h},
    '4h': {'naive_mae': naive_mae_4h, 'mean_mae': mean_mae_4h, 'best': best_baseline_4h}
}

# Store feature importance for saving
FEATURE_IMPORTANCE = importance_dict

print(f"\n{'='*60}")
print("TRAINING DATA SUMMARY")
print("{'='*60}")
print(f"Samples: {len(X):,}")
print(f"Features (all): {len(feature_cols)}")
print(f"Features (1h specific): {len(features_1h)}")
print(f"Target 1h range: {y_1h.min():.4f} - {y_1h.max():.4f} gwei")

In [None]:
# Model Training with Stacking Ensemble, Target Differencing, and Direction Constraints
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import Ridge, ElasticNet, HuberRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV
import joblib
import warnings
warnings.filterwarnings('ignore')

def time_series_cv(model, X, y, n_splits=5):
    """Time-series cross-validation"""
    tscv = TimeSeriesSplit(n_splits=n_splits)
    scores = {'mae': [], 'r2': []}
    
    for train_idx, val_idx in tscv.split(X):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        
        model.fit(X_train_scaled, y_train)
        pred = model.predict(X_val_scaled)
        
        scores['mae'].append(mean_absolute_error(y_val, pred))
        scores['r2'].append(r2_score(y_val, pred))
    
    return {
        'mae_mean': np.mean(scores['mae']),
        'mae_std': np.std(scores['mae']),
        'r2_mean': np.mean(scores['r2']),
        'r2_std': np.std(scores['r2'])
    }

def train_1h_model(X, y, y_diff, current_gas, baseline_mae):
    """
    Train 1h model with SIMPLER models + target differencing.
    Also tries stacking ensemble for potentially better results.
    """
    print(f"\n{'='*60}")
    print("Training 1h models (SIMPLER + TARGET DIFFERENCING)")
    print("='*60}")
    print(f"Baseline MAE (best): {baseline_mae:.6f}")
    
    # Time-series split
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
    y_diff_train, y_diff_test = y_diff.iloc[:split_idx], y_diff.iloc[split_idx:]
    gas_test = current_gas.iloc[split_idx:]
    
    print(f"Train: {len(X_train):,}, Test: {len(X_test):,}")
    
    # Scale
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    results = []
    all_preds = []
    
    # === 1. Ridge (strong regularization for noisy data) ===
    print("\n[1/6] Ridge Regression (high regularization)...")
    ridge = Ridge(alpha=10.0, random_state=42)
    ridge.fit(X_train_scaled, y_train)
    ridge_pred = ridge.predict(X_test_scaled)
    ridge_metrics = evaluate_model(y_test, ridge_pred, baseline_mae)
    results.append(('Ridge', ridge, ridge_metrics, scaler))
    all_preds.append(ridge_pred)
    print(f"      MAE: {ridge_metrics['mae']:.6f}, vs Baseline: {ridge_metrics['vs_baseline']}")
    
    # === 2. ElasticNet (L1+L2 regularization) ===
    print("[2/6] ElasticNet...")
    elastic = ElasticNet(alpha=1.0, l1_ratio=0.5, random_state=42, max_iter=5000)
    elastic.fit(X_train_scaled, y_train)
    elastic_pred = elastic.predict(X_test_scaled)
    elastic_metrics = evaluate_model(y_test, elastic_pred, baseline_mae)
    results.append(('ElasticNet', elastic, elastic_metrics, scaler))
    all_preds.append(elastic_pred)
    print(f"      MAE: {elastic_metrics['mae']:.6f}, vs Baseline: {elastic_metrics['vs_baseline']}")
    
    # === 3. Huber Regressor (robust to outliers) ===
    print("[3/6] Huber Regressor (robust to outliers)...")
    huber = HuberRegressor(epsilon=1.35, alpha=1.0, max_iter=1000)
    huber.fit(X_train_scaled, y_train)
    huber_pred = huber.predict(X_test_scaled)
    huber_metrics = evaluate_model(y_test, huber_pred, baseline_mae)
    results.append(('Huber', huber, huber_metrics, scaler))
    all_preds.append(huber_pred)
    print(f"      MAE: {huber_metrics['mae']:.6f}, vs Baseline: {huber_metrics['vs_baseline']}")
    
    # === 4. Target Differencing (predict change, then reconstruct) ===
    print("[4/6] Target Differencing (predict change)...")
    diff_model = Ridge(alpha=5.0, random_state=42)
    diff_model.fit(X_train_scaled, y_diff_train)
    diff_pred = diff_model.predict(X_test_scaled)
    # Reconstruct absolute price from predicted difference
    diff_absolute_pred = gas_test.values + diff_pred
    diff_metrics = evaluate_model(y_test, diff_absolute_pred, baseline_mae)
    results.append(('Differencing', diff_model, diff_metrics, scaler))
    all_preds.append(diff_absolute_pred)
    print(f"      MAE: {diff_metrics['mae']:.6f}, vs Baseline: {diff_metrics['vs_baseline']}")
    
    # === 5. LightGBM with aggressive regularization ===
    try:
        import lightgbm as lgb
        print("[5/6] LightGBM (high regularization)...")
        
        val_split = int(len(X_train_scaled) * 0.9)
        X_tr, X_val = X_train_scaled[:val_split], X_train_scaled[val_split:]
        y_tr, y_val = y_train.iloc[:val_split], y_train.iloc[val_split:]
        
        lgbm = lgb.LGBMRegressor(
            n_estimators=200, max_depth=4, learning_rate=0.05,
            num_leaves=15, min_child_samples=30, subsample=0.7,
            colsample_bytree=0.7, reg_alpha=1.0, reg_lambda=1.0,
            random_state=42, n_jobs=-1, verbose=-1
        )
        lgbm.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], 
                 callbacks=[lgb.early_stopping(30, verbose=False)])
        
        lgbm_pred = lgbm.predict(X_test_scaled)
        lgbm_metrics = evaluate_model(y_test, lgbm_pred, baseline_mae)
        results.append(('LightGBM', lgbm, lgbm_metrics, scaler))
        all_preds.append(lgbm_pred)
        print(f"      MAE: {lgbm_metrics['mae']:.6f}, vs Baseline: {lgbm_metrics['vs_baseline']}")
    except Exception as e:
        print(f"[5/6] LightGBM failed: {e}")
    
    # === 6. Stacking Ensemble (NEW) ===
    print("[6/6] Stacking Ensemble (Ridge + Huber + RF -> Ridge meta)...")
    try:
        base_estimators = [
            ('ridge', Ridge(alpha=10.0, random_state=42)),
            ('huber', HuberRegressor(epsilon=1.35, alpha=1.0, max_iter=1000)),
            ('rf', RandomForestRegressor(n_estimators=50, max_depth=5, random_state=42, n_jobs=-1))
        ]
        stacking = StackingRegressor(
            estimators=base_estimators,
            final_estimator=Ridge(alpha=0.1, random_state=42),
            cv=3,
            n_jobs=-1
        )
        stacking.fit(X_train_scaled, y_train)
        stacking_pred = stacking.predict(X_test_scaled)
        stacking_metrics = evaluate_model(y_test, stacking_pred, baseline_mae)
        results.append(('Stacking', stacking, stacking_metrics, scaler))
        all_preds.append(stacking_pred)
        print(f"      MAE: {stacking_metrics['mae']:.6f}, vs Baseline: {stacking_metrics['vs_baseline']}")
    except Exception as e:
        print(f"[6/6] Stacking failed: {e}")
    
    # === Weighted Ensemble ===
    print("\n[Ensemble] Weighted average (favor simpler models)...")
    weights = [1/len(all_preds)] * len(all_preds)
    ensemble_pred = np.average(all_preds, axis=0, weights=weights)
    ensemble_metrics = evaluate_model(y_test, ensemble_pred, baseline_mae)
    print(f"      MAE: {ensemble_metrics['mae']:.6f}, vs Baseline: {ensemble_metrics['vs_baseline']}")
    
    # === Select best ===
    all_results = results + [('Ensemble', [r[1] for r in results], ensemble_metrics, scaler)]
    best = max(all_results, key=lambda x: x[2]['improvement'])
    
    print(f"\n>>> Best 1h model: {best[0]} (MAE: {best[2]['mae']:.6f}, {best[2]['vs_baseline']})")
    
    # Calculate confidence based on volatility
    confidence_scores = calculate_confidence(X_test, y_test, best[1] if best[0] != 'Ensemble' else results[0][1], scaler)
    
    return best, results, list(X.columns), ensemble_pred, y_test, confidence_scores

def train_4h_model(X, y, y_diff, current_gas, baseline_mae, dir_clf=None, dir_scaler=None):
    """
    Train 4h model with full model suite + hyperparameter tuning + stacking.
    Also supports direction-constrained predictions.
    """
    print(f"\n{'='*60}")
    print("Training 4h models (FULL SUITE + STACKING)")
    print("='*60}")
    print(f"Baseline MAE (best): {baseline_mae:.6f}")
    
    # Time-series split
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
    y_diff_train, y_diff_test = y_diff.iloc[:split_idx], y_diff.iloc[split_idx:]
    gas_test = current_gas.iloc[split_idx:]
    
    print(f"Train: {len(X_train):,}, Test: {len(X_test):,}")
    
    # Scale
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    results = []
    all_preds = []
    
    # === 1. Random Forest with tuning ===
    print("\n[1/6] Random Forest with RandomizedSearchCV...")
    rf_params = {
        'n_estimators': [100, 150, 200],
        'max_depth': [8, 12, 15],
        'min_samples_split': [5, 10, 15],
        'min_samples_leaf': [3, 5, 8]
    }
    
    rf_base = RandomForestRegressor(random_state=42, n_jobs=-1)
    tscv = TimeSeriesSplit(n_splits=3)
    rf_search = RandomizedSearchCV(
        rf_base, rf_params, n_iter=10, cv=tscv, 
        scoring='neg_mean_absolute_error', random_state=42, n_jobs=-1
    )
    rf_search.fit(X_train_scaled, y_train)
    rf = rf_search.best_estimator_
    
    rf_pred = rf.predict(X_test_scaled)
    rf_metrics = evaluate_model(y_test, rf_pred, baseline_mae)
    results.append(('RandomForest', rf, rf_metrics, scaler))
    all_preds.append(rf_pred)
    print(f"      Best params: {rf_search.best_params_}")
    print(f"      MAE: {rf_metrics['mae']:.6f}, vs Baseline: {rf_metrics['vs_baseline']}")
    
    # === 2. Gradient Boosting ===
    print("[2/6] Gradient Boosting...")
    gb = GradientBoostingRegressor(
        n_estimators=150, max_depth=6, learning_rate=0.05,
        min_samples_split=10, subsample=0.8, random_state=42
    )
    gb.fit(X_train_scaled, y_train)
    gb_pred = gb.predict(X_test_scaled)
    gb_metrics = evaluate_model(y_test, gb_pred, baseline_mae)
    results.append(('GradientBoosting', gb, gb_metrics, scaler))
    all_preds.append(gb_pred)
    print(f"      MAE: {gb_metrics['mae']:.6f}, vs Baseline: {gb_metrics['vs_baseline']}")
    
    # === 3. LightGBM ===
    try:
        import lightgbm as lgb
        print("[3/6] LightGBM with early stopping...")
        
        val_split = int(len(X_train_scaled) * 0.9)
        X_tr, X_val = X_train_scaled[:val_split], X_train_scaled[val_split:]
        y_tr, y_val = y_train.iloc[:val_split], y_train.iloc[val_split:]
        
        lgbm = lgb.LGBMRegressor(
            n_estimators=500, max_depth=10, learning_rate=0.03,
            num_leaves=31, min_child_samples=20, subsample=0.8,
            colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=0.1,
            random_state=42, n_jobs=-1, verbose=-1
        )
        lgbm.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], 
                 callbacks=[lgb.early_stopping(50, verbose=False)])
        
        lgbm_pred = lgbm.predict(X_test_scaled)
        lgbm_metrics = evaluate_model(y_test, lgbm_pred, baseline_mae)
        results.append(('LightGBM', lgbm, lgbm_metrics, scaler))
        all_preds.append(lgbm_pred)
        print(f"      MAE: {lgbm_metrics['mae']:.6f}, vs Baseline: {lgbm_metrics['vs_baseline']}")
    except Exception as e:
        print(f"[3/6] LightGBM failed: {e}")
    
    # === 4. XGBoost ===
    try:
        import xgboost as xgb
        print("[4/6] XGBoost with early stopping...")
        
        xgbm = xgb.XGBRegressor(
            n_estimators=500, max_depth=8, learning_rate=0.03,
            min_child_weight=5, subsample=0.8, colsample_bytree=0.8,
            reg_alpha=0.1, reg_lambda=1.0, random_state=42, 
            n_jobs=-1, verbosity=0, early_stopping_rounds=50
        )
        xgbm.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], verbose=False)
        
        xgbm_pred = xgbm.predict(X_test_scaled)
        xgbm_metrics = evaluate_model(y_test, xgbm_pred, baseline_mae)
        results.append(('XGBoost', xgbm, xgbm_metrics, scaler))
        all_preds.append(xgbm_pred)
        print(f"      MAE: {xgbm_metrics['mae']:.6f}, vs Baseline: {xgbm_metrics['vs_baseline']}")
    except Exception as e:
        print(f"[4/6] XGBoost failed: {e}")
    
    # === 5. Stacking Ensemble (NEW) ===
    print("[5/6] Stacking Ensemble (RF + GB + Ridge -> Ridge meta)...")
    try:
        base_estimators = [
            ('rf', RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)),
            ('gb', GradientBoostingRegressor(n_estimators=100, max_depth=5, random_state=42)),
            ('ridge', Ridge(alpha=1.0, random_state=42))
        ]
        stacking = StackingRegressor(
            estimators=base_estimators,
            final_estimator=Ridge(alpha=0.1, random_state=42),
            cv=3,
            n_jobs=-1
        )
        stacking.fit(X_train_scaled, y_train)
        stacking_pred = stacking.predict(X_test_scaled)
        stacking_metrics = evaluate_model(y_test, stacking_pred, baseline_mae)
        results.append(('Stacking', stacking, stacking_metrics, scaler))
        all_preds.append(stacking_pred)
        print(f"      MAE: {stacking_metrics['mae']:.6f}, vs Baseline: {stacking_metrics['vs_baseline']}")
    except Exception as e:
        print(f"[5/6] Stacking failed: {e}")
    
    # === 6. Target Differencing ===
    print("[6/6] Target Differencing (predict change)...")
    try:
        diff_model = GradientBoostingRegressor(n_estimators=100, max_depth=5, random_state=42)
        diff_model.fit(X_train_scaled, y_diff_train)
        diff_pred = diff_model.predict(X_test_scaled)
        diff_absolute_pred = gas_test.values + diff_pred
        diff_metrics = evaluate_model(y_test, diff_absolute_pred, baseline_mae)
        results.append(('Differencing', diff_model, diff_metrics, scaler))
        all_preds.append(diff_absolute_pred)
        print(f"      MAE: {diff_metrics['mae']:.6f}, vs Baseline: {diff_metrics['vs_baseline']}")
    except Exception as e:
        print(f"[6/6] Differencing failed: {e}")
    
    # === Ensemble ===
    print("\n[Ensemble] Average all models...")
    ensemble_pred = np.mean(all_preds, axis=0)
    ensemble_metrics = evaluate_model(y_test, ensemble_pred, baseline_mae)
    print(f"      MAE: {ensemble_metrics['mae']:.6f}, vs Baseline: {ensemble_metrics['vs_baseline']}")
    
    # Select best
    all_results = results + [('Ensemble', [r[1] for r in results], ensemble_metrics, scaler)]
    best = max(all_results, key=lambda x: x[2]['improvement'])
    
    print(f"\n>>> Best 4h model: {best[0]} (MAE: {best[2]['mae']:.6f}, {best[2]['vs_baseline']})")
    
    # Calculate confidence
    confidence_scores = calculate_confidence(X_test, y_test, best[1] if best[0] != 'Ensemble' else results[0][1], scaler)
    
    return best, results, list(X.columns), ensemble_pred, y_test, confidence_scores

def evaluate_model(y_true, y_pred, baseline_mae):
    """Calculate model metrics with baseline comparison"""
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    
    # Directional accuracy
    if len(y_true) > 1:
        actual_dir = np.sign(np.diff(y_true.values))
        pred_dir = np.sign(np.diff(y_pred))
        dir_acc = np.mean(actual_dir == pred_dir)
    else:
        dir_acc = 0.0
    
    # Compare to baseline
    improvement = (baseline_mae - mae) / baseline_mae * 100
    vs_baseline = f"{improvement:+.1f}%" if improvement != 0 else "0%"
    
    return {
        'mae': mae, 'rmse': rmse, 'r2': r2, 
        'directional_accuracy': dir_acc,
        'vs_baseline': vs_baseline, 'improvement': improvement
    }

def calculate_confidence(X_test, y_test, model, scaler):
    """
    Calculate prediction confidence based on:
    1. Model's prediction variance (if ensemble/tree)
    2. Distance from training distribution
    3. Recent volatility
    """
    X_scaled = scaler.transform(X_test) if not isinstance(X_test, np.ndarray) else X_test
    
    confidences = []
    
    if hasattr(model, 'estimators_'):
        # For ensemble models, use prediction variance across trees
        tree_preds = np.array([tree.predict(X_scaled) for tree in model.estimators_])
        pred_std = np.std(tree_preds, axis=0)
        # Lower std = higher confidence
        max_std = np.percentile(pred_std, 95)
        confidences = 1 - np.clip(pred_std / (max_std + 1e-8), 0, 1)
    else:
        # For other models, use uniform medium confidence
        confidences = np.full(len(X_test), 0.6)
    
    return confidences

def apply_direction_constraint(predictions, current_gas, dir_clf, dir_scaler, X_test):
    """
    Use direction classifier to constrain regression predictions.
    If classifier says 'up', don't let regression predict down (and vice versa).
    """
    X_scaled = dir_scaler.transform(X_test)
    dir_preds = dir_clf.predict(X_scaled)
    dir_proba = dir_clf.predict_proba(X_scaled)
    
    constrained_preds = predictions.copy()
    
    for i in range(len(predictions)):
        pred = predictions[i]
        current = current_gas.iloc[i]
        direction = dir_preds[i]  # 0=Down, 1=Stable, 2=Up
        confidence = np.max(dir_proba[i])
        
        # Only apply constraint if direction classifier is confident (>60%)
        if confidence > 0.6:
            if direction == 0:  # Predicted Down
                # If regression predicts up, cap it at current
                if pred > current:
                    constrained_preds[i] = current * 0.99  # Slight down
            elif direction == 2:  # Predicted Up
                # If regression predicts down, floor it at current
                if pred < current:
                    constrained_preds[i] = current * 1.01  # Slight up
    
    return constrained_preds

print("Training functions defined with Stacking + Differencing + Direction Constraints.")

In [None]:
# Train all models with separate strategies for 1h vs 4h
print("="*60)
print("TRAINING ALL MODELS")
print("="*60)

# Use 1h-specific features for 1h model (micro-features)
print("\n>>> Using micro-features for 1h model")
best_1h, all_1h, features_1h_used, pred_1h, actual_1h, conf_1h = train_1h_model(
    X_1h, y_1h, y_diff_1h, current_gas, BASELINES['1h']['best']
)

# Use full features for 4h model
print("\n>>> Using full features for 4h model")
best_4h, all_4h, features_4h_used, pred_4h, actual_4h, conf_4h = train_4h_model(
    X_4h, y_4h, y_diff_4h, current_gas, BASELINES['4h']['best']
)

# 24h model (actually 4h - honest labeling)
print("\n>>> 24h model = 4h model (data limitation)")
print("    Note: '24h' predictions are actually 4h ahead due to insufficient continuous data")
best_24h = best_4h  # Same as 4h
all_24h = all_4h
pred_24h = pred_4h
actual_24h = actual_4h
conf_24h = conf_4h

# Store features used for saving
features = feature_cols  # Use all features for model file

In [None]:
# PREDICTION INTERVALS using Quantile Regression
# Output "25-35 gwei (80% confidence)" instead of just "30 gwei"

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import QuantileRegressor

print("\n" + "="*60)
print("TRAINING QUANTILE MODELS (Prediction Intervals)")
print("="*60)
print("Predicting 10th, 50th, and 90th percentiles for confidence intervals")

def train_quantile_models(X, y, horizon_name, quantiles=[0.1, 0.5, 0.9]):
    """
    Train quantile regression models for prediction intervals.
    Returns models for each quantile (10%, 50%, 90%).
    """
    print(f"\n{horizon_name} Quantile Models:")
    
    # Time-series split
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
    
    # Scale
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    quantile_models = {}
    quantile_preds = {}
    
    for q in quantiles:
        print(f"  Training quantile {q:.0%}...")
        
        # Use GradientBoostingRegressor with quantile loss
        model = GradientBoostingRegressor(
            loss='quantile',
            alpha=q,
            n_estimators=100,
            max_depth=5,
            learning_rate=0.1,
            min_samples_split=10,
            random_state=42
        )
        
        model.fit(X_train_scaled, y_train)
        pred = model.predict(X_test_scaled)
        
        quantile_models[q] = model
        quantile_preds[q] = pred
        
        # Evaluate
        coverage = np.mean((y_test.values >= quantile_preds.get(0.1, pred)) & 
                          (y_test.values <= quantile_preds.get(0.9, pred))) if q == 0.5 else None
        
        mae = np.mean(np.abs(y_test.values - pred))
        print(f"      MAE: {mae:.6f}")
    
    # Calculate interval metrics
    if 0.1 in quantile_preds and 0.9 in quantile_preds:
        lower = quantile_preds[0.1]
        upper = quantile_preds[0.9]
        median = quantile_preds[0.5]
        
        # Coverage: how often actual is within interval
        coverage = np.mean((y_test.values >= lower) & (y_test.values <= upper))
        
        # Interval width (narrower is better, but need good coverage)
        avg_width = np.mean(upper - lower)
        
        print(f"\n  Interval Statistics:")
        print(f"    80% Interval Coverage: {coverage:.1%} (target: 80%)")
        print(f"    Average Interval Width: {avg_width:.4f} gwei")
        print(f"    Median Prediction MAE: {np.mean(np.abs(y_test.values - median)):.6f}")
    
    return quantile_models, scaler, quantile_preds, y_test

# Train quantile models for 1h
print("\n>>> Training 1h Quantile Models")
quantile_1h, quantile_scaler_1h, qpreds_1h, qactual_1h = train_quantile_models(
    X_1h, y_1h, '1h'
)

# Train quantile models for 4h
print("\n>>> Training 4h Quantile Models")
quantile_4h, quantile_scaler_4h, qpreds_4h, qactual_4h = train_quantile_models(
    X_4h, y_4h, '4h'
)

# Store for later use
quantile_24h = quantile_4h  # Same as 4h
quantile_scaler_24h = quantile_scaler_4h

print("\n" + "="*60)
print("Quantile models trained - prediction intervals ready!")
print("="*60)

In [None]:
# Direction Prediction (Classification: Down/Stable/Up) - IMPROVED
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, f1_score

print("\n" + "="*60)
print("DIRECTION PREDICTION (Classification)")
print("="*60)
print("Classes: 0=Down (>2% drop), 1=Stable (<2% change), 2=Up (>2% rise)")

def train_direction_model(X, y_dir, horizon_name, use_class_weights=True):
    """
    Train direction classifier with:
    - Class weights to handle imbalance
    - Multiple model comparison
    - Probability calibration
    """
    print(f"\n{horizon_name} Direction Classifier:")
    
    # Remove NaN
    valid_idx = ~y_dir.isna()
    X_valid = X[valid_idx]
    y_valid = y_dir[valid_idx].astype(int)
    
    # Class distribution
    class_counts = y_valid.value_counts().sort_index()
    total = len(y_valid)
    print(f"  Class distribution:")
    print(f"    Down (0):   {class_counts.get(0,0):5d} ({class_counts.get(0,0)/total*100:.1f}%)")
    print(f"    Stable (1): {class_counts.get(1,0):5d} ({class_counts.get(1,0)/total*100:.1f}%)")
    print(f"    Up (2):     {class_counts.get(2,0):5d} ({class_counts.get(2,0)/total*100:.1f}%)")
    
    # Calculate class weights (inverse frequency)
    if use_class_weights:
        class_weights = {i: total / (3 * count) for i, count in class_counts.items()}
        print(f"  Using class weights: {class_weights}")
    else:
        class_weights = None
    
    # Split
    split_idx = int(len(X_valid) * 0.8)
    X_train, X_test = X_valid.iloc[:split_idx], X_valid.iloc[split_idx:]
    y_train, y_test = y_valid.iloc[:split_idx], y_valid.iloc[split_idx:]
    
    # Scale
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    results = []
    
    # 1. Random Forest with class weights
    print(f"\n  [1/3] Random Forest...")
    rf_clf = RandomForestClassifier(
        n_estimators=150, max_depth=10, min_samples_split=10,
        class_weight=class_weights, random_state=42, n_jobs=-1
    )
    rf_clf.fit(X_train_scaled, y_train)
    rf_pred = rf_clf.predict(X_test_scaled)
    rf_acc = accuracy_score(y_test, rf_pred)
    rf_f1 = f1_score(y_test, rf_pred, average='weighted')
    results.append(('RandomForest', rf_clf, rf_acc, rf_f1))
    print(f"        Accuracy: {rf_acc:.1%}, F1: {rf_f1:.3f}")
    
    # 2. Gradient Boosting
    print(f"  [2/3] Gradient Boosting...")
    gb_clf = GradientBoostingClassifier(
        n_estimators=100, max_depth=5, learning_rate=0.1,
        random_state=42
    )
    gb_clf.fit(X_train_scaled, y_train)
    gb_pred = gb_clf.predict(X_test_scaled)
    gb_acc = accuracy_score(y_test, gb_pred)
    gb_f1 = f1_score(y_test, gb_pred, average='weighted')
    results.append(('GradientBoosting', gb_clf, gb_acc, gb_f1))
    print(f"        Accuracy: {gb_acc:.1%}, F1: {gb_f1:.3f}")
    
    # 3. Logistic Regression (probability calibration)
    print(f"  [3/3] Logistic Regression...")
    lr_clf = LogisticRegression(
        class_weight=class_weights, max_iter=1000, random_state=42, n_jobs=-1
    )
    lr_clf.fit(X_train_scaled, y_train)
    lr_pred = lr_clf.predict(X_test_scaled)
    lr_acc = accuracy_score(y_test, lr_pred)
    lr_f1 = f1_score(y_test, lr_pred, average='weighted')
    results.append(('LogisticRegression', lr_clf, lr_acc, lr_f1))
    print(f"        Accuracy: {lr_acc:.1%}, F1: {lr_f1:.3f}")
    
    # Baseline: always predict most common class
    most_common = y_train.mode()[0]
    baseline_acc = (y_test == most_common).mean()
    print(f"\n  Baseline (always predict {['Down', 'Stable', 'Up'][most_common]}): {baseline_acc:.1%}")
    
    # Select best by F1 score (better for imbalanced classes)
    best = max(results, key=lambda x: x[3])
    print(f"\n  >>> Best: {best[0]} (Accuracy: {best[2]:.1%}, F1: {best[3]:.3f})")
    print(f"      Improvement over baseline: {(best[2] - baseline_acc)*100:+.1f}%")
    
    # Print classification report for best model
    best_pred = best[1].predict(X_test_scaled)
    print(f"\n  Classification Report ({best[0]}):")
    print(classification_report(y_test, best_pred, target_names=['Down', 'Stable', 'Up']))
    
    return best[1], scaler, best[2], best[3]

# Train with class weights
dir_clf_1h, dir_scaler_1h, dir_acc_1h, dir_f1_1h = train_direction_model(X, y_dir_1h, '1h', use_class_weights=True)
dir_clf_4h, dir_scaler_4h, dir_acc_4h, dir_f1_4h = train_direction_model(X, y_dir_4h, '4h', use_class_weights=True)

print("\n" + "="*60)
print("Direction classifiers trained successfully")
print(f"  1h: Accuracy={dir_acc_1h:.1%}, F1={dir_f1_1h:.3f}")
print(f"  4h: Accuracy={dir_acc_4h:.1%}, F1={dir_f1_4h:.3f}")
print("="*60)

In [None]:
# REGIME DETECTION
# Detect: Normal, High Activity (NFT mints, DeFi rushes), Spike periods
# Train model to auto-detect which regime we're in

print("\n" + "="*60)
print("TRAINING REGIME DETECTION MODEL")
print("="*60)
print("Regimes: 0=Normal, 1=Elevated (high activity), 2=Spike")

def train_regime_detector(X, df_clean):
    """
    Train a model to detect market regime:
    - 0: Normal (low volatility, typical gas levels)
    - 1: Elevated (high activity, moderately high gas)
    - 2: Spike (extreme gas prices, likely NFT mint/DeFi event)
    """
    
    # Create regime labels based on multiple signals
    gas_values = df_clean['gas'].values
    gas_mean = np.mean(gas_values)
    gas_std = np.std(gas_values)
    
    # Get volatility if available
    if 'gas_cv_1h' in df_clean.columns:
        volatility = df_clean['gas_cv_1h'].values
    else:
        volatility = df_clean['gas'].rolling(60).std() / (df_clean['gas'].rolling(60).mean() + 1e-8)
        volatility = volatility.values
    
    # Define regimes
    regime = np.zeros(len(gas_values))
    
    # Elevated: gas > mean + 0.5*std OR high volatility
    elevated_mask = (gas_values > gas_mean + 0.5 * gas_std) | (volatility > 0.1)
    regime[elevated_mask] = 1
    
    # Spike: gas > mean + 2*std OR very high volatility
    spike_mask = (gas_values > gas_mean + 2 * gas_std) | (volatility > 0.25)
    regime[spike_mask] = 2
    
    # Class distribution
    unique, counts = np.unique(regime, return_counts=True)
    print(f"\nRegime distribution:")
    for u, c in zip(unique, counts):
        regime_name = ['Normal', 'Elevated', 'Spike'][int(u)]
        print(f"  {regime_name}: {c} ({c/len(regime)*100:.1f}%)")
    
    # Split
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = regime[:split_idx], regime[split_idx:]
    
    # Scale
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train Random Forest for regime detection
    print("\nTraining regime classifier...")
    from sklearn.ensemble import RandomForestClassifier
    
    clf = RandomForestClassifier(
        n_estimators=100,
        max_depth=8,
        min_samples_split=10,
        class_weight='balanced',  # Handle imbalanced classes
        random_state=42,
        n_jobs=-1
    )
    
    clf.fit(X_train_scaled, y_train)
    
    # Evaluate
    y_pred = clf.predict(X_test_scaled)
    accuracy = np.mean(y_pred == y_test)
    
    print(f"\nRegime Detection Results:")
    print(f"  Accuracy: {accuracy:.1%}")
    
    # Per-class accuracy
    for regime_id, regime_name in enumerate(['Normal', 'Elevated', 'Spike']):
        mask = y_test == regime_id
        if mask.sum() > 0:
            class_acc = np.mean(y_pred[mask] == y_test[mask])
            print(f"  {regime_name}: {class_acc:.1%} ({mask.sum()} samples)")
    
    # Feature importance for regime detection
    feature_imp = dict(zip(X.columns, clf.feature_importances_))
    top_features = sorted(feature_imp.items(), key=lambda x: x[1], reverse=True)[:5]
    print(f"\n  Top regime indicators:")
    for feat, imp in top_features:
        print(f"    {feat}: {imp:.3f}")
    
    return clf, scaler, accuracy

# Train regime detector
regime_clf, regime_scaler, regime_accuracy = train_regime_detector(X, df_clean)

print("\n" + "="*60)
print(f"Regime detector trained - Accuracy: {regime_accuracy:.1%}")
print("="*60)

In [None]:
# Train Spike Detectors
from sklearn.ensemble import GradientBoostingClassifier

print("\n" + "="*60)
print("TRAINING SPIKE DETECTORS")
print("="*60)

def train_spike_detector(X, y_target, current_gas, horizon_name):
    """Train spike classification model"""
    print(f"\nTraining {horizon_name} spike detector...")
    
    # Classify based on relative change from current
    price_change_pct = (y_target - current_gas) / (current_gas + 1e-8)
    
    # Normal: < 50% change, Elevated: 50-100%, Spike: > 100%
    def classify(pct):
        pct = abs(pct)
        if pct < 0.5:
            return 0  # Normal
        elif pct < 1.0:
            return 1  # Elevated
        else:
            return 2  # Spike
    
    y_class = price_change_pct.apply(classify)
    
    # Class distribution
    class_counts = y_class.value_counts().sort_index()
    print(f"  Classes: Normal={class_counts.get(0,0)}, Elevated={class_counts.get(1,0)}, Spike={class_counts.get(2,0)}")
    
    # Split (time-series)
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y_class.iloc[:split_idx], y_class.iloc[split_idx:]
    
    # Scale
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train
    clf = GradientBoostingClassifier(n_estimators=100, max_depth=5, random_state=42)
    clf.fit(X_train_scaled, y_train)
    
    accuracy = clf.score(X_test_scaled, y_test)
    print(f"  Accuracy: {accuracy:.1%}")
    
    return clf, scaler

spike_1h, spike_scaler_1h = train_spike_detector(X, y_1h, current_gas, '1h')
spike_4h, spike_scaler_4h = train_spike_detector(X, y_4h, current_gas, '4h')
spike_24h, spike_scaler_24h = train_spike_detector(X, y_24h, current_gas, '24h')

In [None]:
# DQN AGENT TRAINING
# Train reinforcement learning agent for optimal transaction timing
# The agent learns when to WAIT vs EXECUTE based on gas price patterns

print("\n" + "="*60)
print("TRAINING DQN AGENT (Transaction Timing Optimization)")
print("="*60)

import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

# Check if GPU available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# === DQN Network ===
class DuelingDQN(nn.Module):
    """Dueling DQN architecture with separate value and advantage streams."""
    
    def __init__(self, state_dim, action_dim, hidden_dims=[128, 64]):
        super().__init__()
        
        # Shared feature layers
        layers = []
        in_dim = state_dim
        for h_dim in hidden_dims[:-1]:
            layers.extend([
                nn.Linear(in_dim, h_dim),
                nn.ReLU(),
                nn.Dropout(0.1)
            ])
            in_dim = h_dim
        self.features = nn.Sequential(*layers)
        
        # Value stream
        self.value_stream = nn.Sequential(
            nn.Linear(in_dim, hidden_dims[-1]),
            nn.ReLU(),
            nn.Linear(hidden_dims[-1], 1)
        )
        
        # Advantage stream
        self.advantage_stream = nn.Sequential(
            nn.Linear(in_dim, hidden_dims[-1]),
            nn.ReLU(),
            nn.Linear(hidden_dims[-1], action_dim)
        )
    
    def forward(self, x):
        features = self.features(x)
        value = self.value_stream(features)
        advantage = self.advantage_stream(features)
        # Combine: Q(s,a) = V(s) + (A(s,a) - mean(A(s,a)))
        q_values = value + advantage - advantage.mean(dim=1, keepdim=True)
        return q_values

# === Prioritized Replay Buffer ===
class PrioritizedReplayBuffer:
    """Experience replay with prioritization."""
    
    def __init__(self, capacity=50000, alpha=0.6):
        self.capacity = capacity
        self.alpha = alpha
        self.buffer = []
        self.priorities = []
        self.position = 0
    
    def push(self, state, action, reward, next_state, done, td_error=None):
        priority = (abs(td_error) + 1e-5) ** self.alpha if td_error else 1.0
        
        if len(self.buffer) < self.capacity:
            self.buffer.append((state, action, reward, next_state, done))
            self.priorities.append(priority)
        else:
            self.buffer[self.position] = (state, action, reward, next_state, done)
            self.priorities[self.position] = priority
        
        self.position = (self.position + 1) % self.capacity
    
    def sample(self, batch_size, beta=0.4):
        priorities = np.array(self.priorities)
        probs = priorities / priorities.sum()
        
        indices = np.random.choice(len(self.buffer), batch_size, p=probs)
        samples = [self.buffer[i] for i in indices]
        
        # Importance sampling weights
        weights = (len(self.buffer) * probs[indices]) ** (-beta)
        weights /= weights.max()
        
        return samples, indices, torch.FloatTensor(weights).to(device)
    
    def update_priorities(self, indices, td_errors):
        for idx, td_error in zip(indices, td_errors):
            self.priorities[idx] = (abs(td_error) + 1e-5) ** self.alpha
    
    def __len__(self):
        return len(self.buffer)

# === DQN Agent ===
class DQNAgent:
    """DQN Agent with Double DQN, Dueling architecture, and PER."""
    
    def __init__(self, state_dim, action_dim, hidden_dims=[128, 64],
                 lr=0.0003, gamma=0.98, epsilon_start=1.0, epsilon_end=0.05,
                 epsilon_decay_episodes=5000, buffer_size=50000, batch_size=64,
                 target_update_freq=200, tau=0.001):
        
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma
        self.batch_size = batch_size
        self.target_update_freq = target_update_freq
        self.tau = tau
        
        # Epsilon for exploration
        self.epsilon = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = (epsilon_start - epsilon_end) / epsilon_decay_episodes
        
        # Networks
        self.policy_net = DuelingDQN(state_dim, action_dim, hidden_dims).to(device)
        self.target_net = DuelingDQN(state_dim, action_dim, hidden_dims).to(device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
        self.replay_buffer = PrioritizedReplayBuffer(buffer_size)
        
        self.training_steps = 0
        self.state_mean = None
        self.state_std = None
    
    def fit_state_normalizer(self, states):
        """Fit state normalizer from sample states."""
        self.state_mean = np.mean(states, axis=0)
        self.state_std = np.std(states, axis=0) + 1e-8
    
    def normalize_state(self, state):
        """Normalize state using fitted statistics."""
        if self.state_mean is not None:
            return (state - self.state_mean) / self.state_std
        return state
    
    def select_action(self, state, training=True):
        """Select action using epsilon-greedy policy."""
        if training and random.random() < self.epsilon:
            return random.randint(0, self.action_dim - 1)
        
        state = self.normalize_state(state)
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        
        with torch.no_grad():
            q_values = self.policy_net(state_tensor)
            return q_values.argmax().item()
    
    def store_transition(self, state, action, reward, next_state, done, td_error=None):
        """Store transition in replay buffer."""
        state = self.normalize_state(state)
        next_state = self.normalize_state(next_state)
        self.replay_buffer.push(state, action, reward, next_state, done, td_error)
    
    def train_step(self):
        """Perform one training step."""
        if len(self.replay_buffer) < self.batch_size:
            return None
        
        # Sample from replay buffer
        beta = min(1.0, 0.4 + self.training_steps * 0.001)
        samples, indices, weights = self.replay_buffer.sample(self.batch_size, beta)
        
        # Unpack samples
        states, actions, rewards, next_states, dones = zip(*samples)
        
        states = torch.FloatTensor(np.array(states)).to(device)
        actions = torch.LongTensor(actions).to(device)
        rewards = torch.FloatTensor(rewards).to(device)
        next_states = torch.FloatTensor(np.array(next_states)).to(device)
        dones = torch.FloatTensor(dones).to(device)
        
        # Current Q values
        current_q = self.policy_net(states).gather(1, actions.unsqueeze(1))
        
        # Double DQN: use policy net to select action, target net to evaluate
        with torch.no_grad():
            next_actions = self.policy_net(next_states).argmax(1)
            next_q = self.target_net(next_states).gather(1, next_actions.unsqueeze(1)).squeeze()
            target_q = rewards + (1 - dones) * self.gamma * next_q
        
        # TD errors for PER
        td_errors = (current_q.squeeze() - target_q).detach().cpu().numpy()
        self.replay_buffer.update_priorities(indices, td_errors)
        
        # Weighted loss
        loss = (weights * (current_q.squeeze() - target_q) ** 2).mean()
        
        # Optimize
        self.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
        self.optimizer.step()
        
        # Soft update target network
        self.training_steps += 1
        if self.training_steps % self.target_update_freq == 0:
            for target_param, policy_param in zip(self.target_net.parameters(), 
                                                   self.policy_net.parameters()):
                target_param.data.copy_(self.tau * policy_param.data + 
                                        (1 - self.tau) * target_param.data)
        
        return loss.item()
    
    def decay_epsilon(self):
        """Decay exploration rate."""
        self.epsilon = max(self.epsilon_end, self.epsilon - self.epsilon_decay)
    
    def get_recommendation(self, state, threshold=0.5):
        """Get action recommendation with confidence."""
        state = self.normalize_state(state)
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        
        with torch.no_grad():
            q_values = self.policy_net(state_tensor).squeeze()
            probs = torch.softmax(q_values, dim=0)
            action = q_values.argmax().item()
            confidence = probs[action].item()
        
        return {
            'action': 'execute' if action == 1 else 'wait',
            'confidence': confidence,
            'q_values': q_values.cpu().numpy()
        }
    
    def save(self, path):
        """Save agent to file."""
        torch.save({
            'policy_net': self.policy_net.state_dict(),
            'target_net': self.target_net.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'epsilon': self.epsilon,
            'training_steps': self.training_steps,
            'state_mean': self.state_mean,
            'state_std': self.state_std,
            'state_dim': self.state_dim,
            'action_dim': self.action_dim
        }, path)
    
    @classmethod
    def load(cls, path, device='cpu'):
        """Load agent from file."""
        checkpoint = torch.load(path, map_location=device)
        agent = cls(checkpoint['state_dim'], checkpoint['action_dim'])
        agent.policy_net.load_state_dict(checkpoint['policy_net'])
        agent.target_net.load_state_dict(checkpoint['target_net'])
        agent.optimizer.load_state_dict(checkpoint['optimizer'])
        agent.epsilon = checkpoint['epsilon']
        agent.training_steps = checkpoint['training_steps']
        agent.state_mean = checkpoint['state_mean']
        agent.state_std = checkpoint['state_std']
        return agent

print("DQN Agent classes defined.")

In [None]:
# Gas Optimization Environment and DQN Training

print("\n" + "="*60)
print("SETTING UP GAS OPTIMIZATION ENVIRONMENT")
print("="*60)

class GasOptimizationEnv:
    """
    RL Environment for learning optimal transaction timing.
    Actions: 0=Wait, 1=Execute
    """
    
    def __init__(self, gas_data, episode_length=48, max_wait_steps=None):
        self.gas_data = gas_data
        self.episode_length = episode_length
        self.max_wait_steps = max_wait_steps or episode_length
        
        # State: [price_features(24), time_features(4), urgency(1), waiting(1)]
        self.state_dim = 30
        self.action_dim = 2
        
        self._current_step = 0
        self._episode_data = None
        self._urgency = 0.5
        self._time_waiting = 0
        self._initial_price = 0
    
    def _get_episodes(self, num_episodes=100):
        """Generate training episodes from data."""
        episodes = []
        data_len = len(self.gas_data)
        
        for _ in range(num_episodes):
            start_idx = np.random.randint(0, max(1, data_len - self.episode_length))
            episode = self.gas_data.iloc[start_idx:start_idx + self.episode_length]
            if len(episode) >= self.episode_length:
                episodes.append(episode)
        
        return episodes
    
    def reset(self, episode_data=None):
        """Reset environment for new episode."""
        if episode_data is not None:
            self._episode_data = episode_data
        else:
            episodes = self._get_episodes(1)
            if episodes:
                self._episode_data = episodes[0]
            else:
                raise ValueError("No data available for episode")
        
        self._current_step = 0
        self._time_waiting = 0
        self._urgency = np.random.uniform(0.1, 0.9)
        self._initial_price = self._episode_data['gas'].iloc[0]
        
        return self._get_state()
    
    def _get_state(self):
        """Build state vector."""
        if self._current_step >= len(self._episode_data):
            return np.zeros(self.state_dim)
        
        prices = self._episode_data['gas'].values
        current_price = prices[self._current_step]
        
        # Price history features (last 24 steps, or pad with current)
        history_start = max(0, self._current_step - 24)
        price_history = prices[history_start:self._current_step + 1]
        if len(price_history) < 24:
            price_history = np.pad(price_history, (24 - len(price_history), 0), mode='edge')
        price_history = price_history[-24:]
        
        # Normalize prices
        mean_price = np.mean(price_history) + 1e-8
        norm_prices = (price_history - mean_price) / mean_price
        
        # Price statistics
        volatility = np.std(price_history) / mean_price
        momentum = (price_history[-1] - price_history[0]) / mean_price if len(price_history) > 1 else 0
        price_position = (current_price - np.min(price_history)) / (np.max(price_history) - np.min(price_history) + 1e-8)
        
        # Time features
        time_remaining = 1.0 - (self._current_step / self.max_wait_steps)
        
        # Build state vector
        state = np.concatenate([
            norm_prices,                    # 24 features
            [volatility],                   # 1 feature
            [momentum],                     # 1 feature
            [price_position],               # 1 feature
            [time_remaining],               # 1 feature
            [self._urgency],                # 1 feature
            [self._time_waiting / self.max_wait_steps]  # 1 feature
        ])
        
        return state.astype(np.float32)
    
    def step(self, action):
        """Take action in environment."""
        current_price = self._episode_data['gas'].iloc[self._current_step]
        
        # Check if forced execution at deadline
        if self._current_step >= self.max_wait_steps - 1 and action == 0:
            action = 1  # Force execute
            forced = True
        else:
            forced = False
        
        if action == 1:  # Execute
            # Reward based on savings vs initial price
            savings = (self._initial_price - current_price) / self._initial_price
            reward = np.tanh(savings * 10)  # Scale and bound reward
            
            if forced:
                reward -= 0.3  # Penalty for forced execution
            
            done = True
            info = {
                'execution_price': current_price,
                'initial_price': self._initial_price,
                'savings': savings,
                'forced': forced,
                'wait_time': self._time_waiting
            }
        else:  # Wait
            reward = -0.01 * self._urgency  # Small wait penalty
            self._time_waiting += 1
            self._current_step += 1
            done = self._current_step >= len(self._episode_data) - 1
            info = {'action': 'wait'}
        
        next_state = self._get_state() if not done else np.zeros(self.state_dim)
        return next_state, reward, done, info

# === DQN Training ===
print("\nPreparing training data...")

# Use the clean gas data for RL training
rl_data = df_clean[['gas']].copy()
print(f"RL training data: {len(rl_data)} samples")

# Training parameters
NUM_EPISODES = 3000  # Reduced for Colab (increase to 10000 for better results)
EPISODE_LENGTH = 48
MAX_WAIT_STEPS = 48

# Create environment
env = GasOptimizationEnv(rl_data, episode_length=EPISODE_LENGTH, max_wait_steps=MAX_WAIT_STEPS)

# Create agent
dqn_agent = DQNAgent(
    state_dim=env.state_dim,
    action_dim=env.action_dim,
    hidden_dims=[128, 64],
    lr=0.0003,
    gamma=0.98,
    epsilon_start=1.0,
    epsilon_end=0.05,
    epsilon_decay_episodes=NUM_EPISODES,
    buffer_size=50000,
    batch_size=64
)

# Generate diverse training episodes
print("Generating training episodes...")
train_episodes = env._get_episodes(min(NUM_EPISODES * 2, 500))
print(f"Generated {len(train_episodes)} diverse episodes")

# Fit state normalizer
print("Fitting state normalizer...")
sample_states = []
for ep in train_episodes[:50]:
    state = env.reset(episode_data=ep)
    sample_states.append(state)
    for _ in range(min(10, len(ep) - 1)):
        action = np.random.randint(0, 2)
        next_state, _, done, _ = env.step(action)
        if done:
            break
        sample_states.append(next_state)
dqn_agent.fit_state_normalizer(np.array(sample_states))

# Training loop
print(f"\nStarting DQN training ({NUM_EPISODES} episodes)...")
print("-" * 50)

episode_rewards = []
episode_savings = []
losses = []
best_avg_savings = float('-inf')

for episode in range(NUM_EPISODES):
    # Select episode data
    ep_data = train_episodes[episode % len(train_episodes)]
    state = env.reset(episode_data=ep_data)
    
    total_reward = 0
    episode_losses = []
    
    while True:
        action = dqn_agent.select_action(state, training=True)
        next_state, reward, done, info = env.step(action)
        
        dqn_agent.store_transition(state, action, reward, next_state, done)
        
        loss = dqn_agent.train_step()
        if loss is not None:
            episode_losses.append(loss)
        
        total_reward += reward
        state = next_state
        
        if done:
            break
    
    dqn_agent.decay_epsilon()
    episode_rewards.append(total_reward)
    
    if 'savings' in info:
        episode_savings.append(info['savings'])
    
    if episode_losses:
        losses.append(np.mean(episode_losses))
    
    # Track best model
    if len(episode_savings) >= 100:
        avg_savings = np.mean(episode_savings[-100:])
        if avg_savings > best_avg_savings:
            best_avg_savings = avg_savings
    
    # Progress logging
    if (episode + 1) % 500 == 0:
        avg_reward = np.mean(episode_rewards[-100:]) if episode_rewards else 0
        avg_save = np.mean(episode_savings[-100:]) * 100 if episode_savings else 0
        avg_loss = np.mean(losses[-100:]) if losses else 0
        
        print(f"Episode {episode + 1}/{NUM_EPISODES}")
        print(f"  Avg Reward (last 100): {avg_reward:.3f}")
        print(f"  Avg Savings (last 100): {avg_save:.2f}%")
        print(f"  Avg Loss: {avg_loss:.4f}")
        print(f"  Epsilon: {dqn_agent.epsilon:.3f}")
        print(f"  Buffer Size: {len(dqn_agent.replay_buffer)}")

# Training complete
print("\n" + "="*60)
print("DQN TRAINING COMPLETE")
print("="*60)
print(f"Total Episodes: {NUM_EPISODES}")
print(f"Training Steps: {dqn_agent.training_steps}")
print(f"Final Epsilon: {dqn_agent.epsilon:.4f}")
if episode_savings:
    print(f"Final Avg Savings (last 100): {np.mean(episode_savings[-100:])*100:.2f}%")
    print(f"Best Avg Savings: {best_avg_savings*100:.2f}%")
print(f"Final Buffer Size: {len(dqn_agent.replay_buffer)}")

# Store for saving
DQN_TRAINED = True
DQN_AGENT = dqn_agent
DQN_METRICS = {
    'episodes': NUM_EPISODES,
    'training_steps': dqn_agent.training_steps,
    'final_epsilon': float(dqn_agent.epsilon),
    'avg_savings': float(np.mean(episode_savings[-100:])) if episode_savings else 0,
    'best_avg_savings': float(best_avg_savings)
}

In [None]:
# Save all models including NEW: Quantile models + Regime detector
import os
from datetime import datetime

os.makedirs('saved_models', exist_ok=True)

print("\n" + "="*60)
print("SAVING MODELS")
print("="*60)

# Use feature importance from cell 6
feature_importance = FEATURE_IMPORTANCE

# Save prediction models
for horizon, best, features_used in [('1h', best_1h, features_1h_used), 
                                      ('4h', best_4h, features_4h_used), 
                                      ('24h', best_24h, features_4h_used)]:
    name, model, metrics, scaler = best
    
    model_data = {
        'model': model,
        'model_name': name,
        'metrics': metrics,
        'trained_at': datetime.now().isoformat(),
        'feature_names': features_used,
        'feature_scaler': scaler,
        'scaler_type': 'RobustScaler',
        'is_ensemble': name == 'Ensemble' or name == 'Stacking',
        'training_strategy': 'simpler_regularized_stacking' if horizon == '1h' else 'full_tuned_stacking',
        'actual_horizon': '1 hour' if horizon == '1h' else '4 hours',
        'confidence_method': 'tree_variance' if hasattr(model, 'estimators_') else 'fixed'
    }
    
    if horizon == '4h' and feature_importance:
        model_data['feature_importance'] = feature_importance
    
    joblib.dump(model_data, f'saved_models/model_{horizon}.pkl')
    print(f"Saved model_{horizon}.pkl ({name}, MAE={metrics['mae']:.6f}, {metrics['vs_baseline']})")
    
    joblib.dump(scaler, f'saved_models/scaler_{horizon}.pkl')

# === NEW: Save Quantile Models (Prediction Intervals) ===
print("\nSaving quantile models for prediction intervals...")
for horizon, q_models, q_scaler in [('1h', quantile_1h, quantile_scaler_1h),
                                     ('4h', quantile_4h, quantile_scaler_4h),
                                     ('24h', quantile_24h, quantile_scaler_24h)]:
    quantile_data = {
        'models': q_models,  # Dict with 0.1, 0.5, 0.9 quantiles
        'scaler': q_scaler,
        'quantiles': [0.1, 0.5, 0.9],
        'trained_at': datetime.now().isoformat()
    }
    joblib.dump(quantile_data, f'saved_models/quantile_{horizon}.pkl')
    print(f"Saved quantile_{horizon}.pkl (10th, 50th, 90th percentiles)")

# === NEW: Save Regime Detector ===
print("\nSaving regime detector...")
regime_data = {
    'model': regime_clf,
    'scaler': regime_scaler,
    'regimes': {0: 'Normal', 1: 'Elevated', 2: 'Spike'},
    'accuracy': regime_accuracy,
    'trained_at': datetime.now().isoformat()
}
joblib.dump(regime_data, 'saved_models/regime_detector.pkl')
print(f"Saved regime_detector.pkl (Accuracy: {regime_accuracy:.1%})")

# Save spike detectors
for horizon, (clf, scaler) in [('1h', (spike_1h, spike_scaler_1h)), 
                                ('4h', (spike_4h, spike_scaler_4h)),
                                ('24h', (spike_24h, spike_scaler_24h))]:
    spike_data = {
        'model': clf,
        'scaler': scaler,
        'trained_at': datetime.now().isoformat()
    }
    joblib.dump(spike_data, f'saved_models/spike_detector_{horizon}.pkl')
    print(f"Saved spike_detector_{horizon}.pkl")

# Save feature names
joblib.dump(features, 'saved_models/feature_names.pkl')
print(f"Saved feature_names.pkl ({len(features)} features)")

# Save training metadata with full info
import json
metadata = {
    'training_timestamp': datetime.now().isoformat(),
    'total_samples': len(df_clean),
    'date_range': f"{df_clean.index.min()} to {df_clean.index.max()}",
    'num_segments_used': len(good_segments),
    'has_eth_price': HAS_ETH_PRICE,
    'features': {
        'total': len(features),
        '1h_specific': len(features_1h_used),
        '4h_specific': len(features_4h_used)
    },
    'baselines': BASELINES,
    'models': {
        '1h': {
            'name': best_1h[0], 
            'r2': float(best_1h[2]['r2']), 
            'mae': float(best_1h[2]['mae']),
            'vs_baseline': best_1h[2]['vs_baseline'],
            'improvement_pct': float(best_1h[2]['improvement']),
            'actual_horizon': '1 hour',
            'training_strategy': 'simpler models with stacking + target differencing',
            'directional_accuracy': float(best_1h[2]['directional_accuracy'])
        },
        '4h': {
            'name': best_4h[0], 
            'r2': float(best_4h[2]['r2']), 
            'mae': float(best_4h[2]['mae']),
            'vs_baseline': best_4h[2]['vs_baseline'],
            'improvement_pct': float(best_4h[2]['improvement']),
            'actual_horizon': '4 hours',
            'training_strategy': 'full model suite with stacking + XGBoost/LightGBM',
            'directional_accuracy': float(best_4h[2]['directional_accuracy'])
        },
        '24h': {
            'name': best_24h[0], 
            'r2': float(best_24h[2]['r2']), 
            'mae': float(best_24h[2]['mae']),
            'vs_baseline': best_24h[2]['vs_baseline'],
            'improvement_pct': float(best_24h[2]['improvement']),
            'actual_horizon': '4 hours (labeled as 24h due to data limitations)',
            'training_strategy': 'same as 4h model',
            'directional_accuracy': float(best_24h[2]['directional_accuracy'])
        }
    },
    'direction_models': {
        '1h': {'accuracy': float(dir_acc_1h), 'f1_score': float(dir_f1_1h)},
        '4h': {'accuracy': float(dir_acc_4h), 'f1_score': float(dir_f1_4h)}
    },
    'regime_detector': {
        'accuracy': float(regime_accuracy),
        'regimes': ['Normal', 'Elevated', 'Spike']
    },
    'prediction_intervals': {
        'quantiles': [0.1, 0.5, 0.9],
        'description': '80% confidence interval (10th to 90th percentile)'
    },
    'improvements_applied': [
        'ETH price features (momentum, volatility, correlation)',
        'Extended ETH lags (1h, 2h, 4h price changes)',
        'Network utilization features',
        'Day-of-week one-hot encoding',
        'Micro-features (5min, 15min, 30min) for 1h prediction',
        'Target differencing (predict change, reconstruct price)',
        'Stacking ensemble (RF + GB + Ridge -> Ridge meta)',
        'XGBoost and LightGBM with early stopping',
        'Feature selection by importance (dropped bottom 25%)',
        'Quantile regression for prediction intervals',
        'Regime detection (Normal/Elevated/Spike)',
        'Direction-constrained predictions (optional)',
        'Hyperparameter tuning (RandomizedSearchCV)',
        'Class-weighted direction classification',
        'DQN agent for transaction timing optimization'
    ]
}

with open('saved_models/training_metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"Saved training_metadata.json")

# Save feature importance
if feature_importance:
    with open('saved_models/feature_importance.json', 'w') as f:
        sorted_importance = dict(sorted(feature_importance.items(), key=lambda x: x[1], reverse=True))
        json.dump(sorted_importance, f, indent=2)
    print(f"Saved feature_importance.json")


# === Save DQN Agent ===
print("\nSaving DQN agent...")
if 'DQN_TRAINED' in dir() and DQN_TRAINED:
    os.makedirs('saved_models/rl_agents', exist_ok=True)
    DQN_AGENT.save('saved_models/rl_agents/dqn_agent.pt')
    print(f"Saved dqn_agent.pt (Training steps: {DQN_AGENT.training_steps})")
    
    # Save DQN metadata
    dqn_meta = {
        'state_dim': DQN_AGENT.state_dim,
        'action_dim': DQN_AGENT.action_dim,
        'training_steps': DQN_AGENT.training_steps,
        'epsilon': float(DQN_AGENT.epsilon),
        'metrics': DQN_METRICS,
        'trained_at': datetime.now().isoformat()
    }
    with open('saved_models/rl_agents/dqn_metadata.json', 'w') as f:
        json.dump(dqn_meta, f, indent=2)
    print("Saved dqn_metadata.json")
else:
    print("DQN agent not trained, skipping...")

print("\n" + "="*60)
print("ALL MODELS SAVED")
print("="*60)

In [None]:
# Print final report
print("\n" + "="*70)
print("TRAINING COMPLETE - FINAL REPORT")
print("="*70)

print(f"\nDATA SUMMARY")
print(f"   Total samples: {len(df_clean):,}")
print(f"   Segments: {len(good_segments)}")
print(f"   Features: {len(features)} (1h: {len(features_1h_used)}, 4h: {len(features_4h_used)})")
print(f"   Date range: {df_clean.index.min()} to {df_clean.index.max()}")
print(f"   ETH price data: {'Yes' if HAS_ETH_PRICE else 'No'}")

print(f"\n" + "-"*70)
print(f"{'PRICE PREDICTION MODELS':^70}")
print("-"*70)
print(f"{'Horizon':<8} {'Model':<18} {'MAE':>10} {'vs Baseline':>13} {'Dir Acc':>8}")
print("-"*70)
for horizon, best in [('1h', best_1h), ('4h', best_4h), ('24h*', best_24h)]:
    print(f"{horizon:<8} {best[0]:<18} {best[2]['mae']:>10.6f} {best[2]['vs_baseline']:>13} {best[2]['directional_accuracy']:>7.1%}")
print("-"*70)

print(f"\n" + "-"*70)
print(f"{'PREDICTION INTERVALS (Quantile Regression)':^70}")
print("-"*70)
print(f"   Output format: '25-35 gwei (80% confidence)'")
print(f"   Quantiles trained: 10th, 50th (median), 90th percentile")
print(f"   Coverage target: 80% of actual values within interval")
print("-"*70)

print(f"\n" + "-"*70)
print(f"{'REGIME DETECTION':^70}")
print("-"*70)
print(f"   Accuracy: {regime_accuracy:.1%}")
print(f"   Regimes:")
print(f"     0 = Normal (typical gas, low volatility)")
print(f"     1 = Elevated (high activity, NFT mints)")
print(f"     2 = Spike (extreme prices, DeFi rush)")
print("-"*70)

print(f"\n" + "-"*70)
print(f"{'DIRECTION CLASSIFICATION':^70}")
print("-"*70)
print(f"{'Horizon':<8} {'Accuracy':>10} {'F1 Score':>10}")
print("-"*70)
print(f"{'1h':<8} {dir_acc_1h:>9.1%} {dir_f1_1h:>10.3f}")
print(f"{'4h':<8} {dir_acc_4h:>9.1%} {dir_f1_4h:>10.3f}")
print("-"*70)

print(f"\nNEW FEATURES ADDED")
print(f"   1. ETH Price Features")
print(f"      - ETH momentum (15min, 30min, 60min)")
print(f"      - ETH volatility (1h, 2h, 4h)")
print(f"      - Extended ETH lags (1h, 2h, 4h price changes)")
print(f"      - Gas-ETH correlation")
print(f"   2. Network Utilization")
print(f"      - Rolling utilization stats")
print(f"      - High utilization streaks")
print(f"   3. Day-of-Week One-Hot Encoding")
print(f"      - Explicit day indicators (dow_0 to dow_6)")
print(f"   4. Target Differencing")
print(f"      - Predict change instead of absolute value")
print(f"      - Reconstruct final price from difference")
print(f"   5. Stacking Ensemble")
print(f"      - RF + GB + Ridge base models")
print(f"      - Ridge meta-learner for optimal combination")
print(f"   6. Feature Selection by Importance")
print(f"      - Dropped bottom 25% of low-importance features")
print(f"   7. Prediction Intervals")
print(f"      - 80% confidence bounds (10th-90th percentile)")
print(f"   8. Regime Detection")
print(f"      - Auto-detect Normal/Elevated/Spike periods")


print(f"\n" + "-"*70)
print(f"{'DQN AGENT (Transaction Timing)':^70}")
print("-"*70)
if 'DQN_TRAINED' in dir() and DQN_TRAINED:
    print(f"   Episodes: {DQN_METRICS['episodes']}")
    print(f"   Training Steps: {DQN_METRICS['training_steps']}")
    print(f"   Avg Savings: {DQN_METRICS['avg_savings']*100:.2f}%")
    print(f"   Best Avg Savings: {DQN_METRICS['best_avg_savings']*100:.2f}%")
else:
    print(f"   Not trained")
print("-"*70)

print(f"\nFILES SAVED")
print(f"   Point predictions:")
print(f"     - model_1h/4h/24h.pkl, scaler_1h/4h/24h.pkl")
print(f"   Prediction intervals:")
print(f"     - quantile_1h/4h/24h.pkl (10th, 50th, 90th percentiles)")
print(f"   Regime detection:")
print(f"     - regime_detector.pkl")
print(f"   Other:")
print(f"     - spike_detector_*.pkl, feature_names.pkl")
print(f"   DQN Agent:")
print(f"     - rl_agents/dqn_agent.pt, dqn_metadata.json")
print(f"     - training_metadata.json, feature_importance.json")

print("\n" + "="*70)
print("Upload to Colab, run all cells, download gweizy_models.zip")
print("="*70)

In [None]:
# Visualizations
import matplotlib.pyplot as plt
import seaborn as sns

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
fig.suptitle('Gweizy Model Training Results', fontsize=14, fontweight='bold')

# 1. Actual vs Predicted (1h)
ax1 = axes[0, 0]
ax1.scatter(actual_1h.values, pred_1h, alpha=0.5, s=10)
ax1.plot([actual_1h.min(), actual_1h.max()], [actual_1h.min(), actual_1h.max()], 'r--', label='Perfect')
ax1.set_xlabel('Actual Gas Price')
ax1.set_ylabel('Predicted')
ax1.set_title(f'1h Prediction (R²={best_1h[2]["r2"]:.3f})')
ax1.legend()

# 2. Actual vs Predicted (4h)
ax2 = axes[0, 1]
ax2.scatter(actual_4h.values, pred_4h, alpha=0.5, s=10)
ax2.plot([actual_4h.min(), actual_4h.max()], [actual_4h.min(), actual_4h.max()], 'r--', label='Perfect')
ax2.set_xlabel('Actual Gas Price')
ax2.set_ylabel('Predicted')
ax2.set_title(f'4h Prediction (R²={best_4h[2]["r2"]:.3f})')
ax2.legend()

# 3. Model Comparison (MAE)
ax3 = axes[0, 2]
models_1h = [r[0] for r in all_1h]
maes_1h = [r[2]['mae'] for r in all_1h]
colors = ['green' if m < BASELINES['1h']['best'] else 'red' for m in maes_1h]
bars = ax3.barh(models_1h, maes_1h, color=colors, alpha=0.7)
ax3.axvline(BASELINES['1h']['best'], color='blue', linestyle='--', label=f'Baseline: {BASELINES["1h"]["best"]:.4f}')
ax3.set_xlabel('MAE')
ax3.set_title('1h Model Comparison')
ax3.legend()

# 4. Residuals Distribution (1h)
ax4 = axes[1, 0]
residuals_1h = actual_1h.values - pred_1h
ax4.hist(residuals_1h, bins=50, alpha=0.7, edgecolor='black')
ax4.axvline(0, color='red', linestyle='--')
ax4.set_xlabel('Residual (Actual - Predicted)')
ax4.set_ylabel('Frequency')
ax4.set_title(f'1h Residuals (mean={np.mean(residuals_1h):.4f})')

# 5. Time Series Sample
ax5 = axes[1, 1]
sample_size = min(200, len(actual_1h))
ax5.plot(range(sample_size), actual_1h.values[:sample_size], label='Actual', alpha=0.8)
ax5.plot(range(sample_size), pred_1h[:sample_size], label='Predicted', alpha=0.8)
ax5.set_xlabel('Time (samples)')
ax5.set_ylabel('Gas Price')
ax5.set_title('1h: Actual vs Predicted (Time Series)')
ax5.legend()

# 6. Feature Importance (top 10)
ax6 = axes[1, 2]
if feature_importance:
    sorted_imp = dict(sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:10])
    ax6.barh(list(sorted_imp.keys()), list(sorted_imp.values()), color='steelblue')
    ax6.set_xlabel('Importance')
    ax6.set_title('Top 10 Features (4h model)')
else:
    ax6.text(0.5, 0.5, 'Feature importance\nnot available', ha='center', va='center')
    ax6.set_title('Feature Importance')

plt.tight_layout()
plt.savefig('saved_models/training_results.png', dpi=150, bbox_inches='tight')
plt.show()
print("Saved training_results.png")

In [None]:
# Create zip file for download
import shutil

shutil.make_archive('gweizy_models', 'zip', 'saved_models')
print("\n✅ Created gweizy_models.zip")
print("\nDownload this file and extract to: backend/models/saved_models/")

# Auto-download
files.download('gweizy_models.zip')