In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import warnings
import joblib
warnings.filterwarnings('ignore')

# ML
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectFromModel
from sklearn.calibration import CalibratedClassifierCV
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

# Paths
BASE_DIR = Path.cwd()
DATA_DIR = BASE_DIR / 'data'
MODEL_DIR = BASE_DIR / 'models' / 'signal_generator_v8'
MODEL_DIR.mkdir(parents=True, exist_ok=True)

# GPU Check
import torch
GPU_AVAILABLE = torch.cuda.is_available()

print("="*60)
print("üöÄ FOREX SIGNAL GENERATOR V8")
print("   Based on V6 + Optimizations")
print("="*60)
print(f"‚úì GPU Available: {GPU_AVAILABLE}")
print(f"‚úì Model Directory: {MODEL_DIR}")

üöÄ FOREX SIGNAL GENERATOR V8
   Based on V6 + Optimizations
‚úì GPU Available: True
‚úì Model Directory: c:\Users\Acer\Desktop\Forex-Signal-App\models\signal_generator_v8


## 1. Data Loading

In [2]:
# Load Data
train_df = pd.read_csv(DATA_DIR / 'EUR_USD_1min.csv')
test_df = pd.read_csv(DATA_DIR / 'EUR_USD_test.csv')

for df in [train_df, test_df]:
    if 'timestamp' in df.columns:
        df.rename(columns={'timestamp': 'time'}, inplace=True)
    df['time'] = pd.to_datetime(df['time'])

print(f"Train: {len(train_df):,} rows")
print(f"Test: {len(test_df):,} rows")

Train: 1,859,492 rows
Test: 296,778 rows


## 2. V8 Feature Engineering (V6 Base + Selected Improvements)

In [3]:
def add_features_v8(df):
    """
    V8 Features: V6 Core (33) + Carefully Selected New Features
    Goal: Improve without overfitting
    """
    df = df.copy()
    
    # ==================== V6 CORE FEATURES ====================
    # Time Features
    df['hour'] = df['time'].dt.hour
    df['day_of_week'] = df['time'].dt.dayofweek
    df['is_london'] = ((df['hour'] >= 8) & (df['hour'] < 16)).astype(int)
    df['is_ny'] = ((df['hour'] >= 13) & (df['hour'] < 21)).astype(int)
    
    # Moving Averages
    for p in [5, 10, 20, 50, 200]:
        df[f'sma_{p}'] = df['close'].rolling(p).mean()
        df[f'ema_{p}'] = df['close'].ewm(span=p, adjust=False).mean()
    
    # RSI
    delta = df['close'].diff()
    gain = delta.where(delta > 0, 0).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    rs = gain / (loss + 1e-10)
    df['rsi'] = 100 - (100 / (1 + rs))
    
    # MACD
    ema12 = df['close'].ewm(span=12).mean()
    ema26 = df['close'].ewm(span=26).mean()
    df['macd'] = ema12 - ema26
    df['macd_signal'] = df['macd'].ewm(span=9).mean()
    df['macd_hist'] = df['macd'] - df['macd_signal']
    
    # Bollinger Bands
    df['bb_mid'] = df['close'].rolling(20).mean()
    df['bb_std'] = df['close'].rolling(20).std()
    df['bb_upper'] = df['bb_mid'] + 2 * df['bb_std']
    df['bb_lower'] = df['bb_mid'] - 2 * df['bb_std']
    df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_mid']
    
    # ADX (V6 core)
    df['tr0'] = abs(df['high'] - df['low'])
    df['tr1'] = abs(df['high'] - df['close'].shift())
    df['tr2'] = abs(df['low'] - df['close'].shift())
    df['tr'] = df[['tr0', 'tr1', 'tr2']].max(axis=1)
    
    df['up_move'] = df['high'] - df['high'].shift()
    df['down_move'] = df['low'].shift() - df['low']
    
    df['plus_dm'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
    df['minus_dm'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
    
    period = 14
    df['atr'] = df['tr'].rolling(period).mean()
    df['plus_di'] = 100 * (df['plus_dm'].rolling(period).mean() / (df['atr'] + 1e-10))
    df['minus_di'] = 100 * (df['minus_dm'].rolling(period).mean() / (df['atr'] + 1e-10))
    df['dx'] = 100 * abs(df['plus_di'] - df['minus_di']) / (df['plus_di'] + df['minus_di'] + 1e-10)
    df['adx'] = df['dx'].rolling(period).mean()
    
    # CCI
    tp = (df['high'] + df['low'] + df['close']) / 3
    sma_tp = tp.rolling(20).mean()
    mad_tp = tp.rolling(20).apply(lambda x: np.abs(x - x.mean()).mean())
    df['cci'] = (tp - sma_tp) / (0.015 * mad_tp + 1e-10)
    
    # Williams %R
    hh = df['high'].rolling(14).max()
    ll = df['low'].rolling(14).min()
    df['williams_r'] = -100 * (hh - df['close']) / (hh - ll + 1e-10)
    
    # Volatility
    df['returns'] = df['close'].pct_change()
    df['volatility'] = df['returns'].rolling(20).std() * 100
    
    # Interaction (V6)
    df['rsi_x_adx'] = df['rsi'] * df['adx'] / 100
    df['momentum_score'] = (
        (df['rsi'] > 50).astype(int) + 
        (df['macd'] > df['macd_signal']).astype(int) + 
        (df['plus_di'] > df['minus_di']).astype(int)
    )
    
    # ==================== V8 NEW FEATURES (Carefully Selected) ====================
    
    # 1. Price Position (normalized)
    df['price_position'] = (df['close'] - df['sma_50']) / (df['atr'] + 1e-10)
    
    # 2. Trend Strength Score (simple)
    df['trend_score'] = (
        (df['close'] > df['sma_20']).astype(int) +
        (df['sma_20'] > df['sma_50']).astype(int) +
        (df['sma_50'] > df['sma_200']).astype(int) +
        (df['adx'] > 25).astype(int)
    )
    
    # 3. RSI Zones (categorical encoded)
    df['rsi_zone'] = pd.cut(df['rsi'], bins=[0, 30, 45, 55, 70, 100], labels=[0, 1, 2, 3, 4]).astype(float)
    
    # 4. MACD Momentum (rate of change)
    df['macd_momentum'] = df['macd_hist'] - df['macd_hist'].shift(3)
    
    # 5. Volume/ATR Ratio (if volume available)
    if 'volume' in df.columns and df['volume'].sum() > 0:
        df['volume_ma'] = df['volume'].rolling(20).mean()
        df['volume_ratio'] = df['volume'] / (df['volume_ma'] + 1e-10)
    else:
        df['volume_ratio'] = 1.0
    
    # 6. Session Overlap (London + NY)
    df['is_overlap'] = ((df['hour'] >= 13) & (df['hour'] < 16)).astype(int)
    
    # 7. Recent Price Action
    df['close_vs_high'] = (df['high'].rolling(20).max() - df['close']) / (df['atr'] + 1e-10)
    df['close_vs_low'] = (df['close'] - df['low'].rolling(20).min()) / (df['atr'] + 1e-10)
    
    # Cleanup temp columns
    drop_cols = ['tr0', 'tr1', 'tr2', 'tr', 'up_move', 'down_move', 'plus_dm', 'minus_dm']
    df.drop(columns=[c for c in drop_cols if c in df.columns], inplace=True)
    
    return df

print("Adding V8 features...")
train_df = add_features_v8(train_df)
test_df = add_features_v8(test_df)
print(f"‚úì Features added. Total columns: {len(train_df.columns)}")

Adding V8 features...
‚úì Features added. Total columns: 49
‚úì Features added. Total columns: 49


## 3. Labeling (BUY vs SELL - Like V6)

In [4]:
def create_labels(df, forward_periods=60, min_pips=15, ratio=1.5):
    """
    BUY (1): Up move >= min_pips AND Up > Down * ratio
    SELL (0): Down move >= min_pips AND Down > Up * ratio
    HOLD (-1): Neither
    """
    df = df.copy()
    min_move = min_pips * 0.0001
    
    df['future_max'] = df['high'].rolling(forward_periods).max().shift(-forward_periods)
    df['future_min'] = df['low'].rolling(forward_periods).min().shift(-forward_periods)
    
    df['up_move'] = df['future_max'] - df['close']
    df['down_move'] = df['close'] - df['future_min']
    
    conditions = [
        (df['up_move'] >= min_move) & (df['up_move'] > df['down_move'] * ratio),
        (df['down_move'] >= min_move) & (df['down_move'] > df['up_move'] * ratio)
    ]
    choices = [1, 0]
    df['signal'] = np.select(conditions, choices, default=-1)
    
    df.drop(['future_max', 'future_min', 'up_move', 'down_move'], axis=1, inplace=True)
    return df

train_df = create_labels(train_df)
test_df = create_labels(test_df)

# Filter BUY/SELL only (remove HOLD)
train_binary = train_df[train_df['signal'] != -1].copy()
test_binary = test_df[test_df['signal'] != -1].copy()

print(f"Train: {len(train_binary):,} (BUY/SELL only)")
print(f"Test: {len(test_binary):,}")
print(f"BUY ratio (train): {train_binary['signal'].mean()*100:.1f}%")
print(f"BUY ratio (test): {test_binary['signal'].mean()*100:.1f}%")

Train: 393,249 (BUY/SELL only)
Test: 80,302
BUY ratio (train): 49.3%
BUY ratio (test): 52.2%


## 4. Feature Selection

In [5]:
# Prepare Data
exclude_cols = ['time', 'signal', 'open', 'high', 'low', 'close', 'volume', 'tick_volume']
feature_cols = [c for c in train_binary.columns if c not in exclude_cols]

train_clean = train_binary.dropna(subset=feature_cols).copy()
test_clean = test_binary.dropna(subset=feature_cols).copy()

X_train = train_clean[feature_cols].values
y_train = train_clean['signal'].values
X_test = test_clean[feature_cols].values
y_test = test_clean['signal'].values

print(f"Features before selection: {len(feature_cols)}")

# Feature Importance based selection using LightGBM
selector_model = lgb.LGBMClassifier(
    n_estimators=100, max_depth=6, random_state=42, verbose=-1, device='gpu'
)
selector_model.fit(X_train, y_train)

# Get feature importances
importances = selector_model.feature_importances_
importance_df = pd.DataFrame({
    'feature': feature_cols,
    'importance': importances
}).sort_values('importance', ascending=False)

# Select top features (importance > median)
threshold = np.median(importances)
selected_features = importance_df[importance_df['importance'] >= threshold]['feature'].tolist()

print(f"Features after selection: {len(selected_features)}")
print(f"\nTop 10 Features:")
print(importance_df.head(10).to_string(index=False))

Features before selection: 43
Features after selection: 22

Top 10 Features:
    feature  importance
       hour         404
    sma_200         326
  volume_ma         293
day_of_week         267
    ema_200         215
   bb_lower         141
        atr         138
 volatility         130
     sma_50         119
   bb_upper         100
Features after selection: 22

Top 10 Features:
    feature  importance
       hour         404
    sma_200         326
  volume_ma         293
day_of_week         267
    ema_200         215
   bb_lower         141
        atr         138
 volatility         130
     sma_50         119
   bb_upper         100


## 5. Prepare Final Training Data

In [6]:
# Use selected features
X_train_selected = train_clean[selected_features].values
X_test_selected = test_clean[selected_features].values

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_selected)
X_test_scaled = scaler.transform(X_test_selected)

print(f"Training data: {X_train_scaled.shape}")
print(f"Test data: {X_test_scaled.shape}")

Training data: (393152, 22)
Test data: (80288, 22)


## 6. Train Optimized Models (GPU)

In [7]:
print("="*60)
print("üöÄ TRAINING V8 MODELS (Optimized Hyperparameters)")
print("="*60)

models = {}

# 1. XGBoost - Tuned for higher precision
models['xgb1'] = xgb.XGBClassifier(
    n_estimators=600, max_depth=6, learning_rate=0.03,
    subsample=0.8, colsample_bytree=0.8,
    reg_alpha=0.1, reg_lambda=1.0,
    min_child_weight=3,
    random_state=42, tree_method='hist', device='cuda', verbosity=0
)

# 2. XGBoost - Different config
models['xgb2'] = xgb.XGBClassifier(
    n_estimators=400, max_depth=8, learning_rate=0.05,
    subsample=0.7, colsample_bytree=0.7,
    reg_alpha=0.05, reg_lambda=0.5,
    gamma=0.1,
    random_state=43, tree_method='hist', device='cuda', verbosity=0
)

# 3. LightGBM - Tuned
models['lgb1'] = lgb.LGBMClassifier(
    n_estimators=600, max_depth=6, learning_rate=0.03,
    subsample=0.8, colsample_bytree=0.8,
    num_leaves=31, min_child_samples=30,
    reg_alpha=0.1, reg_lambda=1.0,
    random_state=42, verbose=-1, device='gpu'
)

# 4. LightGBM - Different config
models['lgb2'] = lgb.LGBMClassifier(
    n_estimators=500, max_depth=8, learning_rate=0.04,
    subsample=0.75, colsample_bytree=0.75,
    num_leaves=63, min_child_samples=20,
    random_state=44, verbose=-1, device='gpu'
)

# 5. CatBoost - Tuned
models['cat'] = CatBoostClassifier(
    iterations=600, depth=6, learning_rate=0.03,
    l2_leaf_reg=3.0, random_strength=0.5,
    bagging_temperature=0.5,
    random_seed=42, task_type='GPU', devices='0', verbose=False
)

# Train
predictions = {}
probabilities = {}

for name, model in models.items():
    print(f"  Training {name.upper()}...", end=" ")
    model.fit(X_train_scaled, y_train)
    predictions[name] = model.predict(X_test_scaled)
    probabilities[name] = model.predict_proba(X_test_scaled)
    acc = accuracy_score(y_test, predictions[name])
    print(f"‚úì Accuracy: {acc*100:.2f}%")

üöÄ TRAINING V8 MODELS (Optimized Hyperparameters)
  Training XGB1... ‚úì Accuracy: 50.31%
  Training XGB2... ‚úì Accuracy: 50.31%
  Training XGB2... ‚úì Accuracy: 50.19%
  Training LGB1... ‚úì Accuracy: 50.19%
  Training LGB1... ‚úì Accuracy: 49.90%
  Training LGB2... ‚úì Accuracy: 49.90%
  Training LGB2... ‚úì Accuracy: 49.57%
  Training CAT... ‚úì Accuracy: 49.57%
  Training CAT... ‚úì Accuracy: 51.27%
‚úì Accuracy: 51.27%


## 7. Optimized Ensemble

In [8]:
# Find optimal weights based on individual performance
accuracies = {name: accuracy_score(y_test, predictions[name]) for name in models.keys()}
total_acc = sum(accuracies.values())

# Weight based on accuracy
weights = {name: acc / total_acc for name, acc in accuracies.items()}
print("Optimized Weights:")
for name, w in weights.items():
    print(f"  {name}: {w:.3f}")

# Weighted Ensemble
final_proba = np.zeros_like(probabilities['xgb1'])
for name, w in weights.items():
    final_proba += w * probabilities[name]

buy_prob = final_proba[:, 1] * 100

# Agreement bonus (all 5 agree)
all_agree_buy = np.all([predictions[name] == 1 for name in models.keys()], axis=0)
most_agree_buy = np.sum([predictions[name] == 1 for name in models.keys()], axis=0) >= 4

confidence = buy_prob.copy()
confidence[all_agree_buy] = np.minimum(confidence[all_agree_buy] + 5, 100)
confidence[most_agree_buy & ~all_agree_buy] = np.minimum(confidence[most_agree_buy & ~all_agree_buy] + 2, 100)

print(f"\nAll 5 agree on BUY: {all_agree_buy.sum():,}")
print(f"4+ agree on BUY: {most_agree_buy.sum():,}")

Optimized Weights:
  xgb1: 0.200
  xgb2: 0.200
  lgb1: 0.199
  lgb2: 0.197
  cat: 0.204

All 5 agree on BUY: 19,277
4+ agree on BUY: 28,511


## 8. V8 Results

In [9]:
print("="*70)
print("üìä V8 ENSEMBLE RESULTS")
print("="*70)

print(f"\n{'Confidence':>12} | {'Signals':>10} | {'Correct':>10} | {'Accuracy':>10}")
print("-"*60)

v8_results = {}
for conf in [50, 60, 70, 75, 80, 85, 90, 95]:
    mask = confidence >= conf
    if mask.sum() > 0:
        signals = mask.sum()
        correct = y_test[mask].sum()
        acc = correct / signals * 100
        v8_results[conf] = {'signals': signals, 'correct': correct, 'accuracy': acc}
        print(f"{conf:>10}%+ | {signals:>10} | {correct:>10} | {acc:>9.1f}%")

üìä V8 ENSEMBLE RESULTS

  Confidence |    Signals |    Correct |   Accuracy
------------------------------------------------------------
        50%+ |      36815 |      19288 |      52.4%
        60%+ |      16273 |       8711 |      53.5%
        70%+ |       1948 |       1147 |      58.9%
        75%+ |        627 |        424 |      67.6%
        80%+ |        229 |        149 |      65.1%
        85%+ |         48 |         45 |      93.8%
        90%+ |          8 |          8 |     100.0%


## 9. V8 vs V6 Comparison

In [10]:
# Load V6 models
v6_dir = BASE_DIR / 'models' / 'signal_generator_v6'

v6_models = {}
for name in ['xgb1', 'xgb2', 'lgb1', 'lgb2', 'cat']:
    v6_models[name] = joblib.load(v6_dir / f'{name}_v6_bin.joblib')

v6_scaler = joblib.load(v6_dir / 'scaler_v6_bin.joblib')
v6_feature_cols = joblib.load(v6_dir / 'feature_cols_v6.joblib')

# Prepare V6 test data
missing = [c for c in v6_feature_cols if c not in test_clean.columns]
for c in missing:
    test_clean[c] = 0

X_test_v6 = test_clean[v6_feature_cols].values
X_test_v6_scaled = v6_scaler.transform(X_test_v6)

# V6 predictions
v6_proba = {}
v6_preds = {}
for name, model in v6_models.items():
    v6_preds[name] = model.predict(X_test_v6_scaled)
    v6_proba[name] = model.predict_proba(X_test_v6_scaled)

# V6 ensemble
v6_weights = {'xgb1': 0.20, 'xgb2': 0.20, 'lgb1': 0.20, 'lgb2': 0.20, 'cat': 0.20}
v6_final_proba = np.zeros_like(v6_proba['xgb1'])
for name, w in v6_weights.items():
    v6_final_proba += w * v6_proba[name]

v6_buy_prob = v6_final_proba[:, 1] * 100
v6_all_agree = np.all([v6_preds[name] == 1 for name in v6_models.keys()], axis=0)
v6_confidence = v6_buy_prob.copy()
v6_confidence[v6_all_agree] = np.minimum(v6_confidence[v6_all_agree] + 5, 100)

print("="*80)
print("üìä V8 vs V6 COMPARISON")
print("="*80)
print(f"\n{'Threshold':>10} | {'V6 Sig':>8} | {'V6 Acc':>8} | {'V8 Sig':>8} | {'V8 Acc':>8} | {'Winner':>10}")
print("-"*70)

for conf in [70, 75, 80, 85, 90]:
    # V6
    v6_mask = v6_confidence >= conf
    v6_sig = v6_mask.sum()
    v6_acc = y_test[v6_mask].mean() * 100 if v6_sig > 0 else 0
    
    # V8
    v8_mask = confidence >= conf
    v8_sig = v8_mask.sum()
    v8_acc = y_test[v8_mask].mean() * 100 if v8_sig > 0 else 0
    
    if v8_acc > v6_acc + 1:
        winner = "V8 ‚úì"
    elif v6_acc > v8_acc + 1:
        winner = "V6 ‚úì"
    else:
        winner = "‚âà TIE"
    
    print(f"{conf:>8}%+ | {v6_sig:>8} | {v6_acc:>7.1f}% | {v8_sig:>8} | {v8_acc:>7.1f}% | {winner:>10}")

üìä V8 vs V6 COMPARISON

 Threshold |   V6 Sig |   V6 Acc |   V8 Sig |   V8 Acc |     Winner
----------------------------------------------------------------------
      70%+ |     4480 |    55.3% |     1948 |    58.9% |       V8 ‚úì
      75%+ |     1406 |    56.8% |      627 |    67.6% |       V8 ‚úì
      80%+ |      408 |    58.1% |      229 |    65.1% |       V8 ‚úì
      85%+ |      104 |    63.5% |       48 |    93.8% |       V8 ‚úì
      90%+ |       21 |    76.2% |        8 |   100.0% |       V8 ‚úì


## 10. Final Summary

In [11]:
print("="*80)
print("üìã FINAL SUMMARY")
print("="*80)

# Find best configs
v8_best = max(v8_results.items(), key=lambda x: x[1]['accuracy'] if x[1]['signals'] >= 30 else 0)

print(f"\nV8 Best: {v8_best[0]}% threshold ‚Üí {v8_best[1]['accuracy']:.1f}% accuracy ({v8_best[1]['signals']} signals)")

# Practical recommendation
print("\nüí° RECOMMENDED CONFIGURATIONS:")
for conf in [75, 80, 85]:
    if conf in v8_results and v8_results[conf]['signals'] >= 20:
        r = v8_results[conf]
        print(f"   {conf}%+: {r['accuracy']:.1f}% acc, {r['signals']} signals")

üìã FINAL SUMMARY

V8 Best: 85% threshold ‚Üí 93.8% accuracy (48 signals)

üí° RECOMMENDED CONFIGURATIONS:
   75%+: 67.6% acc, 627 signals
   80%+: 65.1% acc, 229 signals
   85%+: 93.8% acc, 48 signals


## 11. Save V8 Models

In [12]:
print("Saving V8 Models...")

for name, model in models.items():
    joblib.dump(model, MODEL_DIR / f'{name}_v8.joblib')

joblib.dump(scaler, MODEL_DIR / 'scaler_v8.joblib')
joblib.dump(selected_features, MODEL_DIR / 'feature_cols_v8.joblib')
joblib.dump(weights, MODEL_DIR / 'weights_v8.joblib')

config = {
    'version': 'v8',
    'mode': 'BUY_vs_SELL',
    'best_threshold': v8_best[0],
    'features': len(selected_features),
    'models': list(models.keys())
}
joblib.dump(config, MODEL_DIR / 'config_v8.joblib')

print(f"‚úÖ V8 Models Saved to {MODEL_DIR}")

Saving V8 Models...
‚úÖ V8 Models Saved to c:\Users\Acer\Desktop\Forex-Signal-App\models\signal_generator_v8


In [13]:
# ================================================================
# OVERFIT CHECK: Train vs Test Accuracy
# ================================================================
print("="*70)
print("üîç OVERFIT ANALYSIS: Train vs Test")
print("="*70)

# Train predictions
train_proba = {}
train_preds = {}
for name, model in models.items():
    train_proba[name] = model.predict_proba(X_train_selected)
    train_preds[name] = model.predict(X_train_selected)

# Train ensemble
train_final_proba = np.zeros_like(train_proba['xgb1'])
for name, w in weights.items():
    train_final_proba += w * train_proba[name]

train_buy_prob = train_final_proba[:, 1] * 100

# Agreement bonus
train_all_agree = np.all([train_preds[name] == 1 for name in models.keys()], axis=0)
train_confidence = train_buy_prob.copy()
train_confidence[train_all_agree] = np.minimum(train_confidence[train_all_agree] + 5, 100)

print(f"\n{'Threshold':>10} | {'Train Sig':>10} | {'Train Acc':>10} | {'Test Sig':>10} | {'Test Acc':>10} | {'Diff':>8}")
print("-"*70)

for conf in [70, 75, 80, 85, 90]:
    # Train
    train_mask = train_confidence >= conf
    train_sig = train_mask.sum()
    train_acc = y_train[train_mask].mean() * 100 if train_sig > 0 else 0
    
    # Test
    test_mask = confidence >= conf
    test_sig = test_mask.sum()
    test_acc = y_test[test_mask].mean() * 100 if test_sig > 0 else 0
    
    diff = train_acc - test_acc
    overfit = "‚ö†Ô∏è OVERFIT" if diff > 10 else ("‚úÖ OK" if diff < 5 else "‚ö° SLIGHT")
    
    print(f"{conf:>8}%+ | {train_sig:>10} | {train_acc:>9.1f}% | {test_sig:>10} | {test_acc:>9.1f}% | {diff:>+7.1f}% {overfit}")

print("\n" + "="*70)
print("üìã OVERFIT INTERPRETATION:")
print("   - Diff < 5%: ‚úÖ Generalization —Å–∞–π–Ω")
print("   - Diff 5-10%: ‚ö° –ë–∞–≥–∞ –∑—ç—Ä—ç–≥ overfit")
print("   - Diff > 10%: ‚ö†Ô∏è Overfit - Test –¥—ç—ç—Ä—Ö “Ø—Ä –¥“Ø–Ω–¥ –∞–Ω—Ö–∞–∞—Ä–∞—Ö")
print("="*70)

üîç OVERFIT ANALYSIS: Train vs Test

 Threshold |  Train Sig |  Train Acc |   Test Sig |   Test Acc |     Diff
----------------------------------------------------------------------
      70%+ |       1799 |      43.8% |       1948 |      58.9% |   -15.1% ‚úÖ OK
      75%+ |        272 |      57.7% |        627 |      67.6% |    -9.9% ‚úÖ OK
      80%+ |         19 |     100.0% |        229 |      65.1% |   +34.9% ‚ö†Ô∏è OVERFIT
      85%+ |          0 |       0.0% |         48 |      93.8% |   -93.8% ‚úÖ OK
      90%+ |          0 |       0.0% |          8 |     100.0% |  -100.0% ‚úÖ OK

üìã OVERFIT INTERPRETATION:
   - Diff < 5%: ‚úÖ Generalization —Å–∞–π–Ω
   - Diff 5-10%: ‚ö° –ë–∞–≥–∞ –∑—ç—Ä—ç–≥ overfit
   - Diff > 10%: ‚ö†Ô∏è Overfit - Test –¥—ç—ç—Ä—Ö “Ø—Ä –¥“Ø–Ω–¥ –∞–Ω—Ö–∞–∞—Ä–∞—Ö


In [14]:
# ================================================================
# STATISTICAL SIGNIFICANCE CHECK
# ================================================================
from scipy import stats

print("="*70)
print("üìä STATISTICAL SIGNIFICANCE (95% Confidence Interval)")
print("="*70)

def confidence_interval(n_success, n_total, confidence=0.95):
    """Wilson score interval for proportion"""
    if n_total == 0:
        return 0, 0, 0
    p = n_success / n_total
    z = stats.norm.ppf((1 + confidence) / 2)
    
    denominator = 1 + z**2 / n_total
    center = (p + z**2 / (2 * n_total)) / denominator
    spread = z * np.sqrt((p * (1 - p) + z**2 / (4 * n_total)) / n_total) / denominator
    
    lower = max(0, center - spread)
    upper = min(1, center + spread)
    return p, lower, upper

print(f"\n{'Threshold':>10} | {'Signals':>8} | {'Accuracy':>10} | {'95% CI':>20} | {'Reliable?':>12}")
print("-"*75)

for conf in [70, 75, 80, 85, 90]:
    mask = confidence >= conf
    n_total = mask.sum()
    n_success = y_test[mask].sum() if n_total > 0 else 0
    
    acc, lower, upper = confidence_interval(n_success, n_total)
    ci_str = f"[{lower*100:.1f}% - {upper*100:.1f}%]"
    
    # Reliability check
    if n_total >= 100 and lower > 0.55:
        reliable = "‚úÖ RELIABLE"
    elif n_total >= 30 and lower > 0.50:
        reliable = "‚ö° MODERATE"
    elif n_total < 30:
        reliable = "‚ö†Ô∏è TOO FEW"
    else:
        reliable = "‚ùå NOT RELIABLE"
    
    print(f"{conf:>8}%+ | {n_total:>8} | {acc*100:>9.1f}% | {ci_str:>20} | {reliable:>12}")

print("\n" + "="*70)
print("üìã RECOMMENDATION:")
print("   - ‚úÖ RELIABLE: Use this threshold confidently")
print("   - ‚ö° MODERATE: Use with caution, monitor performance")
print("   - ‚ö†Ô∏è TOO FEW: Need more data to confirm")
print("   - ‚ùå NOT RELIABLE: Don't use, may be random chance")
print("="*70)

üìä STATISTICAL SIGNIFICANCE (95% Confidence Interval)

 Threshold |  Signals |   Accuracy |               95% CI |    Reliable?
---------------------------------------------------------------------------
      70%+ |     1948 |      58.9% |      [56.7% - 61.0%] |   ‚úÖ RELIABLE
      75%+ |      627 |      67.6% |      [63.9% - 71.2%] |   ‚úÖ RELIABLE
      80%+ |      229 |      65.1% |      [58.7% - 70.9%] |   ‚úÖ RELIABLE
      85%+ |       48 |      93.8% |      [83.2% - 97.9%] |   ‚ö° MODERATE
      90%+ |        8 |     100.0% |     [67.6% - 100.0%] |   ‚ö†Ô∏è TOO FEW

üìã RECOMMENDATION:
   - ‚úÖ RELIABLE: Use this threshold confidently
   - ‚ö° MODERATE: Use with caution, monitor performance
   - ‚ö†Ô∏è TOO FEW: Need more data to confirm
   - ‚ùå NOT RELIABLE: Don't use, may be random chance
