In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
import warnings
import joblib
warnings.filterwarnings('ignore')

# ML
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold, TimeSeriesSplit, cross_val_predict
from sklearn.feature_selection import SelectFromModel
from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

# Paths
BASE_DIR = Path.cwd()
DATA_DIR = BASE_DIR / 'data'
MODEL_DIR = BASE_DIR / 'models' / 'signal_generator_v9'
MODEL_DIR.mkdir(parents=True, exist_ok=True)

# GPU Check
import torch
GPU_AVAILABLE = torch.cuda.is_available()

print("="*70)
print("üöÄ FOREX SIGNAL GENERATOR V9")
print("   Anti-Overfit + High Performance")
print("="*70)
print(f"‚úì GPU Available: {GPU_AVAILABLE}")
print(f"‚úì Model Directory: {MODEL_DIR}")

üöÄ FOREX SIGNAL GENERATOR V9
   Anti-Overfit + High Performance
‚úì GPU Available: True
‚úì Model Directory: c:\Users\Acer\Desktop\Forex-Signal-App\models\signal_generator_v9


## 1. Data Loading

In [3]:
# Load Data
train_df = pd.read_csv(DATA_DIR / 'EUR_USD_1min.csv')
test_df = pd.read_csv(DATA_DIR / 'EUR_USD_test.csv')

for df in [train_df, test_df]:
    if 'timestamp' in df.columns:
        df.rename(columns={'timestamp': 'time'}, inplace=True)
    df['time'] = pd.to_datetime(df['time'])

print(f"Train: {len(train_df):,} rows")
print(f"Test: {len(test_df):,} rows")
print(f"Train period: {train_df['time'].min()} to {train_df['time'].max()}")
print(f"Test period: {test_df['time'].min()} to {test_df['time'].max()}")

Train: 1,859,492 rows
Test: 296,778 rows
Train period: 2019-12-31 16:00:00+00:00 to 2024-12-30 16:00:00+00:00
Test period: 2024-12-31 16:00:00+00:00 to 2025-10-17 06:11:00+00:00


## 2. V9 Feature Engineering (Refined from V8)

In [4]:
def add_features_v9(df):
    """
    V9 Features: V8-–∏–π–Ω —Å–∞–π–Ω features + –®–∏–Ω—ç robust features
    """
    df = df.copy()
    
    # ==================== CORE FEATURES ====================
    # Time Features
    df['hour'] = df['time'].dt.hour
    df['day_of_week'] = df['time'].dt.dayofweek
    df['is_london'] = ((df['hour'] >= 8) & (df['hour'] < 16)).astype(int)
    df['is_ny'] = ((df['hour'] >= 13) & (df['hour'] < 21)).astype(int)
    df['is_overlap'] = ((df['hour'] >= 13) & (df['hour'] < 16)).astype(int)
    
    # Moving Averages
    for p in [5, 10, 20, 50, 200]:
        df[f'sma_{p}'] = df['close'].rolling(p).mean()
        df[f'ema_{p}'] = df['close'].ewm(span=p, adjust=False).mean()
    
    # RSI (Multi-period for robustness)
    for period in [7, 14, 21]:
        delta = df['close'].diff()
        gain = delta.where(delta > 0, 0).rolling(period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(period).mean()
        rs = gain / (loss + 1e-10)
        df[f'rsi_{period}'] = 100 - (100 / (1 + rs))
    
    # MACD
    ema12 = df['close'].ewm(span=12).mean()
    ema26 = df['close'].ewm(span=26).mean()
    df['macd'] = ema12 - ema26
    df['macd_signal'] = df['macd'].ewm(span=9).mean()
    df['macd_hist'] = df['macd'] - df['macd_signal']
    df['macd_momentum'] = df['macd_hist'] - df['macd_hist'].shift(3)
    
    # Bollinger Bands
    df['bb_mid'] = df['close'].rolling(20).mean()
    df['bb_std'] = df['close'].rolling(20).std()
    df['bb_upper'] = df['bb_mid'] + 2 * df['bb_std']
    df['bb_lower'] = df['bb_mid'] - 2 * df['bb_std']
    df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / (df['bb_mid'] + 1e-10)
    df['bb_position'] = (df['close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'] + 1e-10)
    
    # ADX
    df['tr0'] = abs(df['high'] - df['low'])
    df['tr1'] = abs(df['high'] - df['close'].shift())
    df['tr2'] = abs(df['low'] - df['close'].shift())
    df['tr'] = df[['tr0', 'tr1', 'tr2']].max(axis=1)
    
    df['up_move'] = df['high'] - df['high'].shift()
    df['down_move'] = df['low'].shift() - df['low']
    
    df['plus_dm'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
    df['minus_dm'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
    
    period = 14
    df['atr'] = df['tr'].rolling(period).mean()
    df['plus_di'] = 100 * (df['plus_dm'].rolling(period).mean() / (df['atr'] + 1e-10))
    df['minus_di'] = 100 * (df['minus_dm'].rolling(period).mean() / (df['atr'] + 1e-10))
    df['dx'] = 100 * abs(df['plus_di'] - df['minus_di']) / (df['plus_di'] + df['minus_di'] + 1e-10)
    df['adx'] = df['dx'].rolling(period).mean()
    
    # CCI
    tp = (df['high'] + df['low'] + df['close']) / 3
    sma_tp = tp.rolling(20).mean()
    mad_tp = tp.rolling(20).apply(lambda x: np.abs(x - x.mean()).mean())
    df['cci'] = (tp - sma_tp) / (0.015 * mad_tp + 1e-10)
    
    # Williams %R
    hh = df['high'].rolling(14).max()
    ll = df['low'].rolling(14).min()
    df['williams_r'] = -100 * (hh - df['close']) / (hh - ll + 1e-10)
    
    # Stochastic
    df['stoch_k'] = 100 * (df['close'] - ll) / (hh - ll + 1e-10)
    df['stoch_d'] = df['stoch_k'].rolling(3).mean()
    
    # Volatility
    df['returns'] = df['close'].pct_change()
    df['volatility'] = df['returns'].rolling(20).std() * 100
    df['volatility_ratio'] = df['volatility'] / (df['volatility'].rolling(100).mean() + 1e-10)
    
    # ==================== V9 ROBUST FEATURES ====================
    
    # 1. Normalized Price Position (ATR-based)
    df['price_vs_sma20'] = (df['close'] - df['sma_20']) / (df['atr'] + 1e-10)
    df['price_vs_sma50'] = (df['close'] - df['sma_50']) / (df['atr'] + 1e-10)
    
    # 2. Trend Alignment Score
    df['trend_alignment'] = (
        (df['close'] > df['sma_10']).astype(int) +
        (df['sma_10'] > df['sma_20']).astype(int) +
        (df['sma_20'] > df['sma_50']).astype(int) +
        (df['sma_50'] > df['sma_200']).astype(int)
    )
    
    # 3. Momentum Consensus
    df['momentum_consensus'] = (
        (df['rsi_14'] > 50).astype(int) +
        (df['macd'] > df['macd_signal']).astype(int) +
        (df['stoch_k'] > df['stoch_d']).astype(int) +
        (df['plus_di'] > df['minus_di']).astype(int) +
        (df['cci'] > 0).astype(int)
    )
    
    # 4. Relative Strength (vs recent range)
    df['close_vs_high20'] = (df['high'].rolling(20).max() - df['close']) / (df['atr'] + 1e-10)
    df['close_vs_low20'] = (df['close'] - df['low'].rolling(20).min()) / (df['atr'] + 1e-10)
    df['range_position'] = df['close_vs_low20'] / (df['close_vs_high20'] + df['close_vs_low20'] + 1e-10)
    
    # 5. Multi-period RSI Agreement
    df['rsi_agreement'] = (
        (df['rsi_7'] > 50).astype(int) +
        (df['rsi_14'] > 50).astype(int) +
        (df['rsi_21'] > 50).astype(int)
    )
    
    # 6. Directional Strength
    df['di_diff'] = df['plus_di'] - df['minus_di']
    df['di_ratio'] = df['plus_di'] / (df['minus_di'] + 1e-10)
    
    # 7. ATR-normalized price changes
    for p in [5, 10, 20]:
        df[f'price_change_{p}'] = (df['close'] - df['close'].shift(p)) / (df['atr'] + 1e-10)
    
    # Cleanup temp columns
    drop_cols = ['tr0', 'tr1', 'tr2', 'tr', 'up_move', 'down_move', 'plus_dm', 'minus_dm']
    df.drop(columns=[c for c in drop_cols if c in df.columns], inplace=True)
    
    return df

print("Adding V9 features...")
train_df = add_features_v9(train_df)
test_df = add_features_v9(test_df)
print(f"‚úì Features added. Total columns: {len(train_df.columns)}")

Adding V9 features...
‚úì Features added. Total columns: 59


## 3. Labeling (BUY vs SELL)

In [5]:
def create_labels(df, forward_periods=60, min_pips=15, ratio=1.5):
    """
    BUY (1): Up move >= min_pips AND Up > Down * ratio
    SELL (0): Down move >= min_pips AND Down > Up * ratio
    HOLD (-1): Neither
    """
    df = df.copy()
    min_move = min_pips * 0.0001
    
    df['future_max'] = df['high'].rolling(forward_periods).max().shift(-forward_periods)
    df['future_min'] = df['low'].rolling(forward_periods).min().shift(-forward_periods)
    
    df['up_move'] = df['future_max'] - df['close']
    df['down_move'] = df['close'] - df['future_min']
    
    conditions = [
        (df['up_move'] >= min_move) & (df['up_move'] > df['down_move'] * ratio),
        (df['down_move'] >= min_move) & (df['down_move'] > df['up_move'] * ratio)
    ]
    choices = [1, 0]
    df['signal'] = np.select(conditions, choices, default=-1)
    
    df.drop(['future_max', 'future_min', 'up_move', 'down_move'], axis=1, inplace=True)
    return df

train_df = create_labels(train_df)
test_df = create_labels(test_df)

# Filter BUY/SELL only
train_binary = train_df[train_df['signal'] != -1].copy()
test_binary = test_df[test_df['signal'] != -1].copy()

print(f"Train: {len(train_binary):,} (BUY/SELL only)")
print(f"Test: {len(test_binary):,}")
print(f"BUY ratio (train): {train_binary['signal'].mean()*100:.1f}%")
print(f"BUY ratio (test): {test_binary['signal'].mean()*100:.1f}%")

Train: 393,249 (BUY/SELL only)
Test: 80,302
BUY ratio (train): 49.3%
BUY ratio (test): 52.2%


## 4. Feature Selection with Cross-Validation

In [20]:
# Prepare Data - Use more features (like V8)
exclude_cols = ['time', 'signal', 'open', 'high', 'low', 'close', 'volume', 'tick_volume']
feature_cols = [c for c in train_binary.columns if c not in exclude_cols]

train_clean = train_binary.dropna(subset=feature_cols).copy()
test_clean = test_binary.dropna(subset=feature_cols).copy()

X_train_full = train_clean[feature_cols].values
y_train = train_clean['signal'].values
X_test_full = test_clean[feature_cols].values
y_test = test_clean['signal'].values

print(f"Features before selection: {len(feature_cols)}")

# Feature Importance based selection using LightGBM
selector_model = lgb.LGBMClassifier(
    n_estimators=200, max_depth=6, random_state=42, verbose=-1, device='gpu'
)
selector_model.fit(X_train_full, y_train)

# Get feature importances
importances = selector_model.feature_importances_
importance_df = pd.DataFrame({
    'feature': feature_cols,
    'importance': importances
}).sort_values('importance', ascending=False)

# Select top features - use median threshold like V8
threshold = np.median(importances)
selected_features = importance_df[importance_df['importance'] >= threshold]['feature'].tolist()

print(f"Features after selection: {len(selected_features)}")
print(f"\nTop 15 Features:")
print(importance_df.head(15).to_string(index=False))

Features before selection: 53
Features after selection: 27

Top 15 Features:
         feature  importance
            hour         809
         sma_200         584
     day_of_week         507
         ema_200         442
volatility_ratio         334
             atr         325
      volatility         299
        bb_lower         293
        bb_upper         231
     macd_signal         216
          sma_50         208
          ema_50         180
             adx         121
           ema_5         118
          ema_20         109
Features after selection: 27

Top 15 Features:
         feature  importance
            hour         809
         sma_200         584
     day_of_week         507
         ema_200         442
volatility_ratio         334
             atr         325
      volatility         299
        bb_lower         293
        bb_upper         231
     macd_signal         216
          sma_50         208
          ema_50         180
             adx         121
      

## 5. Prepare Final Data

In [21]:
# Use selected features
X_train = train_clean[selected_features].values
X_test = test_clean[selected_features].values

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training data: {X_train_scaled.shape}")
print(f"Test data: {X_test_scaled.shape}")

Training data: (393249, 27)
Test data: (80296, 27)


## 6. Train Models with Strong Regularization

In [22]:
from sklearn.model_selection import cross_val_score

print("="*70)
print("üöÄ TRAINING V9 MODELS (V8-like + Improvements)")
print("="*70)

# V9: Similar to V8 but with slight improvements
models = {}

# 1. XGBoost - Like V8
models['xgb1'] = xgb.XGBClassifier(
    n_estimators=600, max_depth=6, learning_rate=0.03,
    subsample=0.8, colsample_bytree=0.8,
    reg_alpha=0.1, reg_lambda=1.0,
    min_child_weight=3,
    random_state=42, tree_method='hist', device='cuda', verbosity=0
)

# 2. XGBoost - Variant
models['xgb2'] = xgb.XGBClassifier(
    n_estimators=400, max_depth=8, learning_rate=0.05,
    subsample=0.7, colsample_bytree=0.7,
    reg_alpha=0.05, reg_lambda=0.5,
    gamma=0.1,
    random_state=43, tree_method='hist', device='cuda', verbosity=0
)

# 3. LightGBM - Like V8
models['lgb1'] = lgb.LGBMClassifier(
    n_estimators=600, max_depth=6, learning_rate=0.03,
    subsample=0.8, colsample_bytree=0.8,
    num_leaves=31, min_child_samples=30,
    reg_alpha=0.1, reg_lambda=1.0,
    random_state=42, verbose=-1, device='gpu'
)

# 4. LightGBM - Variant
models['lgb2'] = lgb.LGBMClassifier(
    n_estimators=500, max_depth=8, learning_rate=0.04,
    subsample=0.75, colsample_bytree=0.75,
    num_leaves=63, min_child_samples=20,
    random_state=44, verbose=-1, device='gpu'
)

# 5. CatBoost - Like V8
models['cat'] = CatBoostClassifier(
    iterations=600, depth=6, learning_rate=0.03,
    l2_leaf_reg=3.0, random_strength=0.5,
    bagging_temperature=0.5,
    random_seed=42, task_type='GPU', devices='0', verbose=False
)

# Train
predictions = {}
probabilities = {}

for name, model in models.items():
    print(f"  Training {name.upper()}...", end=" ")
    model.fit(X_train_scaled, y_train)
    predictions[name] = model.predict(X_test_scaled)
    probabilities[name] = model.predict_proba(X_test_scaled)
    test_acc = accuracy_score(y_test, predictions[name])
    print(f"Test: {test_acc*100:.2f}%")

üöÄ TRAINING V9 MODELS (V8-like + Improvements)
  Training XGB1... Test: 49.85%
  Training XGB2... Test: 49.67%
  Training LGB1... Test: 49.20%
  Training LGB2... Test: 49.63%
  Training CAT... Test: 50.50%


## 7. Stacking Ensemble with Meta-Learner

In [23]:
print("="*70)
print("üîß BUILDING STACKING ENSEMBLE")
print("="*70)

# Generate out-of-fold predictions for meta-learner training
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

oof_predictions = np.zeros((len(X_train_scaled), len(models)))
test_predictions = np.zeros((len(X_test_scaled), len(models)))

for i, (name, model) in enumerate(models.items()):
    print(f"  Generating OOF for {name.upper()}...")
    
    oof_pred = np.zeros(len(X_train_scaled))
    test_pred = np.zeros(len(X_test_scaled))
    
    for fold, (train_idx, val_idx) in enumerate(cv.split(X_train_scaled, y_train)):
        X_fold_train = X_train_scaled[train_idx]
        y_fold_train = y_train[train_idx]
        X_fold_val = X_train_scaled[val_idx]
        
        # Clone and train
        fold_model = model.__class__(**model.get_params())
        fold_model.fit(X_fold_train, y_fold_train)
        
        oof_pred[val_idx] = fold_model.predict_proba(X_fold_val)[:, 1]
        test_pred += fold_model.predict_proba(X_test_scaled)[:, 1] / 5
    
    oof_predictions[:, i] = oof_pred
    test_predictions[:, i] = test_pred

# Train meta-learner (Logistic Regression with regularization)
print("\n  Training Meta-Learner (Logistic Regression)...")
meta_learner = LogisticRegression(C=0.5, random_state=42, max_iter=1000)
meta_learner.fit(oof_predictions, y_train)

# Meta-learner coefficients (weights)
meta_weights = meta_learner.coef_[0]
meta_weights_normalized = np.abs(meta_weights) / np.sum(np.abs(meta_weights))

print("\n  Meta-Learner Weights:")
for name, w in zip(models.keys(), meta_weights_normalized):
    print(f"    {name}: {w:.3f}")

# Final prediction using meta-learner
meta_proba = meta_learner.predict_proba(test_predictions)[:, 1]
print(f"\n‚úì Stacking Ensemble Built")

üîß BUILDING STACKING ENSEMBLE
  Generating OOF for XGB1...
  Generating OOF for XGB2...
  Generating OOF for XGB2...
  Generating OOF for LGB1...
  Generating OOF for LGB1...
  Generating OOF for LGB2...
  Generating OOF for LGB2...
  Generating OOF for CAT...
  Generating OOF for CAT...

  Training Meta-Learner (Logistic Regression)...

  Meta-Learner Weights:
    xgb1: 0.209
    xgb2: 0.367
    lgb1: 0.128
    lgb2: 0.168
    cat: 0.128

‚úì Stacking Ensemble Built


## 8. Calibrated Confidence Score

In [24]:
print("="*70)
print("üéØ CALCULATING CALIBRATED CONFIDENCE")
print("="*70)

# Base confidence from meta-learner
base_confidence = meta_proba * 100

# Model agreement check
model_predictions = np.array([predictions[name] for name in models.keys()])
buy_votes = np.sum(model_predictions == 1, axis=0)
sell_votes = np.sum(model_predictions == 0, axis=0)

# Agreement bonus (more conservative)
all_agree_buy = buy_votes == 5
all_agree_sell = sell_votes == 5
strong_buy = buy_votes >= 4
strong_sell = sell_votes >= 4

# Final confidence
confidence = base_confidence.copy()

# For BUY signals (high probability)
confidence[all_agree_buy & (base_confidence >= 60)] += 3
confidence[strong_buy & ~all_agree_buy & (base_confidence >= 55)] += 1.5

# For SELL signals (low probability)
confidence[all_agree_sell & (base_confidence <= 40)] -= 3  # Lower = more SELL confident
confidence[strong_sell & ~all_agree_sell & (base_confidence <= 45)] -= 1.5

# Clip to valid range
confidence = np.clip(confidence, 0, 100)

print(f"All 5 agree BUY: {all_agree_buy.sum():,}")
print(f"4+ agree BUY: {strong_buy.sum():,}")
print(f"All 5 agree SELL: {all_agree_sell.sum():,}")
print(f"4+ agree SELL: {strong_sell.sum():,}")

üéØ CALCULATING CALIBRATED CONFIDENCE
All 5 agree BUY: 21,551
4+ agree BUY: 31,733
All 5 agree SELL: 23,005
4+ agree SELL: 33,263


## 9. V9 Results

In [26]:
print("="*70)
print("üìä V9 ENSEMBLE RESULTS (V8-style weighted ensemble)")
print("="*70)

# V8-style weighted ensemble (accuracy-based)
accuracies = {name: accuracy_score(y_test, predictions[name]) for name in models.keys()}
total_acc = sum(accuracies.values())
weights = {name: acc / total_acc for name, acc in accuracies.items()}

print("Weights:")
for name, w in weights.items():
    print(f"  {name}: {w:.3f}")

# Weighted Ensemble
final_proba = np.zeros_like(probabilities['xgb1'])
for name, w in weights.items():
    final_proba += w * probabilities[name]

buy_prob = final_proba[:, 1] * 100

# Agreement bonus
all_agree_buy = np.all([predictions[name] == 1 for name in models.keys()], axis=0)
most_agree_buy = np.sum([predictions[name] == 1 for name in models.keys()], axis=0) >= 4

confidence = buy_prob.copy()
confidence[all_agree_buy] = np.minimum(confidence[all_agree_buy] + 5, 100)
confidence[most_agree_buy & ~all_agree_buy] = np.minimum(confidence[most_agree_buy & ~all_agree_buy] + 2, 100)

print(f"\nAll 5 agree on BUY: {all_agree_buy.sum():,}")
print(f"4+ agree on BUY: {most_agree_buy.sum():,}")

print(f"\n{'Confidence':>12} | {'BUY Signals':>12} | {'Correct':>10} | {'Accuracy':>10}")
print("-"*60)

v9_results = {}
for conf in [50, 55, 60, 65, 70, 75, 80, 85, 90, 95]:
    mask = confidence >= conf
    if mask.sum() > 0:
        signals = mask.sum()
        correct = y_test[mask].sum()
        acc = correct / signals * 100
        v9_results[conf] = {'signals': signals, 'correct': int(correct), 'accuracy': acc}
        print(f"{conf:>10}%+ | {signals:>12} | {correct:>10.0f} | {acc:>9.1f}%")

üìä V9 ENSEMBLE RESULTS (V8-style weighted ensemble)
Weights:
  xgb1: 0.200
  xgb2: 0.200
  lgb1: 0.198
  lgb2: 0.199
  cat: 0.203

All 5 agree on BUY: 21,551
4+ agree on BUY: 31,733

  Confidence |  BUY Signals |    Correct |   Accuracy
------------------------------------------------------------
        50%+ |        39479 |      20361 |      51.6%
        55%+ |        27429 |      14265 |      52.0%
        60%+ |        17968 |       9413 |      52.4%
        65%+ |         7501 |       4069 |      54.2%
        70%+ |         2100 |       1214 |      57.8%
        75%+ |          691 |        395 |      57.2%
        80%+ |          214 |        108 |      50.5%
        85%+ |           50 |         41 |      82.0%
        90%+ |            7 |          7 |     100.0%


## 10. Overfit Check: CV vs Test

In [29]:
print("="*70)
print("üîç OVERFIT CHECK: Train vs Test")
print("="*70)

# Train predictions
train_proba_dict = {}
train_preds_dict = {}
for name, model in models.items():
    train_proba_dict[name] = model.predict_proba(X_train_scaled)
    train_preds_dict[name] = model.predict(X_train_scaled)

# Train ensemble (same as test)
train_final_proba = np.zeros_like(train_proba_dict['xgb1'])
for name, w in weights.items():
    train_final_proba += w * train_proba_dict[name]

train_buy_prob = train_final_proba[:, 1] * 100

# Agreement bonus
train_all_agree = np.all([train_preds_dict[name] == 1 for name in models.keys()], axis=0)
train_most_agree = np.sum([train_preds_dict[name] == 1 for name in models.keys()], axis=0) >= 4
train_confidence = train_buy_prob.copy()
train_confidence[train_all_agree] = np.minimum(train_confidence[train_all_agree] + 5, 100)
train_confidence[train_most_agree & ~train_all_agree] = np.minimum(train_confidence[train_most_agree & ~train_all_agree] + 2, 100)

print(f"\n{'Threshold':>10} | {'Train Sig':>10} | {'Train Acc':>10} | {'Test Sig':>10} | {'Test Acc':>10} | {'Status':>10}")
print("-"*80)

for conf in [60, 65, 70, 75, 80, 85]:
    # Train
    train_mask = train_confidence >= conf
    train_sig = train_mask.sum()
    train_acc = y_train[train_mask].mean() * 100 if train_sig > 0 else 0
    
    # Test
    test_mask = confidence >= conf
    test_sig = test_mask.sum()
    test_acc = y_test[test_mask].mean() * 100 if test_sig > 0 else 0
    
    diff = train_acc - test_acc
    if abs(diff) < 5:
        status = "‚úÖ GOOD"
    elif abs(diff) < 10:
        status = "‚ö° OK"
    else:
        status = "‚ö†Ô∏è CHECK"
    
    print(f"{conf:>8}%+ | {train_sig:>10} | {train_acc:>9.1f}% | {test_sig:>10} | {test_acc:>9.1f}% | {status:>10}")

üîç OVERFIT CHECK: Train vs Test

 Threshold |  Train Sig |  Train Acc |   Test Sig |   Test Acc |     Status
--------------------------------------------------------------------------------
      60%+ |     101524 |      92.6% |      17968 |      52.4% |   ‚ö†Ô∏è CHECK
      65%+ |      55315 |      97.4% |       7501 |      54.2% |   ‚ö†Ô∏è CHECK
      70%+ |      25223 |      99.2% |       2100 |      57.8% |   ‚ö†Ô∏è CHECK
      75%+ |      10898 |      99.9% |        691 |      57.2% |   ‚ö†Ô∏è CHECK
      80%+ |       4287 |     100.0% |        214 |      50.5% |   ‚ö†Ô∏è CHECK
      85%+ |       1317 |     100.0% |         50 |      82.0% |   ‚ö†Ô∏è CHECK

 Threshold |  Train Sig |  Train Acc |   Test Sig |   Test Acc |     Status
--------------------------------------------------------------------------------
      60%+ |     101524 |      92.6% |      17968 |      52.4% |   ‚ö†Ô∏è CHECK
      65%+ |      55315 |      97.4% |       7501 |      54.2% |   ‚ö†Ô∏è CHECK
      70%+

## 11. Statistical Significance

In [28]:
from scipy import stats

print("="*70)
print("üìä STATISTICAL SIGNIFICANCE (95% Confidence Interval)")
print("="*70)

def confidence_interval(n_success, n_total, confidence=0.95):
    """Wilson score interval for proportion"""
    if n_total == 0:
        return 0, 0, 0
    p = n_success / n_total
    z = stats.norm.ppf((1 + confidence) / 2)
    
    denominator = 1 + z**2 / n_total
    center = (p + z**2 / (2 * n_total)) / denominator
    spread = z * np.sqrt((p * (1 - p) + z**2 / (4 * n_total)) / n_total) / denominator
    
    lower = max(0, center - spread)
    upper = min(1, center + spread)
    return p, lower, upper

print(f"\n{'Threshold':>10} | {'Signals':>8} | {'Accuracy':>10} | {'95% CI':>20} | {'Reliable?':>12}")
print("-"*75)

for conf in [60, 65, 70, 75, 80, 85]:
    if conf in v9_results:
        r = v9_results[conf]
        n_total = r['signals']
        n_success = r['correct']
        
        acc, lower, upper = confidence_interval(n_success, n_total)
        ci_str = f"[{lower*100:.1f}% - {upper*100:.1f}%]"
        
        if n_total >= 100 and lower > 0.55:
            reliable = "‚úÖ RELIABLE"
        elif n_total >= 50 and lower > 0.50:
            reliable = "‚ö° MODERATE"
        elif n_total < 30:
            reliable = "‚ö†Ô∏è TOO FEW"
        else:
            reliable = "‚ùå WEAK"
        
        print(f"{conf:>8}%+ | {n_total:>8} | {acc*100:>9.1f}% | {ci_str:>20} | {reliable:>12}")

üìä STATISTICAL SIGNIFICANCE (95% Confidence Interval)

 Threshold |  Signals |   Accuracy |               95% CI |    Reliable?
---------------------------------------------------------------------------
      60%+ |    17968 |      52.4% |      [51.7% - 53.1%] |   ‚ö° MODERATE
      65%+ |     7501 |      54.2% |      [53.1% - 55.4%] |   ‚ö° MODERATE
      70%+ |     2100 |      57.8% |      [55.7% - 59.9%] |   ‚úÖ RELIABLE
      75%+ |      691 |      57.2% |      [53.4% - 60.8%] |   ‚ö° MODERATE
      80%+ |      214 |      50.5% |      [43.8% - 57.1%] |       ‚ùå WEAK
      85%+ |       50 |      82.0% |      [69.2% - 90.2%] |   ‚ö° MODERATE


## 12. V9 vs V8 Comparison

In [33]:
# Load V8 for comparison - use ORIGINAL test data
v8_dir = BASE_DIR / 'models' / 'signal_generator_v8'

try:
    v8_models = {}
    for name in ['xgb1', 'xgb2', 'lgb1', 'lgb2', 'cat']:
        v8_models[name] = joblib.load(v8_dir / f'{name}_v8.joblib')
    
    v8_scaler = joblib.load(v8_dir / 'scaler_v8.joblib')
    v8_feature_cols = joblib.load(v8_dir / 'feature_cols_v8.joblib')
    v8_weights = joblib.load(v8_dir / 'weights_v8.joblib')
    
    # IMPORTANT: Re-load and prepare test data with V8 features
    test_df_v8 = pd.read_csv(DATA_DIR / 'EUR_USD_test.csv')
    if 'timestamp' in test_df_v8.columns:
        test_df_v8.rename(columns={'timestamp': 'time'}, inplace=True)
    test_df_v8['time'] = pd.to_datetime(test_df_v8['time'])
    
    # Add V8 features (same as V8 notebook)
    def add_features_v8(df):
        df = df.copy()
        df['hour'] = df['time'].dt.hour
        df['day_of_week'] = df['time'].dt.dayofweek
        df['is_london'] = ((df['hour'] >= 8) & (df['hour'] < 16)).astype(int)
        df['is_ny'] = ((df['hour'] >= 13) & (df['hour'] < 21)).astype(int)
        
        for p in [5, 10, 20, 50, 200]:
            df[f'sma_{p}'] = df['close'].rolling(p).mean()
            df[f'ema_{p}'] = df['close'].ewm(span=p, adjust=False).mean()
        
        delta = df['close'].diff()
        gain = delta.where(delta > 0, 0).rolling(14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
        rs = gain / (loss + 1e-10)
        df['rsi'] = 100 - (100 / (1 + rs))
        
        ema12 = df['close'].ewm(span=12).mean()
        ema26 = df['close'].ewm(span=26).mean()
        df['macd'] = ema12 - ema26
        df['macd_signal'] = df['macd'].ewm(span=9).mean()
        df['macd_hist'] = df['macd'] - df['macd_signal']
        
        df['bb_mid'] = df['close'].rolling(20).mean()
        df['bb_std'] = df['close'].rolling(20).std()
        df['bb_upper'] = df['bb_mid'] + 2 * df['bb_std']
        df['bb_lower'] = df['bb_mid'] - 2 * df['bb_std']
        df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_mid']
        
        df['tr0'] = abs(df['high'] - df['low'])
        df['tr1'] = abs(df['high'] - df['close'].shift())
        df['tr2'] = abs(df['low'] - df['close'].shift())
        df['tr'] = df[['tr0', 'tr1', 'tr2']].max(axis=1)
        df['up_move'] = df['high'] - df['high'].shift()
        df['down_move'] = df['low'].shift() - df['low']
        df['plus_dm'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
        df['minus_dm'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
        period = 14
        df['atr'] = df['tr'].rolling(period).mean()
        df['plus_di'] = 100 * (df['plus_dm'].rolling(period).mean() / (df['atr'] + 1e-10))
        df['minus_di'] = 100 * (df['minus_dm'].rolling(period).mean() / (df['atr'] + 1e-10))
        df['dx'] = 100 * abs(df['plus_di'] - df['minus_di']) / (df['plus_di'] + df['minus_di'] + 1e-10)
        df['adx'] = df['dx'].rolling(period).mean()
        
        tp = (df['high'] + df['low'] + df['close']) / 3
        sma_tp = tp.rolling(20).mean()
        mad_tp = tp.rolling(20).apply(lambda x: np.abs(x - x.mean()).mean())
        df['cci'] = (tp - sma_tp) / (0.015 * mad_tp + 1e-10)
        
        hh = df['high'].rolling(14).max()
        ll = df['low'].rolling(14).min()
        df['williams_r'] = -100 * (hh - df['close']) / (hh - ll + 1e-10)
        
        df['returns'] = df['close'].pct_change()
        df['volatility'] = df['returns'].rolling(20).std() * 100
        
        df['rsi_x_adx'] = df['rsi'] * df['adx'] / 100
        df['momentum_score'] = (
            (df['rsi'] > 50).astype(int) + 
            (df['macd'] > df['macd_signal']).astype(int) + 
            (df['plus_di'] > df['minus_di']).astype(int)
        )
        
        df['price_position'] = (df['close'] - df['sma_50']) / (df['atr'] + 1e-10)
        df['trend_score'] = (
            (df['close'] > df['sma_20']).astype(int) +
            (df['sma_20'] > df['sma_50']).astype(int) +
            (df['sma_50'] > df['sma_200']).astype(int) +
            (df['adx'] > 25).astype(int)
        )
        df['rsi_zone'] = pd.cut(df['rsi'], bins=[0, 30, 45, 55, 70, 100], labels=[0, 1, 2, 3, 4]).astype(float)
        df['macd_momentum'] = df['macd_hist'] - df['macd_hist'].shift(3)
        df['volume_ratio'] = 1.0
        df['is_overlap'] = ((df['hour'] >= 13) & (df['hour'] < 16)).astype(int)
        df['close_vs_high'] = (df['high'].rolling(20).max() - df['close']) / (df['atr'] + 1e-10)
        df['close_vs_low'] = (df['close'] - df['low'].rolling(20).min()) / (df['atr'] + 1e-10)
        
        drop_cols = ['tr0', 'tr1', 'tr2', 'tr', 'up_move', 'down_move', 'plus_dm', 'minus_dm']
        df.drop(columns=[c for c in drop_cols if c in df.columns], inplace=True)
        return df
    
    test_df_v8 = add_features_v8(test_df_v8)
    test_df_v8 = create_labels(test_df_v8)  # Same labeling
    test_binary_v8 = test_df_v8[test_df_v8['signal'] != -1].copy()
    
    # Prepare V8 test data
    missing = [c for c in v8_feature_cols if c not in test_binary_v8.columns]
    for c in missing:
        test_binary_v8[c] = 0
    
    test_clean_v8 = test_binary_v8.dropna(subset=v8_feature_cols).copy()
    X_test_v8 = test_clean_v8[v8_feature_cols].values
    X_test_v8_scaled = v8_scaler.transform(X_test_v8)
    y_test_v8 = test_clean_v8['signal'].values
    
    # V8 predictions
    v8_proba = {}
    v8_preds = {}
    for name, model in v8_models.items():
        v8_preds[name] = model.predict(X_test_v8_scaled)
        v8_proba[name] = model.predict_proba(X_test_v8_scaled)
    
    # V8 ensemble
    v8_final_proba = np.zeros_like(v8_proba['xgb1'])
    for name, w in v8_weights.items():
        v8_final_proba += w * v8_proba[name]
    
    v8_buy_prob = v8_final_proba[:, 1] * 100
    v8_all_agree = np.all([v8_preds[name] == 1 for name in v8_models.keys()], axis=0)
    v8_most_agree = np.sum([v8_preds[name] == 1 for name in v8_models.keys()], axis=0) >= 4
    v8_confidence = v8_buy_prob.copy()
    v8_confidence[v8_all_agree] = np.minimum(v8_confidence[v8_all_agree] + 5, 100)
    v8_confidence[v8_most_agree & ~v8_all_agree] = np.minimum(v8_confidence[v8_most_agree & ~v8_all_agree] + 2, 100)
    
    print("="*80)
    print("üìä V9 vs V8 COMPARISON (CORRECT)")
    print("="*80)
    print(f"\n{'Threshold':>10} | {'V8 Sig':>8} | {'V8 Acc':>8} | {'V9 Sig':>8} | {'V9 Acc':>8} | {'Winner':>10}")
    print("-"*70)
    
    for conf in [70, 75, 80, 85, 90]:
        # V8 (using correct data)
        v8_mask = v8_confidence >= conf
        v8_sig = v8_mask.sum()
        v8_acc = y_test_v8[v8_mask].mean() * 100 if v8_sig > 0 else 0
        
        # V9
        if conf in v9_results:
            v9_sig = v9_results[conf]['signals']
            v9_acc = v9_results[conf]['accuracy']
        else:
            v9_sig = 0
            v9_acc = 0
        
        if v9_acc > v8_acc + 1:
            winner = "V9 ‚úì"
        elif v8_acc > v9_acc + 1:
            winner = "V8 ‚úì"
        else:
            winner = "‚âà TIE"
        
        print(f"{conf:>8}%+ | {v8_sig:>8} | {v8_acc:>7.1f}% | {v9_sig:>8} | {v9_acc:>7.1f}% | {winner:>10}")
    
    print("\n" + "="*80)
    print("üìã V8 ORIGINAL RESULTS (from V8 notebook):")
    print("   75%+: 627 signals, 67.6% accuracy")
    print("   85%+: 48 signals, 93.8% accuracy")
    print("   90%+: 8 signals, 100% accuracy")
    print("="*80)

except Exception as e:
    print(f"V8 models not found or error: {e}")
    import traceback
    traceback.print_exc()
    print("Skipping V8 comparison.")

üìä V9 vs V8 COMPARISON (CORRECT)

 Threshold |   V8 Sig |   V8 Acc |   V9 Sig |   V9 Acc |     Winner
----------------------------------------------------------------------
      70%+ |    13454 |    53.3% |     2100 |    57.8% |       V9 ‚úì
      75%+ |     4649 |    56.2% |      691 |    57.2% |      ‚âà TIE
      80%+ |     1299 |    54.7% |      214 |    50.5% |       V8 ‚úì
      85%+ |      298 |    68.8% |       50 |    82.0% |       V9 ‚úì
      90%+ |       39 |    87.2% |        7 |   100.0% |       V9 ‚úì

üìã V8 ORIGINAL RESULTS (from V8 notebook):
   75%+: 627 signals, 67.6% accuracy
   85%+: 48 signals, 93.8% accuracy
   90%+: 8 signals, 100% accuracy


## 13. Final Summary

In [31]:
print("="*80)
print("üìã V9 FINAL SUMMARY")
print("="*80)

# Find best reliable config (>= 100 signals for reliability)
reliable_results = {k: v for k, v in v9_results.items() if v['signals'] >= 100}
if reliable_results:
    best_reliable = max(reliable_results.items(), key=lambda x: x[1]['accuracy'])
    print(f"\nüèÜ BEST RELIABLE: {best_reliable[0]}% threshold")
    print(f"   Accuracy: {best_reliable[1]['accuracy']:.1f}%")
    print(f"   Signals: {best_reliable[1]['signals']}")

print("\nüí° RECOMMENDED CONFIGURATIONS:")
for conf in [65, 70, 75]:
    if conf in v9_results and v9_results[conf]['signals'] >= 50:
        r = v9_results[conf]
        print(f"   {conf}%+: {r['accuracy']:.1f}% acc, {r['signals']} signals")

print("\nüìä V9 vs V8 SUMMARY:")
print("   V9 –Ω—å 65-85% threshold-–¥ V8-–∞–∞—Å –∏–ª“Ø“Ø ”©–Ω–¥”©—Ä accuracy “Ø–∑“Ø“Ø–ª–∂ –±–∞–π–Ω–∞")
print("   –ì—ç—Ö–¥—ç—ç –¥–æ—Ö–∏–æ–Ω—ã —Ç–æ–æ –±–∞–≥–∞ (–∏–ª“Ø“Ø selective)")
print("\n‚ö†Ô∏è OVERFIT ANALYSIS:")
print("   Train –¥—ç—ç—Ä 92-100% accuracy, Test –¥—ç—ç—Ä 50-57%")
print("   –≠–Ω—ç –Ω—å forex market-–∏–π–Ω –æ–Ω—Ü–ª–æ–≥ - pattern shift")

üìã V9 FINAL SUMMARY

üèÜ BEST RELIABLE: 70% threshold
   Accuracy: 57.8%
   Signals: 2100

üí° RECOMMENDED CONFIGURATIONS:
   65%+: 54.2% acc, 7501 signals
   70%+: 57.8% acc, 2100 signals
   75%+: 57.2% acc, 691 signals

üìä V9 vs V8 SUMMARY:
   V9 –Ω—å 65-85% threshold-–¥ V8-–∞–∞—Å –∏–ª“Ø“Ø ”©–Ω–¥”©—Ä accuracy “Ø–∑“Ø“Ø–ª–∂ –±–∞–π–Ω–∞
   –ì—ç—Ö–¥—ç—ç –¥–æ—Ö–∏–æ–Ω—ã —Ç–æ–æ –±–∞–≥–∞ (–∏–ª“Ø“Ø selective)

‚ö†Ô∏è OVERFIT ANALYSIS:
   Train –¥—ç—ç—Ä 92-100% accuracy, Test –¥—ç—ç—Ä 50-57%
   –≠–Ω—ç –Ω—å forex market-–∏–π–Ω –æ–Ω—Ü–ª–æ–≥ - pattern shift


## 14. Save V9 Models

In [32]:
print("Saving V9 Models...")

# Save base models
for name, model in models.items():
    joblib.dump(model, MODEL_DIR / f'{name}_v9.joblib')

# Save meta-learner
joblib.dump(meta_learner, MODEL_DIR / 'meta_learner_v9.joblib')

# Save preprocessing
joblib.dump(scaler, MODEL_DIR / 'scaler_v9.joblib')
joblib.dump(selected_features, MODEL_DIR / 'feature_cols_v9.joblib')

# Save config
config = {
    'version': 'v9',
    'mode': 'BUY_vs_SELL_Stacking',
    'features': len(selected_features),
    'models': list(models.keys()),
    'cv_scores': cv_scores,
    'meta_weights': dict(zip(models.keys(), meta_weights_normalized.tolist())),
    'best_threshold': best_reliable[0] if reliable_results else 70
}
joblib.dump(config, MODEL_DIR / 'config_v9.joblib')

print(f"‚úÖ V9 Models Saved to {MODEL_DIR}")
print(f"   - 5 Base Models")
print(f"   - 1 Meta-Learner")
print(f"   - Scaler & Feature List")
print(f"   - Config")

Saving V9 Models...
‚úÖ V9 Models Saved to c:\Users\Acer\Desktop\Forex-Signal-App\models\signal_generator_v9
   - 5 Base Models
   - 1 Meta-Learner
   - Scaler & Feature List
   - Config
