In [12]:
import pandas as pd
import numpy as np
from pathlib import Path
import warnings
import joblib

warnings.filterwarnings('ignore')

# ML Models
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

# Paths
BASE_DIR = Path.cwd()
DATA_DIR = BASE_DIR / 'data'
MODEL_DIR = BASE_DIR / 'models' / 'signal_generator_v7'
MODEL_DIR.mkdir(parents=True, exist_ok=True)

print("="*70)
print("üöÄ FOREX SIGNAL GENERATOR V7 - Production Ready")
print("="*70)
print(f"‚úÖ Model Directory: {MODEL_DIR}")

üöÄ FOREX SIGNAL GENERATOR V7 - Production Ready
‚úÖ Model Directory: c:\Users\Acer\Desktop\Forex-Signal-App\models\signal_generator_v7


## 1. Data Loading

In [13]:
# Load Data
train_df = pd.read_csv(DATA_DIR / 'EUR_USD_1min.csv')
test_df = pd.read_csv(DATA_DIR / 'EUR_USD_test.csv')

# Standardize columns
for df in [train_df, test_df]:
    if 'timestamp' in df.columns:
        df.rename(columns={'timestamp': 'time'}, inplace=True)
    df['time'] = pd.to_datetime(df['time'])

print(f"Train: {len(train_df):,} rows")
print(f"Test: {len(test_df):,} rows")
print(f"Train period: {train_df['time'].min()} to {train_df['time'].max()}")
print(f"Test period: {test_df['time'].min()} to {test_df['time'].max()}")

Train: 1,859,492 rows
Test: 296,778 rows
Train period: 2019-12-31 16:00:00+00:00 to 2024-12-30 16:00:00+00:00
Test period: 2024-12-31 16:00:00+00:00 to 2025-10-17 06:11:00+00:00


## 2. V7 Enhanced Feature Engineering

In [14]:
def add_features_v7(df):
    """
    V7 Features: V6 Core + Multi-Timeframe + Advanced Filters
    """
    df = df.copy()
    
    # ==================== TIME FEATURES ====================
    df['hour'] = df['time'].dt.hour
    df['day_of_week'] = df['time'].dt.dayofweek
    df['minute'] = df['time'].dt.minute
    
    # Trading Sessions (UTC) - More granular
    df['is_asian'] = ((df['hour'] >= 0) & (df['hour'] < 8)).astype(int)
    df['is_london'] = ((df['hour'] >= 7) & (df['hour'] < 16)).astype(int)
    df['is_ny'] = ((df['hour'] >= 12) & (df['hour'] < 21)).astype(int)
    df['is_overlap'] = ((df['hour'] >= 12) & (df['hour'] < 16)).astype(int)  # Best time!
    
    # Bad times to trade
    df['is_low_liquidity'] = ((df['hour'] >= 21) | (df['hour'] < 2)).astype(int)
    df['is_news_time'] = ((df['hour'].isin([8, 12, 13, 14])) & (df['minute'] < 35)).astype(int)
    
    # Day quality (Mon=0, Fri=4 are less reliable)
    df['is_midweek'] = df['day_of_week'].isin([1, 2, 3]).astype(int)
    
    # ==================== PRICE FEATURES ====================
    # Multi-period MAs
    for p in [5, 10, 20, 50, 100, 200]:
        df[f'sma_{p}'] = df['close'].rolling(p).mean()
        df[f'ema_{p}'] = df['close'].ewm(span=p, adjust=False).mean()
    
    # MA Distances (normalized)
    df['dist_sma20'] = (df['close'] - df['sma_20']) / df['sma_20'] * 100
    df['dist_sma50'] = (df['close'] - df['sma_50']) / df['sma_50'] * 100
    df['dist_sma200'] = (df['close'] - df['sma_200']) / df['sma_200'] * 100
    
    # MA Crossovers
    df['sma_5_20_cross'] = (df['sma_5'] > df['sma_20']).astype(int)
    df['sma_20_50_cross'] = (df['sma_20'] > df['sma_50']).astype(int)
    df['ema_10_50_cross'] = (df['ema_10'] > df['ema_50']).astype(int)
    df['golden_cross'] = (df['sma_50'] > df['sma_200']).astype(int)
    
    # ==================== MOMENTUM INDICATORS ====================
    # RSI (multiple periods)
    for period in [7, 14, 21]:
        delta = df['close'].diff()
        gain = delta.where(delta > 0, 0).rolling(period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(period).mean()
        rs = gain / (loss + 1e-10)
        df[f'rsi_{period}'] = 100 - (100 / (1 + rs))
    
    # RSI Zones
    df['rsi_oversold'] = (df['rsi_14'] < 30).astype(int)
    df['rsi_overbought'] = (df['rsi_14'] > 70).astype(int)
    df['rsi_neutral'] = ((df['rsi_14'] >= 40) & (df['rsi_14'] <= 60)).astype(int)
    df['rsi_bullish_zone'] = ((df['rsi_14'] > 50) & (df['rsi_14'] < 70)).astype(int)
    
    # RSI Divergence (simplified)
    df['rsi_slope'] = df['rsi_14'] - df['rsi_14'].shift(5)
    df['price_slope'] = (df['close'] - df['close'].shift(5)) / df['close'].shift(5) * 100
    df['rsi_divergence'] = np.sign(df['rsi_slope']) != np.sign(df['price_slope'])
    
    # MACD
    ema12 = df['close'].ewm(span=12).mean()
    ema26 = df['close'].ewm(span=26).mean()
    df['macd'] = ema12 - ema26
    df['macd_signal'] = df['macd'].ewm(span=9).mean()
    df['macd_hist'] = df['macd'] - df['macd_signal']
    df['macd_cross_up'] = ((df['macd'] > df['macd_signal']) & 
                           (df['macd'].shift(1) <= df['macd_signal'].shift(1))).astype(int)
    df['macd_bullish'] = ((df['macd'] > df['macd_signal']) & (df['macd_hist'] > 0)).astype(int)
    
    # Stochastic
    for period in [14, 21]:
        low_min = df['low'].rolling(period).min()
        high_max = df['high'].rolling(period).max()
        df[f'stoch_k_{period}'] = 100 * (df['close'] - low_min) / (high_max - low_min + 1e-10)
        df[f'stoch_d_{period}'] = df[f'stoch_k_{period}'].rolling(3).mean()
    
    df['stoch_oversold'] = (df['stoch_k_14'] < 20).astype(int)
    df['stoch_overbought'] = (df['stoch_k_14'] > 80).astype(int)
    
    # ==================== TREND STRENGTH (ADX) ====================
    # True Range
    df['tr'] = np.maximum(
        df['high'] - df['low'],
        np.maximum(
            abs(df['high'] - df['close'].shift()),
            abs(df['low'] - df['close'].shift())
        )
    )
    
    # Directional Movement
    df['up_move'] = df['high'] - df['high'].shift()
    df['down_move'] = df['low'].shift() - df['low']
    df['plus_dm'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
    df['minus_dm'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
    
    # ADX calculation
    period = 14
    df['atr'] = df['tr'].rolling(period).mean()
    df['plus_di'] = 100 * (df['plus_dm'].rolling(period).mean() / (df['atr'] + 1e-10))
    df['minus_di'] = 100 * (df['minus_dm'].rolling(period).mean() / (df['atr'] + 1e-10))
    df['dx'] = 100 * abs(df['plus_di'] - df['minus_di']) / (df['plus_di'] + df['minus_di'] + 1e-10)
    df['adx'] = df['dx'].rolling(period).mean()
    
    # Trend Filters (KEY for V7!)
    df['strong_trend'] = (df['adx'] > 25).astype(int)
    df['very_strong_trend'] = (df['adx'] > 40).astype(int)
    df['weak_trend'] = (df['adx'] < 20).astype(int)
    df['trend_direction'] = np.where(df['plus_di'] > df['minus_di'], 1, -1)
    df['trend_aligned'] = (df['strong_trend'] == 1) & (df['trend_direction'] == 1)
    
    # ==================== VOLATILITY ====================
    df['atr_pips'] = df['atr'] * 10000
    df['atr_ma'] = df['atr'].rolling(50).mean()
    df['volatility_ratio'] = df['atr'] / (df['atr_ma'] + 1e-10)
    
    # Volatility Filters
    df['normal_volatility'] = ((df['volatility_ratio'] > 0.7) & (df['volatility_ratio'] < 1.5)).astype(int)
    df['high_volatility'] = (df['volatility_ratio'] > 1.5).astype(int)
    df['low_volatility'] = (df['volatility_ratio'] < 0.5).astype(int)
    
    # ==================== BOLLINGER BANDS ====================
    df['bb_mid'] = df['close'].rolling(20).mean()
    df['bb_std'] = df['close'].rolling(20).std()
    df['bb_upper'] = df['bb_mid'] + 2 * df['bb_std']
    df['bb_lower'] = df['bb_mid'] - 2 * df['bb_std']
    df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / (df['bb_mid'] + 1e-10)
    df['bb_position'] = (df['close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'] + 1e-10)
    df['bb_squeeze'] = (df['bb_width'] < df['bb_width'].rolling(50).mean() * 0.8).astype(int)
    
    # ==================== CCI ====================
    tp = (df['high'] + df['low'] + df['close']) / 3
    sma_tp = tp.rolling(20).mean()
    mad_tp = tp.rolling(20).apply(lambda x: np.abs(x - x.mean()).mean())
    df['cci'] = (tp - sma_tp) / (0.015 * mad_tp + 1e-10)
    df['cci_bullish'] = (df['cci'] > 100).astype(int)
    df['cci_oversold'] = (df['cci'] < -100).astype(int)
    
    # ==================== WILLIAMS %R ====================
    hh = df['high'].rolling(14).max()
    ll = df['low'].rolling(14).min()
    df['williams_r'] = -100 * (hh - df['close']) / (hh - ll + 1e-10)
    
    # ==================== MULTI-TIMEFRAME (Simulated) ====================
    # 5-min equivalent (5 bars)
    df['close_5m'] = df['close'].rolling(5).mean()
    df['trend_5m'] = (df['close_5m'] > df['close_5m'].shift(5)).astype(int)
    
    # 15-min equivalent (15 bars)
    df['close_15m'] = df['close'].rolling(15).mean()
    df['trend_15m'] = (df['close_15m'] > df['close_15m'].shift(15)).astype(int)
    
    # 1-hour equivalent (60 bars)
    df['close_1h'] = df['close'].rolling(60).mean()
    df['trend_1h'] = (df['close_1h'] > df['close_1h'].shift(60)).astype(int)
    
    # MTF Alignment (all timeframes agree)
    df['mtf_bullish'] = ((df['trend_5m'] == 1) & (df['trend_15m'] == 1) & (df['trend_1h'] == 1)).astype(int)
    df['mtf_bearish'] = ((df['trend_5m'] == 0) & (df['trend_15m'] == 0) & (df['trend_1h'] == 0)).astype(int)
    
    # ==================== CANDLE PATTERNS ====================
    df['candle_body'] = df['close'] - df['open']
    df['candle_range'] = df['high'] - df['low']
    df['body_ratio'] = abs(df['candle_body']) / (df['candle_range'] + 1e-10)
    df['upper_shadow'] = df['high'] - df[['open', 'close']].max(axis=1)
    df['lower_shadow'] = df[['open', 'close']].min(axis=1) - df['low']
    
    df['is_bullish_candle'] = (df['close'] > df['open']).astype(int)
    df['is_doji'] = (df['body_ratio'] < 0.1).astype(int)
    df['is_hammer'] = ((df['lower_shadow'] > abs(df['candle_body']) * 2) & 
                       (df['upper_shadow'] < abs(df['candle_body']) * 0.5)).astype(int)
    df['is_shooting_star'] = ((df['upper_shadow'] > abs(df['candle_body']) * 2) & 
                              (df['lower_shadow'] < abs(df['candle_body']) * 0.5)).astype(int)
    
    # Engulfing patterns
    df['bullish_engulfing'] = ((df['is_bullish_candle'] == 1) & 
                               (df['is_bullish_candle'].shift(1) == 0) &
                               (df['close'] > df['open'].shift(1)) &
                               (df['open'] < df['close'].shift(1))).astype(int)
    
    # ==================== TREND ALIGNMENT ====================
    df['trend_short'] = np.where(df['ema_10'] > df['ema_20'], 1, -1)
    df['trend_medium'] = np.where(df['ema_20'] > df['ema_50'], 1, -1)
    df['trend_long'] = np.where(df['ema_50'] > df['ema_200'], 1, -1)
    df['all_trends_up'] = ((df['trend_short'] == 1) & (df['trend_medium'] == 1) & (df['trend_long'] == 1)).astype(int)
    df['all_trends_down'] = ((df['trend_short'] == -1) & (df['trend_medium'] == -1) & (df['trend_long'] == -1)).astype(int)
    
    # ==================== COMPOSITE SCORES ====================
    # Buy Score (combination of bullish signals)
    df['buy_score'] = (
        df['macd_bullish'] +
        df['rsi_bullish_zone'] +
        df['sma_5_20_cross'] +
        df['golden_cross'] +
        df['all_trends_up'] +
        df['mtf_bullish'] +
        (df['stoch_k_14'] > df['stoch_d_14']).astype(int) +
        df['strong_trend'] * df['trend_aligned'].astype(int)
    )
    
    # Quality Score (how good is the trading condition)
    df['quality_score'] = (
        df['is_overlap'] * 2 +  # Best time
        df['is_midweek'] +      # Good day
        df['normal_volatility'] +  # Good volatility
        df['strong_trend'] +    # Clear trend
        (1 - df['is_low_liquidity']) +  # Not low liquidity
        (1 - df['is_news_time'])  # Not news time
    )
    
    # Clean up temp columns
    df.drop(['tr', 'up_move', 'down_move', 'plus_dm', 'minus_dm'], axis=1, inplace=True, errors='ignore')
    
    return df

print("Adding V7 features...")
train_df = add_features_v7(train_df)
test_df = add_features_v7(test_df)
print(f"‚úì Features added. Total columns: {len(train_df.columns)}")

Adding V7 features...
‚úì Features added. Total columns: 108
‚úì Features added. Total columns: 108


## 3. V7 Enhanced Labeling

In [15]:
def create_labels_v7(df, forward_periods=60, min_pips=15, tp_sl_ratio=1.5):
    """
    V7 Labeling: More realistic with quality filters
    
    BUY (1): Up move >= min_pips AND Up > Down * ratio
    SELL (0): Down move >= min_pips AND Down > Up * ratio
    
    Additionally, we add a quality flag for filtering
    """
    df = df.copy()
    min_move = min_pips * 0.0001
    
    # Future Max/Min
    df['future_max'] = df['high'].rolling(forward_periods).max().shift(-forward_periods)
    df['future_min'] = df['low'].rolling(forward_periods).min().shift(-forward_periods)
    
    df['up_move'] = df['future_max'] - df['close']
    df['down_move'] = df['close'] - df['future_min']
    
    # Primary signal
    conditions = [
        (df['up_move'] >= min_move) & (df['up_move'] > df['down_move'] * tp_sl_ratio),
        (df['down_move'] >= min_move) & (df['down_move'] > df['up_move'] * tp_sl_ratio)
    ]
    choices = [1, 0]  # BUY=1, SELL=0
    df['signal'] = np.select(conditions, choices, default=-1)  # -1 = HOLD
    
    # Profit magnitude (for quality assessment)
    df['profit_pips'] = np.where(
        df['signal'] == 1, 
        df['up_move'] * 10000,
        np.where(df['signal'] == 0, df['down_move'] * 10000, 0)
    )
    
    # High quality signals (larger profits)
    df['high_quality_signal'] = (df['profit_pips'] > 20).astype(int)
    
    # Drop helper columns
    df.drop(['future_max', 'future_min', 'up_move', 'down_move', 'profit_pips'], axis=1, inplace=True)
    
    return df

train_df = create_labels_v7(train_df)
test_df = create_labels_v7(test_df)

print("Label Distribution (Train):")
print(f"  BUY:  {(train_df['signal'] == 1).sum():,} ({(train_df['signal'] == 1).mean()*100:.1f}%)")
print(f"  SELL: {(train_df['signal'] == 0).sum():,} ({(train_df['signal'] == 0).mean()*100:.1f}%)")
print(f"  HOLD: {(train_df['signal'] == -1).sum():,} ({(train_df['signal'] == -1).mean()*100:.1f}%)")

print("\nLabel Distribution (Test):")
print(f"  BUY:  {(test_df['signal'] == 1).sum():,} ({(test_df['signal'] == 1).mean()*100:.1f}%)")
print(f"  SELL: {(test_df['signal'] == 0).sum():,} ({(test_df['signal'] == 0).mean()*100:.1f}%)")
print(f"  HOLD: {(test_df['signal'] == -1).sum():,} ({(test_df['signal'] == -1).mean()*100:.1f}%)")

Label Distribution (Train):
  BUY:  193,796 (10.4%)
  SELL: 199,453 (10.7%)
  HOLD: 1,466,243 (78.9%)

Label Distribution (Test):
  BUY:  41,901 (14.1%)
  SELL: 38,401 (12.9%)
  HOLD: 216,476 (72.9%)


## 4. Prepare Training Data (BUY vs SELL only)

In [16]:
# Filter BUY/SELL only (remove HOLD)
train_binary = train_df[train_df['signal'] != -1].copy()
test_binary = test_df[test_df['signal'] != -1].copy()

# Feature columns
exclude_cols = ['time', 'signal', 'high_quality_signal', 'open', 'high', 'low', 'close', 'volume', 'tick_volume']
feature_cols = [c for c in train_binary.columns if c not in exclude_cols]

# Clean data
train_clean = train_binary.dropna(subset=feature_cols).copy()
test_clean = test_binary.dropna(subset=feature_cols).copy()

X_train = train_clean[feature_cols].values
y_train = train_clean['signal'].values
X_test = test_clean[feature_cols].values
y_test = test_clean['signal'].values

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training samples: {len(X_train):,}")
print(f"Test samples: {len(X_test):,}")
print(f"Features: {len(feature_cols)}")
print(f"BUY ratio (train): {y_train.mean()*100:.1f}%")
print(f"BUY ratio (test): {y_test.mean()*100:.1f}%")

Training samples: 393,249
Test samples: 80,296
Features: 102
BUY ratio (train): 49.3%
BUY ratio (test): 52.2%


## 5. Train V7 Ensemble (5 GPU Models)

In [17]:
import torch
GPU_AVAILABLE = torch.cuda.is_available()
print(f"GPU Available: {GPU_AVAILABLE}")

models = {}

# 1. XGBoost - Conservative
models['xgb1'] = xgb.XGBClassifier(
    n_estimators=600, max_depth=8, learning_rate=0.03,
    subsample=0.8, colsample_bytree=0.8, 
    reg_alpha=0.1, reg_lambda=1.0,
    random_state=42, tree_method='hist', device='cuda', verbosity=0
)

# 2. XGBoost - Aggressive
models['xgb2'] = xgb.XGBClassifier(
    n_estimators=400, max_depth=12, learning_rate=0.05,
    subsample=0.7, colsample_bytree=0.7,
    random_state=43, tree_method='hist', device='cuda', verbosity=0
)

# 3. LightGBM - Standard
models['lgb1'] = lgb.LGBMClassifier(
    n_estimators=600, max_depth=8, learning_rate=0.03,
    subsample=0.8, colsample_bytree=0.8,
    random_state=42, verbose=-1, device='gpu'
)

# 4. LightGBM - More leaves
models['lgb2'] = lgb.LGBMClassifier(
    n_estimators=500, max_depth=6, learning_rate=0.04,
    num_leaves=127, min_child_samples=20,
    subsample=0.75, colsample_bytree=0.75,
    random_state=44, verbose=-1, device='gpu'
)

# 5. CatBoost
models['cat'] = CatBoostClassifier(
    iterations=600, depth=8, learning_rate=0.03,
    l2_leaf_reg=3.0,
    random_seed=42, task_type='GPU', devices='0', verbose=False
)

# Train all models
predictions = {}
probabilities = {}

print("\nüöÄ Training V7 Ensemble (GPU)...")
for name, model in models.items():
    print(f"  Training {name.upper()}...", end=" ")
    model.fit(X_train_scaled, y_train)
    predictions[name] = model.predict(X_test_scaled)
    probabilities[name] = model.predict_proba(X_test_scaled)
    acc = accuracy_score(y_test, predictions[name])
    print(f"‚úì Accuracy: {acc*100:.2f}%")

GPU Available: True

üöÄ Training V7 Ensemble (GPU)...
  Training XGB1... ‚úì Accuracy: 50.88%
  Training XGB2... ‚úì Accuracy: 50.88%
  Training XGB2... ‚úì Accuracy: 49.17%
  Training LGB1... ‚úì Accuracy: 49.17%
  Training LGB1... ‚úì Accuracy: 51.60%
  Training LGB2... ‚úì Accuracy: 51.60%
  Training LGB2... ‚úì Accuracy: 50.51%
  Training CAT... ‚úì Accuracy: 50.51%
  Training CAT... ‚úì Accuracy: 50.63%
‚úì Accuracy: 50.63%


## 6. V7 Ensemble with Quality Filters

In [18]:
# Weighted Ensemble
weights = {'xgb1': 0.22, 'xgb2': 0.18, 'lgb1': 0.22, 'lgb2': 0.18, 'cat': 0.20}

final_proba = np.zeros_like(probabilities['xgb1'])
for name, w in weights.items():
    final_proba += w * probabilities[name]

buy_prob = final_proba[:, 1] * 100

# Model Agreement Bonus
agreement_count = sum([predictions[name] == 1 for name in models.keys()])
all_agree_buy = agreement_count == 5
most_agree_buy = agreement_count >= 4

confidence = buy_prob.copy()
confidence[all_agree_buy] = np.minimum(confidence[all_agree_buy] + 7, 100)
confidence[most_agree_buy & ~all_agree_buy] = np.minimum(confidence[most_agree_buy & ~all_agree_buy] + 3, 100)

print("="*70)
print("üìä V7 ENSEMBLE RESULTS")
print("="*70)
print(f"All 5 models agree on BUY: {all_agree_buy.sum():,}")
print(f"4+ models agree on BUY: {most_agree_buy.sum():,}")

# Results by Confidence
print("\nüìä BUY Signal Accuracy by Confidence:")
print("-"*60)
print(f"{'Confidence':>12} | {'Signals':>10} | {'Correct':>10} | {'Accuracy':>10}")
print("-"*60)

v7_results = {}
for conf in [50, 60, 70, 75, 80, 85, 90, 95]:
    mask = confidence >= conf
    if mask.sum() > 0:
        signals = mask.sum()
        correct = y_test[mask].sum()
        acc = correct / signals * 100
        v7_results[conf] = {'signals': signals, 'correct': correct, 'accuracy': acc}
        print(f"{conf:>10}%+ | {signals:>10} | {correct:>10} | {acc:>9.1f}%")

üìä V7 ENSEMBLE RESULTS
All 5 models agree on BUY: 20,225
4+ models agree on BUY: 31,346

üìä BUY Signal Accuracy by Confidence:
------------------------------------------------------------
  Confidence |    Signals |    Correct |   Accuracy
------------------------------------------------------------
        50%+ |      40923 |      21327 |      52.1%
        60%+ |      22046 |      11709 |      53.1%
        70%+ |       5955 |       3188 |      53.5%
        75%+ |       2104 |       1204 |      57.2%
        80%+ |        770 |        402 |      52.2%
        85%+ |        259 |        105 |      40.5%
        90%+ |         88 |         43 |      48.9%
        95%+ |         16 |         14 |      87.5%


## 7. Quality Filter Analysis

In [19]:
# Get quality scores from test data
quality_scores = test_clean['quality_score'].values
strong_trend = test_clean['strong_trend'].values
is_overlap = test_clean['is_overlap'].values
normal_vol = test_clean['normal_volatility'].values

print("="*70)
print("üìä ACCURACY WITH QUALITY FILTERS")
print("="*70)

# Test different filter combinations
filters = {
    'No Filter': np.ones(len(y_test), dtype=bool),
    'Strong Trend': strong_trend == 1,
    'London/NY Overlap': is_overlap == 1,
    'Normal Volatility': normal_vol == 1,
    'Quality >= 4': quality_scores >= 4,
    'Quality >= 5': quality_scores >= 5,
    'Trend + Overlap': (strong_trend == 1) & (is_overlap == 1),
    'All Filters': (strong_trend == 1) & (is_overlap == 1) & (normal_vol == 1),
}

print(f"{'Filter':>20} | {'Conf':>5} | {'Signals':>8} | {'Accuracy':>10}")
print("-"*55)

best_combo = {'filter': '', 'conf': 0, 'acc': 0, 'signals': 0}

for filter_name, filter_mask in filters.items():
    for conf in [75, 80, 85]:
        mask = (confidence >= conf) & filter_mask
        if mask.sum() >= 10:  # At least 10 signals
            signals = mask.sum()
            acc = y_test[mask].mean() * 100
            print(f"{filter_name:>20} | {conf:>4}% | {signals:>8} | {acc:>9.1f}%")
            
            if acc > best_combo['acc'] and signals >= 20:
                best_combo = {'filter': filter_name, 'conf': conf, 'acc': acc, 'signals': signals}

print("\n" + "="*70)
print(f"üèÜ BEST CONFIGURATION:")
print(f"   Filter: {best_combo['filter']}")
print(f"   Confidence: {best_combo['conf']}%+")
print(f"   Signals: {best_combo['signals']}")
print(f"   Accuracy: {best_combo['acc']:.1f}%")

üìä ACCURACY WITH QUALITY FILTERS
              Filter |  Conf |  Signals |   Accuracy
-------------------------------------------------------
           No Filter |   75% |     2104 |      57.2%
           No Filter |   80% |      770 |      52.2%
           No Filter |   85% |      259 |      40.5%
        Strong Trend |   75% |     1535 |      60.1%
        Strong Trend |   80% |      585 |      56.6%
        Strong Trend |   85% |      200 |      48.5%
   London/NY Overlap |   75% |      345 |      57.7%
   London/NY Overlap |   80% |       48 |      39.6%
   Normal Volatility |   75% |     1695 |      56.1%
   Normal Volatility |   80% |      559 |      45.6%
   Normal Volatility |   85% |      155 |      21.3%
        Quality >= 4 |   75% |     1123 |      53.6%
        Quality >= 4 |   80% |      327 |      41.9%
        Quality >= 4 |   85% |       75 |      24.0%
        Quality >= 5 |   75% |      649 |      56.4%
        Quality >= 5 |   80% |      168 |      44.6%
        

## 8. V7 vs V6 Comparison

In [20]:
# Load V6 for comparison
v6_dir = BASE_DIR / 'models' / 'signal_generator_v6'

try:
    v6_models = {}
    for name in ['xgb1', 'xgb2', 'lgb1', 'lgb2', 'cat']:
        v6_models[name] = joblib.load(v6_dir / f'{name}_v6_bin.joblib')
    v6_scaler = joblib.load(v6_dir / 'scaler_v6_bin.joblib')
    v6_feature_cols = joblib.load(v6_dir / 'feature_cols_v6.joblib')
    
    # Check missing features
    missing = [c for c in v6_feature_cols if c not in test_clean.columns]
    for c in missing:
        test_clean[c] = 0
    
    X_test_v6 = test_clean[v6_feature_cols].values
    X_test_v6_scaled = v6_scaler.transform(X_test_v6)
    
    # V6 predictions
    v6_proba = {}
    for name, model in v6_models.items():
        v6_proba[name] = model.predict_proba(X_test_v6_scaled)
    
    v6_weights = {'xgb1': 0.20, 'xgb2': 0.20, 'lgb1': 0.20, 'lgb2': 0.20, 'cat': 0.20}
    v6_final_proba = np.zeros_like(v6_proba['xgb1'])
    for name, w in v6_weights.items():
        v6_final_proba += w * v6_proba[name]
    
    v6_confidence = v6_final_proba[:, 1] * 100
    
    print("="*80)
    print("üìä V7 vs V6 COMPARISON")
    print("="*80)
    print(f"{'Threshold':>10} | {'V6 Sig':>8} | {'V6 Acc':>8} | {'V7 Sig':>8} | {'V7 Acc':>8} | {'Winner':>10}")
    print("-"*70)
    
    for conf in [70, 75, 80, 85, 90]:
        v6_mask = v6_confidence >= conf
        v6_sig = v6_mask.sum()
        v6_acc = y_test[v6_mask].mean() * 100 if v6_sig > 0 else 0
        
        v7_mask = confidence >= conf
        v7_sig = v7_mask.sum()
        v7_acc = y_test[v7_mask].mean() * 100 if v7_sig > 0 else 0
        
        winner = "V7 ‚úì" if v7_acc > v6_acc + 1 else ("V6 ‚úì" if v6_acc > v7_acc + 1 else "‚âà TIE")
        print(f"{conf:>8}%+ | {v6_sig:>8} | {v6_acc:>7.1f}% | {v7_sig:>8} | {v7_acc:>7.1f}% | {winner:>10}")
        
except Exception as e:
    print(f"V6 comparison error: {e}")

üìä V7 vs V6 COMPARISON
 Threshold |   V6 Sig |   V6 Acc |   V7 Sig |   V7 Acc |     Winner
----------------------------------------------------------------------
      70%+ |     6177 |    54.5% |     5955 |    53.5% |      ‚âà TIE
      75%+ |     1990 |    57.1% |     2104 |    57.2% |      ‚âà TIE
      80%+ |      475 |    65.7% |      770 |    52.2% |       V6 ‚úì
      85%+ |       41 |    78.0% |      259 |    40.5% |       V6 ‚úì
      90%+ |        0 |     0.0% |       88 |    48.9% |       V7 ‚úì


## 9. Save V7 Models

In [21]:
print("Saving V7 Models...")

for name, model in models.items():
    joblib.dump(model, MODEL_DIR / f'{name}_v7.joblib')

joblib.dump(scaler, MODEL_DIR / 'scaler_v7.joblib')
joblib.dump(feature_cols, MODEL_DIR / 'feature_cols_v7.joblib')
joblib.dump(weights, MODEL_DIR / 'weights_v7.joblib')

# Save config with best settings
config = {
    'version': 'v7',
    'mode': 'BINARY_BUY_SELL',
    'best_threshold': best_combo['conf'],
    'best_filter': best_combo['filter'],
    'models': list(models.keys()),
    'weights': weights
}
joblib.dump(config, MODEL_DIR / 'config_v7.joblib')

print("\n‚úÖ V7 Models Saved Successfully!")
print(f"   Location: {MODEL_DIR}")
print(f"   Best config: {best_combo['conf']}% threshold with {best_combo['filter']}")

Saving V7 Models...

‚úÖ V7 Models Saved Successfully!
   Location: c:\Users\Acer\Desktop\Forex-Signal-App\models\signal_generator_v7
   Best config: 75% threshold with Strong Trend

‚úÖ V7 Models Saved Successfully!
   Location: c:\Users\Acer\Desktop\Forex-Signal-App\models\signal_generator_v7
   Best config: 75% threshold with Strong Trend


## 10. Summary

In [22]:
print("="*70)
print("üìã V7 SUMMARY")
print("="*70)
print(f"\nüîß Features: {len(feature_cols)}")
print(f"üéØ Models: {list(models.keys())}")
print(f"üìä Test samples: {len(y_test):,}")

print("\nüìà Best Results:")
for conf in [75, 80, 85, 90]:
    if conf in v7_results:
        r = v7_results[conf]
        print(f"   {conf}%+ confidence: {r['accuracy']:.1f}% accuracy ({r['signals']} signals)")

print(f"\nüèÜ Recommended: {best_combo['conf']}% threshold + {best_combo['filter']}")
print(f"   Expected Accuracy: {best_combo['acc']:.1f}%")
print(f"   Signals per test period: {best_combo['signals']}")

üìã V7 SUMMARY

üîß Features: 102
üéØ Models: ['xgb1', 'xgb2', 'lgb1', 'lgb2', 'cat']
üìä Test samples: 80,296

üìà Best Results:
   75%+ confidence: 57.2% accuracy (2104 signals)
   80%+ confidence: 52.2% accuracy (770 signals)
   85%+ confidence: 40.5% accuracy (259 signals)
   90%+ confidence: 48.9% accuracy (88 signals)

üèÜ Recommended: 75% threshold + Strong Trend
   Expected Accuracy: 60.1%
   Signals per test period: 1535
