# Synergy 2: MLMI → NW-RQK → FVG Trading Strategy

**Ultra-Fast Backtesting with VectorBT and Numba JIT Compilation**

This notebook implements the second synergy pattern where:
1. MLMI provides the primary trend signal
2. NW-RQK confirms the trend direction
3. FVG validates the final entry zone

Key differences from Synergy 1:
- NW-RQK confirmation comes before FVG
- May capture different market dynamics
- Expected to generate similar trade counts but with different timing

In [None]:
# Cell 1: Environment Setup and Imports

# Standard library imports
import os
import sys
import gc
import json
import time
import logging
import warnings
from datetime import datetime, timedelta
from pathlib import Path
from typing import Tuple, Dict as TypeDict, Optional, List, Union, Any
from dataclasses import dataclass, field
from collections import defaultdict
import pickle

# Scientific computing imports
import numpy as np
import pandas as pd
from scipy import stats
from scipy.spatial import cKDTree

# Visualization imports
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Trading and backtesting imports
import vectorbt as vbt

# Performance optimization imports
from numba import njit, prange, typed, types
from numba.typed import Dict
import numba

# Progress tracking
from tqdm import tqdm

# Suppress warnings
warnings.filterwarnings('ignore')

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.FileHandler('synergy_strategy.log')
    ]
)
logger = logging.getLogger(__name__)

# Configure Numba for maximum performance
numba.config.THREADING_LAYER = 'threadsafe'
numba.config.NUMBA_NUM_THREADS = numba.config.NUMBA_DEFAULT_NUM_THREADS

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 100)

# Version checks
logger.info("Environment Setup")
logger.info(f"Python version: {sys.version}")
logger.info(f"NumPy version: {np.__version__}")
logger.info(f"Pandas version: {pd.__version__}")
logger.info(f"VectorBT version: {vbt.__version__}")
logger.info(f"Numba version: {numba.__version__}")
logger.info(f"Numba threads: {numba.config.NUMBA_NUM_THREADS}")

print("Synergy 2: MLMI → NW-RQK → FVG Strategy")
print(f"Numba threads: {numba.config.NUMBA_NUM_THREADS}")
print(f"VectorBT version: {vbt.__version__}")
print("Environment ready for ultra-fast backtesting!")

# Configuration dataclass
@dataclass
class StrategyConfig:
    """Configuration for the trading strategy"""
    # Data paths
    data_path_5m: str = "/home/QuantNova/AlgoSpace-Strategy-1/@NQ - 5 min - ETH.csv"
    data_path_30m: str = "/home/QuantNova/AlgoSpace-Strategy-1/NQ - 30 min - ETH.csv"
    
    # MLMI parameters
    mlmi_k_neighbors: int = 200
    mlmi_confidence_threshold: float = 0.3
    mlmi_forward_bars: int = 5
    
    # NW-RQK parameters
    nwrqk_h: float = 8.0
    nwrqk_r: float = 8.0
    nwrqk_lag: int = 2
    nwrqk_strength_threshold: float = 0.2
    
    # FVG parameters
    fvg_atr_multiplier: float = 1.5
    fvg_active_bars: int = 20
    
    # Signal parameters
    synergy_window: int = 30
    
    # Backtesting parameters
    initial_capital: float = 100000
    position_size_base: float = 100
    stop_loss_atr: float = 2.0
    max_holding_bars: int = 100
    fees: float = 0.0001
    slippage: float = 0.0001
    
    # Performance parameters
    chunk_size: int = 10000
    max_memory_gb: float = 8.0
    
    # Output parameters
    save_results: bool = True
    results_path: str = "./results"
    checkpoint_interval: int = 1000

# Create default configuration
config = StrategyConfig()

# Memory management utilities
def check_memory_usage():
    """Check current memory usage"""
    try:
        import psutil
        process = psutil.Process(os.getpid())
        mem_gb = process.memory_info().rss / 1024 / 1024 / 1024
        return mem_gb
    except ImportError:
        logger.warning("psutil not installed, memory monitoring disabled")
        return 0.0

def cleanup_memory():
    """Force garbage collection"""
    gc.collect()
    logger.info(f"Memory after cleanup: {check_memory_usage():.2f} GB")

In [None]:
# Cell 2: Data Loading with Robust Error Handling

class DataLoadingError(Exception):
    """Custom exception for data loading errors"""
    pass

class DataValidationError(Exception):
    """Custom exception for data validation errors"""
    pass

def validate_dataframe(df: pd.DataFrame, required_columns: List[str]) -> None:
    """Validate dataframe has required columns and valid data"""
    # Check for required columns
    missing_columns = set(required_columns) - set(df.columns)
    if missing_columns:
        raise DataValidationError(f"Missing required columns: {missing_columns}")
    
    # Check for empty dataframe
    if len(df) == 0:
        raise DataValidationError("Dataframe is empty")
    
    # Check for sufficient data
    if len(df) < 100:
        logger.warning(f"Limited data: only {len(df)} rows available")
    
    # Check for NaN values in critical columns
    critical_columns = ['Open', 'High', 'Low', 'Close']
    nan_counts = df[critical_columns].isna().sum()
    if nan_counts.any():
        logger.warning(f"NaN values found: {nan_counts.to_dict()}")

def load_data_optimized(file_path: str, timeframe: str = '5m', 
                       config: Optional[StrategyConfig] = None) -> pd.DataFrame:
    """Load and prepare data with comprehensive error handling"""
    start_time = time.time()
    logger.info(f"Loading {timeframe} data from {file_path}")
    
    try:
        # Check if file exists
        if not os.path.exists(file_path):
            raise DataLoadingError(f"Data file not found: {file_path}")
        
        # Check file size
        file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
        if file_size_mb > 1000:
            logger.warning(f"Large file detected: {file_size_mb:.1f} MB")
        
        # Read CSV with optimized settings
        df = pd.read_csv(
            file_path,
            parse_dates=['Timestamp'],
            infer_datetime_format=True,
            date_parser=lambda x: pd.to_datetime(x, dayfirst=True, errors='coerce'),
            index_col='Timestamp',
            low_memory=False
        )
        
        # Validate required columns
        required_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
        validate_dataframe(df, required_columns)
        
        # Ensure numeric types for fast operations
        numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
        for col in numeric_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce').astype(np.float64)
        
        # Remove any rows with invalid timestamps
        df = df[df.index.notnull()]
        
        # Remove any NaN values in OHLC
        initial_len = len(df)
        df.dropna(subset=['Open', 'High', 'Low', 'Close'], inplace=True)
        if len(df) < initial_len:
            logger.warning(f"Dropped {initial_len - len(df)} rows with NaN values")
        
        # Validate OHLC relationships
        invalid_candles = (
            (df['High'] < df['Low']) |
            (df['High'] < df['Open']) |
            (df['High'] < df['Close']) |
            (df['Low'] > df['Open']) |
            (df['Low'] > df['Close'])
        )
        if invalid_candles.any():
            logger.warning(f"Found {invalid_candles.sum()} invalid candles, fixing...")
            df.loc[invalid_candles, 'High'] = df.loc[invalid_candles, ['Open', 'Close', 'High']].max(axis=1)
            df.loc[invalid_candles, 'Low'] = df.loc[invalid_candles, ['Open', 'Close', 'Low']].min(axis=1)
        
        # Sort index for faster operations
        df.sort_index(inplace=True)
        
        # Check for duplicate timestamps
        duplicates = df.index.duplicated()
        if duplicates.any():
            logger.warning(f"Found {duplicates.sum()} duplicate timestamps, keeping first")
            df = df[~duplicates]
        
        # Pre-calculate commonly used features with safe operations
        df['Returns'] = df['Close'].pct_change().fillna(0)
        df['LogReturns'] = np.where(
            df['Close'].shift(1) > 0,
            np.log(df['Close'] / df['Close'].shift(1)),
            0
        )
        df['HL_Range'] = df['High'] - df['Low']
        df['OC_Range'] = abs(df['Open'] - df['Close'])
        
        # Add data quality metrics
        df['DataQuality'] = 1.0
        df.loc[df['Volume'] == 0, 'DataQuality'] *= 0.8
        df.loc[df['HL_Range'] == 0, 'DataQuality'] *= 0.9
        
        load_time = time.time() - start_time
        logger.info(f"Successfully loaded {len(df):,} rows in {load_time:.2f} seconds")
        logger.info(f"Date range: {df.index[0]} to {df.index[-1]}")
        logger.info(f"Average data quality: {df['DataQuality'].mean():.3f}")
        
        # Memory optimization
        df = df.astype({col: 'float32' for col in numeric_cols if col in df.columns})
        
        return df
        
    except pd.errors.ParserError as e:
        raise DataLoadingError(f"Failed to parse CSV file: {str(e)}")
    except Exception as e:
        logger.error(f"Unexpected error loading data: {str(e)}")
        raise DataLoadingError(f"Failed to load data: {str(e)}")

# Pre-compile all Numba functions
print("Pre-compiling Numba functions for maximum speed...")

@njit(cache=True)
def dummy_compile():
    """Dummy function to trigger compilation"""
    return np.array([1.0, 2.0, 3.0]).sum()

_ = dummy_compile()  # Trigger compilation

# Load data files with error handling
print("\nLoading data files...")

try:
    # Check if config paths exist, otherwise try alternative paths
    data_paths_5m = [
        config.data_path_5m,
        "./data/@NQ - 5 min - ETH.csv",
        "../data/@NQ - 5 min - ETH.csv",
        "data/@NQ - 5 min - ETH.csv"
    ]
    
    data_paths_30m = [
        config.data_path_30m,
        "./data/NQ - 30 min - ETH.csv",
        "../data/NQ - 30 min - ETH.csv",
        "data/NQ - 30 min - ETH.csv"
    ]
    
    # Try to load 5m data
    df_5m = None
    for path in data_paths_5m:
        if os.path.exists(path):
            df_5m = load_data_optimized(path, '5m', config)
            break
    
    if df_5m is None:
        raise DataLoadingError(f"Could not find 5m data file. Tried: {data_paths_5m}")
    
    # Try to load 30m data
    df_30m = None
    for path in data_paths_30m:
        if os.path.exists(path):
            df_30m = load_data_optimized(path, '30m', config)
            break
    
    if df_30m is None:
        raise DataLoadingError(f"Could not find 30m data file. Tried: {data_paths_30m}")
    
    # Ensure time alignment
    common_start = max(df_5m.index[0], df_30m.index[0])
    common_end = min(df_5m.index[-1], df_30m.index[-1])
    
    df_5m = df_5m.loc[common_start:common_end]
    df_30m = df_30m.loc[common_start:common_end]
    
    print(f"\n5-minute data: {df_5m.index[0]} to {df_5m.index[-1]} ({len(df_5m):,} bars)")
    print(f"30-minute data: {df_30m.index[0]} to {df_30m.index[-1]} ({len(df_30m):,} bars)")
    print(f"Memory usage: {check_memory_usage():.2f} GB")
    
    # Save checkpoint
    if config.save_results:
        os.makedirs(config.results_path, exist_ok=True)
        checkpoint_path = os.path.join(config.results_path, 'data_checkpoint.pkl')
        with open(checkpoint_path, 'wb') as f:
            pickle.dump({
                'df_5m_shape': df_5m.shape,
                'df_30m_shape': df_30m.shape,
                'date_range': (df_5m.index[0], df_5m.index[-1])
            }, f)
        logger.info(f"Saved data checkpoint to {checkpoint_path}")
    
except DataLoadingError as e:
    logger.error(f"Data loading failed: {e}")
    print(f"\nERROR: {e}")
    print("\nPlease ensure data files are in the correct location.")
    print("You can update the file paths in the StrategyConfig class in Cell 1.")
    raise
except Exception as e:
    logger.error(f"Unexpected error: {e}")
    raise

In [None]:
# Cell 3: Optimized Indicator Suite with Robust Error Handling

@njit(fastmath=True, cache=True, parallel=True)
def calculate_all_indicators(close: np.ndarray, high: np.ndarray, low: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Calculate all basic indicators with comprehensive error handling"""
    n = len(close)
    
    # Pre-allocate arrays with default values
    ma5 = np.full(n, np.nan, dtype=np.float64)
    ma20 = np.full(n, np.nan, dtype=np.float64)
    rsi5 = np.full(n, 50.0, dtype=np.float64)
    rsi20 = np.full(n, 50.0, dtype=np.float64)
    atr = np.full(n, np.nan, dtype=np.float64)
    
    # Input validation
    if n == 0:
        return ma5, ma20, rsi5, rsi20, atr
    
    # Weighted Moving Averages with safe calculations
    weights5 = np.arange(1, 6, dtype=np.float64)
    weights20 = np.arange(1, 21, dtype=np.float64)
    sum_w5 = weights5.sum()
    sum_w20 = weights20.sum()
    
    # Calculate WMAs in parallel chunks with bounds checking
    for i in prange(n):
        # 5-period WMA
        if i >= 4:
            window_data = close[i-4:i+1]
            if not np.any(np.isnan(window_data)):
                ma5[i] = np.dot(window_data, weights5) / sum_w5
        
        # 20-period WMA
        if i >= 19:
            window_data = close[i-19:i+1]
            if not np.any(np.isnan(window_data)):
                ma20[i] = np.dot(window_data, weights20) / sum_w20
    
    # RSI calculation with safe division
    if n > 1:
        deltas = np.diff(close)
        gains = np.maximum(deltas, 0)
        losses = -np.minimum(deltas, 0)
        
        # RSI 5
        if len(gains) >= 5:
            avg_gain5 = np.mean(gains[:5])
            avg_loss5 = np.mean(losses[:5])
            
            if avg_loss5 > 0:
                rs5 = avg_gain5 / avg_loss5
                rsi5[5] = 100 - (100 / (1 + rs5))
            else:
                rsi5[5] = 100 if avg_gain5 > 0 else 50
            
            # Calculate remaining RSI values
            for i in range(5, min(n - 1, len(gains))):
                avg_gain5 = (avg_gain5 * 4 + gains[i]) / 5
                avg_loss5 = (avg_loss5 * 4 + losses[i]) / 5
                
                if avg_loss5 > 0:
                    rs5 = avg_gain5 / avg_loss5
                    rsi5[i + 1] = 100 - (100 / (1 + rs5))
                else:
                    rsi5[i + 1] = 100 if avg_gain5 > 0 else 50
        
        # RSI 20
        if len(gains) >= 20:
            avg_gain20 = np.mean(gains[:20])
            avg_loss20 = np.mean(losses[:20])
            
            if avg_loss20 > 0:
                rs20 = avg_gain20 / avg_loss20
                rsi20[20] = 100 - (100 / (1 + rs20))
            else:
                rsi20[20] = 100 if avg_gain20 > 0 else 50
            
            # Calculate remaining RSI values
            for i in range(20, min(n - 1, len(gains))):
                avg_gain20 = (avg_gain20 * 19 + gains[i]) / 20
                avg_loss20 = (avg_loss20 * 19 + losses[i]) / 20
                
                if avg_loss20 > 0:
                    rs20 = avg_gain20 / avg_loss20
                    rsi20[i + 1] = 100 - (100 / (1 + rs20))
                else:
                    rsi20[i + 1] = 100 if avg_gain20 > 0 else 50
    
    # ATR calculation with safe operations
    if n > 1:
        # Calculate true range
        tr = np.zeros(n, dtype=np.float64)
        tr[0] = high[0] - low[0] if not np.isnan(high[0]) and not np.isnan(low[0]) else 0
        
        for i in range(1, n):
            if not np.isnan(high[i]) and not np.isnan(low[i]) and not np.isnan(close[i-1]):
                hl = high[i] - low[i]
                hc = abs(high[i] - close[i-1])
                lc = abs(low[i] - close[i-1])
                tr[i] = max(hl, hc, lc)
            else:
                tr[i] = 0
        
        # Calculate ATR
        for i in range(14, n):
            window = tr[i-13:i+1]
            valid_values = window[window > 0]
            if len(valid_values) > 0:
                atr[i] = np.mean(valid_values)
    
    return ma5, ma20, rsi5, rsi20, atr

@njit(parallel=True, fastmath=True, cache=True)
def detect_fvg_optimized(high: np.ndarray, low: np.ndarray, atr: np.ndarray,
                        multiplier: float = 1.5, min_gap_pct: float = 0.001,
                        active_bars: int = 20) -> Tuple[np.ndarray, np.ndarray]:
    """Optimized FVG detection with ATR filtering and safety checks"""
    n = len(high)
    bull_active = np.zeros(n, dtype=np.bool_)
    bear_active = np.zeros(n, dtype=np.bool_)
    
    if n < 4:  # Need at least 4 bars for FVG
        return bull_active, bear_active
    
    for i in prange(3, n):
        # Skip if ATR is invalid
        if np.isnan(atr[i]) or atr[i] <= 0:
            continue
        
        # Dynamic gap threshold based on ATR
        gap_threshold = max(atr[i] * multiplier, low[i] * min_gap_pct)
        
        # Bullish FVG with safety checks
        if (not np.isnan(low[i]) and not np.isnan(high[i-3]) and 
            low[i] > high[i-3]):
            gap_size = low[i] - high[i-3]
            if gap_size > gap_threshold:
                # Mark active zone
                for j in range(i, min(i + active_bars, n)):
                    if not np.isnan(low[j]) and low[j] >= high[i-3]:
                        bull_active[j] = True
                    else:
                        break
        
        # Bearish FVG with safety checks
        if (not np.isnan(high[i]) and not np.isnan(low[i-3]) and 
            high[i] < low[i-3]):
            gap_size = low[i-3] - high[i]
            if gap_size > gap_threshold:
                # Mark active zone
                for j in range(i, min(i + active_bars, n)):
                    if not np.isnan(high[j]) and high[j] <= low[i-3]:
                        bear_active[j] = True
                    else:
                        break
    
    return bull_active, bear_active

# Safe smoothing function
def safe_smooth(data: np.ndarray, window: int = 20) -> np.ndarray:
    """Apply smoothing with NaN handling"""
    if len(data) < window:
        return data
    
    # Replace NaN with forward fill for smoothing
    filled_data = pd.Series(data).fillna(method='ffill').fillna(method='bfill').values
    
    # Apply convolution
    kernel = np.ones(window) / window
    smoothed = np.convolve(filled_data, kernel, mode='same')
    
    # Restore NaN where original data had NaN
    smoothed[np.isnan(data)] = np.nan
    
    return smoothed

print("\nCalculating all indicators with parallel processing...")
logger.info("Starting indicator calculations")
start_time = time.time()

try:
    # Calculate 30-minute indicators
    close_30m = df_30m['Close'].values.astype(np.float64)
    high_30m = df_30m['High'].values.astype(np.float64)
    low_30m = df_30m['Low'].values.astype(np.float64)
    
    ma5, ma20, rsi5, rsi20, atr_30m = calculate_all_indicators(
        close_30m, high_30m, low_30m
    )
    
    # Smooth RSI with safety
    rsi5_smooth = safe_smooth(rsi5, 20)
    rsi20_smooth = safe_smooth(rsi20, 20)
    
    # Calculate 5-minute indicators
    close_5m = df_5m['Close'].values.astype(np.float64)
    high_5m = df_5m['High'].values.astype(np.float64)
    low_5m = df_5m['Low'].values.astype(np.float64)
    
    _, _, _, _, atr_5m = calculate_all_indicators(
        close_5m, high_5m, low_5m
    )
    
    # Detect FVG with ATR filter
    fvg_bull, fvg_bear = detect_fvg_optimized(
        high_5m, low_5m, atr_5m, 
        config.fvg_atr_multiplier,
        0.001,
        config.fvg_active_bars
    )
    
    calc_time = time.time() - start_time
    
    # Log statistics
    logger.info(f"Indicators calculated in {calc_time:.3f} seconds")
    logger.info(f"MA5 valid values: {(~np.isnan(ma5)).sum()}/{len(ma5)}")
    logger.info(f"MA20 valid values: {(~np.isnan(ma20)).sum()}/{len(ma20)}")
    logger.info(f"RSI5 range: [{np.nanmin(rsi5):.1f}, {np.nanmax(rsi5):.1f}]")
    logger.info(f"RSI20 range: [{np.nanmin(rsi20):.1f}, {np.nanmax(rsi20):.1f}]")
    logger.info(f"ATR 30m valid: {(~np.isnan(atr_30m)).sum()}/{len(atr_30m)}")
    logger.info(f"ATR 5m valid: {(~np.isnan(atr_5m)).sum()}/{len(atr_5m)}")
    logger.info(f"FVG zones - Bull: {fvg_bull.sum():,}, Bear: {fvg_bear.sum():,}")
    
    print(f"All indicators calculated in {calc_time:.3f} seconds")
    print(f"FVG zones detected - Bull: {fvg_bull.sum():,}, Bear: {fvg_bear.sum():,}")
    
    # Memory cleanup
    if check_memory_usage() > config.max_memory_gb * 0.8:
        cleanup_memory()
    
except Exception as e:
    logger.error(f"Error calculating indicators: {str(e)}")
    raise

In [None]:
# Cell 4: Advanced MLMI with Adaptive KNN

@njit(fastmath=True, cache=True)
def adaptive_knn_predict(features: np.ndarray, labels: np.ndarray, query: np.ndarray,
                        k_base: int, volatility: float, size: int) -> Tuple[float, float]:
    """Adaptive KNN that adjusts K based on market volatility"""
    if size == 0:
        return 0.0, 0.5
    
    # Adjust K based on volatility
    k = max(3, min(k_base, int(k_base * (1 - volatility * 2))))
    k = min(k, size)
    
    # Calculate distances
    distances = np.zeros(size)
    for i in range(size):
        dist = 0.0
        for j in range(2):
            diff = features[i, j] - query[j]
            dist += diff * diff
        distances[i] = np.sqrt(dist)
    
    # Find k nearest neighbors
    indices = np.argpartition(distances, k-1)[:k]
    
    # Weighted voting based on distance
    vote = 0.0
    weight_sum = 0.0
    
    for i in range(k):
        idx = indices[i]
        if distances[idx] > 0:
            weight = 1.0 / distances[idx]
            vote += labels[idx] * weight
            weight_sum += weight
    
    if weight_sum > 0:
        prediction = vote / weight_sum
        confidence = min(abs(prediction) / k, 1.0)
    else:
        prediction = 0.0
        confidence = 0.0
    
    return prediction, confidence

@njit(fastmath=True, cache=True)
def calculate_mlmi_adaptive(ma_fast: np.ndarray, ma_slow: np.ndarray,
                           rsi_fast_smooth: np.ndarray, rsi_slow_smooth: np.ndarray,
                           close: np.ndarray, returns: np.ndarray,
                           k_neighbors: int = 200) -> Tuple[np.ndarray, np.ndarray]:
    """MLMI with adaptive KNN and confidence scores"""
    n = len(close)
    mlmi_values = np.zeros(n)
    mlmi_confidence = np.zeros(n)
    
    # Pre-allocate KNN storage
    max_size = min(10000, n)
    features = np.zeros((max_size, 2))
    labels = np.zeros(max_size)
    data_size = 0
    
    # Calculate rolling volatility
    volatility = np.zeros(n)
    for i in range(20, n):
        volatility[i] = np.std(returns[i-20:i])
    
    for i in range(1, n):
        # Detect crossovers
        bull_cross = ma_fast[i] > ma_slow[i] and ma_fast[i-1] <= ma_slow[i-1]
        bear_cross = ma_fast[i] < ma_slow[i] and ma_fast[i-1] >= ma_slow[i-1]
        
        if (bull_cross or bear_cross) and not np.isnan(rsi_fast_smooth[i]) and not np.isnan(rsi_slow_smooth[i]):
            # Store pattern
            if data_size >= max_size:
                # Keep most recent 75%
                keep_size = int(max_size * 0.75)
                features[:keep_size] = features[-keep_size:]
                labels[:keep_size] = labels[-keep_size:]
                data_size = keep_size
            
            features[data_size, 0] = rsi_slow_smooth[i]
            features[data_size, 1] = rsi_fast_smooth[i]
            
            if i < n - 1:
                # Multi-bar forward return for better signal
                fwd_ret = (close[min(i+5, n-1)] - close[i]) / close[i]
                labels[data_size] = np.sign(fwd_ret) * min(abs(fwd_ret) * 100, 1.0)
            else:
                labels[data_size] = 0.0
            
            data_size += 1
        
        # Make prediction
        if data_size > 10 and not np.isnan(rsi_fast_smooth[i]) and not np.isnan(rsi_slow_smooth[i]):
            query = np.array([rsi_slow_smooth[i], rsi_fast_smooth[i]])
            pred, conf = adaptive_knn_predict(features, labels, query,
                                            k_neighbors, volatility[i], data_size)
            mlmi_values[i] = pred * 100  # Scale for visibility
            mlmi_confidence[i] = conf
    
    return mlmi_values, mlmi_confidence

# Calculate MLMI with confidence
print("\nCalculating adaptive MLMI with confidence scores...")
start_time = time.time()

returns_30m = df_30m['Returns'].values
mlmi_values, mlmi_confidence = calculate_mlmi_adaptive(
    ma5, ma20, rsi5_smooth, rsi20_smooth, close_30m, returns_30m
)

# Store in dataframe
df_30m['mlmi'] = mlmi_values
df_30m['mlmi_confidence'] = mlmi_confidence
df_30m['mlmi_bull'] = (mlmi_values > 0) & (mlmi_confidence > 0.3)
df_30m['mlmi_bear'] = (mlmi_values < 0) & (mlmi_confidence > 0.3)

mlmi_time = time.time() - start_time
print(f"Adaptive MLMI calculated in {mlmi_time:.3f} seconds")
print(f"MLMI range: [{mlmi_values.min():.1f}, {mlmi_values.max():.1f}]")
print(f"Average confidence: {mlmi_confidence.mean():.3f}")

In [None]:
# Cell 4: Advanced MLMI with Adaptive KNN and Error Handling

@njit(fastmath=True, cache=True)
def adaptive_knn_predict(features: np.ndarray, labels: np.ndarray, query: np.ndarray,
                        k_base: int, volatility: float, size: int) -> Tuple[float, float]:
    """Adaptive KNN with safe distance calculations"""
    if size == 0:
        return 0.0, 0.5
    
    # Adjust K based on volatility with bounds checking
    vol_factor = max(0, min(1, 1 - volatility * 2))
    k = max(3, min(k_base, int(k_base * vol_factor)))
    k = min(k, size)
    
    # Calculate distances with numerical stability
    distances = np.zeros(size)
    for i in range(size):
        dist = 0.0
        for j in range(min(2, features.shape[1])):  # Ensure we don't exceed feature dimensions
            diff = features[i, j] - query[j]
            dist += diff * diff
        distances[i] = np.sqrt(max(0, dist))  # Ensure non-negative
    
    # Find k nearest neighbors
    if k <= size:
        indices = np.argpartition(distances, k-1)[:k]
    else:
        indices = np.arange(size)
    
    # Weighted voting based on distance
    vote = 0.0
    weight_sum = 0.0
    
    for i in range(len(indices)):
        idx = indices[i]
        if distances[idx] > 1e-10:  # Avoid division by very small numbers
            weight = 1.0 / distances[idx]
            vote += labels[idx] * weight
            weight_sum += weight
        else:
            # Handle exact matches
            vote += labels[idx] * 100
            weight_sum += 100
    
    if weight_sum > 1e-10:
        prediction = vote / weight_sum
        confidence = min(abs(prediction) / max(1, k), 1.0)
    else:
        prediction = 0.0
        confidence = 0.0
    
    return prediction, confidence

@njit(fastmath=True, cache=True)
def calculate_mlmi_adaptive(ma_fast: np.ndarray, ma_slow: np.ndarray,
                           rsi_fast_smooth: np.ndarray, rsi_slow_smooth: np.ndarray,
                           close: np.ndarray, returns: np.ndarray,
                           k_neighbors: int = 200,
                           min_confidence: float = 0.1) -> Tuple[np.ndarray, np.ndarray]:
    """MLMI with comprehensive error handling"""
    n = len(close)
    mlmi_values = np.zeros(n)
    mlmi_confidence = np.zeros(n)
    
    # Input validation
    if n < 10:
        return mlmi_values, mlmi_confidence
    
    # Pre-allocate KNN storage
    max_size = min(10000, n)
    features = np.zeros((max_size, 2))
    labels = np.zeros(max_size)
    data_size = 0
    
    # Calculate rolling volatility with safe operations
    volatility = np.zeros(n)
    for i in range(20, n):
        window_returns = returns[max(0, i-20):i]
        valid_returns = window_returns[~np.isnan(window_returns)]
        if len(valid_returns) > 1:
            volatility[i] = np.std(valid_returns)
        else:
            volatility[i] = 0.01  # Default volatility
    
    for i in range(1, n):
        # Detect crossovers with NaN checks
        if (np.isnan(ma_fast[i]) or np.isnan(ma_slow[i]) or 
            np.isnan(ma_fast[i-1]) or np.isnan(ma_slow[i-1])):
            continue
            
        bull_cross = ma_fast[i] > ma_slow[i] and ma_fast[i-1] <= ma_slow[i-1]
        bear_cross = ma_fast[i] < ma_slow[i] and ma_fast[i-1] >= ma_slow[i-1]
        
        if (bull_cross or bear_cross) and not np.isnan(rsi_fast_smooth[i]) and not np.isnan(rsi_slow_smooth[i]):
            # Store pattern
            if data_size >= max_size:
                # Keep most recent 75%
                keep_size = int(max_size * 0.75)
                features[:keep_size] = features[-keep_size:]
                labels[:keep_size] = labels[-keep_size:]
                data_size = keep_size
            
            features[data_size, 0] = rsi_slow_smooth[i]
            features[data_size, 1] = rsi_fast_smooth[i]
            
            if i < n - 5:  # Ensure we have forward data
                # Multi-bar forward return with safety
                fwd_idx = min(i + 5, n - 1)
                if close[i] > 0:
                    fwd_ret = (close[fwd_idx] - close[i]) / close[i]
                    # Clip extreme values
                    fwd_ret = max(-0.1, min(0.1, fwd_ret))
                    labels[data_size] = np.sign(fwd_ret) * min(abs(fwd_ret) * 100, 1.0)
                else:
                    labels[data_size] = 0.0
            else:
                labels[data_size] = 0.0
            
            data_size += 1
        
        # Make prediction
        if (data_size > 10 and not np.isnan(rsi_fast_smooth[i]) and 
            not np.isnan(rsi_slow_smooth[i])):
            query = np.array([rsi_slow_smooth[i], rsi_fast_smooth[i]])
            pred, conf = adaptive_knn_predict(
                features[:data_size, :], 
                labels[:data_size], 
                query,
                k_neighbors, 
                volatility[i], 
                data_size
            )
            
            # Apply confidence threshold
            if conf >= min_confidence:
                mlmi_values[i] = pred * 100  # Scale for visibility
                mlmi_confidence[i] = conf
            else:
                mlmi_values[i] = 0.0
                mlmi_confidence[i] = 0.0
    
    return mlmi_values, mlmi_confidence

# Calculate MLMI with confidence
print("\nCalculating adaptive MLMI with confidence scores...")
logger.info("Starting MLMI calculation")
start_time = time.time()

try:
    # Ensure we have valid data
    if 'Returns' not in df_30m.columns:
        df_30m['Returns'] = df_30m['Close'].pct_change().fillna(0)
    
    returns_30m = df_30m['Returns'].values
    
    # Calculate MLMI
    mlmi_values, mlmi_confidence = calculate_mlmi_adaptive(
        ma5, ma20, rsi5_smooth, rsi20_smooth, close_30m, returns_30m,
        config.mlmi_k_neighbors, config.mlmi_confidence_threshold
    )
    
    # Store in dataframe with validation
    df_30m['mlmi'] = mlmi_values
    df_30m['mlmi_confidence'] = mlmi_confidence
    df_30m['mlmi_bull'] = (mlmi_values > 0) & (mlmi_confidence > config.mlmi_confidence_threshold)
    df_30m['mlmi_bear'] = (mlmi_values < 0) & (mlmi_confidence > config.mlmi_confidence_threshold)
    
    mlmi_time = time.time() - start_time
    
    # Log statistics
    valid_mlmi = mlmi_values[mlmi_values != 0]
    if len(valid_mlmi) > 0:
        logger.info(f"MLMI calculated in {mlmi_time:.3f} seconds")
        logger.info(f"MLMI range: [{valid_mlmi.min():.1f}, {valid_mlmi.max():.1f}]")
        logger.info(f"Average confidence: {mlmi_confidence[mlmi_confidence > 0].mean():.3f}")
        logger.info(f"Bull signals: {df_30m['mlmi_bull'].sum()}, Bear signals: {df_30m['mlmi_bear'].sum()}")
    
    print(f"Adaptive MLMI calculated in {mlmi_time:.3f} seconds")
    print(f"MLMI range: [{mlmi_values.min():.1f}, {mlmi_values.max():.1f}]")
    print(f"Average confidence: {mlmi_confidence.mean():.3f}")
    
except Exception as e:
    logger.error(f"Error calculating MLMI: {str(e)}")
    # Set default values on error
    df_30m['mlmi'] = 0
    df_30m['mlmi_confidence'] = 0
    df_30m['mlmi_bull'] = False
    df_30m['mlmi_bear'] = False
    raise

In [None]:
# Cell 5: Enhanced NW-RQK with Multiple Kernels and Error Handling

@njit(fastmath=True, cache=True)
def gaussian_kernel(x: float, h: float) -> float:
    """Gaussian kernel with numerical stability"""
    if h <= 0:
        return 0.0
    arg = -(x * x) / (2.0 * h * h)
    # Prevent underflow
    if arg < -50:
        return 0.0
    return np.exp(arg)

@njit(fastmath=True, cache=True)
def epanechnikov_kernel(x: float, h: float) -> float:
    """Epanechnikov kernel with bounds checking"""
    if h <= 0:
        return 0.0
    u = x / h
    if abs(u) <= 1:
        return 0.75 * (1 - u * u)
    return 0.0

@njit(parallel=True, fastmath=True, cache=True)
def nadaraya_watson_ensemble(prices: np.ndarray, h: float, r: float,
                           min_periods: int = 25) -> Tuple[np.ndarray, np.ndarray]:
    """Ensemble NW regression with robust calculations"""
    n = len(prices)
    result_rq = np.full(n, np.nan)  # Rational Quadratic
    result_gauss = np.full(n, np.nan)  # Gaussian
    
    # Input validation
    if n < min_periods or h <= 0 or r <= 0:
        return result_rq, result_gauss
    
    for i in prange(min_periods, n):
        # Skip if price is invalid
        if np.isnan(prices[i]):
            continue
            
        # Rational Quadratic regression
        weighted_sum_rq = 0.0
        weight_sum_rq = 0.0
        
        # Gaussian regression
        weighted_sum_gauss = 0.0
        weight_sum_gauss = 0.0
        
        window_size = min(i + 1, 500)
        
        for j in range(window_size):
            if i - j >= 0 and not np.isnan(prices[i - j]):
                # Rational Quadratic with numerical stability
                denominator = h * h * 2.0 * r
                if denominator > 0:
                    weight_rq = (1.0 + (j * j) / denominator) ** (-r)
                    weighted_sum_rq += prices[i - j] * weight_rq
                    weight_sum_rq += weight_rq
                
                # Gaussian
                weight_gauss = gaussian_kernel(float(j), h)
                if weight_gauss > 1e-10:
                    weighted_sum_gauss += prices[i - j] * weight_gauss
                    weight_sum_gauss += weight_gauss
        
        # Calculate results with minimum weight threshold
        if weight_sum_rq > 1e-10:
            result_rq[i] = weighted_sum_rq / weight_sum_rq
        if weight_sum_gauss > 1e-10:
            result_gauss[i] = weighted_sum_gauss / weight_sum_gauss
    
    # Ensemble: average of both kernels
    ensemble = np.zeros(n)
    for i in range(n):
        count = 0
        sum_val = 0.0
        
        if not np.isnan(result_rq[i]):
            sum_val += result_rq[i]
            count += 1
        if not np.isnan(result_gauss[i]):
            sum_val += result_gauss[i]
            count += 1
            
        if count > 0:
            ensemble[i] = sum_val / count
        else:
            ensemble[i] = np.nan
    
    return ensemble, result_rq

@njit(fastmath=True, cache=True)
def detect_nwrqk_signals_enhanced(yhat1: np.ndarray, yhat2: np.ndarray,
                                 prices: np.ndarray,
                                 min_slope_change: float = 1e-6) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Enhanced signal detection with numerical stability"""
    n = len(yhat1)
    bull_signals = np.zeros(n, dtype=np.bool_)
    bear_signals = np.zeros(n, dtype=np.bool_)
    signal_strength = np.zeros(n)
    
    if n < 3:
        return bull_signals, bear_signals, signal_strength
    
    for i in range(2, n):
        if not np.isnan(yhat1[i]) and not np.isnan(yhat1[i-1]) and not np.isnan(yhat1[i-2]):
            # Calculate slopes with safety
            slope_prev = yhat1[i-1] - yhat1[i-2]
            slope_curr = yhat1[i] - yhat1[i-1]
            
            # Acceleration
            acceleration = slope_curr - slope_prev
            
            # Bullish: negative to positive slope with positive acceleration
            if slope_prev < -min_slope_change and slope_curr > min_slope_change and acceleration > min_slope_change:
                bull_signals[i] = True
                signal_strength[i] = min(abs(acceleration) * 1000, 1.0)
            
            # Bearish: positive to negative slope with negative acceleration
            elif slope_prev > min_slope_change and slope_curr < -min_slope_change and acceleration < -min_slope_change:
                bear_signals[i] = True
                signal_strength[i] = min(abs(acceleration) * 1000, 1.0)
        
        # Crossovers with momentum
        if i > 5 and not np.isnan(yhat1[i]) and not np.isnan(yhat2[i]):
            if not np.isnan(yhat1[i-1]) and not np.isnan(yhat2[i-1]) and not np.isnan(prices[i]):
                # Price momentum filter with safety
                if prices[max(0, i-5)] > 0:
                    price_momentum = (prices[i] - prices[max(0, i-5)]) / prices[max(0, i-5)]
                    price_momentum = max(-0.5, min(0.5, price_momentum))  # Clip extreme values
                else:
                    price_momentum = 0.0
                
                # Crossover detection with threshold
                cross_threshold = abs(yhat1[i] - yhat2[i]) * 0.001
                
                if yhat2[i] > yhat1[i] + cross_threshold and yhat2[i-1] <= yhat1[i-1] and price_momentum > 0:
                    bull_signals[i] = True
                    signal_strength[i] = max(signal_strength[i], min(abs(price_momentum) * 50, 1.0))
                elif yhat2[i] < yhat1[i] - cross_threshold and yhat2[i-1] >= yhat1[i-1] and price_momentum < 0:
                    bear_signals[i] = True
                    signal_strength[i] = max(signal_strength[i], min(abs(price_momentum) * 50, 1.0))
    
    return bull_signals, bear_signals, signal_strength

# Calculate enhanced NW-RQK
print("\nCalculating enhanced NW-RQK with ensemble kernels...")
logger.info("Starting NW-RQK calculation")
start_time = time.time()

try:
    # Validate parameters
    h = max(1.0, config.nwrqk_h)
    r = max(1.0, config.nwrqk_r)
    lag = max(1, config.nwrqk_lag)
    
    # Calculate regression lines
    yhat1, yhat1_rq = nadaraya_watson_ensemble(close_30m, h, r)
    yhat2, yhat2_rq = nadaraya_watson_ensemble(close_30m, h - lag, r)
    
    # Detect signals with strength
    nwrqk_bull, nwrqk_bear, nwrqk_strength = detect_nwrqk_signals_enhanced(
        yhat1, yhat2, close_30m
    )
    
    # Store in dataframe
    df_30m['nwrqk_bull'] = nwrqk_bull
    df_30m['nwrqk_bear'] = nwrqk_bear
    df_30m['nwrqk_strength'] = nwrqk_strength
    df_30m['yhat1'] = yhat1
    df_30m['yhat2'] = yhat2
    
    nwrqk_time = time.time() - start_time
    
    # Log statistics
    logger.info(f"NW-RQK calculated in {nwrqk_time:.3f} seconds")
    logger.info(f"Bull signals: {nwrqk_bull.sum():,}, Bear signals: {nwrqk_bear.sum():,}")
    
    valid_strength = nwrqk_strength[nwrqk_strength > 0]
    if len(valid_strength) > 0:
        logger.info(f"Average signal strength: {valid_strength.mean():.3f}")
        logger.info(f"Max signal strength: {valid_strength.max():.3f}")
    
    print(f"Enhanced NW-RQK calculated in {nwrqk_time:.3f} seconds")
    print(f"Bull signals: {nwrqk_bull.sum():,}, Bear signals: {nwrqk_bear.sum():,}")
    print(f"Average signal strength: {nwrqk_strength[nwrqk_strength > 0].mean():.3f}")
    
    # Memory cleanup
    if check_memory_usage() > config.max_memory_gb * 0.8:
        cleanup_memory()
    
except Exception as e:
    logger.error(f"Error calculating NW-RQK: {str(e)}")
    # Set default values on error
    df_30m['nwrqk_bull'] = False
    df_30m['nwrqk_bear'] = False
    df_30m['nwrqk_strength'] = 0.0
    df_30m['yhat1'] = np.nan
    df_30m['yhat2'] = np.nan
    raise

In [None]:
# Cell 5: Enhanced NW-RQK with Multiple Kernels

@njit(fastmath=True, cache=True)
def gaussian_kernel(x: float, h: float) -> float:
    """Gaussian kernel function"""
    return np.exp(-(x * x) / (2.0 * h * h))

@njit(fastmath=True, cache=True)
def epanechnikov_kernel(x: float, h: float) -> float:
    """Epanechnikov kernel function"""
    u = x / h
    if abs(u) <= 1:
        return 0.75 * (1 - u * u)
    return 0.0

@njit(parallel=True, fastmath=True, cache=True)
def nadaraya_watson_ensemble(prices: np.ndarray, h: float, r: float,
                           min_periods: int = 25) -> Tuple[np.ndarray, np.ndarray]:
    """Ensemble NW regression with multiple kernels"""
    n = len(prices)
    result_rq = np.full(n, np.nan)  # Rational Quadratic
    result_gauss = np.full(n, np.nan)  # Gaussian
    
    for i in prange(min_periods, n):
        # Rational Quadratic regression
        weighted_sum_rq = 0.0
        weight_sum_rq = 0.0
        
        # Gaussian regression
        weighted_sum_gauss = 0.0
        weight_sum_gauss = 0.0
        
        window_size = min(i + 1, 500)
        
        for j in range(window_size):
            if i - j >= 0:
                # Rational Quadratic
                weight_rq = (1.0 + (j * j) / (h * h * 2.0 * r)) ** (-r)
                weighted_sum_rq += prices[i - j] * weight_rq
                weight_sum_rq += weight_rq
                
                # Gaussian
                weight_gauss = gaussian_kernel(float(j), h)
                weighted_sum_gauss += prices[i - j] * weight_gauss
                weight_sum_gauss += weight_gauss
        
        if weight_sum_rq > 0:
            result_rq[i] = weighted_sum_rq / weight_sum_rq
        if weight_sum_gauss > 0:
            result_gauss[i] = weighted_sum_gauss / weight_sum_gauss
    
    # Ensemble: average of both kernels
    ensemble = np.zeros(n)
    for i in range(n):
        if not np.isnan(result_rq[i]) and not np.isnan(result_gauss[i]):
            ensemble[i] = (result_rq[i] + result_gauss[i]) / 2
        elif not np.isnan(result_rq[i]):
            ensemble[i] = result_rq[i]
        elif not np.isnan(result_gauss[i]):
            ensemble[i] = result_gauss[i]
        else:
            ensemble[i] = np.nan
    
    return ensemble, result_rq

@njit(fastmath=True, cache=True)
def detect_nwrqk_signals_enhanced(yhat1: np.ndarray, yhat2: np.ndarray,
                                 prices: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Enhanced signal detection with strength measurement"""
    n = len(yhat1)
    bull_signals = np.zeros(n, dtype=np.bool_)
    bear_signals = np.zeros(n, dtype=np.bool_)
    signal_strength = np.zeros(n)
    
    for i in range(2, n):
        if not np.isnan(yhat1[i]) and not np.isnan(yhat1[i-1]) and not np.isnan(yhat1[i-2]):
            # Trend changes
            slope_prev = yhat1[i-1] - yhat1[i-2]
            slope_curr = yhat1[i] - yhat1[i-1]
            
            # Acceleration
            acceleration = slope_curr - slope_prev
            
            # Bullish: negative to positive slope with positive acceleration
            if slope_prev < 0 and slope_curr > 0 and acceleration > 0:
                bull_signals[i] = True
                signal_strength[i] = min(abs(acceleration) * 1000, 1.0)
            
            # Bearish: positive to negative slope with negative acceleration
            elif slope_prev > 0 and slope_curr < 0 and acceleration < 0:
                bear_signals[i] = True
                signal_strength[i] = min(abs(acceleration) * 1000, 1.0)
        
        # Crossovers with momentum
        if i > 0 and not np.isnan(yhat1[i]) and not np.isnan(yhat2[i]):
            if not np.isnan(yhat1[i-1]) and not np.isnan(yhat2[i-1]):
                # Price momentum filter
                price_momentum = (prices[i] - prices[max(0, i-5)]) / prices[max(0, i-5)]
                
                if yhat2[i] > yhat1[i] and yhat2[i-1] <= yhat1[i-1] and price_momentum > 0:
                    bull_signals[i] = True
                    signal_strength[i] = max(signal_strength[i], min(abs(price_momentum) * 50, 1.0))
                elif yhat2[i] < yhat1[i] and yhat2[i-1] >= yhat1[i-1] and price_momentum < 0:
                    bear_signals[i] = True
                    signal_strength[i] = max(signal_strength[i], min(abs(price_momentum) * 50, 1.0))
    
    return bull_signals, bear_signals, signal_strength

# Calculate enhanced NW-RQK
print("\nCalculating enhanced NW-RQK with ensemble kernels...")
start_time = time.time()

# Parameters
h = 8.0
r = 8.0
lag = 2

# Calculate regression lines
yhat1, yhat1_rq = nadaraya_watson_ensemble(close_30m, h, r)
yhat2, yhat2_rq = nadaraya_watson_ensemble(close_30m, h - lag, r)

# Detect signals with strength
nwrqk_bull, nwrqk_bear, nwrqk_strength = detect_nwrqk_signals_enhanced(yhat1, yhat2, close_30m)

# Store in dataframe
df_30m['nwrqk_bull'] = nwrqk_bull
df_30m['nwrqk_bear'] = nwrqk_bear
df_30m['nwrqk_strength'] = nwrqk_strength
df_30m['yhat1'] = yhat1
df_30m['yhat2'] = yhat2

nwrqk_time = time.time() - start_time
print(f"Enhanced NW-RQK calculated in {nwrqk_time:.3f} seconds")
print(f"Bull signals: {nwrqk_bull.sum():,}, Bear signals: {nwrqk_bear.sum():,}")
print(f"Average signal strength: {nwrqk_strength[nwrqk_strength > 0].mean():.3f}")

In [None]:
# Cell 6: Smart Timeframe Alignment

@njit(parallel=True, fastmath=True, cache=True)
def create_alignment_map(timestamps_5m: np.ndarray, timestamps_30m: np.ndarray) -> np.ndarray:
    """Create efficient mapping between timeframes"""
    n_5m = len(timestamps_5m)
    mapping = np.zeros(n_5m, dtype=np.int64)
    
    j = 0
    for i in prange(n_5m):
        # Find the corresponding 30m bar
        while j < len(timestamps_30m) - 1 and timestamps_30m[j + 1] <= timestamps_5m[i]:
            j += 1
        mapping[i] = j
    
    return mapping

print("\nPerforming smart timeframe alignment...")
start_time = time.time()

# Create datetime arrays for mapping
# Convert to numeric timestamps for Numba
timestamps_5m = df_5m.index.astype(np.int64) // 10**9
timestamps_30m = df_30m.index.astype(np.int64) // 10**9

# Create mapping
mapping = create_alignment_map(timestamps_5m, timestamps_30m)

# Align all indicators efficiently
df_5m_aligned = df_5m.copy()

# MLMI alignment with confidence
df_5m_aligned['mlmi'] = df_30m['mlmi'].values[mapping]
df_5m_aligned['mlmi_confidence'] = df_30m['mlmi_confidence'].values[mapping]
df_5m_aligned['mlmi_bull'] = df_30m['mlmi_bull'].values[mapping]
df_5m_aligned['mlmi_bear'] = df_30m['mlmi_bear'].values[mapping]

# NW-RQK alignment with strength
df_5m_aligned['nwrqk_bull'] = df_30m['nwrqk_bull'].values[mapping]
df_5m_aligned['nwrqk_bear'] = df_30m['nwrqk_bear'].values[mapping]
df_5m_aligned['nwrqk_strength'] = df_30m['nwrqk_strength'].values[mapping]

# FVG data
df_5m_aligned['fvg_bull'] = fvg_bull
df_5m_aligned['fvg_bear'] = fvg_bear

# Add market regime detection
df_5m_aligned['volatility'] = df_5m_aligned['Returns'].rolling(20).std()
df_5m_aligned['trend_strength'] = abs(df_5m_aligned['Returns'].rolling(50).mean()) / df_5m_aligned['volatility']

align_time = time.time() - start_time
print(f"Smart alignment completed in {align_time:.3f} seconds")
print(f"Aligned {len(df_5m_aligned):,} 5-minute bars")

In [None]:
# Cell 7: MLMI → NW-RQK → FVG Synergy Detection

@njit(parallel=True, fastmath=True, cache=True)
def detect_mlmi_nwrqk_fvg_synergy(mlmi_bull: np.ndarray, mlmi_bear: np.ndarray,
                                 mlmi_conf: np.ndarray, nwrqk_bull: np.ndarray,
                                 nwrqk_bear: np.ndarray, nwrqk_strength: np.ndarray,
                                 fvg_bull: np.ndarray, fvg_bear: np.ndarray,
                                 volatility: np.ndarray, window: int = 30) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Advanced synergy detection with confidence scoring"""
    n = len(mlmi_bull)
    long_signals = np.zeros(n, dtype=np.bool_)
    short_signals = np.zeros(n, dtype=np.bool_)
    signal_quality = np.zeros(n)
    
    # State tracking
    mlmi_active_bull = np.zeros(n, dtype=np.bool_)
    mlmi_active_bear = np.zeros(n, dtype=np.bool_)
    nwrqk_confirmed_bull = np.zeros(n, dtype=np.bool_)
    nwrqk_confirmed_bear = np.zeros(n, dtype=np.bool_)
    state_timer = np.zeros(n, dtype=np.int32)
    
    for i in range(1, n):
        # Carry forward states
        mlmi_active_bull[i] = mlmi_active_bull[i-1]
        mlmi_active_bear[i] = mlmi_active_bear[i-1]
        nwrqk_confirmed_bull[i] = nwrqk_confirmed_bull[i-1]
        nwrqk_confirmed_bear[i] = nwrqk_confirmed_bear[i-1]
        state_timer[i] = state_timer[i-1] + 1
        
        # Volatility adjustment
        vol_factor = 1.0 / (1.0 + volatility[i] * 10) if not np.isnan(volatility[i]) else 1.0
        
        # Reset on opposite signal or timeout
        if mlmi_bear[i] or state_timer[i] > window:
            mlmi_active_bull[i] = False
            nwrqk_confirmed_bull[i] = False
            if mlmi_bear[i]:
                state_timer[i] = 0
        
        if mlmi_bull[i] or state_timer[i] > window:
            mlmi_active_bear[i] = False
            nwrqk_confirmed_bear[i] = False
            if mlmi_bull[i]:
                state_timer[i] = 0
        
        # Step 1: MLMI signal with confidence filter
        if mlmi_bull[i] and not mlmi_bull[i-1] and mlmi_conf[i] > 0.3:
            mlmi_active_bull[i] = True
            nwrqk_confirmed_bull[i] = False
            state_timer[i] = 0
        
        if mlmi_bear[i] and not mlmi_bear[i-1] and mlmi_conf[i] > 0.3:
            mlmi_active_bear[i] = True
            nwrqk_confirmed_bear[i] = False
            state_timer[i] = 0
        
        # Step 2: NW-RQK confirmation with strength filter
        if mlmi_active_bull[i] and not nwrqk_confirmed_bull[i] and nwrqk_bull[i] and nwrqk_strength[i] > 0.2:
            nwrqk_confirmed_bull[i] = True
        
        if mlmi_active_bear[i] and not nwrqk_confirmed_bear[i] and nwrqk_bear[i] and nwrqk_strength[i] > 0.2:
            nwrqk_confirmed_bear[i] = True
        
        # Step 3: FVG final confirmation
        if nwrqk_confirmed_bull[i] and fvg_bull[i]:
            long_signals[i] = True
            # Calculate signal quality
            signal_quality[i] = (mlmi_conf[i] + nwrqk_strength[i]) / 2 * vol_factor
            # Reset states
            mlmi_active_bull[i] = False
            nwrqk_confirmed_bull[i] = False
            state_timer[i] = 0
        
        if nwrqk_confirmed_bear[i] and fvg_bear[i]:
            short_signals[i] = True
            # Calculate signal quality
            signal_quality[i] = (mlmi_conf[i] + nwrqk_strength[i]) / 2 * vol_factor
            # Reset states
            mlmi_active_bear[i] = False
            nwrqk_confirmed_bear[i] = False
            state_timer[i] = 0
    
    return long_signals, short_signals, signal_quality

print("\nDetecting MLMI → NW-RQK → FVG synergy signals...")
start_time = time.time()

# Extract arrays
mlmi_bull_arr = df_5m_aligned['mlmi_bull'].values
mlmi_bear_arr = df_5m_aligned['mlmi_bear'].values
mlmi_conf_arr = df_5m_aligned['mlmi_confidence'].values
nwrqk_bull_arr = df_5m_aligned['nwrqk_bull'].values
nwrqk_bear_arr = df_5m_aligned['nwrqk_bear'].values
nwrqk_strength_arr = df_5m_aligned['nwrqk_strength'].values
fvg_bull_arr = df_5m_aligned['fvg_bull'].values
fvg_bear_arr = df_5m_aligned['fvg_bear'].values
volatility_arr = df_5m_aligned['volatility'].fillna(0.01).values

# Detect synergy
long_entries, short_entries, signal_quality = detect_mlmi_nwrqk_fvg_synergy(
    mlmi_bull_arr, mlmi_bear_arr, mlmi_conf_arr,
    nwrqk_bull_arr, nwrqk_bear_arr, nwrqk_strength_arr,
    fvg_bull_arr, fvg_bear_arr, volatility_arr
)

# Add to dataframe
df_5m_aligned['long_entry'] = long_entries
df_5m_aligned['short_entry'] = short_entries
df_5m_aligned['signal_quality'] = signal_quality

signal_time = time.time() - start_time
print(f"Synergy detection completed in {signal_time:.3f} seconds")
print(f"Long entries: {long_entries.sum():,}")
print(f"Short entries: {short_entries.sum():,}")
print(f"Average signal quality: {signal_quality[signal_quality > 0].mean():.3f}")

In [None]:
# Cell 10: Statistical Validation and Robustness Testing

@njit(parallel=True, fastmath=True, cache=True)
def bootstrap_confidence_intervals(returns: np.ndarray, n_bootstrap: int = 10000,
                                  confidence: float = 0.95) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Bootstrap confidence intervals with robust statistics"""
    n = len(returns)
    
    # Arrays to store bootstrap results
    boot_returns = np.zeros(n_bootstrap)
    boot_sharpes = np.zeros(n_bootstrap)
    boot_max_dd = np.zeros(n_bootstrap)
    boot_win_rates = np.zeros(n_bootstrap)
    
    # Remove NaN values
    clean_returns = returns[~np.isnan(returns)]
    n_clean = len(clean_returns)
    
    if n_clean == 0:
        return boot_returns, boot_sharpes, boot_max_dd, boot_win_rates
    
    # Bootstrap iterations
    for i in prange(n_bootstrap):
        # Resample with replacement
        np.random.seed(i)  # For reproducibility
        indices = np.random.randint(0, n_clean, size=n_clean)
        sample = clean_returns[indices]
        
        # Calculate metrics with safety checks
        boot_returns[i] = np.prod(1 + sample) - 1
        
        mean_ret = np.mean(sample)
        std_ret = np.std(sample)
        if std_ret > 1e-10:
            boot_sharpes[i] = mean_ret / std_ret * np.sqrt(252 * 78)
        else:
            boot_sharpes[i] = 0.0
        
        # Max drawdown
        cum_ret = np.cumprod(1 + sample)
        running_max = np.maximum.accumulate(cum_ret)
        dd = np.where(running_max > 0, (cum_ret - running_max) / running_max, 0)
        boot_max_dd[i] = np.min(dd)
        
        # Win rate
        boot_win_rates[i] = np.mean(sample > 0)
    
    return boot_returns, boot_sharpes, boot_max_dd, boot_win_rates

print("\n" + "=" * 80)
print("STATISTICAL VALIDATION & ROBUSTNESS TESTING")
print("=" * 80)

# Bootstrap analysis
print("\nRunning bootstrap analysis (10,000 iterations)...")
logger.info("Starting bootstrap analysis")
boot_start = time.time()

try:
    returns_array = returns.values
    boot_returns, boot_sharpes, boot_max_dd, boot_win_rates = bootstrap_confidence_intervals(returns_array)
    
    boot_time = time.time() - boot_start
    logger.info(f"Bootstrap completed in {boot_time:.3f} seconds")
    print(f"Bootstrap completed in {boot_time:.3f} seconds")
    
    # Calculate confidence intervals
    def calculate_ci(data, confidence=0.95):
        """Calculate confidence interval with safety checks"""
        valid_data = data[~np.isnan(data)]
        if len(valid_data) == 0:
            return 0.0, 0.0
        lower = np.percentile(valid_data, (1 - confidence) / 2 * 100)
        upper = np.percentile(valid_data, (1 + confidence) / 2 * 100)
        return lower, upper
    
    # Display results
    print("\n95% Confidence Intervals:")
    print("-" * 50)
    
    ret_lower, ret_upper = calculate_ci(boot_returns)
    print(f"Total Return: [{ret_lower*100:.2f}%, {ret_upper*100:.2f}%]")
    
    sharpe_lower, sharpe_upper = calculate_ci(boot_sharpes)
    print(f"Sharpe Ratio: [{sharpe_lower:.2f}, {sharpe_upper:.2f}]")
    
    dd_lower, dd_upper = calculate_ci(boot_max_dd)
    print(f"Max Drawdown: [{dd_lower*100:.2f}%, {dd_upper*100:.2f}%]")
    
    wr_lower, wr_upper = calculate_ci(boot_win_rates)
    print(f"Win Rate: [{wr_lower*100:.2f}%, {wr_upper*100:.2f}%]")
    
    # Statistical significance tests
    print("\n" + "-" * 50)
    print("STATISTICAL SIGNIFICANCE")
    print("-" * 50)
    
    # Test if returns are significantly different from zero
    clean_returns = returns_array[~np.isnan(returns_array)]
    if len(clean_returns) > 1:
        mean_return = np.mean(clean_returns)
        std_return = np.std(clean_returns)
        n_returns = len(clean_returns)
        
        if std_return > 0:
            t_stat = mean_return / (std_return / np.sqrt(n_returns))
            # Approximate p-value using normal distribution
            p_value_approx = 2 * (1 - stats.norm.cdf(abs(t_stat)))
            
            print(f"T-statistic: {t_stat:.3f}")
            print(f"Approx p-value: {p_value_approx:.4f}")
            print(f"Returns significantly positive: {'Yes' if t_stat > 1.96 else 'No'}")
        else:
            print("Cannot calculate t-statistic: zero standard deviation")
    
    # Risk-adjusted performance percentiles
    actual_sharpe = stats['Sharpe Ratio'] if 'stats' in globals() else 0
    sharpe_percentile = np.sum(boot_sharpes <= actual_sharpe) / len(boot_sharpes) * 100
    
    print(f"\nStrategy Sharpe ratio percentile: {sharpe_percentile:.1f}%")
    print(f"Performance assessment: ", end="")
    if sharpe_percentile > 90:
        print("EXCELLENT - Top 10% performance")
    elif sharpe_percentile > 75:
        print("VERY GOOD - Top 25% performance")
    elif sharpe_percentile > 50:
        print("GOOD - Above median performance")
    else:
        print("NEEDS IMPROVEMENT - Below median performance")
    
    # Stability analysis
    print("\n" + "-" * 50)
    print("STABILITY ANALYSIS")
    print("-" * 50)
    
    # Rolling performance
    window = min(252 * 5, len(returns) // 2)  # 1 year of 5-minute bars or half the data
    if window > 100:
        rolling_returns = returns.rolling(window).apply(lambda x: (1 + x).prod() - 1)
        rolling_sharpe = returns.rolling(window).apply(
            lambda x: x.mean() / x.std() * np.sqrt(252 * 78) if x.std() > 0 else 0
        )
        
        print(f"Rolling 1-year return volatility: {rolling_returns.std()*100:.2f}%")
        print(f"Rolling Sharpe stability: {rolling_sharpe.std():.2f}")
        print(f"Minimum rolling Sharpe: {rolling_sharpe.min():.2f}")
        print(f"Maximum rolling Sharpe: {rolling_sharpe.max():.2f}")
    else:
        print("Insufficient data for rolling analysis")
    
    # Save validation results
    if config.save_results:
        validation_results = {
            'confidence_intervals': {
                'return': (ret_lower, ret_upper),
                'sharpe': (sharpe_lower, sharpe_upper),
                'max_dd': (dd_lower, dd_upper),
                'win_rate': (wr_lower, wr_upper)
            },
            'significance': {
                't_stat': t_stat if 't_stat' in locals() else None,
                'significant': t_stat > 1.96 if 't_stat' in locals() else False
            },
            'percentiles': {
                'sharpe_percentile': sharpe_percentile
            }
        }
        
        validation_path = os.path.join(config.results_path, 'validation_results.json')
        with open(validation_path, 'w') as f:
            json.dump(validation_results, f, indent=2, default=str)
        logger.info(f"Saved validation results to {validation_path}")
    
except Exception as e:
    logger.error(f"Error in statistical validation: {str(e)}")
    print(f"\nError in statistical validation: {str(e)}")
    print("Continuing with limited validation...")

In [None]:
# Cell 11: Final Summary and Production Deployment

print("\n" + "=" * 80)
print("FINAL SUMMARY - MLMI → NW-RQK → FVG SYNERGY")
print("=" * 80)

# Execution summary
total_time = calc_time + mlmi_time + nwrqk_time + align_time + signal_time + backtest_time + boot_time

try:
    # Performance summary
    print("\nPERFORMANCE SUMMARY:")
    print("-" * 50)
    print(f"Total Return: {stats['Total Return [%]']:.2f}%")
    print(f"Sharpe Ratio: {stats['Sharpe Ratio']:.2f}")
    print(f"Total Trades: {stats['Total Trades']:,.0f}")
    print(f"Win Rate: {stats['Win Rate [%]']:.2f}%")
    print(f"Average Trade: {stats['Expectancy [%]']:.3f}%")
    
    print("\nEXECUTION PERFORMANCE:")
    print("-" * 50)
    print(f"Total execution time: {total_time:.2f} seconds")
    print(f"Bars processed per second: {len(df_5m_aligned) / total_time:,.0f}")
    print(f"Signals detected per second: {(long_entries.sum() + short_entries.sum()) / signal_time:,.0f}")
    
    # Signal analysis
    print("\nSIGNAL CHARACTERISTICS:")
    print("-" * 50)
    print(f"Base indicators (30m):")
    print(f"  - MLMI signals: {df_30m['mlmi_bull'].sum() + df_30m['mlmi_bear'].sum():,}")
    print(f"  - NW-RQK signals: {df_30m['nwrqk_bull'].sum() + df_30m['nwrqk_bear'].sum():,}")
    print(f"FVG zones (5m): {fvg_bull.sum() + fvg_bear.sum():,}")
    print(f"\nSynergy signals: {long_entries.sum() + short_entries.sum():,}")
    
    mlmi_signals = df_30m['mlmi_bull'].sum() + df_30m['mlmi_bear'].sum()
    if mlmi_signals > 0:
        signal_reduction = ((1 - (long_entries.sum() + short_entries.sum()) / mlmi_signals) * 100)
        print(f"Signal reduction: {signal_reduction:.1f}%")
    
    # Strengths and weaknesses
    print("\nKEY STRENGTHS:")
    print("-" * 50)
    strengths = []
    if stats['Sharpe Ratio'] > 1.0:
        strengths.append(f"Strong risk-adjusted returns (Sharpe: {stats['Sharpe Ratio']:.2f})")
    if stats['Win Rate [%]'] > 45:
        strengths.append(f"Solid win rate ({stats['Win Rate [%]']:.1f}%)")
    if stats['Total Trades'] > 1000:
        strengths.append(f"Good trade frequency ({stats['Total Trades']:,.0f} trades)")
    if abs(stats['Max Drawdown [%]']) < 20:
        strengths.append(f"Controlled drawdown ({stats['Max Drawdown [%]']:.1f}%)")
    if total_time < 10:
        strengths.append(f"Ultra-fast execution ({total_time:.1f} seconds)")
    
    for i, strength in enumerate(strengths, 1):
        print(f"{i}. {strength}")
    
    print("\nAREAS FOR IMPROVEMENT:")
    print("-" * 50)
    improvements = []
    if stats['Sharpe Ratio'] < 0.5:
        improvements.append("Improve risk-adjusted returns")
    if stats['Win Rate [%]'] < 40:
        improvements.append("Increase win rate through better entry timing")
    if stats['Total Trades'] < 500:
        improvements.append("Consider relaxing signal criteria for more opportunities")
    if abs(stats['Max Drawdown [%]']) > 30:
        improvements.append("Implement better risk management to reduce drawdowns")
    
    for i, improvement in enumerate(improvements, 1):
        print(f"{i}. {improvement}")
    
    # Save final results
    if config.save_results:
        final_results = {
            'performance': {
                'total_return': stats['Total Return [%]'],
                'sharpe_ratio': stats['Sharpe Ratio'],
                'win_rate': stats['Win Rate [%]'],
                'total_trades': stats['Total Trades'],
                'max_drawdown': stats['Max Drawdown [%]']
            },
            'execution': {
                'total_time': total_time,
                'bars_per_second': len(df_5m_aligned) / total_time
            },
            'signals': {
                'mlmi_count': mlmi_signals,
                'nwrqk_count': df_30m['nwrqk_bull'].sum() + df_30m['nwrqk_bear'].sum(),
                'fvg_count': fvg_bull.sum() + fvg_bear.sum(),
                'final_count': long_entries.sum() + short_entries.sum()
            },
            'config': config.__dict__
        }
        
        results_path = os.path.join(config.results_path, 'final_results.json')
        with open(results_path, 'w') as f:
            json.dump(final_results, f, indent=2, default=str)
        
        # Save trade records
        if len(trades) > 0:
            trades_path = os.path.join(config.results_path, 'trades.csv')
            trades.to_csv(trades_path)
            logger.info(f"Saved {len(trades)} trades to {trades_path}")
        
        # Save signals
        signals_df = df_5m_aligned[['long_entry', 'short_entry', 'signal_quality']].copy()
        signals_df = signals_df[signals_df['long_entry'] | signals_df['short_entry']]
        if len(signals_df) > 0:
            signals_path = os.path.join(config.results_path, 'signals.csv')
            signals_df.to_csv(signals_path)
            logger.info(f"Saved {len(signals_df)} signals to {signals_path}")
        
        print(f"\nResults saved to {config.results_path}")
        
except Exception as e:
    logger.error(f"Error in final summary: {str(e)}")
    print(f"\nError generating summary: {str(e)}")

# Recommendations
print("\nRECOMMENDATIONS:")
print("-" * 50)
print("1. Parameter optimization:")
print("   - Test different MLMI k-neighbors (100-300)")
print("   - Optimize NW-RQK kernel parameters (h: 5-15, r: 5-15)")
print("   - Adjust FVG ATR multiplier (1.0-2.0)")
print("\n2. Risk management enhancements:")
print("   - Implement dynamic position sizing based on volatility")
print("   - Add trailing stops for trend-following trades")
print("   - Consider correlation-based portfolio allocation")
print("\n3. Further testing:")
print("   - Walk-forward analysis across different market regimes")
print("   - Out-of-sample testing on different assets")
print("   - Stress testing during high volatility periods")
print("\n4. Production deployment:")
print("   - Set up real-time data feeds")
print("   - Implement order execution with slippage control")
print("   - Add monitoring and alerting systems")
print("   - Create automated rebalancing schedules")

print("\n" + "=" * 80)
print("ANALYSIS COMPLETE - NOTEBOOK PRODUCTION READY")
print("=" * 80)

# Clean up memory one final time
cleanup_memory()
logger.info("Final memory cleanup completed")
print(f"\nFinal memory usage: {check_memory_usage():.2f} GB")

In [None]:
# Cell 10: Statistical Validation and Robustness Testing

@njit(parallel=True, fastmath=True, cache=True)
def bootstrap_confidence_intervals(returns: np.ndarray, n_bootstrap: int = 10000,
                                  confidence: float = 0.95) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Bootstrap confidence intervals for key metrics"""
    n = len(returns)
    
    # Arrays to store bootstrap results
    boot_returns = np.zeros(n_bootstrap)
    boot_sharpes = np.zeros(n_bootstrap)
    boot_max_dd = np.zeros(n_bootstrap)
    boot_win_rates = np.zeros(n_bootstrap)
    
    # Remove NaN values
    clean_returns = returns[~np.isnan(returns)]
    n_clean = len(clean_returns)
    
    if n_clean == 0:
        return boot_returns, boot_sharpes, boot_max_dd, boot_win_rates
    
    # Bootstrap iterations
    for i in prange(n_bootstrap):
        # Resample with replacement
        indices = np.random.randint(0, n_clean, size=n_clean)
        sample = clean_returns[indices]
        
        # Calculate metrics
        boot_returns[i] = np.prod(1 + sample) - 1
        
        mean_ret = np.mean(sample)
        std_ret = np.std(sample)
        if std_ret > 0:
            boot_sharpes[i] = mean_ret / std_ret * np.sqrt(252 * 78)
        
        # Max drawdown
        cum_ret = np.cumprod(1 + sample)
        running_max = np.maximum.accumulate(cum_ret)
        dd = (cum_ret - running_max) / running_max
        boot_max_dd[i] = np.min(dd)
        
        # Win rate
        boot_win_rates[i] = np.mean(sample > 0)
    
    return boot_returns, boot_sharpes, boot_max_dd, boot_win_rates

print("\n" + "=" * 80)
print("STATISTICAL VALIDATION & ROBUSTNESS TESTING")
print("=" * 80)

# Bootstrap analysis
print("\nRunning bootstrap analysis (10,000 iterations)...")
boot_start = time.time()

returns_array = returns.values
boot_returns, boot_sharpes, boot_max_dd, boot_win_rates = bootstrap_confidence_intervals(returns_array)

boot_time = time.time() - boot_start
print(f"Bootstrap completed in {boot_time:.3f} seconds")

# Calculate confidence intervals
def calculate_ci(data, confidence=0.95):
    lower = np.percentile(data, (1 - confidence) / 2 * 100)
    upper = np.percentile(data, (1 + confidence) / 2 * 100)
    return lower, upper

# Display results
print("\n95% Confidence Intervals:")
print("-" * 50)

ret_lower, ret_upper = calculate_ci(boot_returns)
print(f"Total Return: [{ret_lower*100:.2f}%, {ret_upper*100:.2f}%]")

sharpe_lower, sharpe_upper = calculate_ci(boot_sharpes)
print(f"Sharpe Ratio: [{sharpe_lower:.2f}, {sharpe_upper:.2f}]")

dd_lower, dd_upper = calculate_ci(boot_max_dd)
print(f"Max Drawdown: [{dd_lower*100:.2f}%, {dd_upper*100:.2f}%]")

wr_lower, wr_upper = calculate_ci(boot_win_rates)
print(f"Win Rate: [{wr_lower*100:.2f}%, {wr_upper*100:.2f}%]")

# Statistical significance tests
print("\n" + "-" * 50)
print("STATISTICAL SIGNIFICANCE")
print("-" * 50)

# Test if returns are significantly different from zero
t_stat = np.mean(returns_array[~np.isnan(returns_array)]) / (np.std(returns_array[~np.isnan(returns_array)]) / np.sqrt(len(returns_array[~np.isnan(returns_array)])))
p_value = 2 * (1 - scipy.stats.norm.cdf(abs(t_stat))) if 'scipy' in globals() else 0.05

print(f"T-statistic: {t_stat:.3f}")
print(f"Returns significantly positive: {'Yes' if t_stat > 1.96 else 'No'}")

# Risk-adjusted performance percentiles
actual_sharpe = stats['Sharpe Ratio']
sharpe_percentile = np.sum(boot_sharpes <= actual_sharpe) / len(boot_sharpes) * 100

print(f"\nStrategy Sharpe ratio percentile: {sharpe_percentile:.1f}%")
print(f"Performance assessment: ", end="")
if sharpe_percentile > 90:
    print("EXCELLENT - Top 10% performance")
elif sharpe_percentile > 75:
    print("VERY GOOD - Top 25% performance")
elif sharpe_percentile > 50:
    print("GOOD - Above median performance")
else:
    print("NEEDS IMPROVEMENT - Below median performance")

# Stability analysis
print("\n" + "-" * 50)
print("STABILITY ANALYSIS")
print("-" * 50)

# Rolling performance
window = 252 * 5  # 1 year of 5-minute bars
rolling_returns = returns.rolling(window).apply(lambda x: (1 + x).prod() - 1)
rolling_sharpe = returns.rolling(window).apply(lambda x: x.mean() / x.std() * np.sqrt(252 * 78) if x.std() > 0 else 0)

print(f"Rolling 1-year return volatility: {rolling_returns.std()*100:.2f}%")
print(f"Rolling Sharpe stability: {rolling_sharpe.std():.2f}")
print(f"Minimum rolling Sharpe: {rolling_sharpe.min():.2f}")
print(f"Maximum rolling Sharpe: {rolling_sharpe.max():.2f}")

In [None]:
# Cell 11: Final Summary and Recommendations

print("\n" + "=" * 80)
print("FINAL SUMMARY - MLMI → NW-RQK → FVG SYNERGY")
print("=" * 80)

# Performance summary
print("\nPERFORMANCE SUMMARY:")
print("-" * 50)
print(f"Total Return: {stats['Total Return [%]']:.2f}%")
print(f"Sharpe Ratio: {stats['Sharpe Ratio']:.2f}")
print(f"Total Trades: {stats['Total Trades']:,.0f}")
print(f"Win Rate: {stats['Win Rate [%]']:.2f}%")
print(f"Average Trade: {stats['Expectancy [%]']:.3f}%")

# Execution summary
total_time = calc_time + mlmi_time + nwrqk_time + align_time + signal_time + backtest_time + boot_time
print("\nEXECUTION PERFORMANCE:")
print("-" * 50)
print(f"Total execution time: {total_time:.2f} seconds")
print(f"Bars processed per second: {len(df_5m_aligned) / total_time:,.0f}")
print(f"Signals detected per second: {(long_entries.sum() + short_entries.sum()) / signal_time:,.0f}")

# Signal analysis
print("\nSIGNAL CHARACTERISTICS:")
print("-" * 50)
print(f"Base indicators (30m):")
print(f"  - MLMI signals: {df_30m['mlmi_bull'].sum() + df_30m['mlmi_bear'].sum():,}")
print(f"  - NW-RQK signals: {df_30m['nwrqk_bull'].sum() + df_30m['nwrqk_bear'].sum():,}")
print(f"FVG zones (5m): {fvg_bull.sum() + fvg_bear.sum():,}")
print(f"\nSynergy signals: {long_entries.sum() + short_entries.sum():,}")
print(f"Signal reduction: {((1 - (long_entries.sum() + short_entries.sum()) / (df_30m['mlmi_bull'].sum() + df_30m['mlmi_bear'].sum())) * 100):.1f}%")

# Strengths and weaknesses
print("\nKEY STRENGTHS:")
print("-" * 50)
strengths = []
if stats['Sharpe Ratio'] > 1.0:
    strengths.append(f"Strong risk-adjusted returns (Sharpe: {stats['Sharpe Ratio']:.2f})")
if stats['Win Rate [%]'] > 45:
    strengths.append(f"Solid win rate ({stats['Win Rate [%]']:.1f}%)")
if stats['Total Trades'] > 1000:
    strengths.append(f"Good trade frequency ({stats['Total Trades']:,.0f} trades)")
if abs(stats['Max Drawdown [%]']) < 20:
    strengths.append(f"Controlled drawdown ({stats['Max Drawdown [%]']:.1f}%)")
if total_time < 10:
    strengths.append(f"Ultra-fast execution ({total_time:.1f} seconds)")

for i, strength in enumerate(strengths, 1):
    print(f"{i}. {strength}")

print("\nAREAS FOR IMPROVEMENT:")
print("-" * 50)
improvements = []
if stats['Sharpe Ratio'] < 0.5:
    improvements.append("Improve risk-adjusted returns")
if stats['Win Rate [%]'] < 40:
    improvements.append("Increase win rate through better entry timing")
if stats['Total Trades'] < 500:
    improvements.append("Consider relaxing signal criteria for more opportunities")
if abs(stats['Max Drawdown [%]']) > 30:
    improvements.append("Implement better risk management to reduce drawdowns")

for i, improvement in enumerate(improvements, 1):
    print(f"{i}. {improvement}")

# Recommendations
print("\nRECOMMENDATIONS:")
print("-" * 50)
print("1. Parameter optimization:")
print("   - Test different MLMI k-neighbors (100-300)")
print("   - Optimize NW-RQK kernel parameters (h: 5-15, r: 5-15)")
print("   - Adjust FVG ATR multiplier (1.0-2.0)")
print("\n2. Risk management enhancements:")
print("   - Implement dynamic position sizing based on volatility")
print("   - Add trailing stops for trend-following trades")
print("   - Consider correlation-based portfolio allocation")
print("\n3. Further testing:")
print("   - Walk-forward analysis across different market regimes")
print("   - Out-of-sample testing on different assets")
print("   - Stress testing during high volatility periods")

print("\n" + "=" * 80)
print("ANALYSIS COMPLETE")
print("=" * 80)