# Synergy 2: MLMI → NW-RQK → FVG Trading Strategy

**Ultra-Fast Backtesting with VectorBT and Numba JIT Compilation**

This notebook implements the second synergy pattern where:
1. MLMI provides the primary trend signal
2. NW-RQK confirms the trend direction
3. FVG validates the final entry zone

Key differences from Synergy 1:
- NW-RQK confirmation comes before FVG
- May capture different market dynamics
- Expected to generate similar trade counts but with different timing

In [None]:
# Cell 1: Environment Setup and Imports

# Standard library imports
import os
import sys
import gc
import json
import time
import logging
import warnings
from datetime import datetime, timedelta
from pathlib import Path
from typing import Tuple, Dict as TypeDict, Optional, List, Union, Any
from dataclasses import dataclass, field
from collections import defaultdict
import pickle

# Scientific computing imports
import numpy as np
import pandas as pd
from scipy import stats
from scipy.spatial import cKDTree

# Visualization imports
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Trading and backtesting imports
import vectorbt as vbt

# Performance optimization imports
from numba import njit, prange, typed, types
from numba.typed import Dict
import numba

# Progress tracking
from tqdm import tqdm

# Suppress warnings
warnings.filterwarnings('ignore')

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.FileHandler('synergy_strategy.log')
    ]
)
logger = logging.getLogger(__name__)

# Configure Numba for maximum performance
numba.config.THREADING_LAYER = 'threadsafe'
numba.config.NUMBA_NUM_THREADS = numba.config.NUMBA_DEFAULT_NUM_THREADS

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 100)

# Version checks
logger.info("Environment Setup")
logger.info(f"Python version: {sys.version}")
logger.info(f"NumPy version: {np.__version__}")
logger.info(f"Pandas version: {pd.__version__}")
logger.info(f"VectorBT version: {vbt.__version__}")
logger.info(f"Numba version: {numba.__version__}")
logger.info(f"Numba threads: {numba.config.NUMBA_NUM_THREADS}")

print("Synergy 2: MLMI → NW-RQK → FVG Strategy")
print(f"Numba threads: {numba.config.NUMBA_NUM_THREADS}")
print(f"VectorBT version: {vbt.__version__}")
print("Environment ready for ultra-fast backtesting!")

# Configuration dataclass
@dataclass
class StrategyConfig:
    """Configuration for the trading strategy"""
    # Data paths
    data_path_5m: str = "/home/QuantNova/AlgoSpace-8/notebooks/notebook data/@CL - 5 min - ETH.csv"
    data_path_30m: str = "/home/QuantNova/AlgoSpace-8/notebooks/notebook data/@CL - 30 min - ETH.csv"
    
    # MLMI parameters
    mlmi_k_neighbors: int = 200
    mlmi_confidence_threshold: float = 0.3
    mlmi_forward_bars: int = 5
    
    # NW-RQK parameters
    nwrqk_h: float = 8.0
    nwrqk_r: float = 8.0
    nwrqk_lag: int = 2
    nwrqk_strength_threshold: float = 0.2
    
    # FVG parameters
    fvg_atr_multiplier: float = 1.5
    fvg_active_bars: int = 20
    
    # Signal parameters
    synergy_window: int = 30
    
    # Backtesting parameters
    initial_capital: float = 100000
    position_size_base: float = 100
    stop_loss_atr: float = 2.0
    max_holding_bars: int = 100
    fees: float = 0.0001
    slippage: float = 0.0001
    
    # Performance parameters
    chunk_size: int = 10000
    max_memory_gb: float = 8.0
    
    # Output parameters
    save_results: bool = True
    results_path: str = "./results"
    checkpoint_interval: int = 1000

# Create default configuration
config = StrategyConfig()

# Memory management utilities
def check_memory_usage():
    """Check current memory usage"""
    try:
        import psutil
        process = psutil.Process(os.getpid())
        mem_gb = process.memory_info().rss / 1024 / 1024 / 1024
        return mem_gb
    except ImportError:
        logger.warning("psutil not installed, memory monitoring disabled")
        return 0.0

def cleanup_memory():
    """Force garbage collection"""
    gc.collect()
    logger.info(f"Memory after cleanup: {check_memory_usage():.2f} GB")

In [None]:
# Cell 2: Enhanced Data Loading with Comprehensive Error Handling

class DataLoadingError(Exception):
    """Custom exception for data loading errors"""
    pass

class DataValidationError(Exception):
    """Custom exception for data validation errors"""
    pass

def inspect_csv_columns(file_path: str, nrows: int = 5) -> None:
    """Inspect CSV columns for debugging"""
    try:
        df_sample = pd.read_csv(file_path, nrows=nrows)
        print(f"\nInspecting file: {file_path}")
        print(f"Columns found: {list(df_sample.columns)}")
        print(f"First row data:")
        for col in df_sample.columns:
            print(f"  {col}: {df_sample[col].iloc[0]}")
    except Exception as e:
        print(f"Error inspecting file: {e}")

def standardize_column_names(df: pd.DataFrame) -> pd.DataFrame:
    """
    Comprehensive column name standardization that handles many variations
    """
    # Create a copy to avoid modifying the original
    df = df.copy()
    
    # First, strip whitespace and convert to lowercase for matching
    df.columns = df.columns.str.strip()
    
    # Comprehensive mapping dictionary
    column_mappings = {
        # Open variations
        'open': 'Open', 'o': 'Open', 'open_price': 'Open', 'opening': 'Open',
        'open price': 'Open', 'opening_price': 'Open', 'o_price': 'Open',
        
        # High variations
        'high': 'High', 'h': 'High', 'high_price': 'High', 'highest': 'High',
        'high price': 'High', 'highest_price': 'High', 'h_price': 'High',
        'max': 'High', 'maximum': 'High', 'max_price': 'High',
        
        # Low variations
        'low': 'Low', 'l': 'Low', 'low_price': 'Low', 'lowest': 'Low',
        'low price': 'Low', 'lowest_price': 'Low', 'l_price': 'Low',
        'min': 'Low', 'minimum': 'Low', 'min_price': 'Low',
        
        # Close variations
        'close': 'Close', 'c': 'Close', 'close_price': 'Close', 'closing': 'Close',
        'close price': 'Close', 'closing_price': 'Close', 'c_price': 'Close',
        'last': 'Close', 'last_price': 'Close', 'final': 'Close',
        
        # Volume variations
        'volume': 'Volume', 'v': 'Volume', 'vol': 'Volume', 'volume_btc': 'Volume',
        'volume_usd': 'Volume', 'volume_usdt': 'Volume', 'qty': 'Volume',
        'quantity': 'Volume', 'amount': 'Volume', 'size': 'Volume',
        
        # Timestamp variations
        'timestamp': 'Timestamp', 'datetime': 'Timestamp', 'date': 'Timestamp',
        'time': 'Timestamp', 'gmt time': 'Timestamp', 'gmt_time': 'Timestamp',
        'date_time': 'Timestamp', 'utc_time': 'Timestamp', 'utc': 'Timestamp',
        'index': 'Timestamp', 'date time': 'Timestamp'
    }
    
    # Apply mapping
    rename_dict = {}
    for col in df.columns:
        col_lower = col.lower().strip()
        if col_lower in column_mappings:
            rename_dict[col] = column_mappings[col_lower]
    
    if rename_dict:
        df = df.rename(columns=rename_dict)
        logger.info(f"Renamed columns: {rename_dict}")
    
    return df

def infer_missing_columns(df: pd.DataFrame) -> pd.DataFrame:
    """
    Intelligently infer missing OHLC columns from available data
    """
    df = df.copy()
    
    # Check what we have and what we're missing
    has_open = 'Open' in df.columns
    has_high = 'High' in df.columns
    has_low = 'Low' in df.columns
    has_close = 'Close' in df.columns
    
    # If we have Close but missing others, use Close as base
    if has_close:
        if not has_open:
            logger.warning("'Open' column missing - using Close as approximation")
            df['Open'] = df['Close']
        
        if not has_high:
            logger.warning("'High' column missing - creating from available price data")
            price_cols = [col for col in ['Open', 'Close'] if col in df.columns]
            df['High'] = df[price_cols].max(axis=1) * 1.001  # Slightly above max
        
        if not has_low:
            logger.warning("'Low' column missing - creating from available price data")
            price_cols = [col for col in ['Open', 'Close'] if col in df.columns]
            df['Low'] = df[price_cols].min(axis=1) * 0.999  # Slightly below min
    
    # If we have some OHLC but missing Close (rare but possible)
    elif has_open or has_high or has_low:
        available_price_cols = [col for col in ['Open', 'High', 'Low'] if col in df.columns]
        if not has_close and available_price_cols:
            logger.warning("'Close' column missing - using available price data")
            df['Close'] = df[available_price_cols].mean(axis=1)
    
    # If Volume is missing, create dummy volume
    if 'Volume' not in df.columns:
        logger.warning("'Volume' column missing - creating dummy volume data")
        df['Volume'] = 1000000  # Default volume
    
    return df

def validate_dataframe(df: pd.DataFrame, required_columns: List[str]) -> None:
    """Validate dataframe has required columns and valid data"""
    # Check for required columns
    missing_columns = set(required_columns) - set(df.columns)
    if missing_columns:
        raise DataValidationError(f"Missing required columns: {missing_columns}")
    
    # Check for empty dataframe
    if len(df) == 0:
        raise DataValidationError("Dataframe is empty")
    
    # Check for sufficient data
    if len(df) < 100:
        logger.warning(f"Limited data: only {len(df)} rows available")
    
    # Check for NaN values in critical columns
    critical_columns = ['Open', 'High', 'Low', 'Close']
    nan_counts = df[critical_columns].isna().sum()
    if nan_counts.any():
        logger.warning(f"NaN values found: {nan_counts.to_dict()}")

def load_data_optimized(file_path: str, timeframe: str = '5m', 
                       config: Optional[StrategyConfig] = None) -> pd.DataFrame:
    """Enhanced load function with comprehensive error handling"""
    start_time = time.time()
    logger.info(f"Loading {timeframe} data from {file_path}")
    
    try:
        # Check if file exists
        if not os.path.exists(file_path):
            raise DataLoadingError(f"Data file not found: {file_path}")
        
        # First, inspect the file to understand its structure
        inspect_csv_columns(file_path, nrows=5)
        
        # Try multiple encoding options
        encodings = ['utf-8', 'iso-8859-1', 'cp1252']
        df = None
        
        for encoding in encodings:
            try:
                # Read CSV with flexible options
                df = pd.read_csv(
                    file_path,
                    encoding=encoding,
                    # Don't assume column names yet
                    header=0,
                    # Handle various date formats
                    parse_dates=True,
                    infer_datetime_format=True,
                    # Be flexible with data types
                    dtype=None,
                    # Handle various separators
                    sep=None,
                    engine='python'
                )
                logger.info(f"Successfully read file with {encoding} encoding")
                break
            except UnicodeDecodeError:
                continue
            except Exception as e:
                logger.warning(f"Failed with {encoding}: {str(e)}")
                continue
        
        if df is None:
            raise DataLoadingError("Failed to read CSV with any encoding")
        
        # Log original columns for debugging
        logger.info(f"Original columns: {list(df.columns)}")
        
        # Step 1: Standardize column names
        df = standardize_column_names(df)
        
        # Step 2: Find and set timestamp index
        timestamp_col = None
        for col in df.columns:
            if col == 'Timestamp' or 'time' in col.lower() or 'date' in col.lower():
                timestamp_col = col
                break
        
        if timestamp_col and timestamp_col != 'Timestamp':
            df.rename(columns={timestamp_col: 'Timestamp'}, inplace=True)
        
        # Set timestamp as index
        if 'Timestamp' in df.columns:
            df['Timestamp'] = pd.to_datetime(df['Timestamp'], dayfirst=True, errors='coerce')
            df = df.set_index('Timestamp')
        else:
            logger.warning("No timestamp column found - using sequential index")
            # Create a synthetic timestamp based on timeframe
            if timeframe == '5m':
                freq = '5T'
            elif timeframe == '30m':
                freq = '30T'
            else:
                freq = 'T'
            df.index = pd.date_range(start='2020-01-01', periods=len(df), freq=freq)
        
        # Step 3: Infer missing columns
        df = infer_missing_columns(df)
        
        # Step 4: Final validation (more flexible)
        required_columns = ['Open', 'High', 'Low', 'Close']
        missing_critical = [col for col in required_columns if col not in df.columns]
        
        if missing_critical:
            # Last resort: try to find any price column
            price_patterns = ['price', 'close', 'last', 'value', 'rate']
            price_col = None
            
            for col in df.columns:
                if any(pattern in col.lower() for pattern in price_patterns):
                    price_col = col
                    break
            
            if price_col:
                logger.warning(f"Using '{price_col}' as price data for all OHLC")
                for ohlc in ['Open', 'High', 'Low', 'Close']:
                    if ohlc not in df.columns:
                        df[ohlc] = df[price_col]
            else:
                raise DataValidationError(f"No price data found in columns: {list(df.columns)}")
        
        # Now validate after all attempts to create required columns
        validate_dataframe(df, ['Open', 'High', 'Low', 'Close', 'Volume'])
        
        # Step 5: Ensure numeric types
        numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
        for col in numeric_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce').astype(np.float64)
        
        # Step 6: Clean data
        df = df[df.index.notnull()]
        initial_len = len(df)
        df.dropna(subset=['Open', 'High', 'Low', 'Close'], inplace=True)
        
        if len(df) < initial_len:
            logger.warning(f"Dropped {initial_len - len(df)} rows with NaN values")
        
        # Step 7: Fix OHLC relationships
        df['High'] = df[['Open', 'High', 'Close']].max(axis=1)
        df['Low'] = df[['Open', 'Low', 'Close']].min(axis=1)
        
        # Step 8: Sort and remove duplicates
        df.sort_index(inplace=True)
        df = df[~df.index.duplicated(keep='first')]
        
        # Step 9: Add calculated features
        df['Returns'] = df['Close'].pct_change().fillna(0)
        df['LogReturns'] = np.log1p(df['Returns'])
        df['HL_Range'] = df['High'] - df['Low']
        df['OC_Range'] = abs(df['Open'] - df['Close'])
        df['DataQuality'] = 1.0
        df.loc[df['Volume'] == 0, 'DataQuality'] *= 0.8
        df.loc[df['HL_Range'] == 0, 'DataQuality'] *= 0.9
        
        load_time = time.time() - start_time
        logger.info(f"Successfully loaded {len(df):,} rows in {load_time:.2f} seconds")
        logger.info(f"Date range: {df.index[0]} to {df.index[-1]}")
        logger.info(f"Final columns: {list(df.columns)}")
        logger.info(f"Average data quality: {df['DataQuality'].mean():.3f}")
        
        # Memory optimization
        df = df.astype({col: 'float32' for col in numeric_cols if col in df.columns})
        
        return df
        
    except Exception as e:
        logger.error(f"Critical error loading data: {str(e)}")
        logger.error(f"File path: {file_path}")
        raise DataLoadingError(f"Failed to load {timeframe} data: {str(e)}")

# Pre-compile all Numba functions
print("Pre-compiling Numba functions for maximum speed...")

@njit(cache=True)
def dummy_compile():
    """Dummy function to trigger compilation"""
    return np.array([1.0, 2.0, 3.0]).sum()

_ = dummy_compile()  # Trigger compilation

# Load data files with error handling
print("\nLoading data files with enhanced error handling...")

try:
    # Check if config paths exist, otherwise try alternative paths
    data_paths_5m = [
        config.data_path_5m,
        "./data/@CL - 5 min - ETH.csv",
        "../data/@CL - 5 min - ETH.csv",
        "data/@CL - 5 min - ETH.csv"
    ]
    
    data_paths_30m = [
        config.data_path_30m,
        "./data/@CL - 30 min - ETH.csv",
        "../data/@CL - 30 min - ETH.csv",
        "data/@CL - 30 min - ETH.csv"
    ]
    
    # Try to load 5m data
    df_5m = None
    for path in data_paths_5m:
        if os.path.exists(path):
            try:
                df_5m = load_data_optimized(path, '5m', config)
                break
            except Exception as e:
                logger.warning(f"Failed to load from {path}: {e}")
                continue
    
    if df_5m is None:
        raise DataLoadingError("Could not load 5m data from any path")
    
    # Try to load 30m data
    df_30m = None
    for path in data_paths_30m:
        if os.path.exists(path):
            try:
                df_30m = load_data_optimized(path, '30m', config)
                break
            except Exception as e:
                logger.warning(f"Failed to load from {path}: {e}")
                continue
    
    if df_30m is None:
        raise DataLoadingError("Could not load 30m data from any path")
    
    # Ensure time alignment
    common_start = max(df_5m.index[0], df_30m.index[0])
    common_end = min(df_5m.index[-1], df_30m.index[-1])
    
    df_5m = df_5m.loc[common_start:common_end]
    df_30m = df_30m.loc[common_start:common_end]
    
    print(f"\n5-minute data: {df_5m.index[0]} to {df_5m.index[-1]} ({len(df_5m):,} bars)")
    print(f"30-minute data: {df_30m.index[0]} to {df_30m.index[-1]} ({len(df_30m):,} bars)")
    print(f"Memory usage: {check_memory_usage():.2f} GB")
    
    # Save checkpoint
    if config.save_results:
        os.makedirs(config.results_path, exist_ok=True)
        checkpoint_path = os.path.join(config.results_path, 'data_checkpoint.pkl')
        with open(checkpoint_path, 'wb') as f:
            pickle.dump({
                'df_5m_shape': df_5m.shape,
                'df_30m_shape': df_30m.shape,
                'date_range': (df_5m.index[0], df_5m.index[-1])
            }, f)
        logger.info(f"Saved data checkpoint to {checkpoint_path}")
    
except DataLoadingError as e:
    logger.error(f"Data loading failed: {e}")
    print(f"\nERROR: {e}")
    print("\nTroubleshooting steps:")
    print("1. Check that your CSV files have price data columns")
    print("2. Ensure the file paths are correct")
    print("3. Verify the CSV format is valid")
    print("\nYou can also manually inspect your CSV structure using:")
    print("pd.read_csv('your_file.csv', nrows=5)")
    raise
except Exception as e:
    logger.error(f"Unexpected error: {e}")
    raise

In [None]:
# Cell 3: Original FVG Detection and Optimized Indicators

@njit(fastmath=True, cache=True, parallel=True)
def calculate_all_indicators(close: np.ndarray, high: np.ndarray, low: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Calculate all basic indicators with comprehensive error handling"""
    n = len(close)
    
    # Pre-allocate arrays with default values
    ma5 = np.full(n, np.nan, dtype=np.float64)
    ma20 = np.full(n, np.nan, dtype=np.float64)
    rsi5 = np.full(n, 50.0, dtype=np.float64)
    rsi20 = np.full(n, 50.0, dtype=np.float64)
    atr = np.full(n, np.nan, dtype=np.float64)
    
    # Input validation
    if n == 0:
        return ma5, ma20, rsi5, rsi20, atr
    
    # Weighted Moving Averages with safe calculations
    weights5 = np.arange(1, 6, dtype=np.float64)
    weights20 = np.arange(1, 21, dtype=np.float64)
    sum_w5 = weights5.sum()
    sum_w20 = weights20.sum()
    
    # Calculate WMAs in parallel chunks with bounds checking
    for i in prange(n):
        # 5-period WMA
        if i >= 4:
            window_data = close[i-4:i+1]
            if not np.any(np.isnan(window_data)):
                ma5[i] = np.dot(window_data, weights5) / sum_w5
        
        # 20-period WMA
        if i >= 19:
            window_data = close[i-19:i+1]
            if not np.any(np.isnan(window_data)):
                ma20[i] = np.dot(window_data, weights20) / sum_w20
    
    # RSI calculation with safe division
    if n > 1:
        deltas = np.diff(close)
        gains = np.maximum(deltas, 0)
        losses = -np.minimum(deltas, 0)
        
        # RSI 5
        if len(gains) >= 5:
            avg_gain5 = np.mean(gains[:5])
            avg_loss5 = np.mean(losses[:5])
            
            if avg_loss5 > 0:
                rs5 = avg_gain5 / avg_loss5
                rsi5[5] = 100 - (100 / (1 + rs5))
            else:
                rsi5[5] = 100 if avg_gain5 > 0 else 50
            
            # Calculate remaining RSI values
            for i in range(5, min(n - 1, len(gains))):
                avg_gain5 = (avg_gain5 * 4 + gains[i]) / 5
                avg_loss5 = (avg_loss5 * 4 + losses[i]) / 5
                
                if avg_loss5 > 0:
                    rs5 = avg_gain5 / avg_loss5
                    rsi5[i + 1] = 100 - (100 / (1 + rs5))
                else:
                    rsi5[i + 1] = 100 if avg_gain5 > 0 else 50
        
        # RSI 20
        if len(gains) >= 20:
            avg_gain20 = np.mean(gains[:20])
            avg_loss20 = np.mean(losses[:20])
            
            if avg_loss20 > 0:
                rs20 = avg_gain20 / avg_loss20
                rsi20[20] = 100 - (100 / (1 + rs20))
            else:
                rsi20[20] = 100 if avg_gain20 > 0 else 50
            
            # Calculate remaining RSI values
            for i in range(20, min(n - 1, len(gains))):
                avg_gain20 = (avg_gain20 * 19 + gains[i]) / 20
                avg_loss20 = (avg_loss20 * 19 + losses[i]) / 20
                
                if avg_loss20 > 0:
                    rs20 = avg_gain20 / avg_loss20
                    rsi20[i + 1] = 100 - (100 / (1 + rs20))
                else:
                    rsi20[i + 1] = 100 if avg_gain20 > 0 else 50
    
    # ATR calculation with safe operations
    if n > 1:
        # Calculate true range
        tr = np.zeros(n, dtype=np.float64)
        tr[0] = high[0] - low[0] if not np.isnan(high[0]) and not np.isnan(low[0]) else 0
        
        for i in range(1, n):
            if not np.isnan(high[i]) and not np.isnan(low[i]) and not np.isnan(close[i-1]):
                hl = high[i] - low[i]
                hc = abs(high[i] - close[i-1])
                lc = abs(low[i] - close[i-1])
                tr[i] = max(hl, hc, lc)
            else:
                tr[i] = 0
        
        # Calculate ATR
        for i in range(14, n):
            window = tr[i-13:i+1]
            valid_values = window[window > 0]
            if len(valid_values) > 0:
                atr[i] = np.mean(valid_values)
    
    return ma5, ma20, rsi5, rsi20, atr

def detect_fvg(df, lookback_period=10, body_multiplier=1.5):
    """
    Original FVG detection from Strategy Implementation.ipynb
    Detects Fair Value Gaps (FVGs) in historical price data.
    
    Parameters:
        df (DataFrame): DataFrame with OHLC data
        lookback_period (int): Number of candles to look back for average body size
        body_multiplier (float): Multiplier to determine significant body size
        
    Returns:
        list: List of FVG tuples or None values
    """
    # Create a list to store FVG results
    fvg_list = [None] * len(df)
    
    # Can't form FVG with fewer than 3 candles
    if len(df) < 3:
        print("Warning: Not enough data points to detect FVGs")
        return fvg_list
    
    # Start from the third candle (index 2)
    for i in range(2, len(df)):
        try:
            # Get the prices for three consecutive candles
            first_high = df['High'].iloc[i-2]
            first_low = df['Low'].iloc[i-2]
            middle_open = df['Open'].iloc[i-1]
            middle_close = df['Close'].iloc[i-1]
            third_low = df['Low'].iloc[i]
            third_high = df['High'].iloc[i]
            
            # Calculate average body size from lookback period
            start_idx = max(0, i-1-lookback_period)
            prev_bodies = (df['Close'].iloc[start_idx:i-1] - df['Open'].iloc[start_idx:i-1]).abs()
            avg_body_size = prev_bodies.mean() if not prev_bodies.empty else 0.001
            avg_body_size = max(avg_body_size, 0.001)  # Avoid division by zero
            
            # Calculate current middle candle body size
            middle_body = abs(middle_close - middle_open)
            
            # Check for Bullish FVG (gap up)
            if third_low > first_high and middle_body > avg_body_size * body_multiplier:
                fvg_list[i] = ('bullish', first_high, third_low, i)
                
            # Check for Bearish FVG (gap down)
            elif third_high < first_low and middle_body > avg_body_size * body_multiplier:
                fvg_list[i] = ('bearish', first_low, third_high, i)
                
        except Exception as e:
            # Skip this candle if there's an error
            continue
    
    return fvg_list

# FIX: Updated function to accept DataFrame as parameter instead of using global variable
def process_fvg_for_strategy(df, fvg_list, n, active_bars=20):
    """Convert FVG list to boolean arrays with active zones
    
    Parameters:
        df (DataFrame): DataFrame with OHLC data
        fvg_list (list): List of FVG tuples from detect_fvg
        n (int): Length of the data
        active_bars (int): Number of bars to keep FVG active
        
    Returns:
        tuple: (bull_active, bear_active) boolean arrays
    """
    bull_active = np.zeros(n, dtype=np.bool_)
    bear_active = np.zeros(n, dtype=np.bool_)
    
    for i, fvg in enumerate(fvg_list):
        if fvg is not None:
            fvg_type, level1, level2, idx = fvg
            
            if fvg_type == 'bullish':
                # Mark active zone for bullish FVG
                for j in range(i, min(i + active_bars, n)):
                    bull_active[j] = True
                    # Check if price breaks below the gap bottom (invalidation)
                    if j < n and 'Low' in df.columns:
                        if df.iloc[j]['Low'] < level1:
                            break
            
            elif fvg_type == 'bearish':
                # Mark active zone for bearish FVG
                for j in range(i, min(i + active_bars, n)):
                    bear_active[j] = True
                    # Check if price breaks above the gap top (invalidation)
                    if j < n and 'High' in df.columns:
                        if df.iloc[j]['High'] > level1:
                            break
    
    return bull_active, bear_active

# Safe smoothing function
def safe_smooth(data: np.ndarray, window: int = 20) -> np.ndarray:
    """Apply smoothing with NaN handling"""
    if len(data) < window:
        return data
    
    # Replace NaN with forward fill for smoothing
    filled_data = pd.Series(data).ffill().bfill().values
    
    # Apply convolution
    kernel = np.ones(window) / window
    smoothed = np.convolve(filled_data, kernel, mode='same')
    
    # Restore NaN where original data had NaN
    smoothed[np.isnan(data)] = np.nan
    
    return smoothed

print("\nCalculating all indicators with parallel processing...")
logger.info("Starting indicator calculations")
start_time = time.time()

try:
    # Calculate 30-minute indicators
    close_30m = df_30m['Close'].values.astype(np.float64)
    high_30m = df_30m['High'].values.astype(np.float64)
    low_30m = df_30m['Low'].values.astype(np.float64)
    
    ma5, ma20, rsi5, rsi20, atr_30m = calculate_all_indicators(
        close_30m, high_30m, low_30m
    )
    
    # Smooth RSI with safety
    rsi5_smooth = safe_smooth(rsi5, 20)
    rsi20_smooth = safe_smooth(rsi20, 20)
    
    # Calculate 5-minute indicators
    close_5m = df_5m['Close'].values.astype(np.float64)
    high_5m = df_5m['High'].values.astype(np.float64)
    low_5m = df_5m['Low'].values.astype(np.float64)
    
    _, _, _, _, atr_5m = calculate_all_indicators(
        close_5m, high_5m, low_5m
    )
    
    # Detect FVG using original function
    print("Detecting FVGs using original logic...")
    fvg_list = detect_fvg(df_5m, lookback_period=10, body_multiplier=1.5)
    
    # FIX: Pass DataFrame as parameter to process_fvg_for_strategy
    fvg_bull, fvg_bear = process_fvg_for_strategy(df_5m, fvg_list, len(df_5m), config.fvg_active_bars)
    
    calc_time = time.time() - start_time
    
    # Log statistics
    logger.info(f"Indicators calculated in {calc_time:.3f} seconds")
    logger.info(f"MA5 valid values: {(~np.isnan(ma5)).sum()}/{len(ma5)}")
    logger.info(f"MA20 valid values: {(~np.isnan(ma20)).sum()}/{len(ma20)}")
    logger.info(f"RSI5 range: [{np.nanmin(rsi5):.1f}, {np.nanmax(rsi5):.1f}]")
    logger.info(f"RSI20 range: [{np.nanmin(rsi20):.1f}, {np.nanmax(rsi20):.1f}]")
    logger.info(f"ATR 30m valid: {(~np.isnan(atr_30m)).sum()}/{len(atr_30m)}")
    logger.info(f"ATR 5m valid: {(~np.isnan(atr_5m)).sum()}/{len(atr_5m)}")
    
    # Count FVGs
    fvg_count = sum(1 for fvg in fvg_list if fvg is not None)
    bull_fvg_count = sum(1 for fvg in fvg_list if fvg is not None and fvg[0] == 'bullish')
    bear_fvg_count = sum(1 for fvg in fvg_list if fvg is not None and fvg[0] == 'bearish')
    
    logger.info(f"FVGs detected - Total: {fvg_count}, Bull: {bull_fvg_count}, Bear: {bear_fvg_count}")
    logger.info(f"FVG zones - Bull: {fvg_bull.sum():,}, Bear: {fvg_bear.sum():,}")
    
    print(f"All indicators calculated in {calc_time:.3f} seconds")
    print(f"FVGs detected - Total: {fvg_count}, Bullish: {bull_fvg_count}, Bearish: {bear_fvg_count}")
    print(f"FVG zones active - Bull: {fvg_bull.sum():,}, Bear: {fvg_bear.sum():,}")
    
    # Memory cleanup
    if check_memory_usage() > config.max_memory_gb * 0.8:
        cleanup_memory()
    
except Exception as e:
    logger.error(f"Error calculating indicators: {str(e)}")
    raise

In [None]:
# Cell 4: Original MLMI Implementation with cKDTree

import numpy as np
import pandas as pd
from numba import njit, prange, float64, int64, boolean
from numba.experimental import jitclass
from scipy.spatial import cKDTree  # Using cKDTree for fast kNN

# Define spec for jitclass
spec = [
    ('parameter1', float64[:]),
    ('parameter2', float64[:]),
    ('priceArray', float64[:]),
    ('resultArray', int64[:]),
    ('size', int64)
]

# Create a JIT-compiled MLMI data class for maximum performance
@jitclass(spec)
class MLMIDataFast:
    def __init__(self, max_size=10000):
        # Pre-allocate arrays with maximum size for better performance
        self.parameter1 = np.zeros(max_size, dtype=np.float64)
        self.parameter2 = np.zeros(max_size, dtype=np.float64)
        self.priceArray = np.zeros(max_size, dtype=np.float64)
        self.resultArray = np.zeros(max_size, dtype=np.int64)
        self.size = 0
    
    def storePreviousTrade(self, p1, p2, close_price):
        if self.size > 0:
            # Calculate result before modifying current values
            result = 1 if close_price >= self.priceArray[self.size-1] else -1
            
            # Increment size and add new entry
            self.size += 1
            self.parameter1[self.size-1] = p1
            self.parameter2[self.size-1] = p2
            self.priceArray[self.size-1] = close_price
            self.resultArray[self.size-1] = result
        else:
            # First entry
            self.parameter1[0] = p1
            self.parameter2[0] = p2
            self.priceArray[0] = close_price
            self.resultArray[0] = 0  # Neutral for first entry
            self.size = 1

# Optimized core functions with parallel processing
@njit(fastmath=True, parallel=True, cache=True)
def wma_numba_fast(series, length):
    """Ultra-optimized Weighted Moving Average calculation"""
    n = len(series)
    result = np.zeros(n, dtype=np.float64)
    
    # Pre-calculate weights (constant throughout calculation)
    weights = np.arange(1, length + 1, dtype=np.float64)
    sum_weights = np.sum(weights)
    
    # Parallel processing of WMA calculation
    for i in prange(length-1, n):
        weighted_sum = 0.0
        # Inline loop for better performance
        for j in range(length):
            weighted_sum += series[i-j] * weights[length-j-1]
        result[i] = weighted_sum / sum_weights
    
    return result

@njit(fastmath=True, cache=True)
def calculate_rsi_numba_fast(prices, window):
    """Ultra-optimized RSI calculation"""
    n = len(prices)
    rsi = np.zeros(n, dtype=np.float64)
    
    # Pre-allocate arrays for better memory performance
    delta = np.zeros(n, dtype=np.float64)
    gain = np.zeros(n, dtype=np.float64)
    loss = np.zeros(n, dtype=np.float64)
    avg_gain = np.zeros(n, dtype=np.float64)
    avg_loss = np.zeros(n, dtype=np.float64)
    
    # Calculate deltas in one pass
    for i in range(1, n):
        delta[i] = prices[i] - prices[i-1]
        # Separate gains and losses in the same loop
        if delta[i] > 0:
            gain[i] = delta[i]
        else:
            loss[i] = -delta[i]
    
    # First value uses simple average
    if window <= n:
        avg_gain[window-1] = np.sum(gain[:window]) / window
        avg_loss[window-1] = np.sum(loss[:window]) / window
        
        # Calculate RSI for first window point
        if avg_loss[window-1] == 0:
            rsi[window-1] = 100
        else:
            rs = avg_gain[window-1] / avg_loss[window-1]
            rsi[window-1] = 100 - (100 / (1 + rs))
    
    # Apply Wilder's smoothing for subsequent values with optimized calculation
    window_minus_one = window - 1
    window_recip = 1.0 / window
    for i in range(window, n):
        avg_gain[i] = (avg_gain[i-1] * window_minus_one + gain[i]) * window_recip
        avg_loss[i] = (avg_loss[i-1] * window_minus_one + loss[i]) * window_recip
        
        # Calculate RSI directly
        if avg_loss[i] == 0:
            rsi[i] = 100
        else:
            rs = avg_gain[i] / avg_loss[i]
            rsi[i] = 100 - (100 / (1 + rs))
    
    return rsi

# Use cKDTree for lightning-fast kNN queries
def fast_knn_predict(param1_array, param2_array, result_array, p1, p2, k, size):
    """
    Ultra-fast kNN prediction using scipy.spatial.cKDTree
    """
    # Handle empty data case
    if size == 0:
        return 0
    
    # Create points array for KDTree
    points = np.column_stack((param1_array[:size], param2_array[:size]))
    
    # Create KDTree for fast nearest neighbor search
    tree = cKDTree(points)
    
    # Query KDTree for k nearest neighbors
    distances, indices = tree.query([p1, p2], k=min(k, size))
    
    # Get results of nearest neighbors
    neighbors = result_array[indices]
    
    # Return prediction (sum of neighbor results)
    return np.sum(neighbors)

def calculate_mlmi_optimized(df, num_neighbors=200, momentum_window=20):
    """
    Original MLMI calculation function from Strategy Implementation.ipynb
    """
    print("Preparing data for MLMI calculation...")
    # Get numpy arrays for better performance
    close_array = df['Close'].values
    n = len(close_array)
    
    # Pre-allocate all output arrays at once
    ma_quick = np.zeros(n, dtype=np.float64)
    ma_slow = np.zeros(n, dtype=np.float64)
    rsi_quick = np.zeros(n, dtype=np.float64)
    rsi_slow = np.zeros(n, dtype=np.float64)
    rsi_quick_wma = np.zeros(n, dtype=np.float64)
    rsi_slow_wma = np.zeros(n, dtype=np.float64)
    pos = np.zeros(n, dtype=np.bool_)
    neg = np.zeros(n, dtype=np.bool_)
    mlmi_values = np.zeros(n, dtype=np.float64)
    
    print("Calculating RSI and moving averages...")
    # Calculate indicators with optimized functions
    ma_quick = wma_numba_fast(close_array, 5)
    ma_slow = wma_numba_fast(close_array, 20)
    
    # Calculate RSI with optimized function
    rsi_quick = calculate_rsi_numba_fast(close_array, 5)
    rsi_slow = calculate_rsi_numba_fast(close_array, 20)
    
    # Apply WMA to RSI values
    rsi_quick_wma = wma_numba_fast(rsi_quick, momentum_window)
    rsi_slow_wma = wma_numba_fast(rsi_slow, momentum_window)
    
    # Detect MA crossovers (vectorized where possible)
    print("Detecting moving average crossovers...")
    for i in range(1, n):
        if ma_quick[i] > ma_slow[i] and ma_quick[i-1] <= ma_slow[i-1]:
            pos[i] = True
        if ma_quick[i] < ma_slow[i] and ma_quick[i-1] >= ma_slow[i-1]:
            neg[i] = True
    
    # Initialize optimized MLMI data object
    mlmi_data = MLMIDataFast(max_size=min(10000, n))  # Pre-allocate with reasonable size
    
    print("Processing crossovers and calculating MLMI values...")
    # Process data with batch processing for performance
    crossover_indices = np.where(pos | neg)[0]
    
    # Process crossovers in a single pass
    for i in crossover_indices:
        if not np.isnan(rsi_slow_wma[i]) and not np.isnan(rsi_quick_wma[i]):
            mlmi_data.storePreviousTrade(
                rsi_slow_wma[i],
                rsi_quick_wma[i],
                close_array[i]
            )
    
    # Batch kNN predictions for performance
    # Only calculate for points after momentum_window
    for i in range(momentum_window, n):
        if not np.isnan(rsi_slow_wma[i]) and not np.isnan(rsi_quick_wma[i]):
            # Use fast KDTree-based kNN prediction
            if mlmi_data.size > 0:
                mlmi_values[i] = fast_knn_predict(
                    mlmi_data.parameter1,
                    mlmi_data.parameter2,
                    mlmi_data.resultArray,
                    rsi_slow_wma[i],
                    rsi_quick_wma[i],
                    num_neighbors,
                    mlmi_data.size
                )
    
    # Add results to dataframe (do this all at once)
    df_result = df.copy()
    df_result['ma_quick'] = ma_quick
    df_result['ma_slow'] = ma_slow
    df_result['rsi_quick'] = rsi_quick
    df_result['rsi_slow'] = rsi_slow
    df_result['rsi_quick_wma'] = rsi_quick_wma
    df_result['rsi_slow_wma'] = rsi_slow_wma
    df_result['pos'] = pos
    df_result['neg'] = neg
    df_result['mlmi'] = mlmi_values
    
    # Calculate WMA of MLMI
    df_result['mlmi_ma'] = wma_numba_fast(mlmi_values, 20)
    
    # Calculate bands and other derived values
    print("Calculating bands and crossovers...")
    
    # Use vectorized operations for bands calculation
    highest_values = pd.Series(mlmi_values).rolling(window=2000, min_periods=1).max().values
    lowest_values = pd.Series(mlmi_values).rolling(window=2000, min_periods=1).min().values
    mlmi_std = pd.Series(mlmi_values).rolling(window=20).std().values
    ema_std = pd.Series(mlmi_std).ewm(span=20).mean().values
    
    # Add band values to dataframe
    df_result['upper'] = highest_values
    df_result['lower'] = lowest_values
    df_result['upper_band'] = highest_values - ema_std
    df_result['lower_band'] = lowest_values + ema_std
    
    # Generate crossover signals (vectorized where possible)
    mlmi_bull_cross = np.zeros(n, dtype=np.bool_)
    mlmi_bear_cross = np.zeros(n, dtype=np.bool_)
    mlmi_ob_cross = np.zeros(n, dtype=np.bool_)
    mlmi_ob_exit = np.zeros(n, dtype=np.bool_)
    mlmi_os_cross = np.zeros(n, dtype=np.bool_)
    mlmi_os_exit = np.zeros(n, dtype=np.bool_)
    mlmi_mid_up = np.zeros(n, dtype=np.bool_)
    mlmi_mid_down = np.zeros(n, dtype=np.bool_)
    
    # Calculate crossovers in one pass for better performance
    for i in range(1, n):
        if not np.isnan(mlmi_values[i]) and not np.isnan(mlmi_values[i-1]):
            # MA crossovers
            if mlmi_values[i] > df_result['mlmi_ma'].iloc[i] and mlmi_values[i-1] <= df_result['mlmi_ma'].iloc[i-1]:
                mlmi_bull_cross[i] = True
            if mlmi_values[i] < df_result['mlmi_ma'].iloc[i] and mlmi_values[i-1] >= df_result['mlmi_ma'].iloc[i-1]:
                mlmi_bear_cross[i] = True
                
            # Overbought/Oversold crossovers
            if mlmi_values[i] > df_result['upper_band'].iloc[i] and mlmi_values[i-1] <= df_result['upper_band'].iloc[i-1]:
                mlmi_ob_cross[i] = True
            if mlmi_values[i] < df_result['upper_band'].iloc[i] and mlmi_values[i-1] >= df_result['upper_band'].iloc[i-1]:
                mlmi_ob_exit[i] = True
            if mlmi_values[i] < df_result['lower_band'].iloc[i] and mlmi_values[i-1] >= df_result['lower_band'].iloc[i-1]:
                mlmi_os_cross[i] = True
            if mlmi_values[i] > df_result['lower_band'].iloc[i] and mlmi_values[i-1] <= df_result['lower_band'].iloc[i-1]:
                mlmi_os_exit[i] = True
                
            # Zero-line crosses
            if mlmi_values[i] > 0 and mlmi_values[i-1] <= 0:
                mlmi_mid_up[i] = True
            if mlmi_values[i] < 0 and mlmi_values[i-1] >= 0:
                mlmi_mid_down[i] = True
    
    # Add crossover signals to dataframe
    df_result['mlmi_bull_cross'] = mlmi_bull_cross
    df_result['mlmi_bear_cross'] = mlmi_bear_cross
    df_result['mlmi_ob_cross'] = mlmi_ob_cross
    df_result['mlmi_ob_exit'] = mlmi_ob_exit
    df_result['mlmi_os_cross'] = mlmi_os_cross
    df_result['mlmi_os_exit'] = mlmi_os_exit
    df_result['mlmi_mid_up'] = mlmi_mid_up
    df_result['mlmi_mid_down'] = mlmi_mid_down
    
    # Count signals
    bull_crosses = np.sum(mlmi_bull_cross)
    bear_crosses = np.sum(mlmi_bear_cross)
    ob_cross = np.sum(mlmi_ob_cross)
    ob_exit = np.sum(mlmi_ob_exit)
    os_cross = np.sum(mlmi_os_cross)
    os_exit = np.sum(mlmi_os_exit)
    zero_up = np.sum(mlmi_mid_up)
    zero_down = np.sum(mlmi_mid_down)
    
    print(f"\nMLMI Signal Summary:")
    print(f"- Bullish MA Crosses: {bull_crosses}")
    print(f"- Bearish MA Crosses: {bear_crosses}")
    print(f"- Overbought Crosses: {ob_cross}")
    print(f"- Overbought Exits: {ob_exit}")
    print(f"- Oversold Crosses: {os_cross}")
    print(f"- Oversold Exits: {os_exit}")
    print(f"- Zero Line Crosses Up: {zero_up}")
    print(f"- Zero Line Crosses Down: {zero_down}")
    
    return df_result

# Calculate MLMI using the original function
print("\nCalculating original MLMI with kNN pattern matching...")
start_time = time.time()

# Apply the optimized MLMI calculation to the 30-minute data
df_30m = calculate_mlmi_optimized(df_30m, num_neighbors=config.mlmi_k_neighbors, momentum_window=20)

mlmi_time = time.time() - start_time
print(f"Original MLMI calculated in {mlmi_time:.3f} seconds")

# FIX: Use state-based MLMI signals instead of event-based crossovers
# A bullish state is when MLMI is above its MA, bearish when below
df_30m['mlmi_bull'] = df_30m['mlmi'] > df_30m['mlmi_ma']
df_30m['mlmi_bear'] = df_30m['mlmi'] < df_30m['mlmi_ma']

# Store MLMI values
mlmi_values = df_30m['mlmi'].values

print(f"MLMI range: [{mlmi_values.min():.1f}, {mlmi_values.max():.1f}]")
print(f"MLMI bullish signals (above MA): {df_30m['mlmi_bull'].sum():,}")
print(f"MLMI bearish signals (below MA): {df_30m['mlmi_bear'].sum():,}")

In [None]:
# Cell 5: Original NW-RQK Implementation

import numpy as np
import pandas as pd
from numba import jit, njit, prange, float64, boolean

# Define parameters (matching PineScript defaults)
src_col = 'Close'  # Default source is close price
h = 8.0            # Lookback window
r = 8.0            # Relative weighting
x_0 = 25           # Start regression at bar
lag = 2            # Lag for crossover detection
smooth_colors = False  # Smooth colors option

# JIT-compiled kernel regression function
@njit(float64(float64[:], int64, float64, float64), cache=True)
def kernel_regression_numba(src, size, h_param, r_param):
    """
    Numba-optimized Nadaraya-Watson Regression using Rational Quadratic Kernel
    """
    current_weight = 0.0
    cumulative_weight = 0.0
    
    # Calculate only up to the available data points
    for i in range(min(size + x_0 + 1, len(src))):
        if i < len(src):
            y = src[i]  # Value i bars back
            # Rational Quadratic Kernel
            w = (1 + (i**2 / ((h_param**2) * 2 * r_param)))**(-r_param)
            current_weight += y * w
            cumulative_weight += w
    
    if cumulative_weight == 0:
        return np.nan
    
    return current_weight / cumulative_weight

# JIT-compiled function to process the entire series
@njit(parallel=True, cache=True)
def calculate_nw_regression(prices, h_param, h_lag_param, r_param, x_0_param):
    """
    Calculate Nadaraya-Watson regression for the entire price series
    """
    n = len(prices)
    yhat1 = np.full(n, np.nan)
    yhat2 = np.full(n, np.nan)
    
    # Reverse the array once to match PineScript indexing
    prices_reversed = np.zeros(n)
    for i in range(n):
        prices_reversed[i] = prices[n-i-1]
    
    # Calculate regression values for each bar in parallel
    for i in prange(n):
        if i >= x_0_param:  # Only start calculation after x_0 bars
            # Create window for current bar
            window_size = min(i + 1, n)
            src = np.zeros(window_size)
            for j in range(window_size):
                src[j] = prices[i-j]
            
            yhat1[i] = kernel_regression_numba(src, i, h_param, r_param)
            yhat2[i] = kernel_regression_numba(src, i, h_param-lag, r_param)
    
    return yhat1, yhat2

# JIT-compiled function to detect crossovers
@njit(cache=True)
def detect_crosses(yhat1, yhat2):
    """
    Detect crossovers between two series
    """
    n = len(yhat1)
    bullish_cross = np.zeros(n, dtype=np.bool_)
    bearish_cross = np.zeros(n, dtype=np.bool_)
    
    for i in range(1, n):
        if not np.isnan(yhat1[i]) and not np.isnan(yhat2[i]) and \
           not np.isnan(yhat1[i-1]) and not np.isnan(yhat2[i-1]):
            # Bullish cross (yhat2 crosses above yhat1)
            if yhat2[i] > yhat1[i] and yhat2[i-1] <= yhat1[i-1]:
                bullish_cross[i] = True
            
            # Bearish cross (yhat2 crosses below yhat1)
            if yhat2[i] < yhat1[i] and yhat2[i-1] >= yhat1[i-1]:
                bearish_cross[i] = True
    
    return bullish_cross, bearish_cross

def calculate_nw_rqk(df, src_col='Close', h=8.0, r=8.0, x_0=25, lag=2, smooth_colors=False):
    """
    Calculate Nadaraya-Watson RQK indicator for a dataframe
    """
    print("Calculating Nadaraya-Watson Regression with Rational Quadratic Kernel...")
    
    # Convert to numpy array for Numba
    prices = df[src_col].values
    
    # Calculate regression values using Numba
    yhat1, yhat2 = calculate_nw_regression(prices, h, h-lag, r, x_0)
    
    # Add regression values to dataframe
    df['yhat1'] = yhat1
    df['yhat2'] = yhat2
    
    # Calculate rates of change (vectorized)
    df['wasBearish'] = df['yhat1'].shift(2) > df['yhat1'].shift(1)
    df['wasBullish'] = df['yhat1'].shift(2) < df['yhat1'].shift(1)
    df['isBearish'] = df['yhat1'].shift(1) > df['yhat1']
    df['isBullish'] = df['yhat1'].shift(1) < df['yhat1']
    df['isBearishChange'] = df['isBearish'] & df['wasBullish']
    df['isBullishChange'] = df['isBullish'] & df['wasBearish']
    
    # Calculate crossovers using Numba
    bullish_cross, bearish_cross = detect_crosses(yhat1, yhat2)
    df['isBullishCross'] = bullish_cross
    df['isBearishCross'] = bearish_cross
    
    # Calculate smooth color conditions (vectorized)
    df['isBullishSmooth'] = df['yhat2'] > df['yhat1']
    df['isBearishSmooth'] = df['yhat2'] < df['yhat1']
    
    # Define colors (matches PineScript)
    c_bullish = '#3AFF17'  # Green
    c_bearish = '#FD1707'  # Red
    
    # Determine plot colors based on settings (vectorized)
    df['colorByCross'] = np.where(df['isBullishSmooth'], c_bullish, c_bearish)
    df['colorByRate'] = np.where(df['isBullish'], c_bullish, c_bearish)
    df['plotColor'] = df['colorByCross'] if smooth_colors else df['colorByRate']
    
    # Calculate alert conditions (vectorized)
    df['alertBullish'] = df['isBearishCross'] if smooth_colors else df['isBearishChange']
    df['alertBearish'] = df['isBullishCross'] if smooth_colors else df['isBullishChange']
    
    # Generate alert stream (-1 for bearish, 1 for bullish, 0 for no change) (vectorized)
    df['alertStream'] = np.where(df['alertBearish'], -1,
                                np.where(df['alertBullish'], 1, 0))
    
    # Count signals
    bullish_changes = df['isBullishChange'].sum()
    bearish_changes = df['isBearishChange'].sum()
    bullish_crosses = df['isBullishCross'].sum()
    bearish_crosses = df['isBearishCross'].sum()
    
    print(f"\nNW-RQK Signal Summary:")
    print(f"- Bullish Rate Changes: {bullish_changes}")
    print(f"- Bearish Rate Changes: {bearish_changes}")
    print(f"- Bullish Crosses: {bullish_crosses}")
    print(f"- Bearish Crosses: {bearish_crosses}")
    
    return df

# Calculate original NW-RQK
print("\nCalculating original NW-RQK with single Rational Quadratic kernel...")
logger.info("Starting NW-RQK calculation")
start_time = time.time()

try:
    # Apply the calculation to the 30-minute data
    df_30m = calculate_nw_rqk(df_30m, src_col='Close', h=config.nwrqk_h, r=config.nwrqk_r, 
                              x_0=25, lag=config.nwrqk_lag, smooth_colors=False)
    
    nwrqk_time = time.time() - start_time
    
    # FIX: Use continuous trend states from NW-RQK
    # isBullish/isBearish are already state-based (not just changes)
    df_30m['nwrqk_bull'] = df_30m['isBullish'].fillna(False)  # Current trend is bullish
    df_30m['nwrqk_bear'] = df_30m['isBearish'].fillna(False)  # Current trend is bearish
    
    # Log statistics
    logger.info(f"NW-RQK calculated in {nwrqk_time:.3f} seconds")
    
    print(f"Original NW-RQK calculated in {nwrqk_time:.3f} seconds")
    print(f"Bull signals (trend direction): {df_30m['nwrqk_bull'].sum():,}")
    print(f"Bear signals (trend direction): {df_30m['nwrqk_bear'].sum():,}")
    
    # Memory cleanup
    if check_memory_usage() > config.max_memory_gb * 0.8:
        cleanup_memory()
    
except Exception as e:
    logger.error(f"Error calculating NW-RQK: {str(e)}")
    # Set default values on error
    df_30m['nwrqk_bull'] = False
    df_30m['nwrqk_bear'] = False
    df_30m['yhat1'] = np.nan
    df_30m['yhat2'] = np.nan
    raise

In [None]:
# Cell 6: Smart Timeframe Alignment

@njit(parallel=True, fastmath=True, cache=True)
def create_alignment_map(timestamps_5m: np.ndarray, timestamps_30m: np.ndarray) -> np.ndarray:
    """Create efficient mapping between timeframes"""
    n_5m = len(timestamps_5m)
    mapping = np.zeros(n_5m, dtype=np.int64)
    
    j = 0
    for i in prange(n_5m):
        # Find the corresponding 30m bar
        while j < len(timestamps_30m) - 1 and timestamps_30m[j + 1] <= timestamps_5m[i]:
            j += 1
        mapping[i] = j
    
    return mapping

print("\nPerforming smart timeframe alignment...")
start_time = time.time()

# Create datetime arrays for mapping
# Keep full precision by using nanosecond timestamps
timestamps_5m = df_5m.index.values.astype(np.int64)
timestamps_30m = df_30m.index.values.astype(np.int64)

# Create mapping
mapping = create_alignment_map(timestamps_5m, timestamps_30m)

# Align all indicators efficiently
df_5m_aligned = df_5m.copy()

# MLMI alignment (using the original MLMI values)
df_5m_aligned['mlmi'] = df_30m['mlmi'].values[mapping]
df_5m_aligned['mlmi_bull'] = df_30m['mlmi_bull'].values[mapping]
df_5m_aligned['mlmi_bear'] = df_30m['mlmi_bear'].values[mapping]

# NW-RQK alignment (using the original signals)
df_5m_aligned['nwrqk_bull'] = df_30m['nwrqk_bull'].values[mapping]
df_5m_aligned['nwrqk_bear'] = df_30m['nwrqk_bear'].values[mapping]

# FVG data
df_5m_aligned['fvg_bull'] = fvg_bull
df_5m_aligned['fvg_bear'] = fvg_bear

# Add market regime detection
df_5m_aligned['volatility'] = df_5m_aligned['Returns'].rolling(20).std()
df_5m_aligned['trend_strength'] = abs(df_5m_aligned['Returns'].rolling(50).mean()) / df_5m_aligned['volatility']

align_time = time.time() - start_time
print(f"Smart alignment completed in {align_time:.3f} seconds")
print(f"Aligned {len(df_5m_aligned):,} 5-minute bars")

In [None]:
# Cell 7: MLMI → NW-RQK → FVG Synergy Detection (Improved Logic)

@njit(parallel=True, fastmath=True, cache=True)
def detect_mlmi_nwrqk_fvg_synergy(mlmi_bull: np.ndarray, mlmi_bear: np.ndarray,
                                 nwrqk_bull: np.ndarray, nwrqk_bear: np.ndarray,
                                 fvg_bull: np.ndarray, fvg_bear: np.ndarray,
                                 window: int = 30) -> Tuple[np.ndarray, np.ndarray]:
    """
    Improved synergy detection with state-based approach and less restrictive logic
    
    The synergy requires:
    1. MLMI is in a bullish/bearish state (above/below MA)
    2. NW-RQK confirms the trend direction
    3. FVG provides the entry zone
    
    Key improvements:
    - Uses state-based signals instead of event crossovers
    - Doesn't reset states aggressively
    - Allows re-entry if conditions are met again
    """
    n = len(mlmi_bull)
    long_signals = np.zeros(n, dtype=np.bool_)
    short_signals = np.zeros(n, dtype=np.bool_)
    
    # Track last signal bar to prevent immediate re-entry
    last_long_bar = -window
    last_short_bar = -window
    
    for i in range(2, n):  # Start from 2 to have lookback
        # Check for bullish synergy
        if (mlmi_bull[i] and  # MLMI is bullish (state)
            nwrqk_bull[i] and  # NW-RQK confirms bullish trend (state)
            fvg_bull[i] and  # FVG bullish zone is active
            (i - last_long_bar) >= 10):  # Minimum bars since last long
            
            long_signals[i] = True
            last_long_bar = i
        
        # Check for bearish synergy
        elif (mlmi_bear[i] and  # MLMI is bearish (state)
              nwrqk_bear[i] and  # NW-RQK confirms bearish trend (state)
              fvg_bear[i] and  # FVG bearish zone is active
              (i - last_short_bar) >= 10):  # Minimum bars since last short
            
            short_signals[i] = True
            last_short_bar = i
    
    return long_signals, short_signals

print("\nDetecting MLMI → NW-RQK → FVG synergy signals...")
start_time = time.time()

# Extract arrays
mlmi_bull_arr = df_5m_aligned['mlmi_bull'].values
mlmi_bear_arr = df_5m_aligned['mlmi_bear'].values
nwrqk_bull_arr = df_5m_aligned['nwrqk_bull'].values
nwrqk_bear_arr = df_5m_aligned['nwrqk_bear'].values
fvg_bull_arr = df_5m_aligned['fvg_bull'].values
fvg_bear_arr = df_5m_aligned['fvg_bear'].values

# Detect synergy with improved logic
long_entries, short_entries = detect_mlmi_nwrqk_fvg_synergy(
    mlmi_bull_arr, mlmi_bear_arr,
    nwrqk_bull_arr, nwrqk_bear_arr,
    fvg_bull_arr, fvg_bear_arr,
    window=config.synergy_window
)

# Add to dataframe
df_5m_aligned['long_entry'] = long_entries
df_5m_aligned['short_entry'] = short_entries

# For compatibility with the backtesting code, add a signal_quality column (set to 1.0 for all signals)
df_5m_aligned['signal_quality'] = np.where(long_entries | short_entries, 1.0, 0.0)

signal_time = time.time() - start_time
print(f"Synergy detection completed in {signal_time:.3f} seconds")
print(f"Long entries: {long_entries.sum():,}")
print(f"Short entries: {short_entries.sum():,}")

# Calculate average time between signals
all_signals = long_entries | short_entries
if all_signals.sum() > 1:
    signal_indices = np.where(all_signals)[0]
    avg_bars_between = np.mean(np.diff(signal_indices))
    print(f"Average bars between signals: {avg_bars_between:.1f} (≈ {avg_bars_between * 5:.0f} minutes)")

In [None]:
# Cell 8: Ultra-Fast VectorBT Backtesting

@njit(fastmath=True, cache=True)
def generate_exit_signals_advanced(entries: np.ndarray, direction: np.ndarray, 
                                  close: np.ndarray, atr: np.ndarray,
                                  max_bars: int = 100, 
                                  stop_loss_atr: float = 2.0,
                                  take_profit_atr: float = 4.0) -> np.ndarray:
    """Generate exit signals with ATR-based stops"""
    n = len(entries)
    exits = np.zeros(n, dtype=np.bool_)
    
    position_open = False
    position_dir = 0
    entry_idx = -1
    entry_price = 0.0
    entry_atr = 0.0
    
    for i in range(n):
        if position_open:
            bars_held = i - entry_idx
            
            # Fixed exit levels based on ATR
            stop_distance = entry_atr * stop_loss_atr
            target_distance = entry_atr * take_profit_atr
            
            # Check exit conditions
            if position_dir == 1:  # Long position
                stop_price = entry_price - stop_distance
                target_price = entry_price + target_distance
                
                if (direction[i] == -1 or 
                    bars_held >= max_bars or 
                    close[i] <= stop_price or 
                    close[i] >= target_price):
                    exits[i] = True
                    position_open = False
            
            elif position_dir == -1:  # Short position
                stop_price = entry_price + stop_distance
                target_price = entry_price - target_distance
                
                if (direction[i] == 1 or 
                    bars_held >= max_bars or 
                    close[i] >= stop_price or 
                    close[i] <= target_price):
                    exits[i] = True
                    position_open = False
        
        # Check for new entry
        if entries[i] and not position_open:
            position_open = True
            position_dir = direction[i]
            entry_idx = i
            entry_price = close[i]
            entry_atr = atr[i] if not np.isnan(atr[i]) else close[i] * 0.01
    
    return exits

print("\n" + "=" * 80)
print("ULTRA-FAST VECTORBT BACKTESTING")
print("=" * 80)

# Prepare data for vectorbt
close_prices = df_5m_aligned['Close'].values
entries = df_5m_aligned['long_entry'] | df_5m_aligned['short_entry']
entries_array = entries.values
direction = np.where(df_5m_aligned['long_entry'], 1, 
                    np.where(df_5m_aligned['short_entry'], -1, 0))

# Calculate ATR for dynamic stops (approximation for speed)
atr_approx = df_5m_aligned['HL_Range'].rolling(14).mean().values

# Generate exit signals
print("\nGenerating exit signals...")
exit_start = time.time()

exits = generate_exit_signals_advanced(
    entries_array, direction, close_prices, atr_approx,
    max_bars=config.max_holding_bars,
    stop_loss_atr=config.stop_loss_atr,
    take_profit_atr=config.stop_loss_atr * 2  # 2:1 reward-risk ratio
)

exit_time = time.time() - exit_start
print(f"Exit signals generated in {exit_time:.3f} seconds")
print(f"Total exits: {exits.sum():,}")

# Fixed position sizing (using base size for all trades)
position_sizes = np.where(
    entries_array,
    config.position_size_base,
    0
)

print("\nRunning vectorized backtest...")
backtest_start = time.time()

try:
    # Run backtest with vectorbt
    portfolio = vbt.Portfolio.from_signals(
        close=df_5m_aligned['Close'],
        entries=entries,
        exits=exits,
        direction=direction,
        size=position_sizes,
        size_type='amount',
        init_cash=config.initial_capital,
        fees=config.fees,
        slippage=config.slippage,
        freq='5T',
        cash_sharing=True,
        call_seq='auto'
    )
    
    backtest_time = time.time() - backtest_start
    print(f"\nBacktest completed in {backtest_time:.3f} seconds!")
    
    # Calculate comprehensive metrics
    portfolio_stats = portfolio.stats()
    returns = portfolio.returns()
    trades = portfolio.trades.records_readable
    
    print("\n" + "-" * 50)
    print("PERFORMANCE METRICS")
    print("-" * 50)
    
    # Core metrics
    print(f"Total Return: {portfolio_stats['Total Return [%]']:.2f}%")
    print(f"Annualized Return: {portfolio_stats['Annualized Return [%]']:.2f}%")
    print(f"Sharpe Ratio: {portfolio_stats['Sharpe Ratio']:.2f}")
    print(f"Sortino Ratio: {portfolio_stats['Sortino Ratio']:.2f}")
    print(f"Calmar Ratio: {portfolio_stats['Calmar Ratio']:.2f}")
    print(f"Max Drawdown: {portfolio_stats['Max Drawdown [%]']:.2f}%")
    print(f"Max Drawdown Duration: {portfolio_stats['Max Drawdown Duration']} days")
    
    # Trade statistics
    print("\n" + "-" * 50)
    print("TRADE STATISTICS")
    print("-" * 50)
    print(f"Total Trades: {portfolio_stats['Total Trades']:,.0f}")
    print(f"Win Rate: {portfolio_stats['Win Rate [%]']:.2f}%")
    print(f"Profit Factor: {portfolio_stats['Profit Factor']:.2f}")
    print(f"Expectancy: {portfolio_stats['Expectancy [%]']:.3f}%")
    print(f"Average Win: {portfolio_stats['Avg Winning Trade [%]']:.2f}%")
    print(f"Average Loss: {portfolio_stats['Avg Losing Trade [%]']:.2f}%")
    print(f"Best Trade: {portfolio_stats['Best Trade [%]']:.2f}%")
    print(f"Worst Trade: {portfolio_stats['Worst Trade [%]']:.2f}%")
    
    # Position sizing analysis
    if len(trades) > 0:
        avg_position_size = trades['Size'].mean()
        print(f"\nAverage Position Size: {avg_position_size:.2f}")
        print(f"Position Size StdDev: {trades['Size'].std():.2f}")
        
    # Advanced metrics
    print("\n" + "-" * 50)
    print("ADVANCED METRICS")
    print("-" * 50)
    
    # Calculate additional metrics
    daily_returns = returns.resample('D').apply(lambda x: (1 + x).prod() - 1)
    
    if len(daily_returns) > 0:
        # Value at Risk (95%)
        var_95 = np.percentile(daily_returns.dropna(), 5)
        print(f"Daily VaR (95%): {var_95*100:.2f}%")
        
        # Conditional VaR (CVaR)
        cvar_95 = daily_returns[daily_returns <= var_95].mean()
        print(f"Daily CVaR (95%): {cvar_95*100:.2f}%")
        
        # Information Ratio (assuming 0 benchmark)
        ir = daily_returns.mean() / daily_returns.std() * np.sqrt(252)
        print(f"Information Ratio: {ir:.2f}")
    
    # Trade analysis by direction
    if len(trades) > 0:
        print("\n" + "-" * 50)
        print("DIRECTIONAL ANALYSIS")
        print("-" * 50)
        
        long_trades = trades[trades['Direction'] == 'Long']
        short_trades = trades[trades['Direction'] == 'Short']
        
        if len(long_trades) > 0:
            print(f"\nLong Trades: {len(long_trades)}")
            print(f"  Win Rate: {(long_trades['PnL'] > 0).mean()*100:.1f}%")
            print(f"  Avg PnL: {long_trades['PnL %'].mean():.2f}%")
            
        if len(short_trades) > 0:
            print(f"\nShort Trades: {len(short_trades)}")
            print(f"  Win Rate: {(short_trades['PnL'] > 0).mean()*100:.1f}%")
            print(f"  Avg PnL: {short_trades['PnL %'].mean():.2f}%")
    
    logger.info(f"Backtest completed successfully with {len(trades)} trades")
    
except Exception as e:
    logger.error(f"Error running backtest: {str(e)}")
    print(f"\nError running backtest: {str(e)}")
    portfolio_stats = {}
    returns = pd.Series(dtype=float)
    trades = pd.DataFrame()
    portfolio = None

In [None]:
# Cell 9: Professional Visualizations

print("\nGenerating professional visualizations...")
logger.info("Creating performance visualizations")

# Create comprehensive dashboard
fig = make_subplots(
    rows=4, cols=2,
    shared_xaxes=True,
    vertical_spacing=0.05,
    horizontal_spacing=0.1,
    row_heights=[0.3, 0.2, 0.2, 0.3],
    column_widths=[0.7, 0.3],
    subplot_titles=(
        'Cumulative Returns', 'Returns Distribution',
        'Drawdown', 'Monthly Returns',
        'Signal Indicators', 'Trade PnL Distribution',
        'Price & Signals (Sample)', 'Trade Duration Analysis'
    ),
    specs=[
        [{"secondary_y": True}, {"type": "histogram"}],
        [{"secondary_y": False}, {"type": "scatter"}],
        [{"secondary_y": True}, {"type": "box"}],
        [{"secondary_y": False}, {"type": "bar"}]
    ]
)

# 1. Cumulative Returns with benchmark
cumulative_returns = (1 + returns).cumprod() - 1
benchmark_returns = df_5m_aligned['Close'].pct_change().fillna(0)
cumulative_benchmark = (1 + benchmark_returns).cumprod() - 1

fig.add_trace(
    go.Scatter(
        x=cumulative_returns.index,
        y=cumulative_returns.values * 100,
        mode='lines',
        name='Strategy',
        line=dict(color='blue', width=2)
    ),
    row=1, col=1, secondary_y=False
)

fig.add_trace(
    go.Scatter(
        x=cumulative_benchmark.index,
        y=cumulative_benchmark.values * 100,
        mode='lines',
        name='Buy & Hold',
        line=dict(color='gray', width=1, dash='dash')
    ),
    row=1, col=1, secondary_y=False
)

# Add cumulative trades on secondary axis
if 'portfolio' in globals() and portfolio is not None:
    cumulative_trades = np.arange(len(trades))
    trade_times = trades['Entry Timestamp']
    
    fig.add_trace(
        go.Scatter(
            x=trade_times,
            y=cumulative_trades,
            mode='lines',
            name='Cumulative Trades',
            line=dict(color='green', width=1),
            yaxis='y2'
        ),
        row=1, col=1, secondary_y=True
    )

# 2. Returns Distribution
if len(returns) > 0:
    fig.add_trace(
        go.Histogram(
            x=returns.values * 100,
            nbinsx=50,
            name='Returns',
            marker_color='lightblue',
            showlegend=False
        ),
        row=1, col=2
    )
    
    # Add normal distribution overlay
    mean_ret = returns.mean() * 100
    std_ret = returns.std() * 100
    x_range = np.linspace(returns.min() * 100, returns.max() * 100, 100)
    normal_dist = stats.norm.pdf(x_range, mean_ret, std_ret) * len(returns) * (returns.max() - returns.min()) * 100 / 50
    
    fig.add_trace(
        go.Scatter(
            x=x_range,
            y=normal_dist,
            mode='lines',
            name='Normal',
            line=dict(color='red', width=2)
        ),
        row=1, col=2
    )

# 3. Drawdown
if 'portfolio' in globals() and portfolio is not None:
    drawdown = portfolio.drawdown() * 100
    fig.add_trace(
        go.Scatter(
            x=drawdown.index,
            y=-drawdown.values,
            mode='lines',
            name='Drawdown',
            fill='tozeroy',
            line=dict(color='red', width=1)
        ),
        row=2, col=1
    )

# 4. Monthly Returns Heatmap
if len(returns) > 30:
    monthly_returns = returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
    years = monthly_returns.index.year.unique()
    months = range(1, 13)
    
    # Group by year and month
    monthly_data = []
    for year in years:
        year_data = []
        for month in months:
            try:
                ret = monthly_returns[(monthly_returns.index.year == year) & 
                                    (monthly_returns.index.month == month)].iloc[0] * 100
                year_data.append(ret)
            except:
                year_data.append(0)
        monthly_data.append(year_data)
    
    fig.add_trace(
        go.Scatter(
            x=list(months) * len(years),
            y=[val for year_data in monthly_data for val in year_data],
            mode='markers',
            marker=dict(
                size=15,
                color=[val for year_data in monthly_data for val in year_data],
                colorscale='RdYlGn',
                cmin=-10,
                cmax=10,
                showscale=True,
                colorbar=dict(title="Return %", x=1.15)
            ),
            showlegend=False
        ),
        row=2, col=2
    )

# 5. Signal Indicators (30m data sample)
sample_size = min(500, len(df_30m))
sample_30m = df_30m.tail(sample_size)

fig.add_trace(
    go.Scatter(
        x=sample_30m.index,
        y=sample_30m['Close'],
        mode='lines',
        name='Close',
        line=dict(color='black', width=1)
    ),
    row=3, col=1, secondary_y=False
)

fig.add_trace(
    go.Scatter(
        x=sample_30m.index,
        y=sample_30m['mlmi'],
        mode='lines',
        name='MLMI',
        line=dict(color='blue', width=1),
        yaxis='y2'
    ),
    row=3, col=1, secondary_y=True
)

# Add NW-RQK lines
if 'yhat1' in sample_30m.columns:
    fig.add_trace(
        go.Scatter(
            x=sample_30m.index,
            y=sample_30m['yhat1'],
            mode='lines',
            name='NW-RQK',
            line=dict(color='orange', width=1, dash='dash')
        ),
        row=3, col=1, secondary_y=False
    )

# 6. Trade PnL Distribution
if len(trades) > 0:
    fig.add_trace(
        go.Box(
            y=trades['PnL %'],
            name='PnL Distribution',
            boxpoints='all',
            jitter=0.3,
            pointpos=-1.8,
            marker=dict(
                color='lightblue',
                size=4
            ),
            showlegend=False
        ),
        row=3, col=2
    )

# 7. Price & Signals Sample
sample_size_5m = min(1000, len(df_5m_aligned))
sample_5m = df_5m_aligned.tail(sample_size_5m)

fig.add_trace(
    go.Candlestick(
        x=sample_5m.index,
        open=sample_5m['Open'],
        high=sample_5m['High'],
        low=sample_5m['Low'],
        close=sample_5m['Close'],
        name='Price',
        showlegend=False
    ),
    row=4, col=1
)

# Add entry signals
long_entries_sample = sample_5m[sample_5m['long_entry']]
short_entries_sample = sample_5m[sample_5m['short_entry']]

if len(long_entries_sample) > 0:
    fig.add_trace(
        go.Scatter(
            x=long_entries_sample.index,
            y=long_entries_sample['Low'] * 0.995,
            mode='markers',
            name='Long',
            marker=dict(symbol='triangle-up', size=10, color='green')
        ),
        row=4, col=1
    )

if len(short_entries_sample) > 0:
    fig.add_trace(
        go.Scatter(
            x=short_entries_sample.index,
            y=short_entries_sample['High'] * 1.005,
            mode='markers',
            name='Short',
            marker=dict(symbol='triangle-down', size=10, color='red')
        ),
        row=4, col=1
    )

# 8. Trade Duration Analysis
if len(trades) > 0 and 'Duration' in trades.columns:
    durations = trades['Duration'].dt.total_seconds() / 3600  # Convert to hours
    
    fig.add_trace(
        go.Bar(
            x=['< 1h', '1-2h', '2-4h', '4-8h', '8-24h', '> 24h'],
            y=[
                len(durations[durations < 1]),
                len(durations[(durations >= 1) & (durations < 2)]),
                len(durations[(durations >= 2) & (durations < 4)]),
                len(durations[(durations >= 4) & (durations < 8)]),
                len(durations[(durations >= 8) & (durations < 24)]),
                len(durations[durations >= 24])
            ],
            name='Trade Count',
            marker_color='lightblue'
        ),
        row=4, col=2
    )

# Update layout
fig.update_layout(
    title={
        'text': 'MLMI → NW-RQK → FVG Synergy Strategy Performance Dashboard',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 20}
    },
    height=1600,
    showlegend=True,
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# Update axes
fig.update_yaxes(title_text="Return (%)", row=1, col=1, secondary_y=False)
fig.update_yaxes(title_text="Trades", row=1, col=1, secondary_y=True)
fig.update_xaxes(title_text="Return (%)", row=1, col=2)
fig.update_yaxes(title_text="Frequency", row=1, col=2)
fig.update_yaxes(title_text="Drawdown (%)", row=2, col=1)
fig.update_xaxes(title_text="Month", row=2, col=2)
fig.update_yaxes(title_text="Return (%)", row=2, col=2)
fig.update_yaxes(title_text="Price", row=3, col=1, secondary_y=False)
fig.update_yaxes(title_text="MLMI", row=3, col=1, secondary_y=True)
fig.update_yaxes(title_text="PnL (%)", row=3, col=2)
fig.update_yaxes(title_text="Price", row=4, col=1)
fig.update_xaxes(title_text="Duration", row=4, col=2)
fig.update_yaxes(title_text="Count", row=4, col=2)

# Show the figure
fig.show()

print("\nVisualization complete!")

# Save the figure if configured
if config.save_results:
    fig_path = os.path.join(config.results_path, 'performance_dashboard.html')
    fig.write_html(fig_path)
    logger.info(f"Saved performance dashboard to {fig_path}")
    print(f"Dashboard saved to {fig_path}")

In [None]:
# Cell 10: Statistical Validation and Robustness Testing

@njit(parallel=True, fastmath=True, cache=True)
def bootstrap_confidence_intervals(returns: np.ndarray, n_bootstrap: int = 10000,
                                  confidence: float = 0.95) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Bootstrap confidence intervals with robust statistics"""
    n = len(returns)
    
    # Arrays to store bootstrap results
    boot_returns = np.zeros(n_bootstrap)
    boot_sharpes = np.zeros(n_bootstrap)
    boot_max_dd = np.zeros(n_bootstrap)
    boot_win_rates = np.zeros(n_bootstrap)
    
    # Remove NaN values
    clean_returns = returns[~np.isnan(returns)]
    n_clean = len(clean_returns)
    
    if n_clean == 0:
        return boot_returns, boot_sharpes, boot_max_dd, boot_win_rates
    
    # Bootstrap iterations
    for i in prange(n_bootstrap):
        # Resample with replacement (without setting seed in parallel loop)
        indices = np.random.randint(0, n_clean, size=n_clean)
        sample = clean_returns[indices]
        
        # Calculate metrics with safety checks
        boot_returns[i] = np.prod(1 + sample) - 1
        
        mean_ret = np.mean(sample)
        std_ret = np.std(sample)
        if std_ret > 1e-10:
            boot_sharpes[i] = mean_ret / std_ret * np.sqrt(252 * 78)
        else:
            boot_sharpes[i] = 0.0
        
        # Max drawdown
        cum_ret = np.cumprod(1 + sample)
        running_max = np.maximum.accumulate(cum_ret)
        dd = np.where(running_max > 0, (cum_ret - running_max) / running_max, 0)
        boot_max_dd[i] = np.min(dd)
        
        # Win rate
        boot_win_rates[i] = np.mean(sample > 0)
    
    return boot_returns, boot_sharpes, boot_max_dd, boot_win_rates

print("\n" + "=" * 80)
print("STATISTICAL VALIDATION & ROBUSTNESS TESTING")
print("=" * 80)

# Bootstrap analysis
print("\nRunning bootstrap analysis (10,000 iterations)...")
logger.info("Starting bootstrap analysis")
boot_start = time.time()

try:
    returns_array = returns.values
    boot_returns, boot_sharpes, boot_max_dd, boot_win_rates = bootstrap_confidence_intervals(returns_array)
    
    boot_time = time.time() - boot_start
    logger.info(f"Bootstrap completed in {boot_time:.3f} seconds")
    print(f"Bootstrap completed in {boot_time:.3f} seconds")
    
    # Calculate confidence intervals
    def calculate_ci(data, confidence=0.95):
        """Calculate confidence interval with safety checks"""
        valid_data = data[~np.isnan(data)]
        if len(valid_data) == 0:
            return 0.0, 0.0
        lower = np.percentile(valid_data, (1 - confidence) / 2 * 100)
        upper = np.percentile(valid_data, (1 + confidence) / 2 * 100)
        return lower, upper
    
    # Display results
    print("\n95% Confidence Intervals:")
    print("-" * 50)
    
    ret_lower, ret_upper = calculate_ci(boot_returns)
    print(f"Total Return: [{ret_lower*100:.2f}%, {ret_upper*100:.2f}%]")
    
    sharpe_lower, sharpe_upper = calculate_ci(boot_sharpes)
    print(f"Sharpe Ratio: [{sharpe_lower:.2f}, {sharpe_upper:.2f}]")
    
    dd_lower, dd_upper = calculate_ci(boot_max_dd)
    print(f"Max Drawdown: [{dd_lower*100:.2f}%, {dd_upper*100:.2f}%]")
    
    wr_lower, wr_upper = calculate_ci(boot_win_rates)
    print(f"Win Rate: [{wr_lower*100:.2f}%, {wr_upper*100:.2f}%]")
    
    # Statistical significance tests
    print("\n" + "-" * 50)
    print("STATISTICAL SIGNIFICANCE")
    print("-" * 50)
    
    # Test if returns are significantly different from zero
    clean_returns = returns_array[~np.isnan(returns_array)]
    if len(clean_returns) > 1:
        mean_return = np.mean(clean_returns)
        std_return = np.std(clean_returns)
        n_returns = len(clean_returns)
        
        if std_return > 0:
            t_stat = mean_return / (std_return / np.sqrt(n_returns))
            # Approximate p-value using normal distribution
            p_value_approx = 2 * (1 - stats.norm.cdf(abs(t_stat)))
            
            print(f"T-statistic: {t_stat:.3f}")
            print(f"Approx p-value: {p_value_approx:.4f}")
            print(f"Returns significantly positive: {'Yes' if t_stat > 1.96 else 'No'}")
        else:
            print("Cannot calculate t-statistic: zero standard deviation")
    
    # Risk-adjusted performance percentiles
    if 'portfolio_stats' in globals() and portfolio_stats and 'Sharpe Ratio' in portfolio_stats:
        actual_sharpe = portfolio_stats['Sharpe Ratio']
    else:
        actual_sharpe = 0
        
    sharpe_percentile = np.sum(boot_sharpes <= actual_sharpe) / len(boot_sharpes) * 100
    
    print(f"\nStrategy Sharpe ratio percentile: {sharpe_percentile:.1f}%")
    print(f"Performance assessment: ", end="")
    if sharpe_percentile > 90:
        print("EXCELLENT - Top 10% performance")
    elif sharpe_percentile > 75:
        print("VERY GOOD - Top 25% performance")
    elif sharpe_percentile > 50:
        print("GOOD - Above median performance")
    else:
        print("NEEDS IMPROVEMENT - Below median performance")
    
    # Stability analysis
    print("\n" + "-" * 50)
    print("STABILITY ANALYSIS")
    print("-" * 50)
    
    # Rolling performance
    window = min(252 * 5, len(returns) // 2)  # 1 year of 5-minute bars or half the data
    if window > 100:
        rolling_returns = returns.rolling(window).apply(lambda x: (1 + x).prod() - 1)
        rolling_sharpe = returns.rolling(window).apply(
            lambda x: x.mean() / x.std() * np.sqrt(252 * 78) if x.std() > 0 else 0
        )
        
        print(f"Rolling 1-year return volatility: {rolling_returns.std()*100:.2f}%")
        print(f"Rolling Sharpe stability: {rolling_sharpe.std():.2f}")
        print(f"Minimum rolling Sharpe: {rolling_sharpe.min():.2f}")
        print(f"Maximum rolling Sharpe: {rolling_sharpe.max():.2f}")
    else:
        print("Insufficient data for rolling analysis")
    
    # Save validation results
    if config.save_results:
        validation_results = {
            'confidence_intervals': {
                'return': (ret_lower, ret_upper),
                'sharpe': (sharpe_lower, sharpe_upper),
                'max_dd': (dd_lower, dd_upper),
                'win_rate': (wr_lower, wr_upper)
            },
            'significance': {
                't_stat': t_stat if 't_stat' in locals() else None,
                'significant': t_stat > 1.96 if 't_stat' in locals() else False
            },
            'percentiles': {
                'sharpe_percentile': sharpe_percentile
            }
        }
        
        validation_path = os.path.join(config.results_path, 'validation_results.json')
        with open(validation_path, 'w') as f:
            json.dump(validation_results, f, indent=2, default=str)
        logger.info(f"Saved validation results to {validation_path}")
    
except Exception as e:
    logger.error(f"Error in statistical validation: {str(e)}")
    print(f"\nError in statistical validation: {str(e)}")
    print("Continuing with limited validation...")

# Initialize boot_time if bootstrap failed
if 'boot_time' not in locals():
    boot_time = 0