# Synergy 2: MLMI → NW-RQK → FVG Trading Strategy

**Ultra-Fast Backtesting with VectorBT and Numba JIT Compilation**

This notebook implements the second synergy pattern where:
1. MLMI provides the primary trend signal
2. NW-RQK confirms the trend direction
3. FVG validates the final entry zone

Key differences from Synergy 1:
- NW-RQK confirmation comes before FVG
- May capture different market dynamics
- Expected to generate similar trade counts but with different timing

In [None]:
# Cell 1: Environment Setup and Imports

import pandas as pd
import numpy as np
import vectorbt as vbt
from numba import njit, prange, typed, types
from numba.typed import Dict
import warnings
import time
from typing import Tuple, Dict as TypeDict, Optional
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

# Configure Numba for maximum performance
import numba
numba.config.THREADING_LAYER = 'threadsafe'
numba.config.NUMBA_NUM_THREADS = numba.config.NUMBA_DEFAULT_NUM_THREADS

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 100)

print("Synergy 2: MLMI → NW-RQK → FVG Strategy")
print(f"Numba threads: {numba.config.NUMBA_NUM_THREADS}")
print(f"VectorBT version: {vbt.__version__}")
print("Environment ready for ultra-fast backtesting!")

In [None]:
# Cell 2: Data Loading with Pre-compilation

def load_data_optimized(file_path: str, timeframe: str = '5m') -> pd.DataFrame:
    """Load and prepare data with optimizations"""
    start_time = time.time()
    
    # Read CSV with optimized settings
    df = pd.read_csv(file_path, 
                     parse_dates=['Timestamp'],
                     infer_datetime_format=True,
                     date_parser=lambda x: pd.to_datetime(x, dayfirst=True),
                     index_col='Timestamp')
    
    # Ensure numeric types for fast operations
    numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').astype(np.float64)
    
    # Remove any NaN values
    df.dropna(subset=['Open', 'High', 'Low', 'Close'], inplace=True)
    
    # Sort index for faster operations
    df.sort_index(inplace=True)
    
    # Pre-calculate commonly used features
    df['Returns'] = df['Close'].pct_change()
    df['LogReturns'] = np.log(df['Close'] / df['Close'].shift(1))
    df['HL_Range'] = df['High'] - df['Low']
    df['OC_Range'] = abs(df['Open'] - df['Close'])
    
    load_time = time.time() - start_time
    print(f"Loaded {len(df):,} rows in {load_time:.2f} seconds")
    
    return df

# Pre-compile all Numba functions
print("Pre-compiling Numba functions for maximum speed...")

@njit(cache=True)
def dummy_compile():
    """Dummy function to trigger compilation"""
    return np.array([1.0, 2.0, 3.0]).sum()

_ = dummy_compile()  # Trigger compilation

# Load data files
print("\nLoading data files...")
file_5m = "/home/QuantNova/AlgoSpace-Strategy-1/@NQ - 5 min - ETH.csv"
file_30m = "/home/QuantNova/AlgoSpace-Strategy-1/NQ - 30 min - ETH.csv"

df_5m = load_data_optimized(file_5m, '5m')
df_30m = load_data_optimized(file_30m, '30m')

print(f"\n5-minute data: {df_5m.index[0]} to {df_5m.index[-1]}")
print(f"30-minute data: {df_30m.index[0]} to {df_30m.index[-1]}")

In [None]:
# Cell 3: Optimized Indicator Suite

@njit(fastmath=True, cache=True, parallel=True)
def calculate_all_indicators(close: np.ndarray, high: np.ndarray, low: np.ndarray) -> TypeDict:
    """Calculate all basic indicators in one pass"""
    n = len(close)
    
    # Pre-allocate arrays
    ma5 = np.full(n, np.nan)
    ma20 = np.full(n, np.nan)
    rsi5 = np.full(n, 50.0)
    rsi20 = np.full(n, 50.0)
    atr = np.full(n, np.nan)
    
    # Weighted Moving Averages
    weights5 = np.arange(1, 6, dtype=np.float64)
    weights20 = np.arange(1, 21, dtype=np.float64)
    sum_w5 = weights5.sum()
    sum_w20 = weights20.sum()
    
    # Calculate WMAs in parallel chunks
    for i in prange(4, n):
        if i >= 4:
            ma5[i] = np.dot(close[i-4:i+1], weights5) / sum_w5
        if i >= 19:
            ma20[i] = np.dot(close[i-19:i+1], weights20) / sum_w20
    
    # RSI calculation
    deltas = np.diff(close)
    gains = np.maximum(deltas, 0)
    losses = -np.minimum(deltas, 0)
    
    # RSI 5
    avg_gain5 = np.mean(gains[:5]) if len(gains) >= 5 else 0
    avg_loss5 = np.mean(losses[:5]) if len(losses) >= 5 else 0
    
    if avg_loss5 > 0:
        rsi5[5] = 100 - (100 / (1 + avg_gain5 / avg_loss5))
    else:
        rsi5[5] = 100
    
    for i in range(5, n - 1):
        avg_gain5 = (avg_gain5 * 4 + gains[i]) / 5
        avg_loss5 = (avg_loss5 * 4 + losses[i]) / 5
        if avg_loss5 > 0:
            rsi5[i + 1] = 100 - (100 / (1 + avg_gain5 / avg_loss5))
        else:
            rsi5[i + 1] = 100
    
    # RSI 20
    avg_gain20 = np.mean(gains[:20]) if len(gains) >= 20 else 0
    avg_loss20 = np.mean(losses[:20]) if len(losses) >= 20 else 0
    
    if avg_loss20 > 0:
        rsi20[20] = 100 - (100 / (1 + avg_gain20 / avg_loss20))
    else:
        rsi20[20] = 100
    
    for i in range(20, n - 1):
        avg_gain20 = (avg_gain20 * 19 + gains[i]) / 20
        avg_loss20 = (avg_loss20 * 19 + losses[i]) / 20
        if avg_loss20 > 0:
            rsi20[i + 1] = 100 - (100 / (1 + avg_gain20 / avg_loss20))
        else:
            rsi20[i + 1] = 100
    
    # ATR calculation
    tr = np.maximum(high - low, np.maximum(abs(high - np.roll(close, 1)), abs(low - np.roll(close, 1))))
    tr[0] = high[0] - low[0]
    
    for i in range(14, n):
        atr[i] = np.mean(tr[i-13:i+1])
    
    return ma5, ma20, rsi5, rsi20, atr

@njit(parallel=True, fastmath=True, cache=True)
def detect_fvg_optimized(high: np.ndarray, low: np.ndarray, atr: np.ndarray,
                        multiplier: float = 1.5) -> Tuple[np.ndarray, np.ndarray]:
    """Optimized FVG detection with ATR filtering"""
    n = len(high)
    bull_active = np.zeros(n, dtype=np.bool_)
    bear_active = np.zeros(n, dtype=np.bool_)
    
    for i in prange(3, n):
        if not np.isnan(atr[i]):
            # Dynamic gap threshold based on ATR
            gap_threshold = atr[i] * multiplier
            
            # Bullish FVG with ATR filter
            gap_size = low[i] - high[i-3]
            if gap_size > gap_threshold:
                # Mark active zone
                for j in range(i, min(i + 20, n)):
                    if low[j] >= high[i-3]:
                        bull_active[j] = True
                    else:
                        break
            
            # Bearish FVG with ATR filter
            gap_size = low[i-3] - high[i]
            if gap_size > gap_threshold:
                # Mark active zone
                for j in range(i, min(i + 20, n)):
                    if high[j] <= low[i-3]:
                        bear_active[j] = True
                    else:
                        break
    
    return bull_active, bear_active

print("\nCalculating all indicators with parallel processing...")
start_time = time.time()

# Calculate 30-minute indicators
close_30m = df_30m['Close'].values
high_30m = df_30m['High'].values
low_30m = df_30m['Low'].values

ma5, ma20, rsi5, rsi20, atr_30m = calculate_all_indicators(close_30m, high_30m, low_30m)

# Smooth RSI
rsi5_smooth = np.convolve(rsi5, np.ones(20)/20, mode='same')
rsi20_smooth = np.convolve(rsi20, np.ones(20)/20, mode='same')

# Calculate 5-minute indicators
close_5m = df_5m['Close'].values
high_5m = df_5m['High'].values
low_5m = df_5m['Low'].values

_, _, _, _, atr_5m = calculate_all_indicators(close_5m, high_5m, low_5m)

# Detect FVG with ATR filter
fvg_bull, fvg_bear = detect_fvg_optimized(high_5m, low_5m, atr_5m)

calc_time = time.time() - start_time
print(f"All indicators calculated in {calc_time:.3f} seconds")
print(f"FVG zones detected - Bull: {fvg_bull.sum():,}, Bear: {fvg_bear.sum():,}")

In [None]:
# Cell 4: Advanced MLMI with Adaptive KNN

@njit(fastmath=True, cache=True)
def adaptive_knn_predict(features: np.ndarray, labels: np.ndarray, query: np.ndarray,
                        k_base: int, volatility: float, size: int) -> Tuple[float, float]:
    """Adaptive KNN that adjusts K based on market volatility"""
    if size == 0:
        return 0.0, 0.5
    
    # Adjust K based on volatility
    k = max(3, min(k_base, int(k_base * (1 - volatility * 2))))
    k = min(k, size)
    
    # Calculate distances
    distances = np.zeros(size)
    for i in range(size):
        dist = 0.0
        for j in range(2):
            diff = features[i, j] - query[j]
            dist += diff * diff
        distances[i] = np.sqrt(dist)
    
    # Find k nearest neighbors
    indices = np.argpartition(distances, k-1)[:k]
    
    # Weighted voting based on distance
    vote = 0.0
    weight_sum = 0.0
    
    for i in range(k):
        idx = indices[i]
        if distances[idx] > 0:
            weight = 1.0 / distances[idx]
            vote += labels[idx] * weight
            weight_sum += weight
    
    if weight_sum > 0:
        prediction = vote / weight_sum
        confidence = min(abs(prediction) / k, 1.0)
    else:
        prediction = 0.0
        confidence = 0.0
    
    return prediction, confidence

@njit(fastmath=True, cache=True)
def calculate_mlmi_adaptive(ma_fast: np.ndarray, ma_slow: np.ndarray,
                           rsi_fast_smooth: np.ndarray, rsi_slow_smooth: np.ndarray,
                           close: np.ndarray, returns: np.ndarray,
                           k_neighbors: int = 200) -> Tuple[np.ndarray, np.ndarray]:
    """MLMI with adaptive KNN and confidence scores"""
    n = len(close)
    mlmi_values = np.zeros(n)
    mlmi_confidence = np.zeros(n)
    
    # Pre-allocate KNN storage
    max_size = min(10000, n)
    features = np.zeros((max_size, 2))
    labels = np.zeros(max_size)
    data_size = 0
    
    # Calculate rolling volatility
    volatility = np.zeros(n)
    for i in range(20, n):
        volatility[i] = np.std(returns[i-20:i])
    
    for i in range(1, n):
        # Detect crossovers
        bull_cross = ma_fast[i] > ma_slow[i] and ma_fast[i-1] <= ma_slow[i-1]
        bear_cross = ma_fast[i] < ma_slow[i] and ma_fast[i-1] >= ma_slow[i-1]
        
        if (bull_cross or bear_cross) and not np.isnan(rsi_fast_smooth[i]) and not np.isnan(rsi_slow_smooth[i]):
            # Store pattern
            if data_size >= max_size:
                # Keep most recent 75%
                keep_size = int(max_size * 0.75)
                features[:keep_size] = features[-keep_size:]
                labels[:keep_size] = labels[-keep_size:]
                data_size = keep_size
            
            features[data_size, 0] = rsi_slow_smooth[i]
            features[data_size, 1] = rsi_fast_smooth[i]
            
            if i < n - 1:
                # Multi-bar forward return for better signal
                fwd_ret = (close[min(i+5, n-1)] - close[i]) / close[i]
                labels[data_size] = np.sign(fwd_ret) * min(abs(fwd_ret) * 100, 1.0)
            else:
                labels[data_size] = 0.0
            
            data_size += 1
        
        # Make prediction
        if data_size > 10 and not np.isnan(rsi_fast_smooth[i]) and not np.isnan(rsi_slow_smooth[i]):
            query = np.array([rsi_slow_smooth[i], rsi_fast_smooth[i]])
            pred, conf = adaptive_knn_predict(features, labels, query,
                                            k_neighbors, volatility[i], data_size)
            mlmi_values[i] = pred * 100  # Scale for visibility
            mlmi_confidence[i] = conf
    
    return mlmi_values, mlmi_confidence

# Calculate MLMI with confidence
print("\nCalculating adaptive MLMI with confidence scores...")
start_time = time.time()

returns_30m = df_30m['Returns'].values
mlmi_values, mlmi_confidence = calculate_mlmi_adaptive(
    ma5, ma20, rsi5_smooth, rsi20_smooth, close_30m, returns_30m
)

# Store in dataframe
df_30m['mlmi'] = mlmi_values
df_30m['mlmi_confidence'] = mlmi_confidence
df_30m['mlmi_bull'] = (mlmi_values > 0) & (mlmi_confidence > 0.3)
df_30m['mlmi_bear'] = (mlmi_values < 0) & (mlmi_confidence > 0.3)

mlmi_time = time.time() - start_time
print(f"Adaptive MLMI calculated in {mlmi_time:.3f} seconds")
print(f"MLMI range: [{mlmi_values.min():.1f}, {mlmi_values.max():.1f}]")
print(f"Average confidence: {mlmi_confidence.mean():.3f}")

In [None]:
# Cell 5: Enhanced NW-RQK with Multiple Kernels

@njit(fastmath=True, cache=True)
def gaussian_kernel(x: float, h: float) -> float:
    """Gaussian kernel function"""
    return np.exp(-(x * x) / (2.0 * h * h))

@njit(fastmath=True, cache=True)
def epanechnikov_kernel(x: float, h: float) -> float:
    """Epanechnikov kernel function"""
    u = x / h
    if abs(u) <= 1:
        return 0.75 * (1 - u * u)
    return 0.0

@njit(parallel=True, fastmath=True, cache=True)
def nadaraya_watson_ensemble(prices: np.ndarray, h: float, r: float,
                           min_periods: int = 25) -> Tuple[np.ndarray, np.ndarray]:
    """Ensemble NW regression with multiple kernels"""
    n = len(prices)
    result_rq = np.full(n, np.nan)  # Rational Quadratic
    result_gauss = np.full(n, np.nan)  # Gaussian
    
    for i in prange(min_periods, n):
        # Rational Quadratic regression
        weighted_sum_rq = 0.0
        weight_sum_rq = 0.0
        
        # Gaussian regression
        weighted_sum_gauss = 0.0
        weight_sum_gauss = 0.0
        
        window_size = min(i + 1, 500)
        
        for j in range(window_size):
            if i - j >= 0:
                # Rational Quadratic
                weight_rq = (1.0 + (j * j) / (h * h * 2.0 * r)) ** (-r)
                weighted_sum_rq += prices[i - j] * weight_rq
                weight_sum_rq += weight_rq
                
                # Gaussian
                weight_gauss = gaussian_kernel(float(j), h)
                weighted_sum_gauss += prices[i - j] * weight_gauss
                weight_sum_gauss += weight_gauss
        
        if weight_sum_rq > 0:
            result_rq[i] = weighted_sum_rq / weight_sum_rq
        if weight_sum_gauss > 0:
            result_gauss[i] = weighted_sum_gauss / weight_sum_gauss
    
    # Ensemble: average of both kernels
    ensemble = np.zeros(n)
    for i in range(n):
        if not np.isnan(result_rq[i]) and not np.isnan(result_gauss[i]):
            ensemble[i] = (result_rq[i] + result_gauss[i]) / 2
        elif not np.isnan(result_rq[i]):
            ensemble[i] = result_rq[i]
        elif not np.isnan(result_gauss[i]):
            ensemble[i] = result_gauss[i]
        else:
            ensemble[i] = np.nan
    
    return ensemble, result_rq

@njit(fastmath=True, cache=True)
def detect_nwrqk_signals_enhanced(yhat1: np.ndarray, yhat2: np.ndarray,
                                 prices: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Enhanced signal detection with strength measurement"""
    n = len(yhat1)
    bull_signals = np.zeros(n, dtype=np.bool_)
    bear_signals = np.zeros(n, dtype=np.bool_)
    signal_strength = np.zeros(n)
    
    for i in range(2, n):
        if not np.isnan(yhat1[i]) and not np.isnan(yhat1[i-1]) and not np.isnan(yhat1[i-2]):
            # Trend changes
            slope_prev = yhat1[i-1] - yhat1[i-2]
            slope_curr = yhat1[i] - yhat1[i-1]
            
            # Acceleration
            acceleration = slope_curr - slope_prev
            
            # Bullish: negative to positive slope with positive acceleration
            if slope_prev < 0 and slope_curr > 0 and acceleration > 0:
                bull_signals[i] = True
                signal_strength[i] = min(abs(acceleration) * 1000, 1.0)
            
            # Bearish: positive to negative slope with negative acceleration
            elif slope_prev > 0 and slope_curr < 0 and acceleration < 0:
                bear_signals[i] = True
                signal_strength[i] = min(abs(acceleration) * 1000, 1.0)
        
        # Crossovers with momentum
        if i > 0 and not np.isnan(yhat1[i]) and not np.isnan(yhat2[i]):
            if not np.isnan(yhat1[i-1]) and not np.isnan(yhat2[i-1]):
                # Price momentum filter
                price_momentum = (prices[i] - prices[max(0, i-5)]) / prices[max(0, i-5)]
                
                if yhat2[i] > yhat1[i] and yhat2[i-1] <= yhat1[i-1] and price_momentum > 0:
                    bull_signals[i] = True
                    signal_strength[i] = max(signal_strength[i], min(abs(price_momentum) * 50, 1.0))
                elif yhat2[i] < yhat1[i] and yhat2[i-1] >= yhat1[i-1] and price_momentum < 0:
                    bear_signals[i] = True
                    signal_strength[i] = max(signal_strength[i], min(abs(price_momentum) * 50, 1.0))
    
    return bull_signals, bear_signals, signal_strength

# Calculate enhanced NW-RQK
print("\nCalculating enhanced NW-RQK with ensemble kernels...")
start_time = time.time()

# Parameters
h = 8.0
r = 8.0
lag = 2

# Calculate regression lines
yhat1, yhat1_rq = nadaraya_watson_ensemble(close_30m, h, r)
yhat2, yhat2_rq = nadaraya_watson_ensemble(close_30m, h - lag, r)

# Detect signals with strength
nwrqk_bull, nwrqk_bear, nwrqk_strength = detect_nwrqk_signals_enhanced(yhat1, yhat2, close_30m)

# Store in dataframe
df_30m['nwrqk_bull'] = nwrqk_bull
df_30m['nwrqk_bear'] = nwrqk_bear
df_30m['nwrqk_strength'] = nwrqk_strength
df_30m['yhat1'] = yhat1
df_30m['yhat2'] = yhat2

nwrqk_time = time.time() - start_time
print(f"Enhanced NW-RQK calculated in {nwrqk_time:.3f} seconds")
print(f"Bull signals: {nwrqk_bull.sum():,}, Bear signals: {nwrqk_bear.sum():,}")
print(f"Average signal strength: {nwrqk_strength[nwrqk_strength > 0].mean():.3f}")

In [None]:
# Cell 6: Smart Timeframe Alignment

@njit(parallel=True, fastmath=True, cache=True)
def create_alignment_map(timestamps_5m: np.ndarray, timestamps_30m: np.ndarray) -> np.ndarray:
    """Create efficient mapping between timeframes"""
    n_5m = len(timestamps_5m)
    mapping = np.zeros(n_5m, dtype=np.int64)
    
    j = 0
    for i in prange(n_5m):
        # Find the corresponding 30m bar
        while j < len(timestamps_30m) - 1 and timestamps_30m[j + 1] <= timestamps_5m[i]:
            j += 1
        mapping[i] = j
    
    return mapping

print("\nPerforming smart timeframe alignment...")
start_time = time.time()

# Create datetime arrays for mapping
# Convert to numeric timestamps for Numba
timestamps_5m = df_5m.index.astype(np.int64) // 10**9
timestamps_30m = df_30m.index.astype(np.int64) // 10**9

# Create mapping
mapping = create_alignment_map(timestamps_5m, timestamps_30m)

# Align all indicators efficiently
df_5m_aligned = df_5m.copy()

# MLMI alignment with confidence
df_5m_aligned['mlmi'] = df_30m['mlmi'].values[mapping]
df_5m_aligned['mlmi_confidence'] = df_30m['mlmi_confidence'].values[mapping]
df_5m_aligned['mlmi_bull'] = df_30m['mlmi_bull'].values[mapping]
df_5m_aligned['mlmi_bear'] = df_30m['mlmi_bear'].values[mapping]

# NW-RQK alignment with strength
df_5m_aligned['nwrqk_bull'] = df_30m['nwrqk_bull'].values[mapping]
df_5m_aligned['nwrqk_bear'] = df_30m['nwrqk_bear'].values[mapping]
df_5m_aligned['nwrqk_strength'] = df_30m['nwrqk_strength'].values[mapping]

# FVG data
df_5m_aligned['fvg_bull'] = fvg_bull
df_5m_aligned['fvg_bear'] = fvg_bear

# Add market regime detection
df_5m_aligned['volatility'] = df_5m_aligned['Returns'].rolling(20).std()
df_5m_aligned['trend_strength'] = abs(df_5m_aligned['Returns'].rolling(50).mean()) / df_5m_aligned['volatility']

align_time = time.time() - start_time
print(f"Smart alignment completed in {align_time:.3f} seconds")
print(f"Aligned {len(df_5m_aligned):,} 5-minute bars")

In [None]:
# Cell 7: MLMI → NW-RQK → FVG Synergy Detection

@njit(parallel=True, fastmath=True, cache=True)
def detect_mlmi_nwrqk_fvg_synergy(mlmi_bull: np.ndarray, mlmi_bear: np.ndarray,
                                 mlmi_conf: np.ndarray, nwrqk_bull: np.ndarray,
                                 nwrqk_bear: np.ndarray, nwrqk_strength: np.ndarray,
                                 fvg_bull: np.ndarray, fvg_bear: np.ndarray,
                                 volatility: np.ndarray, window: int = 30) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Advanced synergy detection with confidence scoring"""
    n = len(mlmi_bull)
    long_signals = np.zeros(n, dtype=np.bool_)
    short_signals = np.zeros(n, dtype=np.bool_)
    signal_quality = np.zeros(n)
    
    # State tracking
    mlmi_active_bull = np.zeros(n, dtype=np.bool_)
    mlmi_active_bear = np.zeros(n, dtype=np.bool_)
    nwrqk_confirmed_bull = np.zeros(n, dtype=np.bool_)
    nwrqk_confirmed_bear = np.zeros(n, dtype=np.bool_)
    state_timer = np.zeros(n, dtype=np.int32)
    
    for i in range(1, n):
        # Carry forward states
        mlmi_active_bull[i] = mlmi_active_bull[i-1]
        mlmi_active_bear[i] = mlmi_active_bear[i-1]
        nwrqk_confirmed_bull[i] = nwrqk_confirmed_bull[i-1]
        nwrqk_confirmed_bear[i] = nwrqk_confirmed_bear[i-1]
        state_timer[i] = state_timer[i-1] + 1
        
        # Volatility adjustment
        vol_factor = 1.0 / (1.0 + volatility[i] * 10) if not np.isnan(volatility[i]) else 1.0
        
        # Reset on opposite signal or timeout
        if mlmi_bear[i] or state_timer[i] > window:
            mlmi_active_bull[i] = False
            nwrqk_confirmed_bull[i] = False
            if mlmi_bear[i]:
                state_timer[i] = 0
        
        if mlmi_bull[i] or state_timer[i] > window:
            mlmi_active_bear[i] = False
            nwrqk_confirmed_bear[i] = False
            if mlmi_bull[i]:
                state_timer[i] = 0
        
        # Step 1: MLMI signal with confidence filter
        if mlmi_bull[i] and not mlmi_bull[i-1] and mlmi_conf[i] > 0.3:
            mlmi_active_bull[i] = True
            nwrqk_confirmed_bull[i] = False
            state_timer[i] = 0
        
        if mlmi_bear[i] and not mlmi_bear[i-1] and mlmi_conf[i] > 0.3:
            mlmi_active_bear[i] = True
            nwrqk_confirmed_bear[i] = False
            state_timer[i] = 0
        
        # Step 2: NW-RQK confirmation with strength filter
        if mlmi_active_bull[i] and not nwrqk_confirmed_bull[i] and nwrqk_bull[i] and nwrqk_strength[i] > 0.2:
            nwrqk_confirmed_bull[i] = True
        
        if mlmi_active_bear[i] and not nwrqk_confirmed_bear[i] and nwrqk_bear[i] and nwrqk_strength[i] > 0.2:
            nwrqk_confirmed_bear[i] = True
        
        # Step 3: FVG final confirmation
        if nwrqk_confirmed_bull[i] and fvg_bull[i]:
            long_signals[i] = True
            # Calculate signal quality
            signal_quality[i] = (mlmi_conf[i] + nwrqk_strength[i]) / 2 * vol_factor
            # Reset states
            mlmi_active_bull[i] = False
            nwrqk_confirmed_bull[i] = False
            state_timer[i] = 0
        
        if nwrqk_confirmed_bear[i] and fvg_bear[i]:
            short_signals[i] = True
            # Calculate signal quality
            signal_quality[i] = (mlmi_conf[i] + nwrqk_strength[i]) / 2 * vol_factor
            # Reset states
            mlmi_active_bear[i] = False
            nwrqk_confirmed_bear[i] = False
            state_timer[i] = 0
    
    return long_signals, short_signals, signal_quality

print("\nDetecting MLMI → NW-RQK → FVG synergy signals...")
start_time = time.time()

# Extract arrays
mlmi_bull_arr = df_5m_aligned['mlmi_bull'].values
mlmi_bear_arr = df_5m_aligned['mlmi_bear'].values
mlmi_conf_arr = df_5m_aligned['mlmi_confidence'].values
nwrqk_bull_arr = df_5m_aligned['nwrqk_bull'].values
nwrqk_bear_arr = df_5m_aligned['nwrqk_bear'].values
nwrqk_strength_arr = df_5m_aligned['nwrqk_strength'].values
fvg_bull_arr = df_5m_aligned['fvg_bull'].values
fvg_bear_arr = df_5m_aligned['fvg_bear'].values
volatility_arr = df_5m_aligned['volatility'].fillna(0.01).values

# Detect synergy
long_entries, short_entries, signal_quality = detect_mlmi_nwrqk_fvg_synergy(
    mlmi_bull_arr, mlmi_bear_arr, mlmi_conf_arr,
    nwrqk_bull_arr, nwrqk_bear_arr, nwrqk_strength_arr,
    fvg_bull_arr, fvg_bear_arr, volatility_arr
)

# Add to dataframe
df_5m_aligned['long_entry'] = long_entries
df_5m_aligned['short_entry'] = short_entries
df_5m_aligned['signal_quality'] = signal_quality

signal_time = time.time() - start_time
print(f"Synergy detection completed in {signal_time:.3f} seconds")
print(f"Long entries: {long_entries.sum():,}")
print(f"Short entries: {short_entries.sum():,}")
print(f"Average signal quality: {signal_quality[signal_quality > 0].mean():.3f}")

In [None]:
# Cell 8: Advanced VectorBT Backtesting

print("\n" + "=" * 80)
print("ADVANCED VECTORBT BACKTESTING")
print("=" * 80)

# Prepare data
close_prices = df_5m_aligned['Close']
entries = df_5m_aligned['long_entry'] | df_5m_aligned['short_entry']
direction = np.where(df_5m_aligned['long_entry'], 1, 
                    np.where(df_5m_aligned['short_entry'], -1, 0))

# Dynamic position sizing based on signal quality
base_size = 100
position_sizes = np.where(entries, base_size * (0.5 + df_5m_aligned['signal_quality'] * 0.5), base_size)

# Advanced exit logic
@njit(fastmath=True)
def generate_exits(entries: np.ndarray, direction: np.ndarray, 
                  high: np.ndarray, low: np.ndarray, close: np.ndarray,
                  atr: np.ndarray, max_bars: int = 100) -> np.ndarray:
    """Generate exits with stop loss and time-based exits"""
    n = len(entries)
    exits = np.zeros(n, dtype=np.bool_)
    
    in_position = False
    entry_price = 0.0
    entry_idx = 0
    position_dir = 0
    
    for i in range(n):
        if not in_position and entries[i]:
            # Enter position
            in_position = True
            entry_price = close[i]
            entry_idx = i
            position_dir = direction[i]
        
        elif in_position:
            # Check exit conditions
            bars_held = i - entry_idx
            
            # Stop loss (2 ATR)
            if not np.isnan(atr[i]):
                stop_distance = 2 * atr[i]
                
                if position_dir > 0:  # Long position
                    if low[i] <= entry_price - stop_distance:
                        exits[i] = True
                        in_position = False
                elif position_dir < 0:  # Short position
                    if high[i] >= entry_price + stop_distance:
                        exits[i] = True
                        in_position = False
            
            # Time-based exit
            if bars_held >= max_bars:
                exits[i] = True
                in_position = False
            
            # Exit on opposite signal
            if entries[i] and direction[i] != position_dir:
                exits[i] = True
                in_position = False
    
    return exits

# Generate exits
print("\nGenerating advanced exit signals...")
exits = generate_exits(
    entries.values,
    direction,
    df_5m_aligned['High'].values,
    df_5m_aligned['Low'].values,
    close_prices.values,
    atr_5m
)

print("\nRunning advanced backtest...")
backtest_start = time.time()

# Run backtest
portfolio = vbt.Portfolio.from_signals(
    close=close_prices,
    entries=entries,
    exits=exits,
    direction=direction,
    size=position_sizes,
    size_type='amount',
    init_cash=100000,
    fees=0.0001,
    slippage=0.0001,
    freq='5T'
)

backtest_time = time.time() - backtest_start
print(f"\nBacktest completed in {backtest_time:.3f} seconds!")

# Calculate comprehensive metrics
stats = portfolio.stats()
returns = portfolio.returns()
trades = portfolio.trades.records_readable

print("\n" + "-" * 50)
print("PERFORMANCE METRICS")
print("-" * 50)
print(f"Total Return: {stats['Total Return [%]']:.2f}%")
print(f"Annualized Return: {stats['Total Return [%]'] * (252*78/len(df_5m_aligned)):.2f}%")
print(f"Sharpe Ratio: {stats['Sharpe Ratio']:.2f}")
print(f"Sortino Ratio: {stats['Sortino Ratio']:.2f}")
print(f"Calmar Ratio: {stats['Calmar Ratio']:.2f}")
print(f"Max Drawdown: {stats['Max Drawdown [%]']:.2f}%")
print(f"Max Drawdown Duration: {stats['Max Drawdown Duration']}")

print("\n" + "-" * 50)
print("TRADE STATISTICS")
print("-" * 50)
print(f"Total Trades: {stats['Total Trades']:,.0f}")
print(f"Win Rate: {stats['Win Rate [%]']:.2f}%")
print(f"Profit Factor: {stats['Profit Factor']:.2f}")
print(f"Expectancy: {stats['Expectancy [%]']:.3f}%")
print(f"Average Win: {stats['Avg Winning Trade [%]']:.2f}%")
print(f"Average Loss: {stats['Avg Losing Trade [%]']:.2f}%")
print(f"Best Trade: {stats['Best Trade [%]']:.2f}%")
print(f"Worst Trade: {stats['Worst Trade [%]']:.2f}%")

# Additional analysis
if len(trades) > 0:
    print("\n" + "-" * 50)
    print("TRADE ANALYSIS")
    print("-" * 50)
    avg_duration = trades['Duration'].mean()
    print(f"Average Trade Duration: {avg_duration}")
    print(f"Daily Trades: {len(trades) / (len(df_5m_aligned) / 78):.1f}")
    print(f"Trade Frequency: Every {len(df_5m_aligned) / len(trades):.0f} bars")
    
    # Win/Loss streaks
    returns_array = trades['PnL %'].values
    wins = returns_array > 0
    max_win_streak = 0
    max_loss_streak = 0
    current_win_streak = 0
    current_loss_streak = 0
    
    for win in wins:
        if win:
            current_win_streak += 1
            current_loss_streak = 0
            max_win_streak = max(max_win_streak, current_win_streak)
        else:
            current_loss_streak += 1
            current_win_streak = 0
            max_loss_streak = max(max_loss_streak, current_loss_streak)
    
    print(f"Max Win Streak: {max_win_streak}")
    print(f"Max Loss Streak: {max_loss_streak}")

In [None]:
# Cell 9: Professional Multi-Panel Visualization

print("\nGenerating professional multi-panel visualization...")

# Create comprehensive dashboard
fig = make_subplots(
    rows=5, cols=2,
    shared_xaxes=True,
    vertical_spacing=0.03,
    horizontal_spacing=0.05,
    row_heights=[0.3, 0.2, 0.2, 0.2, 0.1],
    column_widths=[0.7, 0.3],
    subplot_titles=(
        'Cumulative Returns', 'Monthly Returns Heatmap',
        'Drawdown Analysis', 'Trade Distribution',
        'Signal Quality', 'Win Rate by Signal Quality',
        'Price Action with Signals', 'Trade Duration Distribution',
        'Volume Profile', ''
    ),
    specs=[
        [{"secondary_y": False}, {"type": "heatmap"}],
        [{"secondary_y": False}, {"type": "histogram"}],
        [{"secondary_y": False}, {"type": "scatter"}],
        [{"secondary_y": True}, {"type": "histogram"}],
        [{"secondary_y": False}, {"type": "scatter"}]
    ]
)

# 1. Cumulative Returns
cumulative_returns = (1 + returns).cumprod() - 1
fig.add_trace(
    go.Scatter(
        x=cumulative_returns.index,
        y=cumulative_returns.values * 100,
        mode='lines',
        name='Strategy',
        line=dict(color='blue', width=2)
    ),
    row=1, col=1
)

# Benchmark (buy and hold)
benchmark_returns = (close_prices / close_prices.iloc[0] - 1) * 100
fig.add_trace(
    go.Scatter(
        x=benchmark_returns.index,
        y=benchmark_returns.values,
        mode='lines',
        name='Buy & Hold',
        line=dict(color='gray', width=1, dash='dash')
    ),
    row=1, col=1
)

# 2. Monthly Returns Heatmap
monthly_returns = returns.resample('M').apply(lambda x: (1 + x).prod() - 1) * 100
monthly_matrix = monthly_returns.values.reshape(-1, 12)
fig.add_trace(
    go.Heatmap(
        z=monthly_matrix,
        colorscale='RdYlGn',
        zmid=0,
        text=np.round(monthly_matrix, 1),
        texttemplate='%{text}%',
        showscale=False
    ),
    row=1, col=2
)

# 3. Drawdown
drawdown = portfolio.drawdown() * 100
fig.add_trace(
    go.Scatter(
        x=drawdown.index,
        y=-drawdown.values,
        mode='lines',
        name='Drawdown',
        fill='tozeroy',
        line=dict(color='red', width=1)
    ),
    row=2, col=1
)

# 4. Trade Returns Distribution
if len(trades) > 0:
    fig.add_trace(
        go.Histogram(
            x=trades['PnL %'],
            nbinsx=50,
            name='Returns',
            marker_color=np.where(trades['PnL %'] > 0, 'green', 'red')
        ),
        row=2, col=2
    )

# 5. Signal Quality over time
signal_points = df_5m_aligned[df_5m_aligned['signal_quality'] > 0]
fig.add_trace(
    go.Scatter(
        x=signal_points.index,
        y=signal_points['signal_quality'],
        mode='markers',
        name='Signal Quality',
        marker=dict(
            size=5,
            color=signal_points['signal_quality'],
            colorscale='Viridis',
            showscale=False
        )
    ),
    row=3, col=1
)

# 6. Win Rate by Signal Quality bins
if len(trades) > 0 and 'signal_quality' in df_5m_aligned.columns:
    # Match trades with signal quality
    quality_bins = pd.qcut(signal_points['signal_quality'], q=5, duplicates='drop')
    quality_win_rate = signal_points.groupby(quality_bins).apply(
        lambda x: (x['long_entry'] | x['short_entry']).sum()
    )
    
    fig.add_trace(
        go.Scatter(
            x=list(range(len(quality_win_rate))),
            y=quality_win_rate.values,
            mode='lines+markers',
            name='Trade Count by Quality',
            line=dict(color='purple', width=2)
        ),
        row=3, col=2
    )

# 7. Price Action with Signals (zoomed to recent 500 bars)
recent_bars = min(500, len(df_5m_aligned))
recent_df = df_5m_aligned.tail(recent_bars)

fig.add_trace(
    go.Candlestick(
        x=recent_df.index,
        open=recent_df['Open'],
        high=recent_df['High'],
        low=recent_df['Low'],
        close=recent_df['Close'],
        name='Price',
        showlegend=False
    ),
    row=4, col=1
)

# Add signals
long_signals = recent_df[recent_df['long_entry']]
short_signals = recent_df[recent_df['short_entry']]

fig.add_trace(
    go.Scatter(
        x=long_signals.index,
        y=long_signals['Low'] * 0.995,
        mode='markers',
        name='Long',
        marker=dict(symbol='triangle-up', size=8, color='green')
    ),
    row=4, col=1
)

fig.add_trace(
    go.Scatter(
        x=short_signals.index,
        y=short_signals['High'] * 1.005,
        mode='markers',
        name='Short',
        marker=dict(symbol='triangle-down', size=8, color='red')
    ),
    row=4, col=1
)

# 8. Trade Duration Distribution
if len(trades) > 0:
    durations = trades['Duration'].dt.total_seconds() / 3600  # Convert to hours
    fig.add_trace(
        go.Histogram(
            x=durations,
            nbinsx=30,
            name='Duration (hours)',
            marker_color='orange'
        ),
        row=4, col=2
    )

# 9. Volume Profile
fig.add_trace(
    go.Bar(
        x=recent_df.index,
        y=recent_df['Volume'],
        name='Volume',
        marker_color='lightblue'
    ),
    row=5, col=1
)

# Update layout
fig.update_layout(
    title='MLMI → NW-RQK → FVG Synergy Strategy - Comprehensive Analysis',
    height=1600,
    showlegend=True,
    template='plotly_white'
)

# Update axes
fig.update_yaxes(title_text="Return (%)", row=1, col=1)
fig.update_yaxes(title_text="Drawdown (%)", row=2, col=1)
fig.update_yaxes(title_text="Signal Quality", row=3, col=1)
fig.update_yaxes(title_text="Price", row=4, col=1)
fig.update_yaxes(title_text="Volume", row=5, col=1)

fig.show()

print("\nVisualization complete!")

In [None]:
# Cell 10: Statistical Validation and Robustness Testing

@njit(parallel=True, fastmath=True, cache=True)
def bootstrap_confidence_intervals(returns: np.ndarray, n_bootstrap: int = 10000,
                                  confidence: float = 0.95) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Bootstrap confidence intervals for key metrics"""
    n = len(returns)
    
    # Arrays to store bootstrap results
    boot_returns = np.zeros(n_bootstrap)
    boot_sharpes = np.zeros(n_bootstrap)
    boot_max_dd = np.zeros(n_bootstrap)
    boot_win_rates = np.zeros(n_bootstrap)
    
    # Remove NaN values
    clean_returns = returns[~np.isnan(returns)]
    n_clean = len(clean_returns)
    
    if n_clean == 0:
        return boot_returns, boot_sharpes, boot_max_dd, boot_win_rates
    
    # Bootstrap iterations
    for i in prange(n_bootstrap):
        # Resample with replacement
        indices = np.random.randint(0, n_clean, size=n_clean)
        sample = clean_returns[indices]
        
        # Calculate metrics
        boot_returns[i] = np.prod(1 + sample) - 1
        
        mean_ret = np.mean(sample)
        std_ret = np.std(sample)
        if std_ret > 0:
            boot_sharpes[i] = mean_ret / std_ret * np.sqrt(252 * 78)
        
        # Max drawdown
        cum_ret = np.cumprod(1 + sample)
        running_max = np.maximum.accumulate(cum_ret)
        dd = (cum_ret - running_max) / running_max
        boot_max_dd[i] = np.min(dd)
        
        # Win rate
        boot_win_rates[i] = np.mean(sample > 0)
    
    return boot_returns, boot_sharpes, boot_max_dd, boot_win_rates

print("\n" + "=" * 80)
print("STATISTICAL VALIDATION & ROBUSTNESS TESTING")
print("=" * 80)

# Bootstrap analysis
print("\nRunning bootstrap analysis (10,000 iterations)...")
boot_start = time.time()

returns_array = returns.values
boot_returns, boot_sharpes, boot_max_dd, boot_win_rates = bootstrap_confidence_intervals(returns_array)

boot_time = time.time() - boot_start
print(f"Bootstrap completed in {boot_time:.3f} seconds")

# Calculate confidence intervals
def calculate_ci(data, confidence=0.95):
    lower = np.percentile(data, (1 - confidence) / 2 * 100)
    upper = np.percentile(data, (1 + confidence) / 2 * 100)
    return lower, upper

# Display results
print("\n95% Confidence Intervals:")
print("-" * 50)

ret_lower, ret_upper = calculate_ci(boot_returns)
print(f"Total Return: [{ret_lower*100:.2f}%, {ret_upper*100:.2f}%]")

sharpe_lower, sharpe_upper = calculate_ci(boot_sharpes)
print(f"Sharpe Ratio: [{sharpe_lower:.2f}, {sharpe_upper:.2f}]")

dd_lower, dd_upper = calculate_ci(boot_max_dd)
print(f"Max Drawdown: [{dd_lower*100:.2f}%, {dd_upper*100:.2f}%]")

wr_lower, wr_upper = calculate_ci(boot_win_rates)
print(f"Win Rate: [{wr_lower*100:.2f}%, {wr_upper*100:.2f}%]")

# Statistical significance tests
print("\n" + "-" * 50)
print("STATISTICAL SIGNIFICANCE")
print("-" * 50)

# Test if returns are significantly different from zero
t_stat = np.mean(returns_array[~np.isnan(returns_array)]) / (np.std(returns_array[~np.isnan(returns_array)]) / np.sqrt(len(returns_array[~np.isnan(returns_array)])))
p_value = 2 * (1 - scipy.stats.norm.cdf(abs(t_stat))) if 'scipy' in globals() else 0.05

print(f"T-statistic: {t_stat:.3f}")
print(f"Returns significantly positive: {'Yes' if t_stat > 1.96 else 'No'}")

# Risk-adjusted performance percentiles
actual_sharpe = stats['Sharpe Ratio']
sharpe_percentile = np.sum(boot_sharpes <= actual_sharpe) / len(boot_sharpes) * 100

print(f"\nStrategy Sharpe ratio percentile: {sharpe_percentile:.1f}%")
print(f"Performance assessment: ", end="")
if sharpe_percentile > 90:
    print("EXCELLENT - Top 10% performance")
elif sharpe_percentile > 75:
    print("VERY GOOD - Top 25% performance")
elif sharpe_percentile > 50:
    print("GOOD - Above median performance")
else:
    print("NEEDS IMPROVEMENT - Below median performance")

# Stability analysis
print("\n" + "-" * 50)
print("STABILITY ANALYSIS")
print("-" * 50)

# Rolling performance
window = 252 * 5  # 1 year of 5-minute bars
rolling_returns = returns.rolling(window).apply(lambda x: (1 + x).prod() - 1)
rolling_sharpe = returns.rolling(window).apply(lambda x: x.mean() / x.std() * np.sqrt(252 * 78) if x.std() > 0 else 0)

print(f"Rolling 1-year return volatility: {rolling_returns.std()*100:.2f}%")
print(f"Rolling Sharpe stability: {rolling_sharpe.std():.2f}")
print(f"Minimum rolling Sharpe: {rolling_sharpe.min():.2f}")
print(f"Maximum rolling Sharpe: {rolling_sharpe.max():.2f}")

In [None]:
# Cell 11: Final Summary and Recommendations

print("\n" + "=" * 80)
print("FINAL SUMMARY - MLMI → NW-RQK → FVG SYNERGY")
print("=" * 80)

# Performance summary
print("\nPERFORMANCE SUMMARY:")
print("-" * 50)
print(f"Total Return: {stats['Total Return [%]']:.2f}%")
print(f"Sharpe Ratio: {stats['Sharpe Ratio']:.2f}")
print(f"Total Trades: {stats['Total Trades']:,.0f}")
print(f"Win Rate: {stats['Win Rate [%]']:.2f}%")
print(f"Average Trade: {stats['Expectancy [%]']:.3f}%")

# Execution summary
total_time = calc_time + mlmi_time + nwrqk_time + align_time + signal_time + backtest_time + boot_time
print("\nEXECUTION PERFORMANCE:")
print("-" * 50)
print(f"Total execution time: {total_time:.2f} seconds")
print(f"Bars processed per second: {len(df_5m_aligned) / total_time:,.0f}")
print(f"Signals detected per second: {(long_entries.sum() + short_entries.sum()) / signal_time:,.0f}")

# Signal analysis
print("\nSIGNAL CHARACTERISTICS:")
print("-" * 50)
print(f"Base indicators (30m):")
print(f"  - MLMI signals: {df_30m['mlmi_bull'].sum() + df_30m['mlmi_bear'].sum():,}")
print(f"  - NW-RQK signals: {df_30m['nwrqk_bull'].sum() + df_30m['nwrqk_bear'].sum():,}")
print(f"FVG zones (5m): {fvg_bull.sum() + fvg_bear.sum():,}")
print(f"\nSynergy signals: {long_entries.sum() + short_entries.sum():,}")
print(f"Signal reduction: {((1 - (long_entries.sum() + short_entries.sum()) / (df_30m['mlmi_bull'].sum() + df_30m['mlmi_bear'].sum())) * 100):.1f}%")

# Strengths and weaknesses
print("\nKEY STRENGTHS:")
print("-" * 50)
strengths = []
if stats['Sharpe Ratio'] > 1.0:
    strengths.append(f"Strong risk-adjusted returns (Sharpe: {stats['Sharpe Ratio']:.2f})")
if stats['Win Rate [%]'] > 45:
    strengths.append(f"Solid win rate ({stats['Win Rate [%]']:.1f}%)")
if stats['Total Trades'] > 1000:
    strengths.append(f"Good trade frequency ({stats['Total Trades']:,.0f} trades)")
if abs(stats['Max Drawdown [%]']) < 20:
    strengths.append(f"Controlled drawdown ({stats['Max Drawdown [%]']:.1f}%)")
if total_time < 10:
    strengths.append(f"Ultra-fast execution ({total_time:.1f} seconds)")

for i, strength in enumerate(strengths, 1):
    print(f"{i}. {strength}")

print("\nAREAS FOR IMPROVEMENT:")
print("-" * 50)
improvements = []
if stats['Sharpe Ratio'] < 0.5:
    improvements.append("Improve risk-adjusted returns")
if stats['Win Rate [%]'] < 40:
    improvements.append("Increase win rate through better entry timing")
if stats['Total Trades'] < 500:
    improvements.append("Consider relaxing signal criteria for more opportunities")
if abs(stats['Max Drawdown [%]']) > 30:
    improvements.append("Implement better risk management to reduce drawdowns")

for i, improvement in enumerate(improvements, 1):
    print(f"{i}. {improvement}")

# Recommendations
print("\nRECOMMENDATIONS:")
print("-" * 50)
print("1. Parameter optimization:")
print("   - Test different MLMI k-neighbors (100-300)")
print("   - Optimize NW-RQK kernel parameters (h: 5-15, r: 5-15)")
print("   - Adjust FVG ATR multiplier (1.0-2.0)")
print("\n2. Risk management enhancements:")
print("   - Implement dynamic position sizing based on volatility")
print("   - Add trailing stops for trend-following trades")
print("   - Consider correlation-based portfolio allocation")
print("\n3. Further testing:")
print("   - Walk-forward analysis across different market regimes")
print("   - Out-of-sample testing on different assets")
print("   - Stress testing during high volatility periods")

print("\n" + "=" * 80)
print("ANALYSIS COMPLETE")
print("=" * 80)