## Setup & Configuration

In [None]:
# Core imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import warnings
import requests
warnings.filterwarnings('ignore')

# Setup path
import sys
sys.path.append('/Users/melvinalvarez/Documents/Workspace/rust-hft-arbitrage-lab')

# Import project modules
from python import meanrev
from python.api_keys import get_finnhub_key
from python.universes import get_universe, get_available_universes
from python.regime_detector import RegimeDetector, AdaptiveStrategySelector, get_regime_metrics
from python.signal_monitor import SignalMonitor

# Try Rust analytics (high-performance)
try:
    import hft_py
    RUST_ANALYTICS = True
    print("‚úÖ Rust analytics available - using optimized implementations")
except ImportError:
    RUST_ANALYTICS = False
    print("‚ö†Ô∏è  Rust analytics not available - using Python fallbacks")

# Visualization settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
pd.options.display.max_columns = 50
pd.options.display.width = 120

print("‚úÖ Setup complete!")

## Universe Selection

Choose from multiple pre-configured universes:

In [None]:
# Display available universes
print("üìä Available Universes:\n" + "="*60)
for name, info in get_available_universes().items():
    print(f"  {name:20} {info['size']:3} symbols - {info['desc']}")

# SELECT YOUR UNIVERSE HERE:
UNIVERSE = 'sp500_top100'  # Options: 'tech', 'finance', 'healthcare', 'energy', 'consumer', 'sp500_top100', 'crypto_major', 'etf_indices', etc.
DAYS_BACK = 30
RESOLUTION_MIN = 5

print(f"\n‚úÖ Selected: {UNIVERSE} ({get_available_universes()[UNIVERSE]['size']} symbols)")

## Data Fetching Functions

In [None]:
def fetch_finnhub_quote(symbol: str, api_key: str):
    """Fetch current quote from Finnhub."""
    url = "https://finnhub.io/api/v1/quote"
    params = {"symbol": symbol, "token": api_key}
    response = requests.get(url, params=params, timeout=10)
    return response.json()

def generate_historical_data(symbol: str, api_key: str, days_back: int, resolution_min: int):
    """Generate synthetic historical data anchored to current Finnhub price."""
    quote = fetch_finnhub_quote(symbol, api_key)
    if 'c' not in quote or quote['c'] <= 0:
        raise ValueError(f"Could not fetch price for {symbol}")
    
    current_price = quote['c']
    is_crypto = ':' in symbol
    candles_per_day = int((24 if is_crypto else 6.5) * 60 / resolution_min)
    total_candles = days_back * candles_per_day
    
    timestamps = pd.date_range(end=datetime.now(), periods=total_candles, freq=f'{resolution_min}min')
    np.random.seed(hash(symbol) % 2**32)
    
    # Generate with regime switches
    daily_vol = 0.03 if is_crypto else 0.02
    vol_per_step = daily_vol * np.sqrt(resolution_min / (24 * 60))
    prices = np.zeros(total_candles)
    prices[0] = current_price * 0.95
    
    regime_length = candles_per_day * 5
    regimes = np.random.choice(['trend', 'mean_revert', 'high_vol'], 
                               size=total_candles // regime_length + 1, p=[0.3, 0.5, 0.2])
    
    for i in range(1, total_candles):
        regime = regimes[i // regime_length]
        shock = np.random.randn()
        
        if regime == 'trend':
            drift, vol = 0.0003, vol_per_step * 0.8
        elif regime == 'high_vol':
            drift, vol = 0, vol_per_step * 1.5
        else:
            drift = -0.1 * (prices[i-1] - current_price) / current_price
            vol = vol_per_step
        
        prices[i] = np.clip(prices[i-1] * (1 + drift + vol * shock), 
                           current_price * 0.7, current_price * 1.3)
    
    return pd.DataFrame({
        'Close': prices,
        'Open': np.roll(prices, 1),
        'High': prices * (1 + np.abs(np.random.randn(total_candles)) * 0.002),
        'Low': prices * (1 - np.abs(np.random.randn(total_candles)) * 0.002),
        'Volume': 1e6 * (1 + np.abs(np.diff(prices, prepend=prices[0])) / prices * 100)
    }, index=timestamps)

def fetch_universe_data(universe: str, api_key: str, days_back: int, resolution_min: int):
    """Fetch data for entire universe."""
    symbols = get_universe(universe)
    data = {}
    print(f"üì° Fetching {len(symbols)} symbols...")
    
    for i, symbol in enumerate(symbols, 1):
        try:
            print(f"   [{i}/{len(symbols)}] {symbol}...", end=" ", flush=True)
            df = generate_historical_data(symbol, api_key, days_back, resolution_min)
            data[symbol] = df
            print(f"‚úì ({len(df)} candles)")
        except Exception as e:
            print(f"‚úó Error: {e}")
    
    print(f"‚úÖ Fetched {len(data)}/{len(symbols)} symbols")
    return data

print("‚úÖ Data fetching functions defined")

## Fetch Market Data

In [None]:
# Get API key
api_key = get_finnhub_key()
if not api_key:
    raise ValueError("Finnhub API key not found")

# Fetch data
print(f"‚è≥ Fetching {UNIVERSE} data...")
data_dict = fetch_universe_data(UNIVERSE, api_key, DAYS_BACK, RESOLUTION_MIN)

# Extract prices
prices = pd.DataFrame({sym: df['Close'] for sym, df in data_dict.items()})
prices = prices.fillna(method='ffill').fillna(method='bfill')  # Handle any missing data

print(f"\n‚úÖ Data ready!")
print(f"   Shape: {prices.shape[0]:,} timestamps √ó {prices.shape[1]} symbols")
print(f"   Total data points: {prices.size:,}")
print(f"   Date range: {prices.index[0]} to {prices.index[-1]}")
prices.head()

## Regime Detection

Detect market regimes for each asset using Hurst exponent, autocorrelation, and trend analysis:

In [None]:
# Initialize regime detector and monitoring
regime_detector = RegimeDetector(lookback_window=100)
monitor = SignalMonitor(alert_file='data/alerts.jsonl', verbose=True)

# Compute returns
returns = prices.pct_change().fillna(0)

# Detect regimes for all assets
print("üîç Detecting market regimes...")
regime_results = regime_detector.detect_multi_regime(returns)

# Display regime summary
regime_counts = regime_results['regime'].value_counts()
print(f"\nüìä Regime Distribution:")
for regime, count in regime_counts.items():
    pct = count / len(regime_results) * 100
    print(f"   {regime:20} {count:3} assets ({pct:.1f}%)")

# Show top mean-reverting assets
mean_rev_assets = regime_results[regime_results['regime'] == 'mean_reverting'].sort_values('hurst')
print(f"\nüéØ Top 10 Mean-Reverting Assets (by Hurst exponent):")
print(mean_rev_assets[['regime', 'hurst', 'autocorr', 'volatility']].head(10))

regime_results.head(10)

## High-Performance Analytics with Rust

Use Rust for computationally intensive operations:

In [None]:
if RUST_ANALYTICS:
    print("‚ö° Using Rust analytics for high performance...")
    
    # Convert to numpy for Rust
    returns_np = returns.values
    
    # Compute correlation matrix (Rust)
    import time
    start = time.time()
    corr_matrix = hft_py.analytics.compute_correlation_matrix(returns_np)
    rust_time = time.time() - start
    
    # Compare with pandas
    start = time.time()
    corr_pandas = returns.corr().values
    pandas_time = time.time() - start
    
    print(f"   Rust time: {rust_time:.3f}s")
    print(f"   Pandas time: {pandas_time:.3f}s")
    print(f"   Speedup: {pandas_time/rust_time:.1f}x faster")
    
    # Compute PCA (Rust)
    print("\nüî¨ Computing PCA...")
    components, explained_var = hft_py.analytics.compute_pca(returns_np, n_components=10)
    
    explained_ratio = explained_var / explained_var.sum()
    print(f"   Top 10 components explain {explained_ratio.sum():.1%} of variance")
    print(f"   PC1: {explained_ratio[0]:.1%}, PC2: {explained_ratio[1]:.1%}, PC3: {explained_ratio[2]:.1%}")
    
    corr_df = pd.DataFrame(corr_matrix, index=prices.columns, columns=prices.columns)
else:
    print("‚ö†Ô∏è  Using Python analytics (slower for large datasets)")
    corr_df = returns.corr()

print("\n‚úÖ Analytics complete")

## Mean Reversion Signals with Monitoring

In [None]:
# Compute z-scores for mean reversion
window = 50

if RUST_ANALYTICS:
    print("‚ö° Computing z-scores with Rust...")
    zscores_np = hft_py.analytics.compute_zscores(prices.values, window)
    zscores = pd.DataFrame(zscores_np, index=prices.index, columns=prices.columns)
else:
    print("‚öôÔ∏è  Computing z-scores with Python...")
    zscores = (prices - prices.rolling(window).mean()) / prices.rolling(window).std()

# Monitor signals
latest_zscores = zscores.iloc[-1]
print(f"\nüö® Checking for signal alerts (threshold: ¬±{monitor.thresholds['signal_strength']})...")

alerts = []
for symbol in latest_zscores.index:
    zscore = latest_zscores[symbol]
    if not np.isnan(zscore):
        alert = monitor.check_signal_threshold(symbol, zscore, "z_score")
        if alert:
            alerts.append(alert)

print(f"\n‚úÖ Generated {len(alerts)} alerts")

# Display strongest signals
strong_signals = latest_zscores[abs(latest_zscores) > 2.0].sort_values(key=abs, ascending=False)
print(f"\nüìä Strongest Signals (|z| > 2.0): {len(strong_signals)}")
if len(strong_signals) > 0:
    print(strong_signals.head(10))

## Visualization: Correlation Heatmap

In [None]:
# Show correlation heatmap for subset
n_show = min(30, len(corr_df))

fig = go.Figure(data=go.Heatmap(
    z=corr_df.iloc[:n_show, :n_show].values,
    x=corr_df.columns[:n_show],
    y=corr_df.index[:n_show],
    colorscale='RdBu',
    zmid=0,
    text=corr_df.iloc[:n_show, :n_show].values,
    texttemplate='%{text:.2f}',
    textfont={"size": 8}
))

fig.update_layout(
    title=f"Correlation Matrix ({UNIVERSE}, first {n_show} assets)",
    width=900,
    height=800
)
fig.show()

avg_corr = corr_df.values[np.triu_indices_from(corr_df.values, k=1)].mean()
print(f"Average correlation: {avg_corr:.3f}")

## Alert Summary

In [None]:
# Get alert summary
alert_summary = monitor.get_alert_summary()

if len(alert_summary) > 0:
    print(f"üìã Alert Summary: {len(alert_summary)} total alerts\n")
    
    # Count by severity
    severity_counts = alert_summary['severity'].value_counts()
    print("By Severity:")
    for severity, count in severity_counts.items():
        print(f"   {severity:10} {count:3} alerts")
    
    # Count by type
    type_counts = alert_summary['type'].value_counts()
    print("\nBy Type:")
    for alert_type, count in type_counts.items():
        print(f"   {alert_type:15} {count:3} alerts")
    
    print("\nRecent Alerts:")
    display(alert_summary[['symbol', 'type', 'severity', 'message', 'value']].tail(10))
else:
    print("No alerts generated (all signals below threshold)")

## Performance Summary

### Key Metrics:
- **Universe Size**: Large-scale analysis capability
- **Rust Performance**: Significant speedup for computations
- **Regime Detection**: Automated market state identification
- **Signal Monitoring**: Real-time alert system

### Next Steps:
1. Implement adaptive position sizing based on regime
2. Add portfolio optimization with risk constraints
3. Backtest strategies with transaction costs
4. Set up live monitoring with webhooks/email alerts