In [None]:
# Ensure these imports are at the top of your script
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import yfinance as yf
from datetime import datetime, timedelta
import warnings
import matplotlib.pyplot as plt


warnings.filterwarnings('ignore')


# ======== CONFIGURABLE PARAMETERS ========\n# Market data parameters
TICKER = 'SPY'  # Main ticker to analyze
VIX_TICKER = '^VIX'  # Volatility index
TNX_TICKER = '^TNX'  # 10-Year Treasury Yield
GLD_TICKER = 'GLD'  # Gold ETF
XLY_TICKER = 'XLY'  # Consumer Discretionary ETF
XLP_TICKER = 'XLP'  # Consumer Staples ETF
XLU_TICKER = 'XLU'  # Utilities ETF
XLF_TICKER = 'XLF'  # Financial ETF
HYG_TICKER = 'HYG'  # High Yield Corporate Bond ETF
TLT_TICKER = 'TLT'  # 20+ Year Treasury Bond ETF
VIX3M_TICKER = '^VIX3M'  # 3-Month VIX
IRX_TICKER = '^IRX'  # 13 Week TBill (~ Short Term Rate Proxy)
UUP_TICKER = 'UUP'  # US Dollar Index ETF
TIP_TICKER = 'TIP'  # TIPS ETF
IEF_TICKER = 'IEF'  # 7-10 Year Treasury ETF
START_DATE = "2018-01-01"  # Historical data start date
END_DATE = "2025-04-25"    # Data end date (updated to latest)

# Technical indicator parameters
VOL_WINDOW = 21  # Window for volatility calculation (21 days ~ 1 month)
MOMENTUM_WINDOW = 63  # Window for momentum calculation (63 days ~ 3 months)
SMA_FAST = 20  # Fast moving average
SMA_SLOW = 50  # Slow moving average
BB_WINDOW = 20  # Bollinger Bands window
BB_STD = 2  # Bollinger Bands standard deviation multiplier
ATR_WINDOW = 14  # Average True Range window
CHOP_WINDOW = 14  # Choppiness Index window
SECTOR_WINDOW = 10  # Window for sector rotation indicators
CREDIT_MA_WINDOW = 30  # Window for credit spread MA
PERCENTILE_LOOKBACK = 252 # Lookback for calculating rolling percentiles (~1 year)

# Regime Scorecard Configuration
# Define weights, thresholds, percentiles etc. based on the final scorecard design

BULL_REGIME_CONFIG = {
    'rule1_trend_structure': {'weight': 12, 'persistence_days': 5},
    'rule2_price_strength': {'weight': 12, 'thresholds': [1.00, 1.01, 1.02], 'scores': [0.33, 0.67, 1.0]}, # Close > SMA_Slow * threshold
    'rule3_momentum': {'weight': 13, 'percentiles': [0.50, 0.60, 0.70], 'scores': [0.33, 0.67, 1.0]}, # Momentum > Momentum.rolling(252).quantile(percentile)
    'rule4_vol_env': {'weight': 10, 'vix_perc_threshold': 0.40}, # VIX < perc & Vol < 63d mean
    'rule5_credit_cond': {'weight': 12, 'z_thresholds': [0.5, 0.7, 1.0], 'scores': [0.33, 0.67, 1.0]}, # HYG_TLT_Z > z_threshold
    'rule6_broad_part': {'weight': 11, 'adv_perc_threshold': 55}, # AD_Line > 50MA & Adv_Perc > threshold
    'rule7_sector_lead': {'weight': 10, 'xly_xlp_z_threshold': 0.3, 'xlf_spy_z_threshold': 0.0}, # XLY/XLP Z > thresh & XLF/SPY Z > thresh
    'rule8_yield_curve': {'weight': 8, 'spread_threshold': 0.0, 'change_threshold': -0.05}, # Spread > thresh & 21d_Change >= change_thresh
    'rule9_risk_appetite': {'weight': 7, 'bb_perc_b_threshold': 0.7, 'uup_z_threshold': 0.3}, # BB_%B > thresh OR (GLD_Mom > 0 AND UUP_Z < thresh)
    'rule10_vol_confirm': {'weight': 5}, # Volume > 21d mean on up days
}

NEUTRAL_REGIME_CONFIG = {
    'rule1_range_bound': {'weight': 14, 'fast_thresholds': [0.05, 0.03, 0.02], 'slow_thresholds': [0.05, 0.04, 0.02], 'scores': [0.33, 0.67, 1.0]}, # ABS(Price_to_SMA) < threshold
    'rule2_trend_flat': {'weight': 12, 'thresholds': [0.03, 0.02, 0.01], 'scores': [0.33, 0.67, 1.0]}, # ABS(SMA_Ratio - 1) < threshold
    'rule3_choppiness': {'weight': 13, 'lower_threshold': 55, 'upper_threshold': 80}, # Chop Index between thresholds
    'rule4_lim_momentum': {'weight': 11, 'z_thresholds': [1.0, 0.75, 0.5], 'scores': [0.33, 0.67, 1.0]}, # ABS(Momentum_Z) < z_threshold
    'rule5_mod_vol': {'weight': 10, 'z_thresholds': [1.0, 0.8, 0.5], 'scores': [0.33, 0.67, 1.0]}, # ABS(VIX_Z_Score) < z_threshold
    'rule6_band_contract': {'weight': 9, 'lower_z': -0.8, 'upper_z': 0.5}, # BB_Width_Z between lower and upper Z
    'rule7_credit_stab': {'weight': 9, 'z_thresholds': [1.0, 0.7, 0.4], 'scores': [0.33, 0.67, 1.0]}, # ABS(HYG_TLT_Z) < z_threshold
    'rule8_mixed_breadth': {'weight': 8, 'osc_thresholds': [35, 25, 15], 'scores': [0.33, 0.67, 1.0]}, # ABS(McClellan_Oscillator_Norm) < osc_threshold
    'rule9_sector_bal': {'weight': 8, 'xly_xlp_z_threshold': 0.5, 'xlu_spy_z_threshold': 0.5}, # ABS(XLY/XLP_Z) < thresh AND ABS(XLU/SPY_Z) < thresh
    'rule10_mean_reversion': {'weight': 6, 'std_threshold': 0.15, 'mean_lower': 0.4, 'mean_upper': 0.6}, # BB_%B 10d_std > thresh AND 10d_mean between lower/upper
}

BEAR_REGIME_CONFIG = {
    'rule1_trend_structure': {'weight': 12, 'sma_threshold': 0.98, 'persistence_days': 3}, # SMA_Fast < SMA_Slow * thresh for days
    'rule2_price_weakness': {'weight': 12, 'thresholds': [0.99, 0.98, 0.97], 'scores': [0.33, 0.67, 1.0]}, # Close < SMA_Slow * threshold
    'rule3_neg_momentum': {'weight': 13, 'percentiles': [0.40, 0.35, 0.30], 'scores': [0.33, 0.67, 1.0]}, # Momentum < Momentum.rolling(252).quantile(percentile)
    'rule4_elevated_vol': {'weight': 10, 'vix_perc_threshold': 0.70, 'vix_abs_threshold': 20, 'perc_thresholds': [0.60, 0.70, 0.80], 'scores': [0.33, 0.67, 1.0]}, # VIX > perc_thresh AND VIX > abs_thresh; score based on perc
    'rule5_credit_stress': {'weight': 12, 'ratio_threshold': 0.985, 'z_thresholds': [-0.7, -0.9, -1.2], 'scores': [0.33, 0.67, 1.0]}, # Ratio < MA * ratio_thresh AND Z < z_thresh
    'rule6_def_rotation': {'weight': 10, 'xly_xlp_z_threshold': -0.5, 'xlu_spy_z_threshold': 0.3}, # XLY/XLP Z < thresh AND XLU/SPY Z > thresh
    'rule7_breadth_deter': {'weight': 11, 'decl_perc_threshold': 55}, # AD_Line < 50MA AND (Decl_Perc > thresh OR AD_Neg_Div > 0)
    'rule8_vol_structure': {'weight': 8, 'ratio_threshold': 1.03, 'z_threshold': 1.5}, # VIX/VIX3M Ratio > ratio_thresh OR Z > z_thresh
    'rule9_yield_curve_warn': {'weight': 7, 'spread_abs_threshold': 0.05, 'spread_flat_threshold': 0.15, 'change_threshold': -0.10}, # Spread < abs OR (Spread < flat AND 21d_Change < change)
    'rule10_flight_quality': {'weight': 5, 'gld_spy_ratio_threshold': 1.03, 'tlt_threshold': 0.98}, # GLD/SPY Ratio > 63dMean * thresh OR TLT > 20dMax * thresh
}

PERSISTENCE_WEIGHTS = {'t-1': 8, 't-2': 4}

ALL_CONFIG = {
    'Bull': BULL_REGIME_CONFIG,
    'Neutral': NEUTRAL_REGIME_CONFIG,
    'Bear': BEAR_REGIME_CONFIG,
    'Persistence': PERSISTENCE_WEIGHTS
}

# ======== DATA PREPARATION FUNCTIONS ========\n# (Keep load_ad_line_data, download_market_data, calculate_bollinger_bands, calculate_atr, calculate_choppiness_index)
# ... (Keep these functions as they are essential for getting base data and features) ...
def load_ad_line_data(filepath="nyse_breadth_2023.csv"):
    """Load and prepare NYSE breadth data for A/D line indicators"""
    print(f"Loading A/D line data from {filepath}...")
    try:
        ad_data = pd.read_csv(filepath)
        ad_data['Date'] = pd.to_datetime(ad_data['Date'])
        
        # Ensure required columns exist
        required_cols = ['Advancers', 'Decliners', 'Neutral', 'Total Issues']
        if not all(col in ad_data.columns for col in required_cols):
             # Attempt to calculate Total Issues if missing
             if 'Total Issues' not in ad_data.columns and all(c in ad_data.columns for c in ['Advancers', 'Decliners', 'Neutral']):
                 ad_data['Total Issues'] = ad_data['Advancers'] + ad_data['Decliners'] + ad_data['Neutral']
             else:
                raise ValueError(f"Missing required columns in {filepath}. Need: {required_cols}")

        # Calculate basic breadth metrics
        ad_data['Net_Advances'] = ad_data['Advancers'] - ad_data['Decliners']
        ad_data['AD_Line'] = ad_data['Net_Advances'].cumsum()
        
        # A/D Ratio and Z-Score (handle potential division by zero)
        ad_data['AD_Ratio'] = np.where(ad_data['Decliners'] != 0, ad_data['Advancers'] / ad_data['Decliners'], np.nan)
        ad_data['Log_AD_Ratio'] = np.log(ad_data['AD_Ratio'].replace(0, np.nan)) # Avoid log(0)
        ad_data['Log_AD_Ratio_21d_Mean'] = ad_data['Log_AD_Ratio'].rolling(window=21).mean()
        ad_data['Log_AD_Ratio_21d_StdDev'] = ad_data['Log_AD_Ratio'].rolling(window=21).std()
        ad_data['AD_Ratio_Z_Score'] = (ad_data['Log_AD_Ratio'] - ad_data['Log_AD_Ratio_21d_Mean']) / ad_data['Log_AD_Ratio_21d_StdDev']

        # McClellan Oscillator
        ad_data['EMA19_Net_Advances'] = ad_data['Net_Advances'].ewm(span=19, adjust=False).mean()
        ad_data['EMA39_Net_Advances'] = ad_data['Net_Advances'].ewm(span=39, adjust=False).mean()
        ad_data['McClellan_Oscillator'] = ad_data['EMA19_Net_Advances'] - ad_data['EMA39_Net_Advances']
        ad_data['McClellan_Oscillator_Norm'] = np.where(ad_data['Total Issues'] != 0, ad_data['McClellan_Oscillator'] / ad_data['Total Issues'] * 1000, 0)

        # Percentage of Advancing/Declining Issues
        ad_data['Advancing_Percentage'] = np.where(ad_data['Total Issues'] != 0, ad_data['Advancers'] / ad_data['Total Issues'] * 100, 0)
        ad_data['Declining_Percentage'] = np.where(ad_data['Total Issues'] != 0, ad_data['Decliners'] / ad_data['Total Issues'] * 100, 0) # Added for Bear Rule 7
        ad_data['Advancing_Percentage_10MA'] = ad_data['Advancing_Percentage'].rolling(window=10).mean()
        ad_data['Advancing_Percentage_Z'] = (ad_data['Advancing_Percentage'] - ad_data['Advancing_Percentage'].rolling(window=21).mean()) / ad_data['Advancing_Percentage'].rolling(window=21).std()

        # Breadth Thrust Indicator
        ad_data['Daily_Thrust_Denominator'] = ad_data['Advancers'] + ad_data['Decliners']
        ad_data['Daily_Thrust'] = np.where(ad_data['Daily_Thrust_Denominator'] != 0, ad_data['Advancers'] / ad_data['Daily_Thrust_Denominator'], np.nan)
        ad_data['Breadth_Thrust'] = ad_data['Daily_Thrust'].ewm(span=10, adjust=False).mean()
        ad_data['Thrust_Signal'] = (ad_data['Breadth_Thrust'] > 0.65).astype(int)

        # McClellan Summation Index
        ad_data['McClellan_Summation_Index'] = ad_data['McClellan_Oscillator'].cumsum()
        ad_data['McClellan_SI_10MA'] = ad_data['McClellan_Summation_Index'].rolling(window=10).mean()

        # A/D Line Momentum
        ad_data['AD_Line_5d_ROC'] = ad_data['AD_Line'].pct_change(periods=5) * 100
        ad_data['AD_Line_10d_ROC'] = ad_data['AD_Line'].pct_change(periods=10) * 100
        ad_data['AD_Line_20d_ROC'] = ad_data['AD_Line'].pct_change(periods=20) * 100
        ad_data['AD_Line_Momentum_Z'] = (ad_data['AD_Line_10d_ROC'] - ad_data['AD_Line_10d_ROC'].rolling(window=50).mean()) / ad_data['AD_Line_10d_ROC'].rolling(window=50).std()

        # A/D Line Moving Average Crossovers
        ad_data['AD_Line_20MA'] = ad_data['AD_Line'].rolling(window=20).mean()
        ad_data['AD_Line_50MA'] = ad_data['AD_Line'].rolling(window=50).mean()
        ad_data['AD_Line_Golden_Cross'] = (ad_data['AD_Line_20MA'] > ad_data['AD_Line_50MA']).astype(int)
        ad_data['AD_Line_Dist_20MA'] = np.where(ad_data['AD_Line_20MA'] != 0, (ad_data['AD_Line'] / ad_data['AD_Line_20MA'] - 1) * 100, 0)

        # Composite Breadth Indicator (Optional, maybe remove if not used directly)
        # ad_data['AD_Ratio_Z_Norm'] = (ad_data['AD_Ratio_Z_Score'].clip(-3, 3) + 3) / 6
        # ad_data['McClellan_Osc_Norm_Clipped'] = (ad_data['McClellan_Oscillator'].clip(-150, 150) + 150) / 300 # Renamed to avoid confusion
        # ad_data['Breadth_Thrust_Norm'] = ad_data['Breadth_Thrust']
        # ad_data['Advancing_Pct_Norm'] = ad_data['Advancing_Percentage'] / 100
        # ad_data['Composite_Breadth'] = (ad_data['AD_Ratio_Z_Norm'] + ad_data['McClellan_Osc_Norm_Clipped'] + ad_data['Breadth_Thrust_Norm'] + ad_data['Advancing_Pct_Norm']) / 4
        # ad_data['Composite_Breadth_Z'] = (ad_data['Composite_Breadth'] - ad_data['Composite_Breadth'].rolling(window=50).mean()) / ad_data['Composite_Breadth'].rolling(window=50).std().fillna(1)
        
        print("A/D line data loaded and prepared.")
        return ad_data

    except FileNotFoundError:
        print(f"Error: A/D data file not found at {filepath}. Proceeding without breadth data.")
        return None
    except Exception as e:
        print(f"Error loading or processing A/D data from {filepath}: {e}. Proceeding without breadth data.")
        return None


def download_market_data(ticker, vix_ticker, tnx_ticker, gld_ticker, xly_ticker, xlp_ticker, xlu_ticker, xlf_ticker, hyg_ticker, tlt_ticker, vix3m_ticker, irx_ticker, uup_ticker, tip_ticker, ief_ticker, start_date, end_date=None):
    """Download and prepare market data for all required tickers."""
    if end_date is None:
        end_date = (datetime.today() + timedelta(days=1)).strftime("%Y-%m-%d")
    print(f"Downloading market data from {start_date} to {end_date}...")

    tickers_to_download = {
        'main': ticker, 'VIX': vix_ticker, 'TNX': tnx_ticker, 'GLD': gld_ticker,
        'XLY': xly_ticker, 'XLP': xlp_ticker, 'XLU': xlu_ticker, 'XLF': xlf_ticker,
        'HYG': hyg_ticker, 'TLT': tlt_ticker, 'VIX3M': vix3m_ticker, 'IRX': irx_ticker,
        'UUP': uup_ticker, 'TIP': tip_ticker, 'IEF': ief_ticker
    }

    data_frames = {}
    for name, symbol in tickers_to_download.items():
        print(f"Downloading {name} ({symbol})...")
        df = yf.download(symbol, start=start_date, end=end_date, auto_adjust=(name == 'main'), progress=False) # auto_adjust only for main ticker usually

        # Standardize columns (handle multi-index)
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.droplevel(1) # Assumes second level is ticker, adjust if needed

        df = df.reset_index()
        
        # Select relevant columns and rename
        if name == 'main':
            df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
        elif name in ['VIX', 'TNX', 'GLD', 'XLY', 'XLP', 'XLU', 'XLF', 'HYG', 'TLT', 'VIX3M', 'IRX', 'UUP', 'TIP', 'IEF']:
             # Use Adj Close if available and differs from Close, otherwise use Close
             close_col = 'Adj Close' if ('Adj Close' in df.columns and not df['Close'].equals(df['Adj Close'])) else 'Close'
             df = df[['Date', close_col]].rename(columns={close_col: name})
        else:
             # Fallback for unexpected case
             df = df[['Date', 'Close']].rename(columns={'Close': name})

        data_frames[name] = df

    # Merge all dataframes
    print("Merging dataframes...")
    df_merged = data_frames['main']
    for name, df in data_frames.items():
        if name != 'main':
            df_merged = pd.merge(df_merged, df, on='Date', how='left')

    # Forward fill missing values for auxiliary tickers
    print("Forward filling missing values...")
    cols_to_ffill = ['VIX', 'TNX', 'GLD', 'XLY', 'XLP', 'XLU', 'XLF', 'HYG', 'TLT', 'VIX3M', 'IRX', 'UUP', 'TIP', 'IEF']
    for col in cols_to_ffill:
        if col in df_merged.columns:
            df_merged[col] = df_merged[col].fillna(method='ffill')
        else:
             print(f"Warning: Expected column '{col}' not found after download/merge.")


    # Basic log return calculation
    df_merged['Log_Return'] = np.log(df_merged['Close'] / df_merged['Close'].shift(1)) * 100
    if 'GLD' in df_merged.columns:
       df_merged['GLD_Log_Return'] = np.log(df_merged['GLD'] / df_merged['GLD'].shift(1)) * 100
    else:
        df_merged['GLD_Log_Return'] = 0 # Or np.nan if preferred

    print("Market data download and initial preparation complete.")
    return df_merged


def calculate_bollinger_bands(df, window=20, num_std=2):
    """Calculate Bollinger Bands and related metrics"""
    df['BB_Middle'] = df['Close'].rolling(window=window).mean()
    rolling_std = df['Close'].rolling(window=window).std()
    df['BB_Upper'] = df['BB_Middle'] + (rolling_std * num_std)
    df['BB_Lower'] = df['BB_Middle'] - (rolling_std * num_std)
    df['BB_Width'] = np.where(df['BB_Middle'] != 0, (df['BB_Upper'] - df['BB_Lower']) / df['BB_Middle'] * 100, 0)
    df['BB_PercentB'] = np.where((df['BB_Upper'] - df['BB_Lower']) != 0, (df['Close'] - df['BB_Lower']) / (df['BB_Upper'] - df['BB_Lower']), 0.5) # Default to 0.5 if width is 0
    return df

def calculate_atr(df, window=14):
    """Calculate Average True Range (ATR)"""
    df['TR1'] = abs(df['High'] - df['Low'])
    df['TR2'] = abs(df['High'] - df['Close'].shift(1))
    df['TR3'] = abs(df['Low'] - df['Close'].shift(1))
    df['True_Range'] = df[['TR1', 'TR2', 'TR3']].max(axis=1)
    # Use EMA for ATR calculation as is standard
    df['ATR'] = df['True_Range'].ewm(alpha=1/window, adjust=False).mean()
    df['ATR_Normalized'] = np.where(df['Close'] != 0, df['ATR'] / df['Close'] * 100, 0)
    df = df.drop(['TR1', 'TR2', 'TR3', 'True_Range'], axis=1)
    return df

def calculate_choppiness_index(df, window=14):
    """Calculate Choppiness Index"""
    if 'ATR' not in df.columns:
        # ATR calculation dependency
        df = calculate_atr(df, window=window) # Use same window for ATR dependency
        
    log10_window = np.log10(window)
    df['MaxHi'] = df['High'].rolling(window=window).max()
    df['MinLo'] = df['Low'].rolling(window=window).min()
    df['ATR_Sum'] = df['ATR'].rolling(window=window).sum() # Sum of ATR over the window
    
    # Calculate Choppiness Index, handle potential division by zero or log(0)
    range_val = df['MaxHi'] - df['MinLo']
    df['Choppiness_Index'] = np.where(
        (range_val > 0) & (df['ATR_Sum'] > 0), # Ensure arguments to log10 are positive
        100 * np.log10(df['ATR_Sum'] / range_val) / log10_window,
        50 # Default to 50 (mid-point) if calculation fails
    )
    df = df.drop(['MaxHi', 'MinLo', 'ATR_Sum'], axis=1, errors='ignore')
    return df

def calculate_features(data, ad_data=None, percentile_lookback=PERCENTILE_LOOKBACK):
    """Calculate ALL features for regime classification, including A/D line indicators and rolling percentiles."""
    print("Calculating technical features...")
    df = data.copy()

    # --- Basic Price & Trend ---
    df['Momentum'] = df['Close'].pct_change(periods=MOMENTUM_WINDOW) * 100
    df['SMA_Fast'] = df['Close'].rolling(window=SMA_FAST).mean()
    df['SMA_Slow'] = df['Close'].rolling(window=SMA_SLOW).mean()
    df['SMA_Ratio'] = np.where(df['SMA_Slow'] != 0, df['SMA_Fast'] / df['SMA_Slow'], 1)
    df['Price_to_SMA_Fast'] = np.where(df['SMA_Fast'] != 0, df['Close'] / df['SMA_Fast'] - 1, 0)
    df['Price_to_SMA_Slow'] = np.where(df['SMA_Slow'] != 0, df['Close'] / df['SMA_Slow'] - 1, 0)

    # --- Volatility & Risk ---
    df['Volatility'] = df['Log_Return'].rolling(window=VOL_WINDOW).std() * np.sqrt(252) # Annualized
    df = calculate_bollinger_bands(df, window=BB_WINDOW, num_std=BB_STD)
    df = calculate_atr(df, window=ATR_WINDOW)
    df = calculate_choppiness_index(df, window=CHOP_WINDOW)
    if 'VIX' in df.columns:
        df['VIX_Z_Score'] = (df['VIX'] - df['VIX'].rolling(window=VOL_WINDOW).mean()) / df['VIX'].rolling(window=VOL_WINDOW).std()

    if 'VIX' in df.columns and 'VIX3M' in df.columns:
        df['VIX_VIX3M_Ratio'] = np.where(df['VIX3M'] != 0, df['VIX'] / df['VIX3M'], 1)
        df['VIX_VIX3M_Ratio_Z'] = (df['VIX_VIX3M_Ratio'] - df['VIX_VIX3M_Ratio'].rolling(window=VOL_WINDOW).mean()) / df['VIX_VIX3M_Ratio'].rolling(window=VOL_WINDOW).std()
    
    df['BB_Width_Z'] = (df['BB_Width'] - df['BB_Width'].rolling(window=VOL_WINDOW).mean()) / df['BB_Width'].rolling(window=VOL_WINDOW).std()


    # --- Intermarket & Macro ---
    # Yield Curve
    if 'TNX' in df.columns and 'IRX' in df.columns:
        df['Yield_Curve_Spread'] = df['TNX'] - df['IRX']
        df['Yield_Curve_Spread_Change'] = df['Yield_Curve_Spread'].diff(21) # Using 21d change per rules
    
    # Gold
    if 'GLD' in df.columns:
        df['GLD_Momentum'] = df['GLD'].pct_change(periods=MOMENTUM_WINDOW) * 100
        df['GLD_Z_Score'] = (df['GLD'] - df['GLD'].rolling(window=VOL_WINDOW).mean()) / df['GLD'].rolling(window=VOL_WINDOW).std()
        df['GLD_SPY_Ratio'] = np.where(df['Close'] != 0, df['GLD'] / df['Close'], np.nan)
        df['GLD_SPY_Ratio_63d_Mean'] = df['GLD_SPY_Ratio'].rolling(window=63).mean() # For Bear Rule 10

    # Sector Ratios & Z-Scores
    if 'XLY' in df.columns and 'XLP' in df.columns:
        df['XLY_XLP_Ratio'] = np.where(df['XLP'] != 0, df['XLY'] / df['XLP'], np.nan)
        df['XLY_XLP_Z'] = (df['XLY_XLP_Ratio'] - df['XLY_XLP_Ratio'].rolling(window=VOL_WINDOW).mean()) / df['XLY_XLP_Ratio'].rolling(window=VOL_WINDOW).std()

    if 'XLU' in df.columns:
        df['XLU_SPY_Ratio'] = np.where(df['Close'] != 0, df['XLU'] / df['Close'], np.nan)
        df['XLU_SPY_Z'] = (df['XLU_SPY_Ratio'] - df['XLU_SPY_Ratio'].rolling(window=VOL_WINDOW).mean()) / df['XLU_SPY_Ratio'].rolling(window=VOL_WINDOW).std()

    if 'XLF' in df.columns:
        df['XLF_SPY_Ratio'] = np.where(df['Close'] != 0, df['XLF'] / df['Close'], np.nan)
        df['XLF_SPY_Z'] = (df['XLF_SPY_Ratio'] - df['XLF_SPY_Ratio'].rolling(window=VOL_WINDOW).mean()) / df['XLF_SPY_Ratio'].rolling(window=VOL_WINDOW).std()

    # Credit Spread (HYG/TLT)
    if 'HYG' in df.columns and 'TLT' in df.columns:
        df['HYG_TLT_Ratio'] = np.where(df['TLT'] != 0, df['HYG'] / df['TLT'], np.nan)
        df['HYG_TLT_MA'] = df['HYG_TLT_Ratio'].rolling(window=CREDIT_MA_WINDOW).mean()
        df['HYG_TLT_Z'] = (df['HYG_TLT_Ratio'] - df['HYG_TLT_Ratio'].rolling(window=VOL_WINDOW).mean()) / df['HYG_TLT_Ratio'].rolling(window=VOL_WINDOW).std()

    # Dollar (UUP)
    if 'UUP' in df.columns:
         df['UUP_Z_Score'] = (df['UUP'] - df['UUP'].rolling(window=VOL_WINDOW).mean()) / df['UUP'].rolling(window=VOL_WINDOW).std()

    # TIPS (Inflation Expectations Proxy)
    # if 'TIP' in df.columns and 'IEF' in df.columns:
    #     df['TIP_IEF_Ratio'] = np.where(df['IEF'] != 0, df['TIP'] / df['IEF'], np.nan)
    #     df['TIP_IEF_Ratio_Z'] = (df['TIP_IEF_Ratio'] - df['TIP_IEF_Ratio'].rolling(window=VOL_WINDOW).mean()) / df['TIP_IEF_Ratio'].rolling(window=VOL_WINDOW).std()

    # Treasuries for Flight-to-Quality Rule
    if 'TLT' in df.columns:
        df['TLT_20d_Max'] = df['TLT'].rolling(window=20).max()


    # --- Volume ---
    df['Volume_MA21'] = df['Volume'].rolling(window=21).mean()


    # --- Rolling Percentiles (needed for graduated scoring) ---
    print("Calculating rolling percentiles...")
    df['Momentum_Perc_50'] = df['Momentum'].rolling(window=percentile_lookback).quantile(0.50)
    df['Momentum_Perc_60'] = df['Momentum'].rolling(window=percentile_lookback).quantile(0.60)
    df['Momentum_Perc_70'] = df['Momentum'].rolling(window=percentile_lookback).quantile(0.70)
    df['Momentum_Perc_40'] = df['Momentum'].rolling(window=percentile_lookback).quantile(0.40)
    df['Momentum_Perc_35'] = df['Momentum'].rolling(window=percentile_lookback).quantile(0.35)
    df['Momentum_Perc_30'] = df['Momentum'].rolling(window=percentile_lookback).quantile(0.30)

    if 'VIX' in df.columns:
        df['VIX_Perc_40'] = df['VIX'].rolling(window=percentile_lookback).quantile(0.40)
        df['VIX_Perc_60'] = df['VIX'].rolling(window=percentile_lookback).quantile(0.60)
        df['VIX_Perc_70'] = df['VIX'].rolling(window=percentile_lookback).quantile(0.70)
        df['VIX_Perc_80'] = df['VIX'].rolling(window=percentile_lookback).quantile(0.80)

    if 'HYG_TLT_Z' in df.columns:
        # Calculate required Z-score percentiles if needed (or use fixed thresholds directly)
        # For now, using fixed Z thresholds from config
        pass


    # --- Add A/D LINE FEATURES if available ---
    if ad_data is not None and not ad_data.empty:
        print("Merging A/D line features...")
        # Select specific columns needed for rules
        ad_cols_to_merge = [
            'Date', 'AD_Line', 'AD_Line_50MA', 'Advancing_Percentage', 'Declining_Percentage',
            'McClellan_Oscillator_Norm' # Renamed earlier
        ]
        # Check if AD_Negative_Divergence was calculated and add it
        # Example calculation (needs refinement based on exact definition)
        price_new_high = df['Close'] > df['Close'].rolling(window=20).max().shift(1)
        if 'AD_Line' in ad_data.columns:
            ad_line_not_confirming = ad_data['AD_Line'] < ad_data['AD_Line'].rolling(window=20).max().shift(1)
            ad_data['AD_Negative_Divergence'] = (price_new_high & ad_line_not_confirming).astype(int)
            if 'AD_Negative_Divergence' not in ad_cols_to_merge:
                ad_cols_to_merge.append('AD_Negative_Divergence')


        # Ensure 'Date' is datetime in both dataframes before merge
        df['Date'] = pd.to_datetime(df['Date'])
        ad_data['Date'] = pd.to_datetime(ad_data['Date'])

        # Filter ad_data to only necessary columns before merge
        ad_data_subset = ad_data[[col for col in ad_cols_to_merge if col in ad_data.columns]].copy()

        # Merge
        df = pd.merge(df, ad_data_subset, on='Date', how='left')
        # Note: Ffill might not be appropriate for all AD features, use with caution or handle NaNs in scoring
        # df[ad_cols_to_merge[1:]] = df[ad_cols_to_merge[1:]].fillna(method='ffill') # Optional ffill
    else:
        print("Skipping A/D line feature merge as data is not available.")
        # Add placeholder columns if AD data is missing to avoid errors in scoring functions
        placeholder_ad_cols = ['AD_Line', 'AD_Line_50MA', 'Advancing_Percentage', 'Declining_Percentage', 'McClellan_Oscillator_Norm', 'AD_Negative_Divergence']
        for col in placeholder_ad_cols:
            if col not in df.columns:
                df[col] = np.nan


    # --- Final Cleanup ---
    # Drop rows with NaNs essential for core calculations (e.g., SMAs)
    # Be careful not to drop too much if percentiles have long lookbacks
    initial_len = len(df)
    required_cols_for_calc = ['SMA_Slow', 'Momentum', 'Volatility', 'BB_Width_Z'] # Add more if critical
    df = df.dropna(subset=required_cols_for_calc).reset_index(drop=True)
    print(f"Dropped {initial_len - len(df)} rows due to NaNs in essential features.")

    print("Feature calculation complete.")
    return df

# ======== REGIME SCORING FUNCTIONS ========\n
def get_graduated_score(value, thresholds, scores, ascending=True):
    """Helper function for graduated scoring."""
    if ascending: # Higher value gets higher score (e.g., Momentum > threshold)
        for i in range(len(thresholds) - 1, -1, -1):
            if value > thresholds[i]:
                return scores[i]
    else: # Lower value gets higher score (e.g., ABS(Z-score) < threshold)
        for i in range(len(thresholds)):
            if value < thresholds[i]:
                return scores[i]
    return 0 # Default score if no threshold is met


def score_bull_regime(data_row, config):
    """Calculates the raw score for the Bull regime for a given data row."""
    score = 0
    weight_sum = 0 # Keep track for normalization or debugging

    # Handle potential errors if data is missing for a day
    try:
        # Rule 1: Trend Structure
        weight = config['rule1_trend_structure']['weight']
        weight_sum += weight
        # Check persistence requires looking back in the df, handled in main loop or needs different approach if row-only
        # Simplified for row-only: Check if SMA_Fast > SMA_Slow *now*
        if pd.notna(data_row['SMA_Fast']) and pd.notna(data_row['SMA_Slow']) and data_row['SMA_Fast'] > data_row['SMA_Slow']:
             # Persistence check needs state from previous days, cannot be done purely row-by-row
             # Placeholder: Add score, persistence check applied later or needs refactor
             score += weight # Full score for now, persistence logic will adjust later if needed

        # Rule 2: Price Strength
        weight = config['rule2_price_strength']['weight']
        weight_sum += weight
        if pd.notna(data_row['Close']) and pd.notna(data_row['SMA_Slow']) and data_row['SMA_Slow'] > 0:
            ratio = data_row['Close'] / data_row['SMA_Slow']
            thresholds = config['rule2_price_strength']['thresholds']
            scores = config['rule2_price_strength']['scores']
            score += weight * get_graduated_score(ratio, thresholds, scores, ascending=True)

        # Rule 3: Medium-Term Momentum
        weight = config['rule3_momentum']['weight']
        weight_sum += weight
        if pd.notna(data_row['Momentum']):
             percentiles = [data_row.get(f'Momentum_Perc_{int(p*100)}', np.nan) for p in config['rule3_momentum']['percentiles']]
             if not any(pd.isna(p) for p in percentiles): # Check if percentiles were calculated
                 scores = config['rule3_momentum']['scores']
                 score += weight * get_graduated_score(data_row['Momentum'], percentiles, scores, ascending=True)

        # Rule 4: Volatility Environment
        weight = config['rule4_vol_env']['weight']
        weight_sum += weight
        vix_threshold_perc = config['rule4_vol_env']['vix_perc_threshold']
        vix_perc_col = f'VIX_Perc_{int(vix_threshold_perc*100)}'
        if (pd.notna(data_row['VIX']) and pd.notna(data_row[vix_perc_col]) and data_row['VIX'] < data_row[vix_perc_col] and
            pd.notna(data_row['Volatility']) and pd.notna(data_row['Volatility_63d_Mean']) and data_row['Volatility'] < data_row['Volatility_63d_Mean']): # Requires Volatility_63d_Mean calculation
             score += weight

        # Rule 5: Credit Conditions
        weight = config['rule5_credit_cond']['weight']
        weight_sum += weight
        if pd.notna(data_row['HYG_TLT_Z']):
            thresholds = config['rule5_credit_cond']['z_thresholds']
            scores = config['rule5_credit_cond']['scores']
            score += weight * get_graduated_score(data_row['HYG_TLT_Z'], thresholds, scores, ascending=True)
        
        # Rule 6: Broad Participation
        weight = config['rule6_broad_part']['weight']
        weight_sum += weight
        adv_perc_threshold = config['rule6_broad_part']['adv_perc_threshold']
        if (pd.notna(data_row['AD_Line']) and pd.notna(data_row['AD_Line_50MA']) and data_row['AD_Line'] > data_row['AD_Line_50MA'] and
            pd.notna(data_row['Advancing_Percentage']) and data_row['Advancing_Percentage'] > adv_perc_threshold):
            score += weight

        # Rule 7: Sector Leadership
        weight = config['rule7_sector_lead']['weight']
        weight_sum += weight
        xly_xlp_z_thresh = config['rule7_sector_lead']['xly_xlp_z_threshold']
        xlf_spy_z_thresh = config['rule7_sector_lead']['xlf_spy_z_threshold']
        if (pd.notna(data_row['XLY_XLP_Z']) and data_row['XLY_XLP_Z'] > xly_xlp_z_thresh and
            pd.notna(data_row['XLF_SPY_Z']) and data_row['XLF_SPY_Z'] > xlf_spy_z_thresh):
            score += weight

        # Rule 8: Yield Curve Health
        weight = config['rule8_yield_curve']['weight']
        weight_sum += weight
        spread_thresh = config['rule8_yield_curve']['spread_threshold']
        change_thresh = config['rule8_yield_curve']['change_threshold']
        if (pd.notna(data_row['Yield_Curve_Spread']) and data_row['Yield_Curve_Spread'] > spread_thresh and
            pd.notna(data_row['Yield_Curve_Spread_Change']) and data_row['Yield_Curve_Spread_Change'] >= change_thresh):
             score += weight

        # Rule 9: Risk Appetite
        weight = config['rule9_risk_appetite']['weight']
        weight_sum += weight
        bb_thresh = config['rule9_risk_appetite']['bb_perc_b_threshold']
        uup_z_thresh = config['rule9_risk_appetite']['uup_z_threshold']
        rule9_cond1 = pd.notna(data_row['BB_PercentB']) and data_row['BB_PercentB'] > bb_thresh
        rule9_cond2 = (pd.notna(data_row['GLD_Momentum']) and data_row['GLD_Momentum'] > 0 and
                       pd.notna(data_row['UUP_Z_Score']) and data_row['UUP_Z_Score'] < uup_z_thresh)
        if rule9_cond1 or rule9_cond2:
            score += weight

        # Rule 10: Volume Confirmation
        weight = config['rule10_vol_confirm']['weight']
        weight_sum += weight
        if (pd.notna(data_row['Close']) and pd.notna(data_row['Close_Shift1']) and data_row['Close'] > data_row['Close_Shift1'] and # Requires Close_Shift1
            pd.notna(data_row['Volume']) and pd.notna(data_row['Volume_MA21']) and data_row['Volume'] > data_row['Volume_MA21']):
             score += weight

    except KeyError as e:
        # print(f"Warning: Missing key {e} in data_row for Bull scoring on date {data_row.get('Date', 'Unknown')}")
        pass # Silently ignore missing data for a rule, resulting in 0 score for that rule
    except Exception as e:
        print(f"Error scoring Bull regime for date {data_row.get('Date', 'Unknown')}: {e}")

    # Ensure score doesn't exceed maximum possible weight (due to potential float precision issues)
    return min(score, 100) # Assuming weights sum to 100


def score_neutral_regime(data_row, config):
    """Calculates the raw score for the Neutral regime for a given data row."""
    score = 0
    weight_sum = 0

    try:
        # Rule 1: Range-Bound Price
        weight = config['rule1_range_bound']['weight']
        weight_sum += weight
        if pd.notna(data_row['Price_to_SMA_Fast']) and pd.notna(data_row['Price_to_SMA_Slow']):
            fast_thresh = config['rule1_range_bound']['fast_thresholds']
            slow_thresh = config['rule1_range_bound']['slow_thresholds']
            scores = config['rule1_range_bound']['scores']
            # Get scores for both fast and slow, average them? Or take minimum? Let's take minimum for stricter condition.
            score_fast = get_graduated_score(abs(data_row['Price_to_SMA_Fast']), fast_thresh, scores, ascending=False)
            score_slow = get_graduated_score(abs(data_row['Price_to_SMA_Slow']), slow_thresh, scores, ascending=False)
            score += weight * min(score_fast, score_slow) # Both must be relatively close

        # Rule 2: Trend Flatness
        weight = config['rule2_trend_flat']['weight']
        weight_sum += weight
        if pd.notna(data_row['SMA_Ratio']):
            thresholds = config['rule2_trend_flat']['thresholds']
            scores = config['rule2_trend_flat']['scores']
            score += weight * get_graduated_score(abs(data_row['SMA_Ratio'] - 1), thresholds, scores, ascending=False)
            
        # Rule 3: Choppiness
        weight = config['rule3_choppiness']['weight']
        weight_sum += weight
        lower_thresh = config['rule3_choppiness']['lower_threshold']
        upper_thresh = config['rule3_choppiness']['upper_threshold']
        if pd.notna(data_row['Choppiness_Index']) and lower_thresh < data_row['Choppiness_Index'] < upper_thresh:
             score += weight

        # Rule 4: Limited Momentum
        weight = config['rule4_lim_momentum']['weight']
        weight_sum += weight
        if pd.notna(data_row['Momentum_Z']): # Requires Momentum_Z calculation
            thresholds = config['rule4_lim_momentum']['z_thresholds']
            scores = config['rule4_lim_momentum']['scores']
            score += weight * get_graduated_score(abs(data_row['Momentum_Z']), thresholds, scores, ascending=False)

        # Rule 5: Moderate Volatility
        weight = config['rule5_mod_vol']['weight']
        weight_sum += weight
        if pd.notna(data_row['VIX_Z_Score']):
             thresholds = config['rule5_mod_vol']['z_thresholds']
             scores = config['rule5_mod_vol']['scores']
             score += weight * get_graduated_score(abs(data_row['VIX_Z_Score']), thresholds, scores, ascending=False)

        # Rule 6: Band Contraction
        weight = config['rule6_band_contract']['weight']
        weight_sum += weight
        lower_z = config['rule6_band_contract']['lower_z']
        upper_z = config['rule6_band_contract']['upper_z']
        if pd.notna(data_row['BB_Width_Z']) and lower_z < data_row['BB_Width_Z'] < upper_z:
            score += weight

        # Rule 7: Credit Market Stability
        weight = config['rule7_credit_stab']['weight']
        weight_sum += weight
        if pd.notna(data_row['HYG_TLT_Z']):
             thresholds = config['rule7_credit_stab']['z_thresholds']
             scores = config['rule7_credit_stab']['scores']
             score += weight * get_graduated_score(abs(data_row['HYG_TLT_Z']), thresholds, scores, ascending=False)

        # Rule 8: Mixed Breadth
        weight = config['rule8_mixed_breadth']['weight']
        weight_sum += weight
        if pd.notna(data_row['McClellan_Oscillator_Norm']):
            thresholds = config['rule8_mixed_breadth']['osc_thresholds']
            scores = config['rule8_mixed_breadth']['scores']
            score += weight * get_graduated_score(abs(data_row['McClellan_Oscillator_Norm']), thresholds, scores, ascending=False)

        # Rule 9: Sector Balance
        weight = config['rule9_sector_bal']['weight']
        weight_sum += weight
        xly_xlp_z_thresh = config['rule9_sector_bal']['xly_xlp_z_threshold']
        xlu_spy_z_thresh = config['rule9_sector_bal']['xlu_spy_z_threshold']
        if (pd.notna(data_row['XLY_XLP_Z']) and abs(data_row['XLY_XLP_Z']) < xly_xlp_z_thresh and
            pd.notna(data_row['XLU_SPY_Z']) and abs(data_row['XLU_SPY_Z']) < xlu_spy_z_thresh):
            score += weight

        # Rule 10: Mean-Reversion Character
        weight = config['rule10_mean_reversion']['weight']
        weight_sum += weight
        std_thresh = config['rule10_mean_reversion']['std_threshold']
        mean_lower = config['rule10_mean_reversion']['mean_lower']
        mean_upper = config['rule10_mean_reversion']['mean_upper']
        # Requires BB_PercentB_10d_Std and BB_PercentB_10d_Mean calculation in calculate_features
        if (pd.notna(data_row['BB_PercentB_10d_Std']) and data_row['BB_PercentB_10d_Std'] > std_thresh and
            pd.notna(data_row['BB_PercentB_10d_Mean']) and mean_lower < data_row['BB_PercentB_10d_Mean'] < mean_upper):
             score += weight

    except KeyError as e:
        # print(f"Warning: Missing key {e} in data_row for Neutral scoring on date {data_row.get('Date', 'Unknown')}")
        pass
    except Exception as e:
        print(f"Error scoring Neutral regime for date {data_row.get('Date', 'Unknown')}: {e}")

    return min(score, 100)


def score_bear_regime(data_row, config):
    """Calculates the raw score for the Bear regime for a given data row."""
    score = 0
    weight_sum = 0

    try:
        # Rule 1: Trend Structure
        weight = config['rule1_trend_structure']['weight']
        weight_sum += weight
        sma_thresh = config['rule1_trend_structure']['sma_threshold']
        # Persistence check handled later
        if pd.notna(data_row['SMA_Fast']) and pd.notna(data_row['SMA_Slow']) and data_row['SMA_Slow'] > 0 and data_row['SMA_Fast'] < (data_row['SMA_Slow'] * sma_thresh):
            score += weight # Placeholder

        # Rule 2: Price Weakness
        weight = config['rule2_price_weakness']['weight']
        weight_sum += weight
        if pd.notna(data_row['Close']) and pd.notna(data_row['SMA_Slow']) and data_row['SMA_Slow'] > 0:
            ratio = data_row['Close'] / data_row['SMA_Slow']
            thresholds = config['rule2_price_weakness']['thresholds']
            scores = config['rule2_price_weakness']['scores']
            score += weight * get_graduated_score(ratio, thresholds, scores, ascending=False) # Lower ratio = higher score

        # Rule 3: Negative Momentum
        weight = config['rule3_neg_momentum']['weight']
        weight_sum += weight
        if pd.notna(data_row['Momentum']):
             percentiles = [data_row.get(f'Momentum_Perc_{int(p*100)}', np.nan) for p in config['rule3_neg_momentum']['percentiles']]
             if not any(pd.isna(p) for p in percentiles):
                 scores = config['rule3_neg_momentum']['scores']
                 score += weight * get_graduated_score(data_row['Momentum'], percentiles, scores, ascending=False) # Lower momentum = higher score

        # Rule 4: Elevated Volatility
        weight = config['rule4_elevated_vol']['weight']
        weight_sum += weight
        vix_perc_thresh = config['rule4_elevated_vol']['vix_perc_threshold']
        vix_abs_thresh = config['rule4_elevated_vol']['vix_abs_threshold']
        vix_perc_col = f'VIX_Perc_{int(vix_perc_thresh*100)}'
        if pd.notna(data_row['VIX']) and pd.notna(data_row[vix_perc_col]) and data_row['VIX'] > data_row[vix_perc_col] and data_row['VIX'] > vix_abs_thresh:
            # Graduated scoring based on percentile
            perc_levels = [data_row.get(f'VIX_Perc_{int(p*100)}', np.nan) for p in config['rule4_elevated_vol']['perc_thresholds']]
            if not any(pd.isna(p) for p in perc_levels):
                 scores = config['rule4_elevated_vol']['scores']
                 score += weight * get_graduated_score(data_row['VIX'], perc_levels, scores, ascending=True) # Higher VIX = higher score

        # Rule 5: Credit Stress
        weight = config['rule5_credit_stress']['weight']
        weight_sum += weight
        ratio_thresh = config['rule5_credit_stress']['ratio_threshold']
        if (pd.notna(data_row['HYG_TLT_Ratio']) and pd.notna(data_row['HYG_TLT_MA']) and data_row['HYG_TLT_MA'] > 0 and
            data_row['HYG_TLT_Ratio'] < (data_row['HYG_TLT_MA'] * ratio_thresh) and pd.notna(data_row['HYG_TLT_Z'])):
            thresholds = config['rule5_credit_stress']['z_thresholds']
            scores = config['rule5_credit_stress']['scores']
            score += weight * get_graduated_score(data_row['HYG_TLT_Z'], thresholds, scores, ascending=False) # Lower Z = higher score

        # Rule 6: Defensive Rotation
        weight = config['rule6_def_rotation']['weight']
        weight_sum += weight
        xly_xlp_z_thresh = config['rule6_def_rotation']['xly_xlp_z_threshold']
        xlu_spy_z_thresh = config['rule6_def_rotation']['xlu_spy_z_threshold']
        if (pd.notna(data_row['XLY_XLP_Z']) and data_row['XLY_XLP_Z'] < xly_xlp_z_thresh and
            pd.notna(data_row['XLU_SPY_Z']) and data_row['XLU_SPY_Z'] > xlu_spy_z_thresh):
            score += weight

        # Rule 7: Breadth Deterioration
        weight = config['rule7_breadth_deter']['weight']
        weight_sum += weight
        decl_perc_thresh = config['rule7_breadth_deter']['decl_perc_threshold']
        cond1 = pd.notna(data_row['AD_Line']) and pd.notna(data_row['AD_Line_50MA']) and data_row['AD_Line'] < data_row['AD_Line_50MA']
        cond2 = pd.notna(data_row['Declining_Percentage']) and data_row['Declining_Percentage'] > decl_perc_thresh
        cond3 = pd.notna(data_row['AD_Negative_Divergence']) and data_row['AD_Negative_Divergence'] > 0
        if cond1 and (cond2 or cond3):
             score += weight

        # Rule 8: Volatility Structure
        weight = config['rule8_vol_structure']['weight']
        weight_sum += weight
        ratio_thresh = config['rule8_vol_structure']['ratio_threshold']
        z_thresh = config['rule8_vol_structure']['z_threshold']
        cond1_vol = pd.notna(data_row['VIX_VIX3M_Ratio']) and data_row['VIX_VIX3M_Ratio'] > ratio_thresh
        cond2_vol = pd.notna(data_row['VIX_VIX3M_Ratio_Z']) and data_row['VIX_VIX3M_Ratio_Z'] > z_thresh
        if cond1_vol or cond2_vol:
            score += weight

        # Rule 9: Yield Curve Warning
        weight = config['rule9_yield_curve_warn']['weight']
        weight_sum += weight
        spread_abs_thresh = config['rule9_yield_curve_warn']['spread_abs_threshold']
        spread_flat_thresh = config['rule9_yield_curve_warn']['spread_flat_threshold']
        change_thresh = config['rule9_yield_curve_warn']['change_threshold']
        cond1_yc = pd.notna(data_row['Yield_Curve_Spread']) and data_row['Yield_Curve_Spread'] < spread_abs_thresh
        cond2_yc = (pd.notna(data_row['Yield_Curve_Spread']) and data_row['Yield_Curve_Spread'] < spread_flat_thresh and
                    pd.notna(data_row['Yield_Curve_Spread_Change']) and data_row['Yield_Curve_Spread_Change'] < change_thresh)
        if cond1_yc or cond2_yc:
             score += weight

        # Rule 10: Flight-to-Quality
        weight = config['rule10_flight_quality']['weight']
        weight_sum += weight
        gld_spy_ratio_thresh = config['rule10_flight_quality']['gld_spy_ratio_threshold']
        tlt_thresh = config['rule10_flight_quality']['tlt_threshold']
        cond1_fq = (pd.notna(data_row['GLD_SPY_Ratio']) and pd.notna(data_row['GLD_SPY_Ratio_63d_Mean']) and data_row['GLD_SPY_Ratio_63d_Mean'] > 0 and
                    data_row['GLD_SPY_Ratio'] > (data_row['GLD_SPY_Ratio_63d_Mean'] * gld_spy_ratio_thresh))
        cond2_fq = (pd.notna(data_row['TLT']) and pd.notna(data_row['TLT_20d_Max']) and data_row['TLT_20d_Max'] > 0 and
                    data_row['TLT'] > (data_row['TLT_20d_Max'] * tlt_thresh))
        if cond1_fq or cond2_fq:
             score += weight

    except KeyError as e:
        # print(f"Warning: Missing key {e} in data_row for Bear scoring on date {data_row.get('Date', 'Unknown')}")
        pass
    except Exception as e:
        print(f"Error scoring Bear regime for date {data_row.get('Date', 'Unknown')}: {e}")

    return min(score, 100)

# ======== MAIN CALCULATION FUNCTION ========\n
def calculate_all_regimes(df, config):
    """Calculates raw scores, applies persistence, and determines the final regime."""
    print("Calculating regime scores...")
    n_rows = len(df)
    # Initialize columns
    regime_names = ['Bull', 'Neutral', 'Bear']
    for r in regime_names:
        df[f'raw_score_{r}'] = 0.0
        df[f'final_score_{r}'] = 0.0
    df['regime'] = 'Neutral' # Default or could be NaN
    df['confidence'] = 'Low'

    # --- Add helper columns needed for scoring functions ---
    # Shifted close for Volume Confirmation rule
    df['Close_Shift1'] = df['Close'].shift(1)
    # Rolling mean of Volatility for Bull Rule 4
    df['Volatility_63d_Mean'] = df['Volatility'].rolling(window=63).mean()
    # Rolling metrics for Neutral Rule 10
    df['BB_PercentB_10d_Std'] = df['BB_PercentB'].rolling(window=10).std()
    df['BB_PercentB_10d_Mean'] = df['BB_PercentB'].rolling(window=10).mean()
    # Momentum Z for Neutral Rule 4
    df['Momentum_Mean'] = df['Momentum'].rolling(window=VOL_WINDOW).mean()
    df['Momentum_Std'] = df['Momentum'].rolling(window=VOL_WINDOW).std()
    df['Momentum_Z'] = np.where(df['Momentum_Std'] != 0, (df['Momentum'] - df['Momentum_Mean']) / df['Momentum_Std'], 0)

    # --- Pre-calculate trend persistence flags ---
    # Bull Rule 1 Persistence
    bull_trend_cond = df['SMA_Fast'] > df['SMA_Slow']
    df['bull_trend_persistence'] = bull_trend_cond.rolling(window=config['Bull']['rule1_trend_structure']['persistence_days']).sum() >= config['Bull']['rule1_trend_structure']['persistence_days']

    # Bear Rule 1 Persistence
    bear_trend_cond = df['SMA_Fast'] < (df['SMA_Slow'] * config['Bear']['rule1_trend_structure']['sma_threshold'])
    df['bear_trend_persistence'] = bear_trend_cond.rolling(window=config['Bear']['rule1_trend_structure']['persistence_days']).sum() >= config['Bear']['rule1_trend_structure']['persistence_days']
    
    # --- Iterate and Score ---
    # Start from index 2 to allow t-1, t-2 lookback for persistence
    for i in range(2, n_rows):
        current_row = df.iloc[i]
        date = df.index[i] # Use index (Date) for loc assignment

        # Calculate raw scores
        raw_scores = {
            'Bull': score_bull_regime(current_row, config['Bull']),
            'Neutral': score_neutral_regime(current_row, config['Neutral']),
            'Bear': score_bear_regime(current_row, config['Bear'])
        }

        # --- Apply Persistence Rule Corrections ---
        # Check pre-calculated persistence flags and adjust score if needed
        # If the simple check in the scoring function passed, but the persistence check fails, remove the points
        # Bull Rule 1
        if not current_row['bull_trend_persistence'] and (pd.notna(current_row['SMA_Fast']) and pd.notna(current_row['SMA_Slow']) and current_row['SMA_Fast'] > current_row['SMA_Slow']):
             raw_scores['Bull'] -= config['Bull']['rule1_trend_structure']['weight']
        # Bear Rule 1
        if not current_row['bear_trend_persistence'] and (pd.notna(current_row['SMA_Fast']) and pd.notna(current_row['SMA_Slow']) and current_row['SMA_Slow'] > 0 and current_row['SMA_Fast'] < (current_row['SMA_Slow'] * config['Bear']['rule1_trend_structure']['sma_threshold'])):
             raw_scores['Bear'] -= config['Bear']['rule1_trend_structure']['weight']
             
        # Ensure scores are not negative after correction
        raw_scores['Bull'] = max(0, raw_scores['Bull'])
        raw_scores['Bear'] = max(0, raw_scores['Bear'])


        # Store raw scores
        for r in regime_names:
            df.loc[date, f'raw_score_{r}'] = raw_scores[r]

        # Apply Persistence Bonus
        final_scores = raw_scores.copy()
        prev_regime = df.loc[df.index[i-1], 'regime']
        prev_regime_2 = df.loc[df.index[i-2], 'regime']
        persistence_cfg = config['Persistence']

        if prev_regime in final_scores:
            final_scores[prev_regime] += persistence_cfg['t-1']
        if prev_regime_2 in final_scores:
             final_scores[prev_regime_2] += persistence_cfg['t-2']

        # Store final scores
        for r in regime_names:
            df.loc[date, f'final_score_{r}'] = final_scores[r]

        # Determine winning regime with tiebreaker: Bear > Neutral > Bull
        max_score = -1
        winning_regime = 'Neutral' # Default

        # Evaluate in order of tiebreaker priority
        if final_scores['Bear'] > max_score:
            max_score = final_scores['Bear']
            winning_regime = 'Bear'
        if final_scores['Neutral'] >= max_score: # Neutral wins ties vs Bull
            if winning_regime != 'Bear' or final_scores['Neutral'] > max_score: # Only override Bear if strictly greater
                max_score = final_scores['Neutral']
                winning_regime = 'Neutral'
        if final_scores['Bull'] > max_score: # Bull must strictly beat others
            max_score = final_scores['Bull']
            winning_regime = 'Bull'

        df.loc[date, 'regime'] = winning_regime

        # Calculate Confidence
        sorted_scores = sorted(final_scores.values(), reverse=True)
        score_margin = sorted_scores[0] - sorted_scores[1] if len(sorted_scores) > 1 else sorted_scores[0]
        if score_margin > 25:
            confidence = "High"
        elif score_margin > 15:
            confidence = "Medium"
        else:
            confidence = "Low"
        df.loc[date, 'confidence'] = confidence

    print("Regime scoring complete.")
    
    # Clean up helper columns at the end if desired
    # df = df.drop(columns=['Close_Shift1', 'Volatility_63d_Mean', ...], errors='ignore')
    
    return df

# ======== ANALYSIS & VISUALIZATION FUNCTIONS (Adapted) ========\n
def calculate_regime_statistics(df):
    """Calculate key statistics for each market regime - adapted for rule-based output"""
    print("\nCalculating statistics per regime...")
    # Ensure 'regime' column exists
    if 'regime' not in df.columns:
        print("Error: 'regime' column not found in DataFrame. Cannot calculate stats.")
        return pd.DataFrame()

    regime_stats = {}
    # Use the actual regime labels present in the column
    unique_labels = df['regime'].unique()
    # Filter out potential initial NaNs or default values if loop starts later
    unique_labels = [label for label in unique_labels if pd.notna(label) and label in ['Bull', 'Neutral', 'Bear']]


    for label in unique_labels:
        mask = (df['regime'] == label)
        count = mask.sum()

        if count == 0:
            print(f"Warning: No data points found for label '{label}'. Skipping stats calculation.")
            continue

        regime_data = df[mask].copy()
        returns = regime_data['Log_Return'] # Use log returns calculated earlier

        # Basic stats
        return_mean = returns.mean()
        return_std = returns.std()
        ann_return = return_mean * 252
        ann_vol = return_std * np.sqrt(252)
        sharpe = ann_return / ann_vol if ann_vol > 0 else 0
        hit_rate = (returns > 0).mean() * 100

        # Avg values of key indicators
        avg_vix = regime_data['VIX'].mean() if 'VIX' in regime_data.columns else np.nan
        avg_vol = regime_data['Volatility'].mean() if 'Volatility' in regime_data.columns else np.nan
        avg_mom = regime_data['Momentum'].mean() if 'Momentum' in regime_data.columns else np.nan
        avg_chop = regime_data['Choppiness_Index'].mean() if 'Choppiness_Index' in regime_data.columns else np.nan
        avg_hyg_tlt_z = regime_data['HYG_TLT_Z'].mean() if 'HYG_TLT_Z' in regime_data.columns else np.nan


        # Max Drawdown calculation within the regime period
        if len(regime_data) > 1:
             cumulative_ret = (1 + returns / 100).cumprod()
             peak = cumulative_ret.expanding(min_periods=1).max()
             drawdown = (cumulative_ret / peak - 1) * 100
             max_drawdown = drawdown.min()
        else:
             max_drawdown = 0 # Or NaN

        regime_stats[label] = {
            'count': count,
            'return_mean_daily': return_mean,
            'return_std_daily': return_std,
            'annualized_return': ann_return,
            'annualized_volatility': ann_vol,
            'sharpe_ratio': sharpe,
            'hit_rate_pct': hit_rate,
            'max_drawdown_pct': max_drawdown,
            'avg_vix': avg_vix,
            'avg_realized_vol': avg_vol,
            'avg_momentum_63d': avg_mom,
            'avg_choppiness': avg_chop,
            'avg_credit_z': avg_hyg_tlt_z,
        }

    stats_df = pd.DataFrame.from_dict(regime_stats, orient='index')
    print("Regime statistics calculated.")
    return stats_df


def analyze_regime_transitions(df):
    """Analyze transitions between regimes"""
    print("\nAnalyzing regime transitions...")
    if 'regime' not in df.columns or df['regime'].isnull().all():
         print("Error: 'regime' column missing or empty. Cannot analyze transitions.")
         return [], pd.DataFrame()
         
    # Ensure Date is index or column
    if isinstance(df.index, pd.DatetimeIndex):
        dates = df.index
    elif 'Date' in df.columns:
         dates = df['Date']
    else:
         print("Error: Cannot find Date information.")
         return [], pd.DataFrame()


    transitions = []
    df_shifted = df['regime'].shift(1)
    changes = df[df['regime'] != df_shifted]

    if changes.empty:
        print("No regime transitions detected.")
        # Create a dummy transition representing the single regime found
        start_date = dates[0] if isinstance(dates, pd.DatetimeIndex) else dates.iloc[0]
        end_date = dates[-1] if isinstance(dates, pd.DatetimeIndex) else dates.iloc[-1]
        duration = (end_date - start_date).days
        current_regime = df['regime'].iloc[0]
        transitions.append({
            'from_regime': None, # No prior regime
            'to_regime': current_regime,
            'start_date': start_date,
            'end_date': end_date,
            'duration_days': duration
        })
        return transitions, pd.DataFrame() # Return empty transition matrix

    last_regime = df['regime'].iloc[0]
    last_date = dates[0] if isinstance(dates, pd.DatetimeIndex) else dates.iloc[0]

    # Add the first period
    first_change_date = changes.index[0] if isinstance(changes.index, pd.DatetimeIndex) else changes['Date'].iloc[0]
    first_change_idx = df.index.get_loc(first_change_date) if isinstance(df.index, pd.DatetimeIndex) else dates[dates == first_change_date].index[0]
    
    start_date = dates[0] if isinstance(dates, pd.DatetimeIndex) else dates.iloc[0]
    end_date = dates[first_change_idx - 1] if isinstance(dates, pd.DatetimeIndex) else dates.iloc[first_change_idx - 1]
    duration = (end_date - start_date).days
    transitions.append({
        'from_regime': None,
        'to_regime': last_regime,
        'start_date': start_date,
        'end_date': end_date,
        'duration_days': duration
    })


    for i in range(len(changes)):
        current_change_date = changes.index[i] if isinstance(changes.index, pd.DatetimeIndex) else changes['Date'].iloc[i]
        current_regime = changes['regime'].iloc[i]
        
        start_date = current_change_date
        # Find end date (day before next change, or last day)
        if i + 1 < len(changes):
             next_change_date = changes.index[i+1] if isinstance(changes.index, pd.DatetimeIndex) else changes['Date'].iloc[i+1]
             next_change_idx = df.index.get_loc(next_change_date) if isinstance(df.index, pd.DatetimeIndex) else dates[dates == next_change_date].index[0]
             end_date = dates[next_change_idx - 1] if isinstance(dates, pd.DatetimeIndex) else dates.iloc[next_change_idx - 1]
        else:
             end_date = dates[-1] if isinstance(dates, pd.DatetimeIndex) else dates.iloc[-1]

        duration = (end_date - start_date).days + 1 # Add 1 day for duration calculation

        transitions.append({
            'from_regime': last_regime,
            'to_regime': current_regime,
            'start_date': start_date,
            'end_date': end_date,
            'duration_days': duration
        })
        last_regime = current_regime


    # --- Create Transition Matrix ---
    from_regimes = [t['from_regime'] for t in transitions if t['from_regime'] is not None]
    to_regimes = [t['to_regime'] for t in transitions if t['from_regime'] is not None]
    
    labels = sorted(list(set(from_regimes + to_regimes)))
    matrix = pd.DataFrame(0, index=labels, columns=labels)

    for from_r, to_r in zip(from_regimes, to_regimes):
        matrix.loc[from_r, to_r] += 1

    # Normalize to probabilities
    matrix_prob = matrix.div(matrix.sum(axis=1), axis=0).fillna(0)

    print("Regime transition analysis complete.")
    return transitions, matrix_prob


def plot_regime_time_series(df):
    """Plot the market price with regime classifications over time"""
    if 'regime' not in df.columns or 'Close' not in df.columns:
        print("Error: Missing 'regime' or 'Close' column for plotting.")
        return
        
    # Ensure Date is index or column
    if isinstance(df.index, pd.DatetimeIndex):
        date_col = df.index
    elif 'Date' in df.columns:
         date_col = df['Date']
    else:
         print("Error: Cannot find Date information for plotting.")
         return

    # Define colors for regimes
    regime_colors = {'Bull': 'rgba(0, 128, 0, 0.3)', 'Neutral': 'rgba(255, 215, 0, 0.3)', 'Bear': 'rgba(255, 0, 0, 0.3)'}
    line_colors = {'Bull': 'green', 'Neutral': 'orange', 'Bear': 'red'}
    
    fig = make_subplots(rows=3, cols=1,
                        shared_xaxes=True,
                        vertical_spacing=0.03,
                        subplot_titles=('Market Price with Regime Background', 'Regime Scores', 'Regime Confidence'),
                        row_heights=[0.6, 0.2, 0.2])

    # Plot 1: Price with Regime Background
    fig.add_trace(go.Scatter(x=date_col, y=df['Close'], mode='lines', name='Price', line=dict(color='black', width=1)), row=1, col=1)

    # Add shapes for regime backgrounds
    start_date = date_col[0] if isinstance(date_col, pd.DatetimeIndex) else date_col.iloc[0]
    current_regime = df['regime'].iloc[0]
    for i in range(1, len(df)):
        if df['regime'].iloc[i] != current_regime or i == len(df) - 1:
            end_date = date_col[i] if isinstance(date_col, pd.DatetimeIndex) else date_col.iloc[i]
            if i != len(df) - 1:
                end_date = date_col[i-1] if isinstance(date_col, pd.DatetimeIndex) else date_col.iloc[i-1]
            color = regime_colors.get(current_regime, 'rgba(128, 128, 128, 0.3)') # Default grey
            fig.add_shape(type="rect",
                          x0=start_date, y0=0, x1=end_date, y1=df['Close'].max()*1.1, # Adjust y1 if needed
                          line=dict(width=0), fillcolor=color, layer='below',
                          row=1, col=1)
            start_date = date_col[i] if isinstance(date_col, pd.DatetimeIndex) else date_col.iloc[i]
            current_regime = df['regime'].iloc[i]


    # Plot 2: Regime Scores
    for r in ['Bull', 'Neutral', 'Bear']:
        if f'final_score_{r}' in df.columns:
            fig.add_trace(go.Scatter(x=date_col, y=df[f'final_score_{r}'], mode='lines', name=f'{r} Score', line=dict(color=line_colors.get(r))), row=2, col=1)

    # Plot 3: Confidence
    if 'confidence' in df.columns:
         # Map confidence to numerical value for plotting
         confidence_map = {'High': 3, 'Medium': 2, 'Low': 1}
         y_confidence = df['confidence'].map(confidence_map)
         fig.add_trace(go.Scatter(x=date_col, y=y_confidence, mode='lines', name='Confidence', line=dict(color='purple')), row=3, col=1)
         fig.update_yaxes(tickvals=[1, 2, 3], ticktext=['Low', 'Medium', 'High'], range=[0.5, 3.5], row=3, col=1)


    # Update layout
    fig.update_layout(title='Market Regime Classification',
                      xaxis_title='Date',
                      yaxis_title='Price',
                      legend_title='Regimes/Scores',
                      height=900,
                      template='plotly_white',
                      xaxis_rangeslider_visible=False)
    fig.update_yaxes(title_text="Score", row=2, col=1)
    fig.update_yaxes(title_text="Confidence", row=3, col=1)
    
    fig.show()


def plot_regime_performance(stats_df):
    """Plot performance metrics for each regime"""
    if stats_df.empty:
        print("Statistics DataFrame is empty. Cannot plot performance.")
        return

    stats_df = stats_df.reset_index().rename(columns={'index': 'Regime'})
    regime_colors = {'Bull': 'green', 'Neutral': 'gold', 'Bear': 'red'}
    
    fig = make_subplots(rows=2, cols=2,
                        subplot_titles=('Annualized Return (%)', 'Annualized Volatility (%)', 'Sharpe Ratio', 'Max Drawdown (%)'),
                        vertical_spacing=0.15,
                        horizontal_spacing=0.1)

    # Plot 1: Ann Return
    fig.add_trace(go.Bar(x=stats_df['Regime'], y=stats_df['annualized_return'], name='Ann. Return', marker_color=[regime_colors.get(r, 'grey') for r in stats_df['Regime']]), row=1, col=1)
    # Plot 2: Ann Volatility
    fig.add_trace(go.Bar(x=stats_df['Regime'], y=stats_df['annualized_volatility'], name='Ann. Vol', marker_color=[regime_colors.get(r, 'grey') for r in stats_df['Regime']]), row=1, col=2)
    # Plot 3: Sharpe Ratio
    fig.add_trace(go.Bar(x=stats_df['Regime'], y=stats_df['sharpe_ratio'], name='Sharpe', marker_color=[regime_colors.get(r, 'grey') for r in stats_df['Regime']]), row=2, col=1)
    # Plot 4: Max Drawdown
    fig.add_trace(go.Bar(x=stats_df['Regime'], y=stats_df['max_drawdown_pct'], name='Max Drawdown', marker_color=[regime_colors.get(r, 'grey') for r in stats_df['Regime']]), row=2, col=2)

    fig.update_layout(title='Performance by Market Regime',
                      height=700,
                      template='plotly_white',
                      showlegend=False)
    fig.show()

# ======== MAIN FUNCTION ========\n
def main():
    """Main execution function for the rule-based market regime analysis"""
    
    # 1. Download and prepare market data
    df_market = download_market_data(
        TICKER, VIX_TICKER, TNX_TICKER, GLD_TICKER,
        XLY_TICKER, XLP_TICKER, XLU_TICKER, XLF_TICKER,
        HYG_TICKER, TLT_TICKER, VIX3M_TICKER, IRX_TICKER,
        UUP_TICKER, TIP_TICKER, IEF_TICKER,
        START_DATE, END_DATE
    )

    # 2. Load A/D line data (optional)
    ad_data = load_ad_line_data("nyse_breadth_2023.csv") # Update path if needed

    # 3. Calculate all features (enhanced function)
    # Pass df_market (base market data) and ad_data
    df_features = calculate_features(df_market, ad_data, percentile_lookback=PERCENTILE_LOOKBACK)

    # Ensure we have enough data after feature calculation and NaN drops
    if len(df_features) < max(PERCENTILE_LOOKBACK, SMA_SLOW, MOMENTUM_WINDOW) + 5: # Need enough history
         print(f"Error: Insufficient data ({len(df_features)} rows) after feature calculation. Minimum needed ~{max(PERCENTILE_LOOKBACK, SMA_SLOW, MOMENTUM_WINDOW)}. Exiting.")
         return None

    # Make sure Date is the index for easier .loc operations
    if 'Date' in df_features.columns:
         df_features['Date'] = pd.to_datetime(df_features['Date'])
         df_features = df_features.set_index('Date')


    # 4. Calculate regimes using the rule-based system
    df_final = calculate_all_regimes(df_features, ALL_CONFIG)

    # --- Verification Steps ---
    print(f"\nFinal DataFrame head:\n{df_final.head().to_string()}")
    print(f"\nFinal DataFrame tail:\n{df_final.tail().to_string()}")
    print(f"\nRegime distribution:\n{df_final['regime'].value_counts(normalize=True) * 100}")


    # 5. Calculate regime statistics
    regime_stats_df = calculate_regime_statistics(df_final)
    if not regime_stats_df.empty:
        print("\nRegime Performance Summary:")
        print("=" * 80)
        # Format for better readability
        print(regime_stats_df.to_string(float_format="%.2f"))
    else:
         print("\nRegime statistics could not be calculated.")


    # 6. Analyze regime transitions
    transitions_list, transition_matrix = analyze_regime_transitions(df_final)
    if transitions_list:
        print("\nRegime Transitions Summary:")
        print("=" * 80)
        # Print last 10 transitions
        for t in transitions_list[-10:]:
             from_r = t['from_regime'] if t['from_regime'] else "Start"
             print(f"{from_r} → {t['to_regime']}: {t['start_date'].strftime('%Y-%m-%d')} to {t['end_date'].strftime('%Y-%m-%d')} ({t['duration_days']} days)")
        
        print("\nTransition Matrix (Probability):")
        print("=" * 80)
        print(transition_matrix.to_string(float_format="%.3f"))


    # 7. Create visualizations
    plot_regime_time_series(df_final)
    if not regime_stats_df.empty:
        plot_regime_performance(regime_stats_df)


    print("\nAnalysis Complete.")
    # Return the final dataframe for further external use if needed
    return df_final, regime_stats_df

if __name__ == "__main__":
    results_df, stats = main()
    if results_df is not None:
        print("\nRule-based regime analysis finished successfully.")
        # Example: Access the latest regime
        latest_regime = results_df['regime'].iloc[-1]
        latest_confidence = results_df['confidence'].iloc[-1]
        latest_scores = results_df[['final_score_Bull', 'final_score_Neutral', 'final_score_Bear']].iloc[-1]
        print(f"\nLatest Calculated Regime ({results_df.index[-1].strftime('%Y-%m-%d')}): {latest_regime} (Confidence: {latest_confidence})")
        print(f"Scores: Bull={latest_scores['final_score_Bull']:.1f}, Neutral={latest_scores['final_score_Neutral']:.1f}, Bear={latest_scores['final_score_Bear']:.1f}")
    else:
        print("\nAnalysis failed or returned no results.")



Downloading market data from 2018-01-01 to 2025-04-25...
Downloading main (SPY)...
Downloading VIX (^VIX)...
Downloading TNX (^TNX)...
Downloading GLD (GLD)...
Downloading XLY (XLY)...
Downloading XLP (XLP)...
Downloading XLU (XLU)...
Downloading XLF (XLF)...
Downloading HYG (HYG)...
Downloading TLT (TLT)...
Downloading VIX3M (^VIX3M)...
Downloading IRX (^IRX)...
Downloading UUP (UUP)...
Downloading TIP (TIP)...
Downloading IEF (IEF)...
Merging dataframes...
Forward filling missing values...
Market data download and initial preparation complete.
Loading A/D line data from nyse_breadth_2023.csv...
A/D line data loaded and prepared.
Calculating technical features...
Calculating rolling percentiles...
Merging A/D line features...
Dropped 63 rows due to NaNs in essential features.
Feature calculation complete.
Calculating regime scores...
Regime scoring complete.

Final DataFrame head:
                  Open        High         Low       Close     Volume        VIX    TNX         GLD      


Analysis Complete.

Rule-based regime analysis finished successfully.

Latest Calculated Regime (2025-04-24): Bear (Confidence: Medium)
Scores: Bull=23.0, Neutral=31.5, Bear=47.2
