In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from hmmlearn import hmm
import yfinance as yf
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Hidden Markov Model Modifiable Parameters
hidden_states = 3
em_iterations = 75

# Fixed training period 
train_start_date = "2018-01-01"
train_end_date = "2024-07-31"

# Date parameters for yfinance API - download data from 1995 to present
start_date = "2020-01-01"
end_date = datetime.today().strftime("%Y-%m-%d")

print(f"Downloading market data from {start_date} to {end_date}...")
# Download SPY and VIX data using yfinance
df_spy = yf.download('SPY', start=start_date, end=end_date, auto_adjust=True)
df_vix = yf.download('^VIX', start=start_date, end=end_date)

# Fix the multi-level column structure
df_spy.columns = df_spy.columns.droplevel(1) if len(df_spy.columns.names) > 1 else df_spy.columns
df_vix.columns = df_vix.columns.droplevel(1) if len(df_vix.columns.names) > 1 else df_vix.columns

# Reset index to make Date a column
df_spy = df_spy.reset_index()
df_vix = df_vix.reset_index()

# Keep only the Date and Close columns from VIX
df_vix = df_vix[['Date', 'Close']].rename(columns={'Close': 'VIX'})

# Merge SPY and VIX data
df = pd.merge(df_spy, df_vix, on='Date', how='left')

# Forward fill any missing VIX values
df['VIX'] = df['VIX'].fillna(method='ffill')

# Add log of VIX
df['LogVIX'] = np.log(df['VIX'])

# Display first few rows to check the structure
print("DataFrame structure:")
print("Data shape:", df.shape)
print("Data columns:", df.columns.tolist())
df.head()

Downloading market data from 2020-01-01 to 2025-04-13...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

YF.download() has changed argument auto_adjust default to True
DataFrame structure:
Data shape: (1327, 8)
Data columns: ['Date', 'Close', 'High', 'Low', 'Open', 'Volume', 'VIX', 'LogVIX']





Price,Date,Close,High,Low,Open,Volume,VIX,LogVIX
0,2020-01-02,300.291595,300.3101,298.128634,299.06223,59151200,12.47,2.523326
1,2020-01-03,298.017639,299.154592,296.806751,296.86221,77709700,14.02,2.640485
2,2020-01-06,299.154602,299.23779,296.122728,296.242897,55653900,13.85,2.628285
3,2020-01-07,298.313477,299.062193,297.860529,298.581516,40496400,13.79,2.623944
4,2020-01-08,299.903351,301.132716,298.25802,298.507583,68296000,13.45,2.598979


In [2]:
# Function to calculate technical indicators
def calculate_indicators(data):
    # Create a copy of the dataframe to avoid modifying original
    df_copy = data.copy()
    
    # Volatility is computed by obtaining variance between current close and prices of past 10 days
    volatility = []
    # MA is the 10 day SMA
    ma = []
    # Return is the single-day percentage return
    returns = []
    ma_sum = 0
    
    # Normalize LogVIX relative to its recent history (10-day window)
    log_vix_ratio = []
    
    # Warming up data for calculations
    for i in range(0, 10):
        volatility.append(0)
        ma.append(0)
        returns.append(0)
        log_vix_ratio.append(0)
        ma_sum += df_copy['Close'].iloc[i]
    
    # Filling in data for return, moving average, and volatility
    for ind in range(len(df_copy)):
        if ind >= 10:
            # SPY indicators
            tail_close = df_copy['Close'].iloc[ind-10]
            prev_close = df_copy['Close'].iloc[ind-1]
            head_close = df_copy['Close'].iloc[ind]
            ma_sum = (ma_sum - tail_close + head_close)
            ma_curr = ma_sum/10
            ma.append(ma_curr)
            returns.append(((head_close-prev_close)/prev_close)*100)
            
            # Computing Volatility
            vol_sum = 0
            for i in range(0, 10):
                curr_vol = abs(ma_curr - df_copy['Close'].iloc[ind-i])
                vol_sum += (curr_vol ** 2)
            volatility.append(vol_sum/10)
            
            # LogVIX-based indicator: current LogVIX relative to 10-day average
            log_vix_10day_avg = sum(df_copy['LogVIX'].iloc[ind-10:ind]) / 10
            log_vix_ratio.append(df_copy['LogVIX'].iloc[ind] / log_vix_10day_avg if log_vix_10day_avg > 0 else 1)
    
    # Adding columns to dataframe
    df_copy['Volatility'] = volatility
    df_copy['MA'] = ma
    df_copy['Return'] = returns
    df_copy['LogVIX_Ratio'] = log_vix_ratio
    
    return df_copy

# Calculate indicators for the entire dataset
df = calculate_indicators(df)
df.tail(5)

Price,Date,Close,High,Low,Open,Volume,VIX,LogVIX,Volatility,MA,Return,LogVIX_Ratio
1322,2025-04-07,504.380005,523.169983,481.799988,489.190002,256611400,46.98,3.849722,599.447177,549.803006,-0.178118,1.238541
1323,2025-04-08,496.480011,524.97998,489.160004,521.859985,165816600,52.330002,3.95757,755.574688,541.905005,-1.566278,1.233988
1324,2025-04-09,548.619995,548.619995,493.049988,493.440002,241867300,33.619999,3.515121,684.886736,539.908002,10.50193,1.059188
1325,2025-04-10,524.580017,533.5,509.320007,532.169983,162331200,40.720001,3.706719,616.487105,535.658002,-4.3819,1.096872
1326,2025-04-11,533.940002,536.429993,520.070007,523.01001,97741700,37.560001,3.62594,572.0568,533.486005,1.784282,1.0488


### Theoretical, useful for cross checking HMM states 

In [7]:
# %%
# Change Point Detection using Ruptures

# Install the library if not already installed
try:
    import ruptures as rpt
except ImportError:
    print("Installing ruptures...")
    !pip install ruptures
    import ruptures as rpt

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

print("Starting Change Point Detection using Ruptures...")

# --- Data Preparation ---
# Extract data starting from where indicators are properly calculated (after index 10)
start_idx = 10  # Skip initial points where indicators were initialized to 0
df_analysis = df.iloc[start_idx:].copy().reset_index(drop=True)

# Ensure we have valid data
if len(df_analysis) == 0:
    raise ValueError("No data available for analysis after filtering")

# Prepare signal data - we'll use a combination of Return and Volatility to detect regime changes
# This captures both mean shifts and volatility shifts
signal_data_return = df_analysis['Return'].values
signal_data_vol = df_analysis['Volatility'].values

# Ensure no NaN or Inf values
if np.any(np.isnan(signal_data_return)) or np.any(np.isinf(signal_data_return)):
    print("Warning: NaN or Inf values found in return data. Replacing with 0.")
    signal_data_return = np.nan_to_num(signal_data_return)
    
if np.any(np.isnan(signal_data_vol)) or np.any(np.isinf(signal_data_vol)):
    print("Warning: NaN or Inf values found in volatility data. Replacing with 0.")
    signal_data_vol = np.nan_to_num(signal_data_vol)

# Stack return and volatility into a 2D array for multivariate analysis
# This allows detection of regime changes in either returns or volatility
# First normalize each series to have unit variance to ensure equal importance
return_std = np.std(signal_data_return) if np.std(signal_data_return) > 0 else 1
vol_std = np.std(signal_data_vol) if np.std(signal_data_vol) > 0 else 1

signal_data_return_norm = signal_data_return / return_std
signal_data_vol_norm = signal_data_vol / vol_std

# Stack the normalized data
points = np.column_stack((signal_data_return_norm, signal_data_vol_norm))

# --- Define Segmentation Function with Error Handling ---
def detect_change_points(data, min_size=20, penalty_value=None):
    """
    Detect change points in the given data using Pelt algorithm.
    
    Args:
        data: 2D array of shape (n_samples, n_features)
        min_size: Minimum segment length
        penalty_value: Custom penalty value (if None, BIC penalty is used)
        
    Returns:
        List of change point indices
    """
    # Sanity check
    if len(data) <= min_size * 2:
        print(f"Warning: Data length ({len(data)}) is too short for meaningful segmentation with min_size={min_size}")
        return []
        
    try:
        # Create and fit the model
        model = "normal"  # Normal distribution cost function (detects mean and variance changes)
        algo = rpt.Pelt(model=model, min_size=min_size).fit(data)
        
        # Calculate penalty if not provided
        if penalty_value is None:
            n_samples = len(data)
            n_dims = data.shape[1]
            
            # Basic BIC penalty: log(n) * k, where k is number of parameters per segment
            # For normal distribution, k = mean + covariance parameters
            # For 2D data, k = 2 + 3 = 5 (2 means, 2 variances, 1 covariance)
            k = n_dims + n_dims * (n_dims + 1) // 2
            
            # Scale by an empirical factor to avoid over-segmentation
            # This is somewhat arbitrary but 0.5-2x is a reasonable range
            scaling = 1.5  
            penalty_value = scaling * k * np.log(n_samples)
            
        # Predict change points
        result = algo.predict(pen=penalty_value)
        
        # Filter out the last index which just marks the end of the series
        change_points = [idx for idx in result if idx < len(data)]
        
        return change_points
        
    except Exception as e:
        print(f"Error in change point detection: {e}")
        return []

# --- Run Change Point Detection ---
change_points = detect_change_points(points, min_size=20)

# Map change points to dates
change_point_dates = df_analysis['Date'].iloc[change_points].tolist() if change_points else []

print(f"Detected {len(change_points)} change points.")

# --- Create Regime Labels ---
# Assign a regime number to each data point
regimes = np.zeros(len(df_analysis), dtype=int)
current_regime = 0
for cp in change_points:
    current_regime += 1
    regimes[cp:] = current_regime

# Create a new column in df_analysis to hold regime labels
df_analysis['Regime'] = regimes

# --- Create Visualization ---
# Create a two-panel figure
fig = make_subplots(rows=2, cols=1, 
                    shared_xaxes=True,
                    vertical_spacing=0.05,
                    subplot_titles=('SPY Price with Regime Changes', 'Return and Volatility by Regime'))

# Upper panel: SPY price with regime backgrounds
# We'll color each regime with a different background color
regime_colors = ['rgba(255,255,255,0)', 'rgba(173,216,230,0.2)', 'rgba(144,238,144,0.2)', 
                'rgba(255,182,193,0.2)', 'rgba(230,230,250,0.2)', 'rgba(255,222,173,0.2)']

# Get unique regime values
unique_regimes = sorted(df_analysis['Regime'].unique())

# Add colored backgrounds for each regime
for regime in unique_regimes:
    regime_data = df_analysis[df_analysis['Regime'] == regime]
    if not regime_data.empty:
        start_date = regime_data['Date'].iloc[0]
        end_date = regime_data['Date'].iloc[-1]
        
        # Add rectangle for regime background
        color_idx = regime % len(regime_colors)
        fig.add_shape(
            type="rect",
            xref="x",
            yref="paper",
            x0=start_date,
            y0=0,
            x1=end_date,
            y1=1,
            fillcolor=regime_colors[color_idx],
            opacity=0.5,
            layer="below",
            line_width=0,
            row=1, col=1
        )

# Add SPY price line
fig.add_trace(
    go.Scatter(
        x=df_analysis['Date'],
        y=df_analysis['Close'],
        mode='lines',
        name='SPY Price',
        line=dict(color='navy', width=1.5)
    ),
    row=1, col=1
)

# Add vertical lines at change points
for date in change_point_dates:
    fig.add_vline(
        x=date,
        line_width=1.5,
        line_dash="solid",
        line_color="red",
        row=1, col=1
    )
    # Repeat the line in the bottom panel
    fig.add_vline(
        x=date,
        line_width=1.5,
        line_dash="solid",
        line_color="red",
        row=2, col=1
    )

# Lower panel: Return and Volatility
fig.add_trace(
    go.Scatter(
        x=df_analysis['Date'],
        y=df_analysis['Return'],
        mode='lines',
        name='Daily Return %',
        line=dict(color='darkgreen', width=1)
    ),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(
        x=df_analysis['Date'],
        y=df_analysis['Volatility'],
        mode='lines',
        name='Volatility',
        line=dict(color='darkorange', width=1),
        opacity=0.7
    ),
    row=2, col=1
)

# Update layout
fig.update_layout(
    title_text='SPY Market Regimes Detected by Change Point Analysis',
    height=800,
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    template="plotly_white"
)

# Update axes
fig.update_xaxes(
    title_text="Date",
    rangeslider_visible=False,
    row=2, col=1
)

fig.update_yaxes(
    title_text="SPY Price ($)",
    row=1, col=1
)

fig.update_yaxes(
    title_text="Value",
    row=2, col=1
)

# Show the figure
fig.show()

# --- Print Regime Information ---
print("\nDetected Regime Changes:")
for i, date in enumerate(change_point_dates):
    print(f"Regime {i+1} starts on: {date.strftime('%Y-%m-%d')}")

# Calculate regime statistics
print("\nRegime Statistics:")
for regime in unique_regimes:
    regime_data = df_analysis[df_analysis['Regime'] == regime]
    if len(regime_data) > 0:
        start_date = regime_data['Date'].iloc[0].strftime('%Y-%m-%d')
        end_date = regime_data['Date'].iloc[-1].strftime('%Y-%m-%d')
        mean_return = regime_data['Return'].mean()
        std_return = regime_data['Return'].std()
        mean_vol = regime_data['Volatility'].mean()
        
        print(f"Regime {regime} ({start_date} to {end_date}):")
        print(f"  Mean Daily Return: {mean_return:.4f}%")
        print(f"  Return Volatility: {std_return:.4f}%")
        print(f"  Mean Volatility Indicator: {mean_vol:.6f}")
        print("")


Starting Change Point Detection using Ruptures...
Detected 18 change points.



Detected Regime Changes:
Regime 1 starts on: 2020-02-24
Regime 2 starts on: 2020-04-21
Regime 3 starts on: 2020-07-16
Regime 4 starts on: 2020-08-27
Regime 5 starts on: 2020-11-20
Regime 6 starts on: 2021-01-28
Regime 7 starts on: 2021-04-19
Regime 8 starts on: 2021-09-16
Regime 9 starts on: 2022-01-18
Regime 10 starts on: 2022-05-26
Regime 11 starts on: 2022-06-27
Regime 12 starts on: 2022-11-23
Regime 13 starts on: 2023-04-13
Regime 14 starts on: 2023-09-20
Regime 15 starts on: 2023-11-22
Regime 16 starts on: 2024-07-23
Regime 17 starts on: 2024-08-27
Regime 18 starts on: 2025-02-27

Regime Statistics:
Regime 0 (2020-01-16 to 2020-02-21):
  Mean Daily Return: 0.0671%
  Return Volatility: 0.8105%
  Mean Volatility Indicator: 8.825304

Regime 1 (2020-02-24 to 2020-04-20):
  Mean Daily Return: -0.3039%
  Return Volatility: 4.5919%
  Mean Volatility Indicator: 163.492588

Regime 2 (2020-04-21 to 2020-07-15):
  Mean Daily Return: 0.2421%
  Return Volatility: 1.5412%
  Mean Volatility Ind

### Live 

In [6]:
# %%
# Real-time Change Point Detection using Ruptures (No Look-ahead)

try:
    import ruptures as rpt
except ImportError:
    print("Installing ruptures...")
    !pip install ruptures
    import ruptures as rpt

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm  # For progress bar

print("Starting Real-time Change Point Detection Simulation...")

# --- Data Preparation ---
# Extract data starting from where indicators are properly calculated
start_idx = 10  # Skip initial points where indicators were initialized to 0
df_analysis = df.iloc[start_idx:].copy().reset_index(drop=True)

# Ensure we have valid data
if len(df_analysis) == 0:
    raise ValueError("No data available for analysis after filtering")

# --- Define Change Point Detection Function ---
def detect_change_points(data, min_size=20, penalty_scaling=1.5):
    """
    Detect change points in the given data using Pelt algorithm.
    
    Args:
        data: 2D array of shape (n_samples, n_features)
        min_size: Minimum segment length
        penalty_scaling: Scaling factor for the BIC penalty
        
    Returns:
        List of change point indices
    """
    if len(data) <= min_size * 2:
        return []
        
    try:
        # Create and fit the model
        model = "normal"  # Normal distribution cost function
        algo = rpt.Pelt(model=model, min_size=min_size).fit(data)
        
        # Calculate BIC penalty
        n_samples = len(data)
        n_dims = data.shape[1]
        
        # For normal distribution, params = means + covariance matrix elements
        k = n_dims + n_dims * (n_dims + 1) // 2
        penalty_value = penalty_scaling * k * np.log(n_samples)
        
        # Predict change points
        result = algo.predict(pen=penalty_value)
        
        # Filter out the last index which marks the end of the series
        change_points = [idx for idx in result if idx < len(data)]
        
        return change_points
        
    except Exception as e:
        print(f"Error in change point detection: {e}")
        return []

# --- Simulate Real-time Detection ---
# Parameters for simulation
min_window = 252  # Start detection after 1 year of data (trading days)
step_size = 1     # Increment by 1 day each time (can be increased for performance)
min_segment = 20  # Minimum regime length in days

# Create arrays to store real-time detection results
# At each time t, we'll store the regime label based only on data up to time t
real_time_regimes = np.zeros(len(df_analysis), dtype=int)
real_time_change_points = []  # Store the actual indices where we detected changes

# We'll also track when each change point was first detected
detection_dates = []
detection_indices = []

print(f"Starting real-time simulation with {len(df_analysis)} data points...")
print(f"Initial window: {min_window} days, step size: {step_size} days")

# Loop through time, simulating the passing of each day
# Use tqdm to show progress
for t in tqdm(range(min_window, len(df_analysis), step_size)):
    # Use data only up to current time t (expanding window)
    historical_data = df_analysis.iloc[:t+1]
    
    # Prepare signals: returns and volatility
    returns = historical_data['Return'].values
    volatility = historical_data['Volatility'].values
    
    # Normalize to have unit variance to ensure equal importance
    return_std = np.std(returns) if np.std(returns) > 0 else 1
    vol_std = np.std(volatility) if np.std(volatility) > 0 else 1
    
    returns_norm = returns / return_std
    volatility_norm = volatility / vol_std
    
    # Stack the normalized data
    points = np.column_stack((returns_norm, volatility_norm))
    
    # Detect change points using only historical data
    historical_change_points = detect_change_points(
        points, 
        min_size=min_segment,
        penalty_scaling=1.5  # Adjust as needed
    )
    
    # Use the last detected change point to assign the current regime
    if len(historical_change_points) > 0:
        last_cp = historical_change_points[-1]
        # If the last change point is new, record it
        if last_cp not in real_time_change_points and t - last_cp >= min_segment:
            real_time_change_points.append(last_cp)
            detection_dates.append(df_analysis['Date'].iloc[t])
            detection_indices.append(t)
            
    # Update the real-time regime label
    if len(real_time_change_points) == 0:
        # If no change points detected yet, everything is regime 0
        real_time_regimes[t] = 0
    else:
        # Find the most recent change point before current time
        relevant_cps = [cp for cp in real_time_change_points if cp < t]
        if len(relevant_cps) == 0:
            real_time_regimes[t] = 0
        else:
            # Increment regime by 1 after each change point
            real_time_regimes[t] = len(relevant_cps)

# Fill in any remaining days (in case step_size > 1)
if step_size > 1:
    for t in range(min_window, len(df_analysis)):
        if real_time_regimes[t] == 0 and t > min_window:
            # Propagate the previous regime label
            real_time_regimes[t] = real_time_regimes[t-1]

# --- Process Final Results ---
# Add the real-time regime labels to the dataframe
df_analysis['RealTimeRegime'] = real_time_regimes

# Convert change point indices to dates for visualization
change_point_dates = [df_analysis['Date'].iloc[cp] for cp in real_time_change_points 
                     if cp < len(df_analysis)]

print(f"\nDetected {len(change_point_dates)} change points in real-time simulation.")

# --- Create Visualization ---
# Find min and max SPY prices for proper y-axis scaling in annotations
min_spy_price = df_analysis['Close'].min()
max_spy_price = df_analysis['Close'].max()
price_range = max_spy_price - min_spy_price

# Create a two-panel figure
fig = make_subplots(rows=2, cols=1, 
                    shared_xaxes=True,
                    vertical_spacing=0.05,
                    subplot_titles=('SPY Price with Real-time Detected Regimes', 'Return and Volatility'))

# Get unique real-time regime values
unique_regimes = sorted(df_analysis['RealTimeRegime'].unique())

# Define colors for regimes
regime_colors = ['rgba(255,255,255,0)', 'rgba(173,216,230,0.2)', 'rgba(144,238,144,0.2)', 
                'rgba(255,182,193,0.2)', 'rgba(230,230,250,0.2)', 'rgba(255,222,173,0.2)',
                'rgba(255,250,205,0.2)', 'rgba(216,191,216,0.2)', 'rgba(176,224,230,0.2)']

# Add SPY price line first (so we can get the y-axis range)
fig.add_trace(
    go.Scatter(
        x=df_analysis['Date'],
        y=df_analysis['Close'],
        mode='lines',
        name='SPY Price',
        line=dict(color='navy', width=1.5)
    ),
    row=1, col=1
)

# Define regions for each regime - use paper coordinates for the background rectangles
for regime in unique_regimes:
    # Skip regime 0 (pre-detection period)
    if regime == 0 and min_window > 0:
        continue
        
    regime_data = df_analysis[df_analysis['RealTimeRegime'] == regime]
    if len(regime_data) > 0:
        start_date = regime_data['Date'].iloc[0]
        end_date = regime_data['Date'].iloc[-1]
        
        # Add a colored background for this regime
        color_idx = regime % len(regime_colors)
        fig.add_shape(
            type="rect",
            xref="x",
            yref="paper",
            x0=start_date,
            y0=0,
            x1=end_date,
            y1=1,
            fillcolor=regime_colors[color_idx],
            opacity=0.5,
            layer="below",
            line_width=0,
            row=1, col=1
        )

# Calculate y-positions for annotations in the price subplot
# Position annotations at different heights to avoid overlap
top_annotation_y = max_spy_price + price_range * 0.05  # 5% above max price
mid_annotation_y = max_spy_price - price_range * 0.15  # 15% below max
bottom_annotation_y = min_spy_price + price_range * 0.05  # 5% above min

# Add vertical lines at real-time detected change points
for i, date in enumerate(change_point_dates):
    # Get the SPY price at the change point for better annotation placement
    cp_idx = df_analysis.index[df_analysis['Date'] == date].tolist()
    if cp_idx:
        cp_price = df_analysis.loc[cp_idx[0], 'Close']
    else:
        cp_price = mid_annotation_y  # Fallback
    
    # Use add_shape for vertical lines in both panels
    # First in the top panel - using y coordinates in data space
    fig.add_shape(
        type="line",
        x0=date, 
        y0=min_spy_price,
        x1=date,
        y1=max_spy_price,
        line=dict(color="red", width=1.5, dash="solid"),
        xref="x",
        yref="y",
        row=1, col=1
    )
    
    # Add annotation for the change point - position above the price line
    annotation_y = cp_price + price_range * 0.08  # Position the label above the price
    # Alternate positioning to avoid overlaps if change points are close
    if i % 2 == 1:
        annotation_y = cp_price + price_range * 0.15
        
    fig.add_annotation(
        x=date,
        y=annotation_y,
        text=f"CP {i+1}",
        showarrow=True,
        arrowhead=2,
        arrowsize=1,
        arrowwidth=1,
        arrowcolor="red",
        ax=0,
        ay=-30,
        bgcolor="rgba(255,255,255,0.8)",
        font=dict(size=10, color="red"),
        row=1, col=1
    )
    
    # Same vertical line in the bottom panel - spanning full height
    # Find the min and max of returns and volatility for proper scaling
    min_val = min(df_analysis['Return'].min(), df_analysis['Volatility'].min())
    max_val = max(df_analysis['Return'].max(), df_analysis['Volatility'].max())
    val_range = max_val - min_val
    
    fig.add_shape(
        type="line",
        x0=date, 
        y0=min_val,
        x1=date,
        y1=max_val,
        line=dict(color="red", width=1.5, dash="solid"),
        xref="x2",
        yref="y2",
        row=2, col=1
    )
    
    # If we have detection dates (when we first identified the change point),
    # add a different marker to show the detection lag
    if i < len(detection_dates):
        detection_date = detection_dates[i]
        
        # Get price at detection date for proper positioning
        det_idx = df_analysis.index[df_analysis['Date'] == detection_date].tolist()
        if det_idx:
            det_price = df_analysis.loc[det_idx[0], 'Close']
        else:
            det_price = mid_annotation_y  # Fallback
        
        # Add vertical line at detection date
        fig.add_shape(
            type="line",
            x0=detection_date, 
            y0=min_spy_price,
            x1=detection_date,
            y1=max_spy_price,
            line=dict(color="green", width=1, dash="dot"),
            xref="x",
            yref="y",
            row=1, col=1
        )
        
        # Add annotation for when the change point was detected
        # Position below the price to distinguish from change point labels
        annotation_y = det_price - price_range * 0.08
        
        fig.add_annotation(
            x=detection_date,
            y=annotation_y,
            text="Detected",
            showarrow=True,
            arrowhead=2,
            arrowsize=1,
            arrowwidth=1,
            arrowcolor="green",
            ax=0,
            ay=30,
            bgcolor="rgba(255,255,255,0.8)",
            font=dict(size=8, color="green"),
            row=1, col=1
        )

# Add line for minimum window (where we start detection)
if min_window > 0:
    start_date = df_analysis['Date'].iloc[min_window]
    
    # Get price at start date
    start_price = df_analysis.loc[min_window, 'Close']
    
    fig.add_shape(
        type="line",
        x0=start_date, 
        y0=min_spy_price,
        x1=start_date,
        y1=max_spy_price,
        line=dict(color="blue", width=1, dash="dash"),
        xref="x",
        yref="y",
        row=1, col=1
    )
    
    # Add annotation for the start of detection
    fig.add_annotation(
        x=start_date,
        y=start_price - price_range * 0.12,
        text="Start Detection",
        showarrow=True,
        arrowhead=2,
        arrowsize=1,
        arrowwidth=1,
        arrowcolor="blue",
        ax=0,
        ay=40,
        bgcolor="rgba(255,255,255,0.8)",
        font=dict(size=10, color="blue"),
        row=1, col=1
    )

# Lower panel: Return and Volatility
fig.add_trace(
    go.Scatter(
        x=df_analysis['Date'],
        y=df_analysis['Return'],
        mode='lines',
        name='Daily Return %',
        line=dict(color='darkgreen', width=1)
    ),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(
        x=df_analysis['Date'],
        y=df_analysis['Volatility'],
        mode='lines',
        name='Volatility',
        line=dict(color='darkorange', width=1),
        opacity=0.7
    ),
    row=2, col=1
)

# Update layout
fig.update_layout(
    title_text='SPY Market Regimes Detected in Real-time Simulation (No Look-ahead)',
    height=800,
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    template="plotly_white"
)

# Update axes
fig.update_xaxes(
    title_text="Date",
    rangeslider_visible=False,
    row=2, col=1
)

fig.update_yaxes(
    title_text="SPY Price ($)",
    row=1, col=1
)

fig.update_yaxes(
    title_text="Value",
    row=2, col=1
)

# Show the figure
fig.show()

# --- Print Regime Information ---
print("\nReal-time Detected Regime Changes:")
for i, date in enumerate(change_point_dates):
    detected_date = detection_dates[i] if i < len(detection_dates) else "N/A"
    
    # Calculate detection lag safely
    if i < len(detection_dates) and isinstance(detected_date, pd.Timestamp):
        detection_lag = (detected_date - date).days
    else:
        detection_lag = "N/A"
        
    print(f"Regime {i+1}:")
    print(f"  Change occurred: {date.strftime('%Y-%m-%d')}")
    print(f"  First detected: {detected_date.strftime('%Y-%m-%d') if isinstance(detected_date, pd.Timestamp) else detected_date}")
    print(f"  Detection lag: {detection_lag} days")

# Calculate regime statistics
print("\nRegime Statistics:")
for regime in sorted([r for r in unique_regimes if r > 0]):  # Skip regime 0 (initial window)
    regime_data = df_analysis[df_analysis['RealTimeRegime'] == regime]
    if len(regime_data) > 0:
        start_date = regime_data['Date'].iloc[0].strftime('%Y-%m-%d')
        end_date = regime_data['Date'].iloc[-1].strftime('%Y-%m-%d')
        mean_return = regime_data['Return'].mean()
        std_return = regime_data['Return'].std()
        mean_vol = regime_data['Volatility'].mean()
        
        print(f"Regime {regime} ({start_date} to {end_date}):")
        print(f"  Duration: {len(regime_data)} trading days")
        print(f"  Mean Daily Return: {mean_return:.4f}%")
        print(f"  Return Volatility: {std_return:.4f}%")
        print(f"  Mean Volatility Indicator: {mean_vol:.6f}")
        
        # Calculate cumulative performance of this regime
        cum_return = ((1 + regime_data['Return']/100).cumprod() - 1) * 100
        if len(cum_return) > 0:
            print(f"  Cumulative Return: {cum_return.iloc[-1]:.2f}%")
        
        print("")


Starting Real-time Change Point Detection Simulation...
Starting real-time simulation with 1317 data points...
Initial window: 252 days, step size: 1 days


  0%|          | 0/1065 [00:00<?, ?it/s]


Detected 28 change points in real-time simulation.



Real-time Detected Regime Changes:
Regime 1:
  Change occurred: 2020-11-20
  First detected: 2021-01-15
  Detection lag: 56 days
Regime 2:
  Change occurred: 2021-01-06
  First detected: 2021-02-09
  Detection lag: 34 days
Regime 3:
  Change occurred: 2021-01-13
  First detected: 2021-02-11
  Detection lag: 29 days
Regime 4:
  Change occurred: 2021-01-21
  First detected: 2021-02-19
  Detection lag: 29 days
Regime 5:
  Change occurred: 2021-01-28
  First detected: 2021-02-26
  Detection lag: 29 days
Regime 6:
  Change occurred: 2021-04-19
  First detected: 2021-06-09
  Detection lag: 51 days
Regime 7:
  Change occurred: 2021-09-16
  First detected: 2021-10-14
  Detection lag: 28 days
Regime 8:
  Change occurred: 2021-12-27
  First detected: 2022-01-25
  Detection lag: 29 days
Regime 9:
  Change occurred: 2022-01-10
  First detected: 2022-02-08
  Detection lag: 29 days
Regime 10:
  Change occurred: 2022-01-18
  First detected: 2022-02-15
  Detection lag: 28 days
Regime 11:
  Change occ