In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from hmmlearn import hmm
import yfinance as yf
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Hidden Markov Model Modifiable Parameters
hidden_states = 3
em_iterations = 75

# Fixed training period 
train_start_date = "2018-01-01"
train_end_date = "2024-12-31"

# Date parameters for yfinance API - download data from 1995 to present
start_date = "1995-01-01"
end_date = datetime.today().strftime("%Y-%m-%d")

print(f"Downloading market data from {start_date} to {end_date}...")
# Download SPY and VIX data using yfinance
df_spy = yf.download('SPY', start=start_date, end=end_date, auto_adjust=True)
df_vix = yf.download('^VIX', start=start_date, end=end_date)

# Fix the multi-level column structure
df_spy.columns = df_spy.columns.droplevel(1) if len(df_spy.columns.names) > 1 else df_spy.columns
df_vix.columns = df_vix.columns.droplevel(1) if len(df_vix.columns.names) > 1 else df_vix.columns

# Reset index to make Date a column
df_spy = df_spy.reset_index()
df_vix = df_vix.reset_index()

# Keep only the Date and Close columns from VIX
df_vix = df_vix[['Date', 'Close']].rename(columns={'Close': 'VIX'})

# Merge SPY and VIX data
df = pd.merge(df_spy, df_vix, on='Date', how='left')

# Forward fill any missing VIX values
df['VIX'] = df['VIX'].fillna(method='ffill')

# Display first few rows to check the structure
print("DataFrame structure:")
print("Data shape:", df.shape)
print("Data columns:", df.columns.tolist())
df.head()

[*********************100%***********************]  1 of 1 completed

Downloading market data from 1995-01-01 to 2025-04-11...
YF.download() has changed argument auto_adjust default to True



[*********************100%***********************]  1 of 1 completed

DataFrame structure:
Data shape: (7620, 7)
Data columns: ['Date', 'Close', 'High', 'Low', 'Open', 'Volume', 'VIX']





Price,Date,Close,High,Low,Open,Volume,VIX
0,1995-01-03,26.815872,26.852481,26.760959,26.770111,324300,14.25
1,1995-01-04,26.944002,26.944002,26.797567,26.93485,351800,13.53
2,1995-01-05,26.944002,27.008067,26.916546,26.962307,89800,13.5
3,1995-01-06,26.971464,27.090442,26.889095,26.998921,448400,13.13
4,1995-01-09,26.998913,26.998913,26.944,26.962304,36800,13.33


In [13]:
# Function to calculate technical indicators
def calculate_indicators(data):
    # Create a copy of the dataframe to avoid modifying original
    df_copy = data.copy()
    
    # Volatility is computed by obtaining variance between current close and prices of past 10 days
    volatility = []
    # MA is the 10 day SMA
    ma = []
    # Return is the single-day percentage return
    returns = []
    ma_sum = 0
    
    # Normalize VIX relative to its recent history (10-day window)
    vix_ratio = []
    
    # Warming up data for calculations
    for i in range(0, 10):
        volatility.append(0)
        ma.append(0)
        returns.append(0)
        vix_ratio.append(0)
        ma_sum += df_copy['Close'].iloc[i]
    
    # Filling in data for return, moving average, and volatility
    for ind in range(len(df_copy)):
        if ind >= 10:
            # SPY indicators
            tail_close = df_copy['Close'].iloc[ind-10]
            prev_close = df_copy['Close'].iloc[ind-1]
            head_close = df_copy['Close'].iloc[ind]
            ma_sum = (ma_sum - tail_close + head_close)
            ma_curr = ma_sum/10
            ma.append(ma_curr)
            returns.append(((head_close-prev_close)/prev_close)*100)
            
            # Computing Volatility
            vol_sum = 0
            for i in range(0, 10):
                curr_vol = abs(ma_curr - df_copy['Close'].iloc[ind-i])
                vol_sum += (curr_vol ** 2)
            volatility.append(vol_sum/10)
            
            # VIX-based indicator: current VIX relative to 10-day average
            vix_10day_avg = sum(df_copy['VIX'].iloc[ind-10:ind]) / 10
            vix_ratio.append(df_copy['VIX'].iloc[ind] / vix_10day_avg if vix_10day_avg > 0 else 1)
    
    # Adding columns to dataframe
    df_copy['Volatility'] = volatility
    df_copy['MA'] = ma
    df_copy['Return'] = returns
    df_copy['VIX_Ratio'] = vix_ratio
    
    return df_copy

# Calculate indicators for the entire dataset
df = calculate_indicators(df)
df.head(15)

Price,Date,Close,High,Low,Open,Volume,VIX,Volatility,MA,Return,VIX_Ratio
0,1995-01-03,26.815872,26.852481,26.760959,26.770111,324300,14.25,0.0,0.0,0.0,0.0
1,1995-01-04,26.944002,26.944002,26.797567,26.93485,351800,13.53,0.0,0.0,0.0,0.0
2,1995-01-05,26.944002,27.008067,26.916546,26.962307,89800,13.5,0.0,0.0,0.0,0.0
3,1995-01-06,26.971464,27.090442,26.889095,26.998921,448400,13.13,0.0,0.0,0.0,0.0
4,1995-01-09,26.998913,26.998913,26.944,26.962304,36800,13.33,0.0,0.0,0.0,0.0
5,1995-01-10,27.026375,27.17281,27.026375,27.062984,229800,12.52,0.0,0.0,0.0,0.0
6,1995-01-11,27.044676,27.117893,26.834176,27.117893,222400,12.15,0.0,0.0,0.0,0.0
7,1995-01-12,27.053829,27.072134,26.962307,27.01722,40300,12.83,0.0,0.0,0.0,0.0
8,1995-01-13,27.374159,27.374159,27.163659,27.200268,170600,11.1,0.0,0.0,0.0,0.0
9,1995-01-16,27.538895,27.548047,27.365003,27.365003,105100,11.14,0.0,0.0,0.0,0.0


In [14]:
# Function to train HMM model on specified period
def train_hmm_model(data, start_date, end_date, n_states=3, n_iter=75):
    # Filter data to training period
    training = data[(data['Date'] >= start_date) & (data['Date'] <= end_date)].copy()
    
    print(f"Training model on data from {start_date} to {end_date}")
    print(f"Training data shape: {training.shape}")
    
    # Prepare observations for HMM (using Volatility, Return, and VIX_Ratio)
    obs = np.column_stack([
        training['Volatility'].values, 
        training['Return'].values,
        training['VIX_Ratio'].values
    ])
    
    # Create and train the HMM model
    model = hmm.GaussianHMM(n_components=n_states, covariance_type="full", n_iter=n_iter)
    model.fit(obs)
    
    # Get predictions for training data
    predictions = model.predict(obs)
    
    # Analyze regime characteristics
    regime_vol = [0] * n_states
    regime_ret = [0] * n_states
    regime_vix = [0] * n_states
    regime_count = [0] * n_states
    
    for i in range(len(predictions)):
        regime = predictions[i]
        regime_count[regime] += 1
        regime_vol[regime] += training['Volatility'].iloc[i]
        regime_ret[regime] += training['Return'].iloc[i]
        regime_vix[regime] += training['VIX'].iloc[i]
    
    # Calculate averages
    for i in range(n_states):
        if regime_count[i] > 0:  # Prevent division by zero
            regime_vol[i] = regime_vol[i] / regime_count[i]
            regime_ret[i] = regime_ret[i] / regime_count[i]
            regime_vix[i] = regime_vix[i] / regime_count[i]
    
    # Print regime characteristics
    for i in range(n_states):
        print(f"Regime {i}")
        print(f"Avg Vol: {regime_vol[i]:.4f}")
        print(f"Avg Return: {regime_ret[i]:.4f}%")
        print(f"Avg VIX: {regime_vix[i]:.2f}")
        print(f"Occurrence: {regime_count[i]} days\n")
    
    return model, training, predictions

# Train the model on the fixed period
model, training_data, train_predictions = train_hmm_model(df, train_start_date, train_end_date, 
                                                         hidden_states, em_iterations)

Training model on data from 2018-01-01 to 2024-12-31
Training data shape: (1761, 11)
Regime 0
Avg Vol: 5.6755
Avg Return: 0.1116%
Avg VIX: 16.24
Occurrence: 811 days

Regime 1
Avg Vol: 98.6563
Avg Return: -0.1834%
Avg VIX: 30.90
Occurrence: 199 days

Regime 2
Avg Vol: 27.1617
Avg Return: 0.0656%
Avg VIX: 20.87
Occurrence: 751 days



In [15]:
# Visualize the training data with regime classifications using Plotly
training_with_predictions = training_data.copy()
training_with_predictions['Regime'] = train_predictions

# Create a categorical color map for regimes
regime_colors = px.colors.qualitative.Set2[:hidden_states]

# Create subplot figures for SPY and VIX
fig = make_subplots(rows=2, cols=1, 
                   shared_xaxes=True,
                   vertical_spacing=0.1,
                   subplot_titles=('SPY Close Price by Regime', 'VIX by Regime'))

# SPY price by regime
for regime in range(hidden_states):
    regime_data = training_with_predictions[training_with_predictions['Regime'] == regime]
    fig.add_trace(
        go.Scatter(
            x=regime_data['Date'], 
            y=regime_data['Close'],
            mode='markers',
            marker=dict(color=regime_colors[regime], size=6),
            name=f'Regime {regime}',
            showlegend=True
        ),
        row=1, col=1
    )

# VIX by regime
for regime in range(hidden_states):
    regime_data = training_with_predictions[training_with_predictions['Regime'] == regime]
    fig.add_trace(
        go.Scatter(
            x=regime_data['Date'], 
            y=regime_data['VIX'],
            mode='markers',
            marker=dict(color=regime_colors[regime], size=6),
            name=f'Regime {regime}',
            showlegend=False
        ),
        row=2, col=1
    )

fig.update_layout(
    height=800,
    title_text=f'Market Regimes with SPY and VIX (Training Period: {train_start_date} to {train_end_date})',
    template='plotly_white',
    legend_title='Regime',
    hovermode='closest'
)

fig.update_xaxes(title_text="Date", row=2, col=1)
fig.update_yaxes(title_text="SPY Price", row=1, col=1)
fig.update_yaxes(title_text="VIX", row=2, col=1)

fig.show()

# Visualize transition probabilities with Plotly
transition_matrix = model.transmat_
regime_labels = [f"Regime {i}" for i in range(hidden_states)]

# Create the heatmap
fig_heatmap = go.Figure(data=go.Heatmap(
    z=transition_matrix,
    x=regime_labels,
    y=regime_labels,
    colorscale='Blues',
    text=np.round(transition_matrix, 2),
    texttemplate="%{text:.2f}",
    textfont={"size": 14}
))

fig_heatmap.update_layout(
    title='Regime Transition Probabilities',
    xaxis_title='To Regime',
    yaxis_title='From Regime',
    width=700,
    height=600,
    template='plotly_white'
)

fig_heatmap.show()

# Show stationary distribution
stationary_dist = model.get_stationary_distribution()
print("\nStationary Distribution:")
for i in range(hidden_states):
    print(f"Regime {i}: {stationary_dist[i]*100:.2f}%")

# Create a pie chart for the stationary distribution
fig_pie = px.pie(
    values=stationary_dist * 100,
    names=regime_labels,
    title='Stationary Distribution of Regimes',
    color_discrete_sequence=regime_colors
)

fig_pie.update_traces(textinfo='percent+label', textposition='inside')
fig_pie.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')
fig_pie.show()


Stationary Distribution:
Regime 0: 44.77%
Regime 1: 11.51%
Regime 2: 43.71%


In [16]:
# Function to predict regimes for a specific date range
def predict_regimes(model, data, start_date, end_date):
    """
    Predict market regimes for a specific date range using the trained HMM model
    
    Parameters:
    -----------
    model : hmm.GaussianHMM
        The trained HMM model
    data : DataFrame
        The full dataset with calculated indicators
    start_date : str
        Start date for prediction period in 'YYYY-MM-DD' format
    end_date : str
        End date for prediction period in 'YYYY-MM-DD' format
        
    Returns:
    --------
    DataFrame with date, close price, VIX, and predicted regime
    """
    # Filter data for prediction period
    pred_data = data[(data['Date'] >= start_date) & (data['Date'] <= end_date)].copy()
    
    if len(pred_data) == 0:
        print(f"No data available for period {start_date} to {end_date}")
        if end_date > data['Date'].max().strftime('%Y-%m-%d'):
            print("NOTE: Prediction period extends into the future")
            # Generate future dates for forecasting
            last_date = data['Date'].max()
            future_end = datetime.strptime(end_date, '%Y-%m-%d')
            
            # Get the most recent 30 days of data for calculating indicators
            recent_data = data.tail(30).copy()
            
            # For future dates, we'll extend from the last value and assume zero returns
            # This is a simplification - in reality you might want to use a forecasting model
            current_date = last_date + timedelta(days=1)
            while current_date <= future_end:
                if current_date.weekday() < 5:  # Only include weekdays
                    new_row = {
                        'Date': current_date,
                        'Close': recent_data['Close'].iloc[-1],  # Use the last known close price
                        'High': recent_data['Close'].iloc[-1],
                        'Low': recent_data['Close'].iloc[-1],
                        'Open': recent_data['Close'].iloc[-1],
                        'Volume': recent_data['Volume'].mean(),  # Use average volume
                        'VIX': recent_data['VIX'].mean(),  # Use average VIX
                        'Volatility': recent_data['Volatility'].mean(),  # Use average volatility
                        'MA': recent_data['MA'].iloc[-1],  # Use the last MA
                        'Return': 0,  # Assume zero returns for future dates
                        'VIX_Ratio': 1.0  # Assume neutral VIX ratio
                    }
                    recent_data = pd.concat([recent_data, pd.DataFrame([new_row])], ignore_index=True)
                current_date += timedelta(days=1)
            
            # Keep only the future dates we generated
            future_data = recent_data[recent_data['Date'] > last_date].copy()
            pred_data = future_data[(future_data['Date'] >= start_date) & (future_data['Date'] <= end_date)].copy()
    
    # Prepare observations for prediction
    obs = np.column_stack([
        pred_data['Volatility'].values, 
        pred_data['Return'].values,
        pred_data['VIX_Ratio'].values
    ])
    
    # Predict regimes
    predictions = model.predict(obs)
    
    # Add predictions to dataframe
    pred_data['Predicted_Regime'] = predictions
    
    print(f"Predicted regimes for period {start_date} to {end_date}")
    print(f"Data points: {len(pred_data)}")
    
    # Calculate regime distribution
    regime_counts = pd.Series(predictions).value_counts(normalize=True) * 100
    print("\nRegime Distribution:")
    for regime, percentage in regime_counts.items():
        print(f"Regime {regime}: {percentage:.2f}%")
    
    return pred_data[['Date', 'Close', 'VIX', 'Volatility', 'Return', 'Predicted_Regime']]

In [17]:
# Unified function for regime prediction and visualization
def analyze_market_regimes(start_date, end_date, title=None):
    """
    Comprehensive function to predict and visualize market regimes for any date range
    
    Parameters:
    -----------
    start_date : str
        Start date in 'YYYY-MM-DD' format
    end_date : str
        End date in 'YYYY-MM-DD' format
    title : str, optional
        Custom title for the plots
    
    Returns:
    --------
    DataFrame with prediction results
    """
    # Get predictions
    results = predict_regimes(model, df, start_date, end_date)
    
    if results is None or results.empty:
        print("No data available for the specified period")
        return None
    
    # Set plot title
    if title is None:
        title = f'Market Regimes from {start_date} to {end_date}'
    
    # Create subplot figures for SPY and VIX
    fig = make_subplots(rows=2, cols=1, 
                       shared_xaxes=True,
                       vertical_spacing=0.1,
                       subplot_titles=('SPY Close Price by Regime', 'VIX by Regime'))
    
    # SPY price by regime
    for regime in sorted(results['Predicted_Regime'].unique()):
        regime_data = results[results['Predicted_Regime'] == regime]
        fig.add_trace(
            go.Scatter(
                x=regime_data['Date'], 
                y=regime_data['Close'],
                mode='markers',
                marker=dict(color=regime_colors[regime], size=6),
                name=f'Regime {regime}',
                showlegend=True
            ),
            row=1, col=1
        )
    
    # Add a line for the price trend
    fig.add_trace(
        go.Scatter(
            x=results['Date'],
            y=results['Close'],
            mode='lines',
            line=dict(color='rgba(0,0,0,0.3)'),
            name='SPY Price',
            showlegend=True
        ),
        row=1, col=1
    )
    
    # VIX by regime
    for regime in sorted(results['Predicted_Regime'].unique()):
        regime_data = results[results['Predicted_Regime'] == regime]
        fig.add_trace(
            go.Scatter(
                x=regime_data['Date'], 
                y=regime_data['VIX'],
                mode='markers',
                marker=dict(color=regime_colors[regime], size=6),
                name=f'Regime {regime}',
                showlegend=False
            ),
            row=2, col=1
        )
    
    # Add a line for the VIX trend
    fig.add_trace(
        go.Scatter(
            x=results['Date'],
            y=results['VIX'],
            mode='lines',
            line=dict(color='rgba(0,0,0,0.3)'),
            name='VIX',
            showlegend=True
        ),
        row=2, col=1
    )
    
    fig.update_layout(
        height=800,
        title_text=title,
        template='plotly_white',
        legend_title='Regime',
        hovermode='closest'
    )
    
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="SPY Price", row=1, col=1)
    fig.update_yaxes(title_text="VIX", row=2, col=1)
    
    fig.show()
    
    # Distribution of regimes pie chart
    regime_percentages = results['Predicted_Regime'].value_counts(normalize=True) * 100
    labels = [f"Regime {i}" for i in sorted(regime_percentages.index)]
    
    fig_pie = px.pie(
        values=regime_percentages.values,
        names=labels,
        title=f'Percentage of Time in Each Regime ({start_date} to {end_date})',
        color_discrete_sequence=[regime_colors[i] for i in sorted(regime_percentages.index)]
    )
    
    fig_pie.update_traces(textinfo='percent+label', textposition='inside')
    fig_pie.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')
    fig_pie.show()
    
    # Scatter plot of Volatility vs Return colored by regime
    fig_scatter = px.scatter(
        results,
        x='Volatility',
        y='Return',
        color='Predicted_Regime',
        color_discrete_sequence=regime_colors,
        size='VIX',  # Use VIX for point size
        title=f'Volatility vs Return by Regime ({start_date} to {end_date})',
        labels={'Volatility': 'Volatility', 'Return': 'Return (%)', 'VIX': 'VIX'},
        opacity=0.8
    )
    
    fig_scatter.update_layout(
        legend_title='Regime',
        hovermode='closest',
        template='plotly_white'
    )
    
    fig_scatter.update_xaxes(rangemode='tozero')
    fig_scatter.show()
    
    return results

# Example: Analyze the most recent 3 months
from datetime import datetime, timedelta

today = datetime.today()
three_months_ago = today - timedelta(days=90)

start_date = three_months_ago.strftime('%Y-%m-%d')
end_date = today.strftime('%Y-%m-%d')

recent_results = analyze_market_regimes(
    start_date, 
    end_date, 
    title=f'Market Regimes in Recent 3 Months ({start_date} to {end_date})'
)

Predicted regimes for period 2025-01-11 to 2025-04-11
Data points: 62

Regime Distribution:
Regime 2: 43.55%
Regime 1: 40.32%
Regime 0: 16.13%


In [18]:
# Create a comprehensive regime analysis dashboard
def create_regime_dashboard():
    """
    Creates a comprehensive dashboard for analyzing the HMM model and regime characteristics
    """
    # Gather regime statistics
    regime_vol = [0] * hidden_states
    regime_ret = [0] * hidden_states
    regime_vix = [0] * hidden_states
    regime_count = [0] * hidden_states
    
    for i in range(len(train_predictions)):
        regime = train_predictions[i]
        regime_count[regime] += 1
        regime_vol[regime] += training_data['Volatility'].iloc[i]
        regime_ret[regime] += training_data['Return'].iloc[i]
        regime_vix[regime] += training_data['VIX'].iloc[i]
    
    for i in range(hidden_states):
        if regime_count[i] > 0:
            regime_vol[i] = regime_vol[i] / regime_count[i]
            regime_ret[i] = regime_ret[i] / regime_count[i]
            regime_vix[i] = regime_vix[i] / regime_count[i]
    
    # Create a summary dataframe
    regime_summary = pd.DataFrame({
        'Regime': range(hidden_states),
        'Average_Volatility': regime_vol,
        'Average_Return': regime_ret,
        'Average_VIX': regime_vix,
        'Occurrence_Count': regime_count,
        'Occurrence_Percentage': [count/sum(regime_count)*100 for count in regime_count]
    })
    
    # Display the summary
    print("HMM Market Regime Summary")
    display(regime_summary)
    
    # Interpret the regimes based on their characteristics
    print("\nMarket Regime Interpretation:")
    regime_types = []
    for i in range(hidden_states):
        if regime_vix[i] > 25:  # High VIX
            if regime_ret[i] < 0:
                regime_type = "Crisis Regime (High VIX, Negative Returns)"
            else:
                regime_type = "Volatile Recovery (High VIX, Positive Returns)"
        elif regime_ret[i] > 0.05:
            if regime_vol[i] < 10:
                regime_type = "Stable Growth (Low Volatility, Positive Returns)"
            else:
                regime_type = "Volatile Growth (High Volatility, Positive Returns)"
        elif regime_ret[i] < 0:
            if regime_vol[i] > 10:
                regime_type = "Bear Market (High Volatility, Negative Returns)"
            else:
                regime_type = "Correction (Moderate Volatility, Negative Returns)"
        else:
            regime_type = "Sideways Market (Moderate Volatility, Flat Returns)"
        
        regime_types.append(regime_type)
        print(f"Regime {i}: {regime_type}")
    
    # Create a dashboard layout with 6 subplots
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=(
            "Regime Transition Network", 
            "Training Data Regime Distribution", 
            "Regime Return Distribution", 
            "Volatility by Regime",
            "VIX by Regime",
            "Regime Characteristics"
        ),
        specs=[
            [{"type": "scatter"}, {"type": "pie"}],
            [{"type": "box"}, {"type": "box"}],
            [{"type": "box"}, {"type": "scatter"}]
        ],
        vertical_spacing=0.1,
        horizontal_spacing=0.05
    )
    
    # 1. Regime Transition Network (top left)
    # Create node positions for a circular layout
    import math
    angles = np.linspace(0, 2*math.pi, hidden_states, endpoint=False).tolist()
    node_x = [math.cos(angle) for angle in angles]
    node_y = [math.sin(angle) for angle in angles]
    
    # Add nodes
    for i in range(hidden_states):
        fig.add_trace(
            go.Scatter(
                x=[node_x[i]], 
                y=[node_y[i]],
                mode='markers+text',
                marker=dict(size=30, color=regime_colors[i]),
                text=[f'{i}'],
                textposition="middle center",
                textfont=dict(color='white', size=14),
                name=f'Regime {i}',
                hoverinfo='text',
                hovertext=f'Regime {i}'
            ),
            row=1, col=1
        )
    
    # Add edges for transitions
    for i in range(hidden_states):
        for j in range(hidden_states):
            if transition_matrix[i, j] > 0.05:  # Only show significant transitions
                # Calculate edge curvature based on whether it's a self-loop
                curve = 0.2 if i != j else 0.5
                fig.add_trace(
                    go.Scatter(
                        x=[node_x[i], None, node_x[j]],
                        y=[node_y[i], None, node_y[j]],
                        mode='lines',
                        line=dict(
                            width=transition_matrix[i, j] * 10,  # Line width based on probability
                            color='rgba(150,150,150,0.8)',
                        ),
                        hoverinfo='text',
                        hovertext=f'P({i}->{j}) = {transition_matrix[i, j]:.2f}',
                        showlegend=False
                    ),
                    row=1, col=1
                )
    
    # 2. Training Data Regime Distribution (top right)
    regime_counts_train = pd.Series(train_predictions).value_counts(normalize=True) * 100
    labels = [f"Regime {i}" for i in range(hidden_states)]
    values = [regime_counts_train.get(i, 0) for i in range(hidden_states)]
    
    fig.add_trace(
        go.Pie(
            labels=labels,
            values=values,
            textinfo='percent',
            marker=dict(colors=regime_colors)
        ),
        row=1, col=2
    )
    
    # 3. Regime Return Distribution (middle left)
    # Prepare return data by regime
    regime_returns = {i: [] for i in range(hidden_states)}
    for i in range(len(train_predictions)):
        regime_returns[train_predictions[i]].append(training_data['Return'].iloc[i])
    
    for i in range(hidden_states):
        if regime_returns[i]:  # Check if list is not empty
            fig.add_trace(
                go.Box(
                    y=regime_returns[i],
                    name=f'Regime {i}',
                    marker_color=regime_colors[i],
                    boxmean=True  # Show mean as a dashed line
                ),
                row=2, col=1
            )
    
    # 4. Volatility by Regime (middle right)
    regime_volatility = {i: [] for i in range(hidden_states)}
    for i in range(len(train_predictions)):
        regime_volatility[train_predictions[i]].append(training_data['Volatility'].iloc[i])
    
    for i in range(hidden_states):
        if regime_volatility[i]:  # Check if list is not empty
            fig.add_trace(
                go.Box(
                    y=regime_volatility[i],
                    name=f'Regime {i}',
                    marker_color=regime_colors[i],
                    boxmean=True  # Show mean as a dashed line
                ),
                row=2, col=2
            )
    
    # 5. VIX by Regime (bottom left)
    regime_vix_values = {i: [] for i in range(hidden_states)}
    for i in range(len(train_predictions)):
        regime_vix_values[train_predictions[i]].append(training_data['VIX'].iloc[i])
    
    for i in range(hidden_states):
        if regime_vix_values[i]:  # Check if list is not empty
            fig.add_trace(
                go.Box(
                    y=regime_vix_values[i],
                    name=f'Regime {i}',
                    marker_color=regime_colors[i],
                    boxmean=True  # Show mean as a dashed line
                ),
                row=3, col=1
            )
    
    # 6. Regime Characteristics (bottom right)
    # 3D scatter plot with Volatility, Return, and VIX
    fig.add_trace(
        go.Scatter(
            x=regime_summary['Average_Volatility'],
            y=regime_summary['Average_Return'],
            mode='markers',
            marker=dict(
                size=regime_summary['Average_VIX'] * 1.5,  # Scale by VIX
                color=[regime_colors[i] for i in range(hidden_states)],
                line=dict(width=2, color='DarkSlateGrey')
            ),
            text=[f'Regime {i}<br>VIX: {vix:.2f}' for i, vix in enumerate(regime_summary['Average_VIX'])],
            hoverinfo='text',
            showlegend=False
        ),
        row=3, col=2
    )
    
    # Update layout
    fig.update_layout(
        height=1200,
        width=1200,
        title_text='HMM Market Regime Analysis Dashboard',
        template='plotly_white',
        showlegend=False
    )
    
    # Update axes for specific subplots
    fig.update_xaxes(title_text="", showticklabels=False, row=1, col=1)
    fig.update_yaxes(title_text="", showticklabels=False, row=1, col=1)
    
    fig.update_xaxes(title_text="Regime", row=2, col=1)
    fig.update_yaxes(title_text="Return (%)", row=2, col=1)
    
    fig.update_xaxes(title_text="Regime", row=2, col=2)
    fig.update_yaxes(title_text="Volatility", row=2, col=2)
    
    fig.update_xaxes(title_text="Regime", row=3, col=1)
    fig.update_yaxes(title_text="VIX", row=3, col=1)
    
    fig.update_xaxes(title_text="Volatility", row=3, col=2)
    fig.update_yaxes(title_text="Return (%)", row=3, col=2)
    
    # Show the dashboard
    fig.show()
    
    return regime_summary, regime_types

# Generate the comprehensive dashboard
regime_summary, regime_types = create_regime_dashboard()

HMM Market Regime Summary


Unnamed: 0,Regime,Average_Volatility,Average_Return,Average_VIX,Occurrence_Count,Occurrence_Percentage
0,0,5.675468,0.111624,16.235808,811,46.053379
1,1,98.65626,-0.183387,30.902111,199,11.300398
2,2,27.161723,0.065611,20.870559,751,42.646224



Market Regime Interpretation:
Regime 0: Stable Growth (Low Volatility, Positive Returns)
Regime 1: Crisis Regime (High VIX, Negative Returns)
Regime 2: Volatile Growth (High Volatility, Positive Returns)


In [19]:
# Function to predict regimes for future periods
def predict_future_regimes(days=30):
    """
    Predict market regimes for the coming periods
    
    Parameters:
    -----------
    days : int
        Number of days to predict into the future
    
    Returns:
    --------
    DataFrame with date, predicted close price, VIX, and regime
    """
    today = datetime.today()
    future_date = today + timedelta(days=days)
    
    # Format dates
    start_date = today.strftime('%Y-%m-%d')
    end_date = future_date.strftime('%Y-%m-%d')
    
    print(f"Predicting regimes from {start_date} to {end_date}")
    
    # Use our existing function to generate predictions
    results = predict_regimes(model, df, start_date, end_date)
    
    if results is None or results.empty:
        print("Failed to generate predictions")
        return None
    
    # Create a calendar view of predicted regimes
    results['Day'] = results['Date'].dt.day
    results['Month'] = results['Date'].dt.month_name()
    results['Weekday'] = results['Date'].dt.day_name()
    
    # Create a calendar heatmap
    fig_cal = px.scatter(
        results,
        x='Day', 
        y='Weekday',
        color='Predicted_Regime',
        color_discrete_sequence=regime_colors,
        title=f'Calendar View of Predicted Regimes ({start_date} to {end_date})',
        hover_data=['Date', 'Close', 'VIX'],
        size_max=15,
        size=[10] * len(results)  # Fixed size for all points
    )
    
    fig_cal.update_layout(
        xaxis_title='Day of Month',
        yaxis_title='Weekday',
        yaxis=dict(
            categoryorder='array',
            categoryarray=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
        ),
        template='plotly_white'
    )
    
    fig_cal.show()
    
    # Show distribution of predicted regimes
    regime_percentages = results['Predicted_Regime'].value_counts(normalize=True) * 100
    regime_counts = results['Predicted_Regime'].value_counts()
    
    print("\nPredicted Regime Distribution:")
    for regime in sorted(regime_percentages.index):
        print(f"Regime {regime}: {regime_percentages[regime]:.2f}% ({regime_counts[regime]} days)")
        # Get the interpretation
        for i, r_type in enumerate(regime_types):
            if i == regime:
                print(f"  → {r_type}")
    
    # Create prediction summary visualization
    fig_summary = make_subplots(rows=1, cols=2, 
                              subplot_titles=('Predicted Regimes', 'Daily Close Prices'),
                              specs=[[{"type": "pie"}, {"type": "scatter"}]])
    
    # Pie chart of regime distribution
    fig_summary.add_trace(
        go.Pie(
            labels=[f"Regime {i}" for i in sorted(regime_percentages.index)],
            values=regime_percentages.values,
            marker=dict(colors=[regime_colors[i] for i in sorted(regime_percentages.index)])
        ),
        row=1, col=1
    )
    
    # Line chart of predicted prices
    for regime in sorted(results['Predicted_Regime'].unique()):
        regime_data = results[results['Predicted_Regime'] == regime]
        fig_summary.add_trace(
            go.Scatter(
                x=regime_data['Date'],
                y=regime_data['Close'],
                mode='markers+lines',
                marker=dict(color=regime_colors[regime]),
                name=f'Regime {regime}'
            ),
            row=1, col=2
        )
    
    fig_summary.update_layout(
        height=500,
        title_text=f'Future Market Regime Predictions ({start_date} to {end_date})',
        template='plotly_white'
    )
    
    fig_summary.show()
    
    return results

# Example: Predict regimes for the next 30 days
future_predictions = predict_future_regimes(30)

Predicting regimes from 2025-04-11 to 2025-05-11
No data available for period 2025-04-11 to 2025-05-11
NOTE: Prediction period extends into the future
Predicted regimes for period 2025-04-11 to 2025-05-11
Data points: 21

Regime Distribution:
Regime 1: 95.24%
Regime 2: 4.76%



Predicted Regime Distribution:
Regime 1: 95.24% (20 days)
  → Crisis Regime (High VIX, Negative Returns)
Regime 2: 4.76% (1 days)
  → Volatile Growth (High Volatility, Positive Returns)


In [20]:
# Example: Analyze a custom time period (e.g., market correction in early 2022)
# This function can be used to analyze any specific period of interest

# Jan-Feb 2022 correction period
custom_results = analyze_market_regimes(
    "2022-01-01", 
    "2022-02-28", 
    title="Market Regimes During Early 2022 Correction"
)

Predicted regimes for period 2022-01-01 to 2022-02-28
Data points: 39

Regime Distribution:
Regime 2: 56.41%
Regime 1: 41.03%
Regime 0: 2.56%
