In [None]:
# %%
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from hmmlearn import hmm
import yfinance as yf
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# ======== CONFIGURABLE PARAMETERS ========
# Market data parameters
TICKER = 'SPY'  # Main ticker to analyze
VIX_TICKER = '^VIX'  # Volatility index
START_DATE = "2000-01-01"  # Historical data start date

# HMM model parameters  
HIDDEN_STATES = 3  # Number of market regimes
EM_ITERATIONS = 74  # Training iterations for HMM

# High-pass filter parameters
HPF_WINDOW = 10  #21 Window size for the high-pass filter moving average

# Training period
TRAIN_START_DATE = "2019-01-01"
TRAIN_END_DATE = "2024-01-01"

# Rolling window parameters
TRAINING_DELAY = 1  # Number of days to wait before retraining the model
USE_EXPANDING_WINDOW = False  # If True, use expanding window; if False, use fixed window

# ======== DATA PREPARATION FUNCTIONS ========
def download_market_data(ticker, vix_ticker, start_date):
    """Download and prepare market data"""
    end_date = datetime.today().strftime("%Y-%m-%d")
    print(f"Downloading market data from {start_date} to {end_date}...")
    
    # Download ticker and VIX data
    df_ticker = yf.download(ticker, start=start_date, end=end_date, auto_adjust=True)
    df_vix = yf.download(vix_ticker, start=start_date, end=end_date)
    
    # Fix column structure and reset index
    if len(df_ticker.columns.names) > 1:
        df_ticker.columns = df_ticker.columns.droplevel(1)
    if len(df_vix.columns.names) > 1:
        df_vix.columns = df_vix.columns.droplevel(1)
    
    df_ticker = df_ticker.reset_index()
    df_vix = df_vix.reset_index()
    
    # Keep only Date and Close from VIX
    df_vix = df_vix[['Date', 'Close']].rename(columns={'Close': 'VIX'})
    
    # Merge data
    df = pd.merge(df_ticker, df_vix, on='Date', how='left')
    df['VIX'] = df['VIX'].fillna(method='ffill')
    df['LogVIX'] = np.log(df['VIX'])
    
    return df

def calculate_indicators(data):
    """Calculate technical indicators for regime classification"""
    df_copy = data.copy()
    
    # Calculate returns
    df_copy['Return'] = df_copy['Close'].pct_change() * 100
    
    # Calculate volatility (10-day window)
    df_copy['MA10'] = df_copy['Close'].rolling(window=10).mean()
    df_copy['Volatility'] = df_copy['Close'].rolling(window=10).apply(
        lambda x: np.sum((x - x.mean())**2) / len(x)
    )
    
    # LogVIX ratio compared to 10-day average
    df_copy['LogVIX_10MA'] = df_copy['LogVIX'].rolling(window=10).mean()
    df_copy['LogVIX_Ratio'] = df_copy['LogVIX'] / df_copy['LogVIX_10MA']
    
    # Forward fill and backward fill any NaNs
    df_copy = df_copy.fillna(method='ffill').fillna(method='bfill')
    
    return df_copy

def apply_high_pass_filter(data, window_size=10):
    """
    Apply high-pass filter by subtracting moving average 
    from original signal to emphasize significant market movements
    """
    filtered_data = data.copy()
    
    # Apply HPF to returns
    filtered_data['Return_MA'] = filtered_data['Return'].rolling(window=window_size).mean()
    filtered_data['HPF_Return'] = filtered_data['Return'] - filtered_data['Return_MA']
    
    # Apply HPF to LogVIX
    filtered_data['LogVIX_MA'] = filtered_data['LogVIX'].rolling(window=window_size).mean()
    filtered_data['HPF_LogVIX'] = filtered_data['LogVIX'] - filtered_data['LogVIX_MA']
    
    # Apply HPF to volatility
    filtered_data['Volatility_MA'] = filtered_data['Volatility'].rolling(window=window_size).mean()
    filtered_data['HPF_Volatility'] = filtered_data['Volatility'] - filtered_data['Volatility_MA']
    
    # Fill any NaNs created by rolling windows
    filtered_data = filtered_data.fillna(method='ffill').fillna(method='bfill')
    
    return filtered_data

# ======== HMM MODEL FUNCTIONS ========
def train_hmm_model(data, start_date, end_date, n_states=3, n_iter=75):
    """Train HMM model on high-pass filtered features"""
    # Filter data to training period
    training = data[(data['Date'] >= start_date) & (data['Date'] <= end_date)].copy()
    
    print(f"Training HMM model on data from {start_date} to {end_date}")
    print(f"Training data shape: {training.shape}")
    
    # Prepare observations for HMM using HPF features
    obs = np.column_stack([
        training['HPF_Volatility'].values, 
        training['HPF_Return'].values,
        training['HPF_LogVIX'].values
    ])
    
    # Create and train the model
    model = hmm.GaussianHMM(n_components=n_states, covariance_type="full", n_iter=n_iter)
    model.fit(obs)
    
    # Get predictions for training data
    predictions = model.predict(obs)
    
    # Analyze regime characteristics using original (non-HPF) data for interpretability
    regime_stats = {}
    for i in range(n_states):
        regime_mask = (predictions == i)
        if np.sum(regime_mask) > 0:
            regime_stats[i] = {
                'count': np.sum(regime_mask),
                'return_avg': np.mean(training.loc[regime_mask, 'Return']),
                'vix_avg': np.mean(training.loc[regime_mask, 'VIX']),
                'logvix_avg': np.mean(training.loc[regime_mask, 'LogVIX']),
                'volatility_avg': np.mean(training.loc[regime_mask, 'Volatility'])
            }
    
    # Assign labels to regimes (Bull, Bear, Neutral) based on characteristics
    regime_labels = [""] * n_states
    bull_scores = []
    bear_scores = []
    neutral_scores = []
    
    for i in range(n_states):
        if i not in regime_stats:
            bull_scores.append(0)
            bear_scores.append(0)
            neutral_scores.append(0)
            continue
            
        stats = regime_stats[i]
        
        # Bull regime scoring: high returns, lower VIX
        bull_score = 0
        if stats['return_avg'] > 0.05:
            bull_score += 2
        elif stats['return_avg'] > 0:
            bull_score += 1
        
        if stats['logvix_avg'] < 2.8:  # Log(16.5) ≈ 2.8
            bull_score += 2
        elif stats['logvix_avg'] < 3.0:  # Log(20) ≈ 3.0
            bull_score += 1
        
        # Bear regime scoring: negative returns, higher VIX
        bear_score = 0
        if stats['return_avg'] < -0.1:
            bear_score += 2
        elif stats['return_avg'] < 0:
            bear_score += 1
        
        if stats['logvix_avg'] > 3.2:  # Log(25) ≈ 3.2
            bear_score += 2
        elif stats['logvix_avg'] > 3.0:
            bear_score += 1
        
        # Neutral regime scoring: modest returns, moderate VIX
        neutral_score = 0
        if -0.05 < stats['return_avg'] < 0.05:
            neutral_score += 2
        elif -0.1 < stats['return_avg'] < 0.1:
            neutral_score += 1
        
        if 2.8 <= stats['logvix_avg'] <= 3.2:
            neutral_score += 2
        elif 2.7 <= stats['logvix_avg'] <= 3.3:
            neutral_score += 1
        
        bull_scores.append(bull_score)
        bear_scores.append(bear_score)
        neutral_scores.append(neutral_score)
    
    # Assign labels based on highest score
    labels_to_assign = ["Bull", "Bear", "Neutral"]
    scores = [(i, max(bull_scores[i], bear_scores[i], neutral_scores[i]), 
               "Bull" if bull_scores[i] >= max(bear_scores[i], neutral_scores[i]) else
               "Bear" if bear_scores[i] >= max(bull_scores[i], neutral_scores[i]) else
               "Neutral") 
              for i in range(n_states)]
    
    # Sort by score and assign labels ensuring each label is used only once
    scores.sort(key=lambda x: x[1], reverse=True)
    assigned_labels = set()
    
    for regime_idx, _, preferred_label in scores:
        if preferred_label not in assigned_labels:
            regime_labels[regime_idx] = preferred_label
            assigned_labels.add(preferred_label)
        else:
            # Find an unassigned label
            for label in labels_to_assign:
                if label not in assigned_labels:
                    regime_labels[regime_idx] = label
                    assigned_labels.add(label)
                    break
    
    # Print key regime statistics
    print("\nRegime Characteristics Summary:")
    print("=" * 60)
    print(f"{'Regime':<10} {'Label':<8} {'Count':<8} {'Return %':<10} {'VIX':<8} {'LogVIX':<8}")
    print("-" * 60)
    
    for i in range(n_states):
        if i in regime_stats:
            stats = regime_stats[i]
            print(f"{i:<10} {regime_labels[i]:<8} {stats['count']:<8} "
                  f"{stats['return_avg']:<10.2f} {stats['vix_avg']:<8.2f} {stats['logvix_avg']:<8.2f}")
    
    # Print transition matrix
    print("\nRegime Transition Matrix:")
    transition_matrix = model.transmat_
    
    print("=" * 60)
    print(f"{'From/To':<10}", end="")
    for i in range(n_states):
        print(f"{regime_labels[i]:<10}", end="")
    print()
    print("-" * 60)
    
    for i in range(n_states):
        print(f"{regime_labels[i]:<10}", end="")
        for j in range(n_states):
            print(f"{transition_matrix[i, j]:<10.2f}", end="")
        print()
    
    # Calculate stationary distribution
    stationary_dist = model.get_stationary_distribution()
    print("\nStationary Distribution (Long-term regime probabilities):")
    for i in range(n_states):
        print(f"Regime {i} [{regime_labels[i]}]: {stationary_dist[i]*100:.2f}%")
    
    return model, training, predictions, regime_labels, regime_stats

def predict_regimes(model, data, start_date, end_date, regime_labels):
    """Predict market regimes for a specific date range using the trained HMM model"""
    # Filter data for prediction period
    pred_data = data[(data['Date'] >= start_date) & (data['Date'] <= end_date)].copy()
    
    if len(pred_data) == 0:
        print(f"No data available for period {start_date} to {end_date}")
        return None
    
    # Prepare observations for prediction using HPF features
    obs = np.column_stack([
        pred_data['HPF_Volatility'].values, 
        pred_data['HPF_Return'].values,
        pred_data['HPF_LogVIX'].values
    ])
    
    # Predict regimes
    predictions = model.predict(obs)
    
    # Add predictions to dataframe
    pred_data['Predicted_Regime'] = predictions
    pred_data['Regime_Label'] = [regime_labels[r] for r in predictions]
    
    # Print basic statistics about the prediction
    print(f"Predicted regimes for period {start_date} to {end_date}")
    
    # Calculate regime distribution
    regime_counts = pd.Series(predictions).value_counts(normalize=True) * 100
    print("\nRegime Distribution:")
    for regime, percentage in regime_counts.items():
        print(f"Regime {regime} [{regime_labels[regime]}]: {percentage:.2f}%")
    
    # Calculate average regime duration
    regime_changes = (pred_data['Predicted_Regime'] != pred_data['Predicted_Regime'].shift(1)).sum()
    avg_duration = len(pred_data) / (regime_changes if regime_changes > 0 else 1)
    print(f"\nRegime persistence: {avg_duration:.2f} days average duration")
    
    return pred_data

# ======== VISUALIZATION FUNCTIONS ========
def plot_regimes(results, title=None):
    """Plot SPY price with regime classifications"""
    if results is None or len(results) == 0:
        print("No data available to plot")
        return
    
    # Set plot title
    if title is None:
        start_date = results['Date'].min().strftime('%Y-%m-%d')
        end_date = results['Date'].max().strftime('%Y-%m-%d')
        title = f'Market Regimes from {start_date} to {end_date}'
    
    # Create a categorical color map for regimes
    unique_regimes = results['Predicted_Regime'].unique()
    n_regimes = len(unique_regimes)
    regime_colors = px.colors.qualitative.Set2[:n_regimes]
    
    # Create figure
    fig = go.Figure()
    
    # Add price line
    fig.add_trace(
        go.Scatter(
            x=results['Date'],
            y=results['Close'],
            mode='lines',
            line=dict(color='rgba(0,0,0,0.3)', width=1),
            name=f'{TICKER} Price'
        )
    )
    
    # Add colored markers for different regimes
    for i, regime in enumerate(sorted(unique_regimes)):
        regime_data = results[results['Predicted_Regime'] == regime]
        regime_label = results.loc[results['Predicted_Regime'] == regime, 'Regime_Label'].iloc[0]
        
        fig.add_trace(
            go.Scatter(
                x=regime_data['Date'], 
                y=regime_data['Close'],
                mode='markers',
                marker=dict(color=regime_colors[i], size=6),
                name=f'{regime_label} Regime',
                hovertemplate='%{x}<br>Price: %{y:.2f}<br>Regime: ' + regime_label
            )
        )
    
    # Update layout
    fig.update_layout(
        title=title,
        xaxis_title='Date',
        yaxis_title=f'{TICKER} Price',
        template='plotly_white',
        legend_title='Market Regimes',
        hovermode='closest',
        height=600
    )
    
    fig.show()
    
    # Create pie chart showing regime distribution
    regime_distribution = results['Regime_Label'].value_counts().reset_index()
    regime_distribution.columns = ['Regime', 'Days']
    regime_distribution['Percentage'] = regime_distribution['Days'] / len(results) * 100
    
    fig_pie = px.pie(
        regime_distribution, 
        values='Percentage', 
        names='Regime',
        title=f'Regime Distribution ({results["Date"].min().strftime("%Y-%m-%d")} to {results["Date"].max().strftime("%Y-%m-%d")})',
        color_discrete_sequence=regime_colors
    )
    
    fig_pie.update_traces(textinfo='percent+label', textposition='inside')
    fig_pie.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')
    
    fig_pie.show()

# ======== ROLLING WINDOW FUNCTIONS ========
def run_rolling_window_simulation(df, initial_train_start, initial_train_end, 
                                 final_eval_date=None, training_delay=5, 
                                 use_expanding_window=True, n_states=3, n_iter=75):
    """
    Run a realistic rolling window simulation where the model is retrained periodically.
    
    Parameters:
    - df: DataFrame containing all market data
    - initial_train_start: Initial training period start date
    - initial_train_end: Initial training period end date
    - final_eval_date: Last date to evaluate (defaults to last date in dataset)
    - training_delay: Number of days to wait before retraining the model
    - use_expanding_window: If True, use expanding window; if False, use fixed window
    - n_states: Number of hidden states for HMM
    - n_iter: Number of EM iterations for HMM
    
    Returns:
    - DataFrame with regime predictions for the entire period
    """
    print(f"Running rolling window simulation with retraining every {training_delay} days")
    print(f"Using {'expanding' if use_expanding_window else 'fixed'} window approach")
    
    if final_eval_date is None:
        final_eval_date = df['Date'].max().strftime('%Y-%m-%d')
    
    # Convert dates to datetime objects for easier manipulation
    initial_train_start_dt = pd.to_datetime(initial_train_start)
    initial_train_end_dt = pd.to_datetime(initial_train_end)
    final_eval_date_dt = pd.to_datetime(final_eval_date)
    
    # Initialize the first model with the initial training period
    print(f"\n===== Initial Model Training: {initial_train_start} to {initial_train_end} =====")
    model, _, _, regime_labels, _ = train_hmm_model(
        df, initial_train_start, initial_train_end, n_states, n_iter
    )
    
    # Initialize variables to track our progress
    current_date = initial_train_end_dt
    next_retrain_date = current_date + timedelta(days=training_delay)
    current_train_start = initial_train_start_dt if use_expanding_window else initial_train_start_dt
    current_train_end = initial_train_end_dt
    
    # Create a results dataframe to store all predictions
    all_results = []
    
    # Variables to track retraining events
    retrain_dates = [initial_train_end_dt]
    
    # Sort dates for sequential processing
    all_dates = sorted(df['Date'].unique())
    all_dates = [date for date in all_dates if date >= initial_train_end_dt]
    
    # Loop through dates for evaluation
    for i in range(len(all_dates) - 1):
        current_date = all_dates[i]
        next_date = all_dates[i + 1]
        
        # Only predict one day at a time (current day)
        day_results = predict_regimes(
            model, df, 
            current_date.strftime('%Y-%m-%d'), 
            current_date.strftime('%Y-%m-%d'), 
            regime_labels
        )
        
        if day_results is not None:
            all_results.append(day_results)
        
        # Check if it's time to retrain the model
        if next_date >= next_retrain_date or next_date >= final_eval_date_dt:
            # Update the training window
            if use_expanding_window:
                # Expanding window keeps the same start date, but extends the end date
                current_train_end = current_date
            else:
                # Fixed window slides both start and end date
                window_size = (initial_train_end_dt - initial_train_start_dt).days
                current_train_start = current_date - timedelta(days=window_size)
                current_train_end = current_date
            
            print(f"\n===== Retraining Model: {current_train_start.strftime('%Y-%m-%d')} to {current_train_end.strftime('%Y-%m-%d')} =====")
            # Retrain the model
            model, _, _, regime_labels, _ = train_hmm_model(
                df, 
                current_train_start.strftime('%Y-%m-%d'), 
                current_train_end.strftime('%Y-%m-%d'), 
                n_states, n_iter
            )
            
            # Set the next retrain date
            next_retrain_date = next_date + timedelta(days=training_delay)
            retrain_dates.append(current_date)
        
        # Break the loop if we've reached the final evaluation date
        if current_date >= final_eval_date_dt:
            break
    
    # Combine all results
    if all_results:
        combined_results = pd.concat(all_results, ignore_index=True)
        combined_results = combined_results.sort_values('Date')
        
        # Create additional column to indicate model retraining dates
        combined_results['Retrain_Event'] = combined_results['Date'].isin(retrain_dates)
        
        print(f"\n===== Rolling Window Simulation Complete =====")
        print(f"Evaluated period: {combined_results['Date'].min().strftime('%Y-%m-%d')} to {combined_results['Date'].max().strftime('%Y-%m-%d')}")
        print(f"Total days evaluated: {len(combined_results)}")
        print(f"Number of model retrainings: {len(retrain_dates) - 1}")  # Subtract 1 for initial training
        
        return combined_results
    else:
        print("No results generated during simulation.")
        return None

def plot_rolling_window_results(results):
    """Plot the results of a rolling window simulation, highlighting retraining events"""
    if results is None or len(results) == 0:
        print("No data available to plot")
        return
    
    # Create a categorical color map for regimes
    unique_regimes = results['Predicted_Regime'].unique()
    n_regimes = len(unique_regimes)
    regime_colors = px.colors.qualitative.Set2[:n_regimes]
    
    # Create figure
    fig = go.Figure()
    
    # Add price line
    fig.add_trace(
        go.Scatter(
            x=results['Date'],
            y=results['Close'],
            mode='lines',
            line=dict(color='rgba(0,0,0,0.3)', width=1),
            name=f'{TICKER} Price'
        )
    )
    
    # Add colored markers for different regimes
    for i, regime in enumerate(sorted(unique_regimes)):
        regime_data = results[results['Predicted_Regime'] == regime]
        regime_label = results.loc[results['Predicted_Regime'] == regime, 'Regime_Label'].iloc[0]
        
        fig.add_trace(
            go.Scatter(
                x=regime_data['Date'], 
                y=regime_data['Close'],
                mode='markers',
                marker=dict(color=regime_colors[i], size=6),
                name=f'{regime_label} Regime',
                hovertemplate='%{x}<br>Price: %{y:.2f}<br>Regime: ' + regime_label
            )
        )
    
    # Add vertical lines for retraining events using scatter traces
    retrain_dates = results[results['Retrain_Event']]['Date'].unique()
    y_min = results['Close'].min() * 0.95  # Slightly below min price 
    y_max = results['Close'].max() * 1.05  # Slightly above max price

    for i, date in enumerate(retrain_dates):
        # Add a scatter trace with a vertical line
        fig.add_trace(
            go.Scatter(
                x=[date, date],
                y=[y_min, y_max],
                mode='lines',
                line=dict(color='red', width=1, dash='dash'),
                name='Retrain' if i == 0 else None,  # Only label the first one for legend
                showlegend=(i == 0),
                hoverinfo='none'
            )
        )
        
        # Add text annotation
        fig.add_annotation(
            x=date,
            y=y_max,
            text="Retrain",
            showarrow=False,
            yshift=10
        )
    
    # Update layout
    fig.update_layout(
        title=f'Rolling Window Regime Predictions ({results["Date"].min().strftime("%Y-%m-%d")} to {results["Date"].max().strftime("%Y-%m-%d")})',
        xaxis_title='Date',
        yaxis_title=f'{TICKER} Price',
        template='plotly_white',
        legend_title='Market Regimes',
        hovermode='closest',
        height=600
    )
    
    fig.show()
    
    # Create regime distribution by month or week
    results['YearMonth'] = results['Date'].dt.strftime('%Y-%m')
    monthly_regimes = results.groupby(['YearMonth', 'Regime_Label']).size().unstack().fillna(0)
    
    # Convert to percentages
    monthly_total = monthly_regimes.sum(axis=1)
    monthly_pct = monthly_regimes.div(monthly_total, axis=0) * 100
    
    # Plot monthly regime distribution
    fig_monthly = px.bar(
        monthly_pct.reset_index().melt(id_vars='YearMonth', var_name='Regime', value_name='Percentage'),
        x='YearMonth',
        y='Percentage',
        color='Regime',
        color_discrete_sequence=regime_colors,
        title='Monthly Regime Distribution',
        barmode='stack'
    )
    
    fig_monthly.update_layout(
        xaxis_title='Month',
        yaxis_title='Percentage (%)',
        legend_title='Regime',
        hovermode='x unified'
    )
    
    fig_monthly.show()


# ======== MAIN EXECUTION ========
def main():
    # Download and prepare data
    df = download_market_data(TICKER, VIX_TICKER, START_DATE)
    df = calculate_indicators(df)
    df = apply_high_pass_filter(df, HPF_WINDOW)
    
    # Train initial model
    model, training_data, train_predictions, regime_labels, regime_stats = train_hmm_model(
        df, TRAIN_START_DATE, TRAIN_END_DATE, HIDDEN_STATES, EM_ITERATIONS
    )
    
    # Plot training period results
    training_results = training_data.copy()
    training_results['Predicted_Regime'] = train_predictions
    training_results['Regime_Label'] = [regime_labels[r] for r in train_predictions]
    
    print("\nVisualization of training period regimes:")
    plot_regimes(training_results, f'Market Regimes - Training Period ({TRAIN_START_DATE} to {TRAIN_END_DATE})')
    
    # Run rolling window simulation
    rolling_results = run_rolling_window_simulation(
        df, 
        TRAIN_START_DATE, 
        TRAIN_END_DATE, 
        None,  # Use all available data
        TRAINING_DELAY,
        USE_EXPANDING_WINDOW,
        HIDDEN_STATES, 
        EM_ITERATIONS
    )
    
    # Plot rolling window results
    if rolling_results is not None:
        plot_rolling_window_results(rolling_results)
    
    return {
        'model': model,
        'data': df,
        'regime_labels': regime_labels,
        'training_results': training_results,
        'rolling_results': rolling_results
    }

# Function to analyze any time period
def analyze_period(model_objects, start_date, end_date, title=None):
    """Analyze any time period using the trained model"""
    results = predict_regimes(
        model_objects['model'], 
        model_objects['data'], 
        start_date, 
        end_date, 
        model_objects['regime_labels']
    )
    
    if results is not None:
        plot_regimes(results, title)
    
    return results

# Execute main function
if __name__ == "__main__":
    model_objects = main()
    
    # Example 
    # To analyze a custom period, uncomment and modify:
    # custom_results = analyze_period(model_objects, "2008-01-01", "2009-12-31", "Market Regimes - 2008 Financial Crisis")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Downloading market data from 2000-01-01 to 2025-04-14...





Training HMM model on data from 2019-01-01 to 2024-01-01
Training data shape: (1258, 19)

Regime Characteristics Summary:
Regime     Label    Count    Return %   VIX      LogVIX  
------------------------------------------------------------
0          Neutral  490      0.16       22.22    3.06    
1          Bull     625      0.07       17.81    2.85    
2          Bear     143      -0.28      34.00    3.47    

Regime Transition Matrix:
From/To   Neutral   Bull      Bear      
------------------------------------------------------------
Neutral   0.93      0.05      0.02      
Bull      0.03      0.95      0.02      
Bear      0.13      0.02      0.86      

Stationary Distribution (Long-term regime probabilities):
Regime 0 [Neutral]: 39.04%
Regime 1 [Bull]: 49.49%
Regime 2 [Bear]: 11.47%

Visualization of training period regimes:


Running rolling window simulation with retraining every 1 days
Using fixed window approach

===== Initial Model Training: 2019-01-01 to 2024-01-01 =====
Training HMM model on data from 2019-01-01 to 2024-01-01
Training data shape: (1258, 19)

Regime Characteristics Summary:
Regime     Label    Count    Return %   VIX      LogVIX  
------------------------------------------------------------
0          Bull     625      0.07       17.81    2.85    
1          Bear     143      -0.28      34.00    3.47    
2          Neutral  490      0.16       22.22    3.06    

Regime Transition Matrix:
From/To   Bull      Bear      Neutral   
------------------------------------------------------------
Bull      0.95      0.02      0.03      
Bear      0.02      0.86      0.13      
Neutral   0.05      0.02      0.93      

Stationary Distribution (Long-term regime probabilities):
Regime 0 [Bull]: 49.48%
Regime 1 [Bear]: 11.47%
Regime 2 [Neutral]: 39.04%
Predicted regimes for period 2024-01-02 to 202