In [5]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.regime_switching.markov_autoregression import MarkovAutoregression
import matplotlib.pyplot as plt

# Load your CSV file
file_path = 'SP500.csv'  # Replace with your CSV file path
df = pd.read_csv(file_path)
print(df)
# Select the column with your time series data
# Replace 'your_column_name' with the name of the column you want to analyze
time_series = df['SP500 - Log-Return'].iloc[1:]

# Define a function to fit the Markov switching AR model with different lags
def fit_markov_ar(time_series, max_lag):
    results = {}
    for lag in range(max_lag + 1):
        model = MarkovAutoregression(time_series, k_regimes=2, order=lag, switching_ar=False)
        fit = model.fit(disp=False)
        results[lag] = fit
    return results

# Fit the models with up to 5 lags
model_results = fit_markov_ar(time_series, 5)

# Perform likelihood ratio tests and choose the best model
best_lag = 0
best_model = model_results[0]
for lag in range(1, 6):
    lr_test = model_results[lag].likelihood_ratio_test(model_results[lag - 1])
    p_value = lr_test[1]
    if p_value < 0.05:  # Using 5% significance level
        best_lag = lag
        best_model = model_results[lag]
    else:
        break

# Plotting the output
plt.figure(figsize=(12, 6))
plt.plot(time_series, label='Observed')
plt.plot(best_model.fittedvalues, label='Fitted', color='red')
plt.title(f'Markov Switching AR Model (Lag: {best_lag})')
plt.legend()
plt.show()


      Unnamed: 0        Date  SP500 - Price  SP500 - Log-Return  \
0            1-1  1950-01-04      16.850000                 NaN   
1            2-1  1950-01-05      16.930000            0.004737   
2            3-1  1950-01-06      16.980000            0.002949   
3            4-1  1950-01-09      17.080000            0.005872   
4            5-1  1950-01-10      17.030001           -0.002932   
...          ...         ...            ...                 ...   
16528    16529-1  2015-09-11    1961.050049            0.004477   
16529    16530-1  2015-09-14    1953.030029           -0.004098   
16530    16531-1  2015-09-15    1978.089966            0.012750   
16531    16532-1  2015-09-16    1995.310059            0.008668   
16532    16533-1  2015-09-17    1990.199951           -0.002564   

       squared_log_ret  
0                  NaN  
1             0.000022  
2             0.000009  
3             0.000034  
4             0.000009  
...                ...  
16528         0.0000

  -0.5 * resid**2 / variance - 0.5 * np.log(2 * np.pi * variance))
  -0.5 * resid**2 / variance - 0.5 * np.log(2 * np.pi * variance))
  -0.5 * resid**2 / variance - 0.5 * np.log(2 * np.pi * variance))


LinAlgError: SVD did not converge