# Estimation of Hidden Markov Model


In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
df = pd.read_csv('SP500.csv', sep=';')

# df['Date'] = pd.to_datetime(df['Date'])

# plt.figure(figsize=(15, 6))
# plt.plot(df['Date'], df['Return'])
# plt.xlabel('Year')
# plt.ylabel('Return')
# plt.title('Daily Returns from 1950 to Present')

# # Setting x-axis to show only the years
# plt.gca().xaxis.set_major_locator(mdates.YearLocator())
# plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
# plt.xticks(rotation=45)
# plt.tight_layout()

# plt.show()

# df

# AR, MS-AR and ARCH Estimations

In [20]:

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.ar_model import AutoReg

from arch import arch_model
from statsmodels.tsa.regime_switching import MarkovAutoregression
# Sample data creation (as the actual data is not provided)
dates = pd.date_range(start='1950-01-01', end='2023-04-30', freq='D')
returns = np.random.normal(0, 0.05, size=len(dates))

# Creating the dataframe
df = pd.DataFrame({'Date': dates, 'Return': returns})
df.set_index('Date', inplace=True)

# 1. AR Model
ar_model = AutoReg(df['Return'], lags=1)
ar_result = ar_model.fit()

# 2. Markov Switching Model (general model used as a proxy for Markov Switching AR)
ms_model = MarkovSwitching(df['Return'], k_regimes=3, order=1, trend='c', switching_variance=True)
ms_result = ms_model.fit()

# 3. GARCH(1,1) Model with t-distributed errors
garch_model = arch_model(df['Return'], p=1, q=1, dist='StudentsT')
garch_result = garch_model.fit()

# Outputting the summary of each model
ar_summary = ar_result.summary()
ms_summary = ms_result.summary()
garch_summary = garch_result.summary()

ar_summary, ms_summary, garch_summary



ImportError: cannot import name 'MarkovAutoregression' from 'statsmodels.tsa.regime_switching' (/home/august/.venvs/jupyter/lib/python3.11/site-packages/statsmodels/tsa/regime_switching/__init__.py)

In [26]:
import yfinance as yf
# Fetch S&P 500 data from Yahoo Finance
sp500_data = yf.download('^GSPC', start='2015-09-17', end='2023-04-30')

# Extracting closing prices and processing
sp500_close = sp500_data['Close']
sp500_log_return = np.log(sp500_close).diff().dropna()

# Creating a new DataFrame for S&P 500 data
sp500_df = pd.DataFrame({'Date': sp500_log_return.index, 'Return': sp500_log_return.values})

# Assuming df is your existing DataFrame
sp500_df

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0,Date,Return
0,2015-09-18,-0.016296
1,2015-09-21,0.004555
2,2015-09-22,-0.012395
3,2015-09-23,-0.002051
4,2015-09-24,-0.003369
...,...,...
1911,2023-04-24,0.000851
1912,2023-04-25,-0.015937
1913,2023-04-26,-0.003849
1914,2023-04-27,0.019377


In [27]:

# Merge the new S&P 500 data with your existing DataFrame
df['Date'] = pd.to_datetime(df['Date'])
merged_df = pd.merge(df, sp500_df, on='Date', how='left')

# Save the merged DataFrame
merged_df.to_csv('file.csv', index=False)
merged_df

Unnamed: 0,Date,Return_x,Return_y
0,1950-01-04,0.000000,
1,1950-01-05,0.004737,
2,1950-01-06,0.002949,
3,1950-01-09,0.005872,
4,1950-01-10,-0.002932,
...,...,...,...
16528,2015-09-11,0.004477,
16529,2015-09-14,-0.004098,
16530,2015-09-15,0.012750,
16531,2015-09-16,0.008668,


In [35]:

# Concatenating
merged_df = pd.concat([df, sp500_df]).drop_duplicates(subset='Date', keep='last').reset_index(drop=True)

# Save the merged dataframe to 'file.csv'
file_path = 'file.csv'
merged_df.to_csv(file_path, index=False, sep=';')

merged_df.head(), merged_df.tail(), file_path

(                       Date,Return       Date    Return
 0  2023-04-28,0.008219331313243927        NaT       NaN
 1                              NaN 2015-09-18 -0.016296
 2                              NaN 2015-09-21  0.004555
 3                              NaN 2015-09-22 -0.012395
 4                              NaN 2015-09-23 -0.002051,
      Date,Return       Date    Return
 1912         NaN 2023-04-24  0.000851
 1913         NaN 2023-04-25 -0.015937
 1914         NaN 2023-04-26 -0.003849
 1915         NaN 2023-04-27  0.019377
 1916         NaN 2023-04-28  0.008219,
 'file.csv')

In [29]:
merged_df

Unnamed: 0,Date,Return
0,1950-01-04,0.000000
1,1950-01-05,0.004737
2,1950-01-06,0.002949
3,1950-01-09,0.005872
4,1950-01-10,-0.002932
...,...,...
18444,2023-04-24,0.000851
18445,2023-04-25,-0.015937
18446,2023-04-26,-0.003849
18447,2023-04-27,0.019377
