# Uncovering Market Regimes with Real SPY Market Data â€” Hidden Markov Models in Python


This version **uses real historical SPY market data** from Yahoo Finance using `yfinance`.

If data download fails (no internet), please run locally.


In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from hmmlearn.hmm import GaussianHMM

np.random.seed(42)


## Acquire Real Market Data (SPY ~20 years)

In [None]:

df = yf.download("SPY", start="2005-01-01", progress=False)
df = df[['Close','Volume']].dropna()
df.head()


## Create Daily Log Returns Feature

In [None]:

df['Return'] = np.log(df['Close']).diff()
df = df.dropna()
df.tail()


## Visualize Price & Volume

In [None]:

plt.figure(figsize=(12,3))
plt.plot(df.index, df['Close'])
plt.title('SPY Close Price')
plt.tight_layout(); plt.show()

plt.figure(figsize=(12,3))
plt.plot(df.index, df['Volume'])
plt.title('SPY Volume')
plt.tight_layout(); plt.show()


## Return Distribution & Rolling Volatility

In [None]:

plt.figure(figsize=(6,3))
plt.hist(df['Return'], bins=80, density=True)
plt.title("Daily Log Returns Distribution")
plt.tight_layout(); plt.show()

df['Vol20'] = df['Return'].rolling(20).std()

plt.figure(figsize=(12,3))
plt.plot(df.index, df['Vol20'])
plt.title("20â€‘Day Rolling Volatility")
plt.tight_layout(); plt.show()


## Train a 2â€‘State Gaussian HMM

In [None]:

X = df[['Return']].values

model = GaussianHMM(n_components=2, covariance_type='full', n_iter=200, random_state=42)
model.fit(X)
print("Converged:", model.monitor_.converged)
print("LogL:", model.score(X))
states = model.predict(X)


## Interpret Regimes from Model Parameters

In [None]:

means = model.means_.flatten()
stds = np.sqrt(model.covars_.flatten())

summary = pd.DataFrame({
    'State': [0,1],
    'Mean Return': means,
    'Volatility (Std)': stds
}).sort_values('Volatility (Std)').reset_index(drop=True)
summary['Interpretation'] = ['Stable / Bullish', 'Volatile / Bearish']
summary


## Transition Matrix â€” Regime Persistence

In [None]:

A = pd.DataFrame(model.transmat_)
A.columns = [f"To {j}" for j in A.columns]
A.index = [f"From {i}" for i in A.index]
A


## Visualize HMM Regimes on Price Chart

In [None]:

aligned_idx = df.index
price = df['Close']

fig, ax = plt.subplots(figsize=(14,4))
ax.plot(aligned_idx, price, label="Close Price")

start = 0
for t in range(1, len(states)):
    if states[t] != states[t-1]:
        ax.axvspan(aligned_idx[start], aligned_idx[t-1], alpha=0.15, color='red' if states[t-1]==1 else 'green')
        start = t
ax.axvspan(aligned_idx[start], aligned_idx[-1], alpha=0.15, color='red' if states[-1]==1 else 'green')

ax.set_title("SPY with HMMâ€‘Detected Regimes (Green=Stable, Red=Volatile)")
ax.legend()
plt.tight_layout(); plt.show()


## Next Steps


âœ… We revealed two interpretable market regimes:
- **Lowâ€‘volatility positiveâ€‘return (Bullish)**
- **Highâ€‘volatility negativeâ€‘return (Bearish)**

ðŸ“Œ Try:
- More hidden states (3â€“4) using **BIC**
- Add features (Volume, VIX)
- Apply to TSLA, NVDA, BTC
