# HMM Model Comparison: Small vs Large Models

This Jupyter Notebook compares a Hidden Markov Model (HMM) with a small number of hidden states (2) versus a large number (100) to analyze BNP Paribas stock price movements.

In [None]:
import numpy as np
import pandas as pd
from hmmlearn import hmm
import matplotlib.pyplot as plt

# Load and preprocess BNP Paribas stock data
data_bnp = pd.read_csv('../data/BNPPA.csv')
data_bnp['Date'] = pd.to_datetime(data_bnp['Date'])
data_bnp.sort_values('Date', inplace=True)
data_bnp = data_bnp[data_bnp['Close'] > 0]  # Remove invalid prices
data_bnp['LogRet'] = np.log(data_bnp['Close']).diff()
data_bnp.dropna(subset=['LogRet'], inplace=True)

# Convert log returns into the format required for HMM
observations = data_bnp['LogRet'].values.reshape(-1, 1)


## Define and Train HMM Models

In [None]:
def train_hmm(n_components):
    model = hmm.GaussianHMM(n_components=n_components, covariance_type='full', n_iter=100, random_state=42)
    model.fit(observations)
    return model

# Train a small HMM (2 states) and a large HMM (100 states)
hmm_small = train_hmm(2)
hmm_large = train_hmm(100)


## Compare Log-Likelihood of Models

In [None]:
log_likelihood_small = hmm_small.score(observations)
log_likelihood_large = hmm_large.score(observations)

print(f'Log-Likelihood (2 states): {log_likelihood_small}')
print(f'Log-Likelihood (100 states): {log_likelihood_large}')

## Visualize Hidden States

In [None]:
hidden_states_small = hmm_small.predict(observations)
hidden_states_large = hmm_large.predict(observations)

plt.figure(figsize=(10, 4))
plt.plot(hidden_states_small, label='2 States', linestyle='--')
plt.plot(hidden_states_large, label='100 States', alpha=0.5)
plt.title('Hidden States Comparison')
plt.legend()
plt.show()