In [2]:
# --- Importing the necessary libraries ---
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [3]:
# --- Data load (EDA's Copy) ---
df = pd.read_csv("../data/SPX.csv")
df["Date"] = pd.to_datetime(df["Date"])
df = df.set_index("Date")

first_valid_volume_date = df[df["Volume"] > 0].index[0]
df_model = df.loc[first_valid_volume_date:].copy()

df_model["Log_Return"] = np.log(df_model["Close"]/df_model["Close"].shift(1))
df_model["Volatility"] = df_model["Log_Return"].rolling(window=21).std()*np.sqrt(252) # Recuerda multiplicar por 100 si quieres la volatilidad en %
df_model["Momentum_126d"] = (df_model["Close"] - df_model["Close"].shift(126))/df_model["Close"].shift(126)
df_model.dropna(inplace=True)

In [7]:
# Select the features for the HMM Model
features = ['Log_Return', 'Volatility', 'Momentum_126d']
X = df_model[features]

scaler = StandardScaler()

# Fit the scaler to the data and transform it
X_scaled = scaler.fit_transform(X)

# Convert the NumPy array back to a Pandas DataFrame
df_scaled = pd.DataFrame(X_scaled, columns = features, index = df_model.index)

print(df_scaled.head())

            Log_Return  Volatility  Momentum_126d
Date                                             
1950-07-05    0.935898    1.636076       0.236710
1950-07-06    0.533881    1.570493       0.179775
1950-07-07   -1.388665    1.593439       0.001155
1950-07-10   -0.486997    1.532642      -0.071272
1950-07-11   -1.587839    1.533520      -0.274775


In [None]:
from hmmlearn.hmm import GMMHMM

# Architecture definition
K = 3
M = 2
COVARIANCE = 'full'

# Controlled initialization of the model
model = GMMHMM( 
    n_components = K,
    n_mix = M,
    covariance_type = COVARIANCE, 
    init_params='',
    n_iter = 100,
    tol = 1e-4
)

# Initial probability
model.startprob_ = np.full(K, 1.0 / K)

# Configure transition matrix
prob_diag = 0.95
prob_off_diag = (1 - prob_diag) / 2

model.transmat_ = np.full((K, K), 0.0)

for i in range(K):
    model.transmat_[i, i] = prob_diag
    for j in range(K):
        if i!= j:
            model.transmat_[i, j] = prob_off_diag

print("Transition matrix:\n", model.transmat_)

model.fit(X_scaled)



Transition matrix:
 [[0.95  0.025 0.025]
 [0.025 0.95  0.025]
 [0.025 0.025 0.95 ]]


0,1,2
,n_components,3
,n_mix,2
,min_covar,0.001
,startprob_prior,1.0
,transmat_prior,1.0
,weights_prior,"array([[1., 1... [1., 1.]])"
,means_prior,"array([[[0., ...0., 0., 0.]]])"
,means_weight,"array([[0., 0... [0., 0.]])"
,covars_prior,"array([[[[0.,...., 0., 0.]]]])"
,covars_weight,"array([[-5., ... [-5., -5.]])"
