# This file is to formulate the GMM-HMM inside our research with shorter window

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import plotly.express as px
import plotly.io as pio
import matplotlib.ticker as ticker
import seaborn as sns

from hmmlearn import hmm

In [21]:
yield_factors = pd.read_csv('../Github/Data/Processed/yield_usage.csv', header=[0], index_col=[0], parse_dates=True)
macro_df = pd.read_csv('../Github/Data/calculated_macro.csv', header=[0], index_col=[0], parse_dates=True)
ffr_df = pd.read_csv('../Github/Data/Raw Data/US FFR.csv', header=[0], index_col=[0], parse_dates=True).sort_index()

macro_df = macro_df.drop('Traditional Taylor Rule', axis = 1)

start_date = pd.to_datetime('1989-01-01')
macro_df = macro_df[start_date: ]
macro_df.columns = ['PCE', 'RGAP', 'FFR']
ffr_df = ffr_df[start_date: ]

For example, initially we have the PCE representing the first quarter macro information of the market, but it can only be obtained at the end of April, hence, we cannot use it to project the FFR of april the first.

Or in other words, if we want to project the FFR on 1990-07-01 (which representing the FFR based on Q2 data), we can only use the PCE and RGAP of the first quarter, that is 1990-01-01. However, we have already shifts the dataset once to create the macro_df, here we just need to shift it once again to get the started macro data.

In [22]:
yield_data_0 = yield_factors[pd.to_datetime('1990-01-02'): pd.to_datetime('1990-03-29')]
macro_0 = macro_df.loc[pd.to_datetime('1989-10-01'), :].values.tolist()[:2]

In [23]:
# Define the parameters of GMM-HMM
np.random.seed(4200)
n_components_per_state = 3
n_hidden_states = 1

observed_states = yield_data_0[['Level']].values

hidden_states_distribution = hmm.GaussianHMM(n_components=n_components_per_state, n_iter=100)

hidden_states_distribution.fit(observed_states)

hidden_states = hidden_states_distribution.predict(observed_states)

print("Predicted Hidden States:\n", hidden_states)

print("Last date value:", hidden_states[-1])

Predicted Hidden States:
 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1]
Last date value: 1




In [24]:
result_df_list = []

for i in range(len(ffr_df.index) - 1):
    start_date = ffr_df.index[i]
    end_date = ffr_df.index[i + 1] - pd.Timedelta(days = 1)

    yield_data = yield_factors.loc[start_date:end_date, ['Level', 'Slope', 'Curvature']]

    result_df_list.append(yield_data)
result_df_list = result_df_list[3:]

In [25]:
result_df_list[-1]

Unnamed: 0_level_0,Level,Slope,Curvature
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-06-30,-0.314621,-0.141355,-0.076155
2023-07-03,-0.278038,-0.181482,-0.069674
2023-07-05,-0.154349,-0.165117,-0.013837
2023-07-06,0.000538,-0.190955,-0.028726
2023-07-07,0.042042,-0.151957,-0.076181
2023-07-10,-0.081089,-0.041318,-0.043154
2023-07-11,-0.106431,-0.056646,-0.002692
2023-07-12,-0.312172,0.063112,-0.011444
2023-07-13,-0.481945,0.170826,-0.016361
2023-07-14,-0.350077,0.046103,-0.03414


In [26]:
np.random.seed(4200)
all_factors = []
for df in result_df_list:
    factors = []
    n_components_per_state = 3
    n_hidden_states = 1

    for name in ['Level', 'Slope', 'Curvature']:

        observed_states = df[[name]].values

        hidden_states_distribution = hmm.GaussianHMM(n_components=n_components_per_state, n_iter=100)

        hidden_states_distribution.fit(observed_states)

        hidden_states = hidden_states_distribution.predict(observed_states)
        factors.append(hidden_states[-43])
    all_factors.append(factors)

Model is not converging.  Current: 31.991895669594758 is not greater than 31.992271599648443. Delta is -0.0003759300536856358
Model is not converging.  Current: 91.04002251630749 is not greater than 91.04526434503121. Delta is -0.0052418287237259165
Model is not converging.  Current: 119.27167742223489 is not greater than 119.2738685102992. Delta is -0.002191088064307678
Model is not converging.  Current: 116.32982914035192 is not greater than 116.43668047172648. Delta is -0.10685133137455693
Model is not converging.  Current: 111.89055189112123 is not greater than 111.95086985158311. Delta is -0.0603179604618731
Model is not converging.  Current: 91.83962247919125 is not greater than 91.84223767575493. Delta is -0.0026151965636813657
Model is not converging.  Current: 129.13702222143203 is not greater than 129.14192372755213. Delta is -0.004901506120091881
Model is not converging.  Current: 103.51323564827575 is not greater than 103.51863560759122. Delta is -0.00539995931546855
Model 

In [27]:
all_factors = np.array(all_factors)
idx_another = macro_df.index[3:]
idx_another = idx_another[: -2]

all_factors = pd.DataFrame(all_factors, index = idx_another, columns = ['Level', 'Slope', 'Curvature'])

In [28]:
new_macro = pd.concat([macro_df, all_factors], axis = 1).dropna(how = 'any')
new_macro = new_macro.drop('FFR', axis = 1)
new_macro.to_csv('../Github/Data/Processed/New_Macro_GMMHMM.csv')