In [46]:
import pandas as pd
import pandas_datareader.data as web
import sklearn.mixture as mix

import numpy as np
import scipy.stats as scs

import matplotlib as mpl
from matplotlib import cm
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator
%matplotlib inline

import seaborn as sns
import missingno as msno
from tqdm import tqdm


In [47]:
strategy = os.getcwd()
print("change working directory to parent")
os.chdir('/mnt/c/workspace/project_quant')
print(os.getcwd())

change working directory to parent
/mnt/c/workspace/project_quant


In [48]:
from utils.index_data import index_data

In [49]:
# get fed data

f1 = 'TEDRATE' # ted spread
f2 = 'T10Y2Y' # constant maturity ten yer - 2 year
f3 = 'T10Y3M' # constant maturity 10yr - 3m

start = pd.to_datetime('2002-01-01')
end = pd.datetime.today()

mkt = 'SPY'
MKT = (web.DataReader([mkt], 'yahoo', start, end)['Adj Close']
       .rename(columns={mkt:mkt})
       .assign(sret=lambda x: np.log(x[mkt]/x[mkt].shift(1)))
       .dropna())

data = (web.DataReader([f1, f2, f3], 'fred', start, end)
        .join(MKT, how='inner')
        .dropna()
       )

p(data.head())

# gives us a quick visual inspection of the data
msno.matrix(data)

  end = pd.datetime.today()


ValueError: Index contains duplicate entries, cannot reshape

In [5]:
# code adapted from http://hmmlearn.readthedocs.io
# for sklearn 18.1

col = 'sret'
select = data.ix[:].dropna()

ft_cols = [f1, f2, f3, 'sret']
X = select[ft_cols].values

model = mix.GaussianMixture(n_components=3, 
                            covariance_type="full", 
                            n_init=100, 
                            random_state=7).fit(X)

# Predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print("Means and vars of each hidden state")
for i in range(model.n_components):
    print("{0}th hidden state".format(i))
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covariances_[i]))
    print()

sns.set(font_scale=1.25)
style_kwds = {'xtick.major.size': 3, 'ytick.major.size': 3,
              'font.family':u'courier prime code', 'legend.frameon': True}
sns.set_style('white', style_kwds)

fig, axs = plt.subplots(model.n_components, sharex=True, sharey=True, figsize=(12,9))
colors = cm.rainbow(np.linspace(0, 1, model.n_components))

for i, (ax, color) in enumerate(zip(axs, colors)):
    # Use fancy indexing to plot data in each state.
    mask = hidden_states == i
    ax.plot_date(select.index.values[mask],
                 select[col].values[mask],
                 ".-", c=color)
    ax.set_title("{0}th hidden state".format(i), fontsize=16, fontweight='demi')

    # Format the ticks.
    ax.xaxis.set_major_locator(YearLocator())
    ax.xaxis.set_minor_locator(MonthLocator())
    sns.despine(offset=10)

plt.tight_layout()
fig.savefig('Hidden Markov (Mixture) Model_Regime Subplots.png')

In [6]:
return_df

Unnamed: 0,msci_acwi,msci_world,msci_emerging,msci_world_gross,msci_world_value,msci_real_estate,bb_world_agg,bb_emerging_agg,bb_corp_ig,bb_corp_hy,bb_infla_protect,snp_commodity
1992-01-31,0.000000,0.000000,0.000000,-0.031464,-0.009100,0.000000,-0.002171,0.000000,-0.002588,0.035225,0.000000,0.005368
1992-02-29,-0.017548,-0.020678,0.103012,-0.023750,-0.014170,0.000000,-0.013095,0.000000,-0.012103,0.024673,0.000000,0.025628
1992-03-31,-0.015102,-0.017357,0.054748,-0.048605,-0.049508,0.000000,0.003074,0.000000,0.006558,0.012399,0.000000,-0.023659
1992-04-30,-0.063123,-0.066034,0.029763,-0.011712,0.035704,0.000000,0.003851,0.000000,0.005622,0.003798,0.000000,0.030560
1992-05-31,0.026335,0.027579,-0.009451,0.042249,0.033392,0.000000,0.006528,0.000000,0.004180,0.014110,0.000000,0.023145
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-28,0.009655,0.002914,0.053679,0.009748,-0.003860,0.008924,-0.006128,-0.007934,-0.010356,0.004106,0.004246,0.068418
2021-03-31,0.027554,0.031810,0.001014,0.007317,0.056323,0.007271,-0.016604,-0.013898,-0.020684,0.005491,-0.020821,0.079945
2021-04-30,0.014839,0.020293,-0.019986,-0.001131,0.040848,0.049885,-0.007515,-0.010507,-0.008003,0.000708,0.000705,0.001453
2021-05-31,0.033565,0.038195,0.002734,0.044316,0.032582,0.048829,0.005114,0.009870,0.006059,0.010157,0.017477,0.075171


In [None]:
sns.set(font_scale=1.5)
states = (pd.DataFrame(hidden_states, columns=['states'], index=select.index)
          .join(select, how='inner')
          .assign(mkt_cret=select.sret.cumsum())
          .reset_index(drop=False)
          .rename(columns={'index':'Date'}))
p(states.head())

sns.set_style('white', style_kwds)
order = [0, 1, 2]
fg = sns.FacetGrid(data=states, hue='states', hue_order=order,
                   palette=scolor, aspect=1.31, size=12)
fg.map(plt.scatter, 'Date', mkt, alpha=0.8).add_legend()
sns.despine(offset=10)
fg.fig.suptitle('Historical SPY Regimes', fontsize=24, fontweight='demi')
fg.savefig('Hidden Markov (Mixture) Model_SPY Regimes.png')