In [7]:

import yfinance as yf
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
import plotly.graph_objects as go
from plotly.graph_objs.scatter.marker import Line
from plotly.subplots import make_subplots
import plotly.express as px
from sklearn.cluster import AgglomerativeClustering
from sklearn.mixture import GaussianMixture
import math
import seaborn as sns

import warnings
from matplotlib import pyplot as plt
warnings.filterwarnings('ignore')



In [8]:
spx = yf.download("SPY", start='1970-01-01', end='2024-05-15')['Adj Close']

spx_df=pd.DataFrame({'date':spx.index, 'prices':spx.values}).set_index('date').rename({'prices':'spx'},axis=1)


[*********************100%%**********************]  1 of 1 completed


In [9]:
def prepare_data_for_model_input(df, ma):
    '''
        Input:
        prices (df) - Dataframe of close prices
        ma (int) - legth of the moveing average
        
        Output:
        prices(df) - An enhanced prices dataframe, with moving averages and log return columns
        prices_array(nd.array) - an array of log returns
    '''
    prices = df.copy(deep=True)
    print(len(prices),len(spx_df))
    
    prices['spx_ma'] = prices.rolling(ma).mean()
    prices['spx_log_return'] = np.log(prices['spx_ma']/prices['spx_ma'].shift(1)).dropna()
 
    prices.dropna(inplace = True)
    prices_array = np.array([[q] for q in prices['spx_log_return'].values])
    
    return prices, prices_array

In [18]:
prices,prices_array=prepare_data_for_model_input(spx_df,7)
prices

7879 7879


Unnamed: 0_level_0,spx,spx_ma,spx_log_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1993-02-09,25.168835,25.201540,0.002299
1993-02-10,25.204058,25.239280,0.001496
1993-02-11,25.327354,25.287088,0.001892
1993-02-12,25.133608,25.269475,-0.000697
1993-02-16,24.499538,25.146184,-0.004891
...,...,...,...
2024-05-08,517.190002,509.935721,0.001999
2024-05-09,520.169983,512.534289,0.005083
2024-05-10,520.840027,515.461435,0.005695
2024-05-13,520.909973,517.730002,0.004391


In [19]:
class RegimeDetection:
 
    def get_regimes_hmm(self, input_data, params):
        hmm_model = self.initialise_model(GaussianHMM(), params).fit(input_data)
        return hmm_model
    
    def get_regimes_clustering(self, params):
        clustering =  self.initialise_model(AgglomerativeClustering(), params)
        return clustering
    
    def get_regimes_gmm(self, input_data, params):
        gmm = self.initialise_model(GaussianMixture(), params).fit(input_data)
        return gmm
        
    def initialise_model(self, model, params):
        for parameter, value in params.items():
            setattr(model, parameter, value)
        return model

In [31]:

# Step 4: Define Function to Plot Hidden States
def plot_hidden_states(hidden_states, prices_df):
    """
    Plot hidden states on top of price data.
    
    Parameters:
    hidden_states (numpy.ndarray): Array of predicted hidden states.
    prices_df (pd.DataFrame): DataFrame containing close prices.
    
    Returns:
    None: Displays a plotly graph.
    """
    colors = ['blue', 'green', 'red', 'purple', 'orange', 'brown', 'pink', 'gray']  # Extend if more states
    unique_states = np.unique(hidden_states)
    fig = go.Figure()

    # Ensure prices_df is a DataFrame
    if isinstance(prices_df, pd.Series):
        prices_df = prices_df.to_frame()

    for state in unique_states:
        mask = hidden_states == state
        print(f'Number of observations for State {state}: {mask.sum()}')
        
        if not prices_df[mask].empty:
            fig.add_trace(go.Scatter(
                x=prices_df.index[mask], 
                y=prices_df[mask].iloc[:, 0],  # Ensure we're selecting the first column correctly
                mode='markers', 
                name=f'Hidden State {state}', 
                marker=dict(size=4, color=colors[state % len(colors)])
            ))
        
    fig.update_layout(
        height=400, 
        width=900, 
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01), 
        margin=dict(l=20, r=20, t=20, b=20)
    ).show()



In [21]:
regime_detection = RegimeDetection()

In [30]:
params = {'n_clusters': 2, 'linkage': 'complete',  'affinity': 'manhattan', 'metric': 'manhattan', 'random_state':100}
clustering = regime_detection.get_regimes_clustering(params)
clustering_states = clustering.fit_predict(prices_array)
 
plot_hidden_states(np.array(clustering_states), prices[['spx']])

Number of observations for State 0: 1053
Number of observations for State 1: 6819


In [29]:
params = {'n_components':2, 'covariance_type': 'full', 'max_iter': 100000, 'n_init': 30,'init_params': 'kmeans', 'random_state':100}
 
gmm_model = regime_detection.get_regimes_gmm(prices_array, params)
gmm_states = gmm_model.predict(prices_array)
plot_hidden_states(np.array(gmm_states), prices[['spx']])

Number of observations for State 0: 7315
Number of observations for State 1: 557


In [26]:
params = {'n_components':2, 'covariance_type':"full", 'random_state':100}
 
hmm_model = regime_detection.get_regimes_hmm(prices_array, params)
hmm_states = hmm_model.predict(prices_array)
plot_hidden_states(np.array(hmm_states), prices[['spx']])

Number of observations for State 0: 1186
Number of observations for State 1: 6686


In [None]:
params = {'n_components':2, 'covariance_type':"full", 'random_state':100}

hmm_model = regime_detection.get_regimes_hmm(prices_array, params)
hmm_states = hmm_model.predict(prices_array)
plot_hidden_states(np.array(hmm_states), prices[['spx']])

Number of observations for State 0: 1186
Number of observations for State 1: 6654


SyntaxError: invalid syntax (1311558153.py, line 1)