In [None]:
import pandas as pd
import numpy as np 

from pyhhmm.gaussian import GaussianHMM

import yfinance as yf

import matplotlib.pyplot as plt

# Data Management

In [None]:
# Data Extraction
start_date = "2020-01-01"
end_date = "2023-05-29"
symbol = "SPY"

# USE YAHOO FINANCE 
data = yf.download(symbol ,  start_date , end_date )
data = data[["Open" , "High" , "Low" , "Close" , "Volume"]]

In [None]:
# Add Returns and Range    
# Returns = (todays price divided by yesterdays)     # ROI since Yesterday
# Range   = (todays high price divided todays low)   # volitility from highest point to lowest within day

df = data.copy()
df["Returns"] = (df["Close"] / df["Close"].shift(1)) - 1
df["Range"]   = (df["High"] / df["Low"]) - 1
df.dropna(inplace=True)

df


# HMM - Hidden States
- HMM is a model that looks for hidden states in the market
- states
    -  (upward) , (downward) , (sidways), and many more

In [None]:
# Structure Data (create a new dataframe using the returns and range)
x_train = df[[ "Returns" , "Range" ]]
x_train

# HMM - Learning & Output
- Covarience Types
    - <b>Full</b> means the components may independently adopt any position and shape.

    - <b>Tied</b> means they have the same shape, but the shape may be anything.

    - <b>Diagonal</b> means the contour axes are oriented along the coordinate axes, but otherwise the eccentricities may vary between components.

    - <b>Tied Diagonal</b> is a "tied" situation where the contour axes are oriented along the coordinate axes. (I have added this because initially it was how I misinterpreted "diagonal.")

    - <b>Spherical</b> is a "diagonal" situation with circular contours (spherical in higher dimensions, whence the name).

In [None]:
# Train Model
model = GaussianHMM(n_states=4, covariance_type="full" , n_emissions=2)
model.train([np.array(x_train.values)])

model.__dir__()

In [None]:
# Check State Categorization
hidden_states = model.predict([x_train.values])[0]
hidden_states

In [None]:
# State Result Means for Each State 
print(f"[Returns avg , Range avg ]")
model.means

In [None]:
# State Result Covariations for Each State 
print(f"[Returns avg , Range avg ]")
model.covars

# Data Visualization

In [None]:
labels_0 = []
labels_1 = []
labels_2 = []
labels_3 = []


prices = df["Close"].values.astype(float)


print("Check pass length of Close matches Hidden states array: " , len(prices)  ==  len(hidden_states)  )


for s in range(0,len(hidden_states), 1):
    if hidden_states[s] == 0:
        labels_0.append(prices[s])
        labels_1.append(float('nan'))
        labels_2.append(float('nan'))
        labels_3.append(float('nan'))
    elif hidden_states[s]  == 1:
        labels_0.append(float('nan'))
        labels_1.append(prices[s])
        labels_2.append(float('nan'))
        labels_3.append(float('nan'))
    elif hidden_states[s]  == 2:
        labels_0.append(float('nan'))
        labels_1.append(float('nan'))
        labels_2.append(prices[s])
        labels_3.append(float('nan'))
    elif hidden_states[s]  == 3:
        labels_0.append(float('nan'))
        labels_1.append(float('nan'))
        labels_2.append(float('nan'))
        labels_3.append(prices[s])


labels_0

In [None]:
# Plot Chart
figure = plt.figure(figsize=(18,8))

plt.plot(labels_0,color="red")
plt.plot(labels_1,color="green")
plt.plot(labels_2,color="orange")
plt.plot(labels_3,color="black")
plt.show()