# Imports

In [None]:
import pandas as pd
import numpy as np 

from pyhhmm.gaussian import GaussianHMM

import yfinance as yf

import matplotlib.pyplot as plt

# Fetch Data

In [None]:
# Data Extraction
start_date = "2020-01-01"
end_date = "2023-06-01"
symbol = "SPY"

# USE YAHOO FINANCE 
data = yf.download(symbol ,  start_date , end_date )
data = data[["Open" , "High" , "Low" , "Close" ]]

In [None]:
# Add Returns and Range    
# Returns = (todays price divided by yesterdays)     # ROI since Yesterday
# Range   = (todays high price divided todays low)   # volitility from highest point to lowest within day

df = data.copy()
df["Returns"] = (df["Close"] / df["Close"].shift(1)) - 1
df["Range"]   = (df["High"] / df["Low"]) - 1
df.dropna(inplace=True)

print("Length: ", len(df))

# Add Moving Average

In [None]:
df["12_MA"] = df["Close"].rolling(window=12).mean()
df["21_MA"] = df["Close"].rolling(window=21).mean()

# Train/Test Split Data

In [None]:
x_train = df[["Returns" , "Range"]].iloc[:500]
x_test = df[["Returns" , "Range"]].iloc[500:]
save_df = df.iloc[500:]



print(f"Train Data Length: {len(x_train)}")
print(f"Test Data Length: {len(x_test)}")

# Train HMM

In [None]:
# Train Hidden Markov Model
model = GaussianHMM(n_states=4, covariance_type="full" , n_emissions=2)
train = model.train([np.array(x_train.values)])

# Test HMM

In [None]:
# Test Hidden Markov Model
test = model.predict([x_train.values])[0]

# Make Predictions

In [62]:
df_main = save_df.copy()
df_main.drop(columns = ["High" , "Low"], inplace = True)

hmm_results = model.predict([x_test.values])[0]
df_main["HMM"] = hmm_results

# Run BackTest
- if the 12 day moving average is greater than the 21 day moving average, place signal
    -  1 : go long
    -  0 : do nothing
    - -1 : go short

In [None]:
# Signals
df_main.loc[df_main["12_MA"] > df_main["21_MA"] , "MA_Signal" ] = int(1)
df_main.loc[df_main["12_MA"] <= df_main["21_MA"] , "MA_Signal" ] = 0

In [63]:
# Check Each row to see if within positive signals from HMM    [ 0 if no   ||   1 if yes]
favorable_states = [0 , 1 , 2 , 3]
hmm_values = df_main["HMM"].values
hmm_values = [1 if x in favorable_states else 0 for x in hmm_values]


df_main["HMM_Signal"] = hmm_values

Unnamed: 0_level_0,Open,Close,Returns,Range,12_MA,21_MA,HMM,HMM_Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-12-28,477.720001,476.869995,-0.000817,0.005777,467.337502,464.403335,3,1
2021-12-29,476.980011,477.480011,0.001279,0.005547,467.899170,465.016668,0,1
2021-12-30,477.929993,476.160004,-0.002765,0.007001,468.698336,465.997621,3,1
2021-12-31,475.640015,474.959991,-0.002520,0.004614,469.665003,467.162383,3,1
2022-01-03,476.299988,477.709991,0.005790,0.008441,470.257502,468.129525,0,1
...,...,...,...,...,...,...,...,...
2023-05-24,412.420013,411.089996,-0.007245,0.007173,413.984169,412.637143,3,1
2023-05-25,414.739990,414.649994,0.008660,0.009093,414.294169,413.127143,3,1
2023-05-26,415.329987,420.019989,0.012951,0.013293,414.891668,413.489524,0,1
2023-05-30,422.029999,420.179993,0.000381,0.009170,415.562500,413.691905,3,1


In [None]:
# Add Combined "Main" Signal   (if moving average and HMM signals are green)
df_main["Main_Signal"] = 0
df_main.loc[   (df_main["MA_Signal"] == 1) &  (df_main["HMM_Signal"] == 1 ), "Main_Signal" ] = int(1)


df_main["Main_Signal"] = df_main["Main_Signal"].shift(1)

In [None]:
# Benchmark Returns
df_main["lrets_strat"] = np.log(df_main["Open"].shift(1) / df_main["Open"]) * df_main["Main_Signal"]
df_main["lrets_prod"] = df_main["lrets_strat"].cumsum()
df_main["strat_prod_exp"] = np.exp(df_main["lrets_prod"]) - 1

# Review Results

In [None]:
df_main.dropna(inplace=True)
df_main.tail(40)