In [1]:
import pandas as pd
import numpy as np

# !pip install pyhhmm
from pyhhmm.gaussian import GaussianHMM
# from hmmlearn.hmm import GaussianHMM
from pandas_datareader import data as pdr
import yfinance as yf
import matplotlib.pyplot as plt

### Structure Data

In [2]:
# Data Extraction

start_date = '2017-01-01'
end_date= '2022-06-01'
symbol = 'SPY'
yf.pdr_override()
# df = DataReader(name=symbol, data_source='yahoo', start=start_date, end=end_date)
df = pdr.get_data_yahoo(symbol, start=start_date, end=end_date)
data = df[["Open", "High", "Low", "Adj Close", "Volume"]]

[*********************100%***********************]  1 of 1 completed


In [4]:
df = data.copy()
df["Returns"] = (df["Adj Close"] / df["Adj Close"].shift(1)) - 1
df["Range"] = (df["High"]/df["Low"]) - 1
df.dropna(inplace=True)
print("Length: ", len(df))
df.head()

Length:  1361


Unnamed: 0_level_0,Open,High,Low,Adj Close,Volume,Returns,Range
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-01-04,225.619995,226.75,225.610001,202.535202,78744400,0.005949,0.005053
2017-01-05,226.270004,226.580002,225.479996,202.374268,78379000,-0.000795,0.004879
2017-01-06,226.529999,227.75,225.899994,203.098328,71559900,0.003578,0.008189
2017-01-09,226.910004,227.070007,226.419998,202.427933,46939700,-0.003301,0.002871
2017-01-10,226.479996,227.449997,226.009995,202.427933,63771900,0.0,0.006371


In [6]:
# Add Moving Average
df["MA_12"] = df["Adj Close"].rolling(window=12).mean()
df["MA_21"] = df["Adj Close"].rolling(window=21).mean()


In [7]:
# Structure Data
X_train = df[["Returns", "Range"]].iloc[:500]
X_test = df[["Returns", "Range"]].iloc[500:]
save_df = df.iloc[500:]


### Train HMM

In [10]:
model = GaussianHMM(n_states=4, covariance_type="full", n_emissions=2)
model.train([np.array(X_train.values)])
# model.predict([X_test.values])



(<pyhhmm.gaussian.GaussianHMM at 0x25e094b2a90>, 3646.3195296126682)

In [11]:
df_main = save_df.copy()
df_main.drop(columns=["High", "Low"], inplace=True)

In [12]:
hmm_results     = model.predict([X_test.values])[0]
df_main["HMM"]  = hmm_results
df_main.head()

Unnamed: 0_level_0,Open,Adj Close,Volume,Returns,Range,MA_12,MA_21,HMM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-12-31,249.559998,232.067352,144299400,0.008759,0.010991,231.581402,238.991977,3
2019-01-02,245.979996,232.308762,126925200,0.00104,0.021387,230.525205,238.009935,1
2019-01-03,248.229996,226.765244,144140700,-0.023863,0.020109,229.384014,236.690502,1
2019-01-04,247.589996,234.360916,142628800,0.033496,0.024032,229.268915,235.572308,1
2019-01-07,252.690002,236.208771,103139100,0.007885,0.016926,229.329342,234.939955,3


### Run Backtest

In [14]:
# Add MA Signals
df_main.loc[df_main["MA_12"] > df_main["MA_21"], "MA_Signal"] = 1
df_main.loc[df_main["MA_12"] <= df_main["MA_21"], "MA_Signal"] = 0


In [16]:
# Add HMM Signals
# Start with all HMM states, and we will remove the states that loss money after bactesting.
favorable_states = [0, 1, 2, 3]
hmm_values = df_main["HMM"].values
hmm_values = [1 if x in favorable_states else 0 for x in hmm_values ]
df_main["HMM_Signal"] = hmm_values

In [17]:
# Add Combines Signal
df_main["Main_Signal"] = 0
df_main.loc[(df_main["MA_Signal"] == 1) & ( df_main["HMM_Signal"] == 1), "Main_Signal"] = 1
df_main["Main_Signal"] = df_main["MA_Signal"].shift(1)


In [18]:
# Benchmark Returns
df_main["lrets_bench"] = np.log(df_main["Adj Close"]/df_main["Adj Close"].shift(1))
df_main["bench_prod"] = df_main["lrets_bench"].cumsum()
df_main["bench_prod_exp"] = np.exp(df_main["bench_prod"]) - 1

In [19]:
# Strategy Returns
# Always assume worst case senario
df_main["lrets_strat"] = np.log(df_main["Open"]/df_main["Open"].shift(1))
df_main["strat_prod"] = df_main["lrets_strat"].cumsum()
df_main["strat_prod_exp"] = np.exp(df_main["strat_prod"]) - 1

In [23]:
# Review results table
df_main.dropna(inplace=True)
df_main.tail()

Unnamed: 0_level_0,Open,Adj Close,Volume,Returns,Range,MA_12,MA_21,HMM,MA_Signal,HMM_Signal,Main_Signal,lrets_bench,bench_prod,bench_prod_exp,lrets_strat,strat_prod,strat_prod_exp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-05-24,392.559998,385.894287,91448800,-0.007634,0.021165,388.144307,397.128162,1,0.0,1,0.0,-0.007663,0.508536,0.662855,-0.000688,0.45299,0.573009
2022-05-25,392.309998,389.303619,91472900,0.008835,0.019291,388.078992,396.254361,1,0.0,1,0.0,0.008796,0.517332,0.677546,-0.000637,0.452353,0.572007
2022-05-26,398.670013,397.082458,82168300,0.019981,0.021559,388.586807,395.6964,1,0.0,1,0.0,0.019784,0.537116,0.711066,0.016082,0.468435,0.597492
2022-05-27,407.910004,406.830475,84768700,0.024549,0.018837,390.424561,395.110914,1,0.0,1,0.0,0.024253,0.561369,0.753071,0.022913,0.491347,0.634517
2022-05-31,413.549988,404.54776,95937000,-0.005611,0.015682,392.105563,395.154301,1,0.0,1,0.0,-0.005627,0.555742,0.743234,0.013732,0.505079,0.657116
