In [107]:
import pandas as pd
import numpy as np

from pyhhmm.gaussian import GaussianHMM
from pandas_datareader.data import DataReader

from ta.momentum import rsi
from ta.trend import ema_indicator
from ta.volatility import  BollingerBands,average_true_range

import yfinance as yf
yf.pdr_override()

import matplotlib.pyplot as plt

In [108]:
# Data Extraction
start_date = '2017-12-30'
end_date = '2022-12-30'
symbol = 'BTC-USD'
data = DataReader(symbol, start_date, end_date) 

[*********************100%%**********************]  1 of 1 completed


In [109]:
# Implement the Indicators

# Initialize Bollinger Bands Indicator
indicator_bb = BollingerBands(close=data["Close"], window=14, window_dev=1.5)

# Add Bollinger Bands features
data['bb_middle'] = indicator_bb.bollinger_mavg()
data['bb_upper'] = indicator_bb.bollinger_hband()
data['bb_lower'] = indicator_bb.bollinger_lband()

# Initialize RSI Indicator
data['RSI'] = rsi(data["Close"], window=14)

data['returns'] =data['Close'].pct_change()

save_df=data.iloc[800:]

save_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,bb_middle,bb_upper,bb_lower,RSI,returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-03-09,8111.146484,8177.793457,7690.098145,7923.644531,7923.644531,46936995808,8738.347517,9276.398546,8200.296487,30.914291,-0.022751
2020-03-10,7922.146973,8136.945312,7814.763184,7909.729492,7909.729492,42213940994,8636.063546,9199.801597,8072.325496,30.764346,-0.001756
2020-03-11,7910.089844,7950.814453,7642.8125,7911.430176,7911.430176,38682762605,8571.128383,9193.410479,7948.846287,30.808518,0.000215
2020-03-12,7913.616211,7929.116211,4860.354004,4970.788086,4970.788086,53980357243,8298.720808,9814.044597,6783.397019,14.080537,-0.371695
2020-03-13,5017.831055,5838.114746,4106.980957,5563.707031,5563.707031,74156772075,8076.667376,9911.06286,6242.271892,23.142024,0.119281


In [110]:
# Structure Data for HMM
X_train = data[['RSI','returns']].iloc[:800]
X_test = data[['RSI','returns']].iloc[800:]


In [111]:
# Train Gaussian HMM
model = GaussianHMM(n_states=4, covariance_type="full", n_emissions=2)
model.train([np.array(X_train.values)])

(<pyhhmm.gaussian.GaussianHMM at 0x21c8f55f610>, -1380.8919159783463)

In [112]:
# Prediction
df_main = save_df.copy()
df_main.drop(columns=['High', 'Low'], inplace=True)

hmm_predictions = model.predict([np.array(X_test.values)])[0]
df_main['hmm_predictions'] = hmm_predictions
df_main.tail()

Unnamed: 0_level_0,Open,Close,Adj Close,Volume,bb_middle,bb_upper,bb_lower,RSI,returns,hmm_predictions
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-12-25,16847.505859,16841.986328,16841.986328,11656379938,16989.24149,17576.912068,16401.570912,46.944488,-0.000342,1
2022-12-26,16842.25,16919.804688,16919.804688,11886957804,16968.767718,17549.807208,16387.728227,48.464238,0.00462,1
2022-12-27,16919.291016,16717.173828,16717.173828,15748580239,16892.757394,17370.952672,16414.562116,44.860801,-0.011976,1
2022-12-28,16716.400391,16552.572266,16552.572266,17005713920,16802.537528,17105.967852,16499.107204,42.12107,-0.009846,1
2022-12-29,16552.322266,16642.341797,16642.341797,14472237479,16750.928711,16949.376826,16552.480596,44.125248,0.005423,1


In [113]:
# Add TA Signals
df_main.loc[(df_main['Close'] <= df_main['bb_lower']) & (df_main['RSI'] <= 40), 'ta_signal'] = 1
df_main.loc[(df_main['Close'] >= df_main['bb_upper']) & (df_main['RSI'] >= 60), 'ta_signal'] = 0


In [124]:
# Add HMM Signals
favourable_states = [0,1,2,3] # [0,1,2,3] later it will be modified to fit better results.
hmm_values =df_main['hmm_predictions'].values
hmm_values = [1 if value in favourable_states else 0 for value in hmm_values]

df_main['hmm_signal'] = hmm_values

In [125]:
# Add Combined Signals
df_main['total_signal'] = 0 
df_main.loc[(df_main['ta_signal'] == 1 ) & (df_main['hmm_signal'] == 1), 'total_signal'] = 1
df_main['total_signal'] = df_main['total_signal'].shift(1) # shift to avoid look ahead bias

In [138]:
# df_main.drop(columns=['Volume','bb_upper','bb_lower','bb_middle','RSI'], inplace=True)
result = df_main.loc[(df_main['total_signal'] == 0) | (df_main['total_signal'] == 1)]
result.dropna(inplace=True,axis=0)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result.dropna(inplace=True,axis=0)


Unnamed: 0_level_0,Open,Close,Adj Close,returns,hmm_predictions,ta_signal,hmm_signal,total_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-03-10,7922.146973,7909.729492,7909.729492,-0.001756,1,1.0,1,1.0
2020-03-11,7910.089844,7911.430176,7911.430176,0.000215,1,1.0,1,1.0
2020-03-12,7913.616211,4970.788086,4970.788086,-0.371695,1,1.0,1,1.0
2020-03-13,5017.831055,5563.707031,5563.707031,0.119281,1,1.0,1,1.0
2020-03-14,5573.077637,5200.366211,5200.366211,-0.065306,1,1.0,1,1.0
