In [1]:
import pandas as pd
import numpy as np  
from sklearn.model_selection import train_test_split 
from sklearn import tree
from sklearn import metrics

from stock_trading_util import download_price, read_price, line_plot, add_bbvalue, add_so, add_rtrn, add_rsi, calc_bt_pl

In [2]:
#df_prc = download_price('SPY', '2010-01-01', '2019-12-31')
df_prc = read_price('SPY', '2010-01-01', '2020-06-30')
#line_plot(df_prc, 'SPY')

In [3]:
df_prc = add_bbvalue(df_prc.loc[:,['Date', 'SPY']], 'SPY')
df_prc['bbvalue20'] = df_prc.bbvalue20 / 100.0
df_prc = add_so(df_prc, 'SPY')
df_prc['so20'] = (df_prc.so20 - 0.5) / 10.0
df_prc = add_rsi(df_prc, 'SPY')
df_prc['rsi14'] = (df_prc.rsi14 - 0.5) / 10.0
df_prc = add_rtrn(df_prc, 'SPY', 'b', 20)
df_prc = add_rtrn(df_prc, 'SPY', 'b', 5)
df_prc = add_rtrn(df_prc, 'SPY', 'b', 1)
df_prc = add_rtrn(df_prc, 'SPY', 'f', 1)

In [4]:
df_prc['holding'] = 0
df_prc.loc[df_prc.rtrn_f_1d>0.002, 'holding'] = 1
df_prc.loc[df_prc.rtrn_f_1d<-0.002, 'holding'] = -1
df_prc.drop(columns=['rtrn_f_1d'], inplace=True)

In [5]:
df_cnt = df_prc.groupby('holding')['holding'].count()
df_cnt

holding
-1     818
 0     720
 1    1102
Name: holding, dtype: int64

In [6]:
# X = df_prc.iloc[2016:2516, 2:-1].values
# y = df_prc.iloc[2016:2516, -1].values
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)
X_train = df_prc.iloc[2016:2516, 2:-1].values
#X_test = df_prc.iloc[-120:-1, 2:-1].values
y_train = df_prc.iloc[2016:2516, -1].values
#y_test = df_prc.iloc[-120:-1, -1].values
def X_y_test(idx):
    return df_prc.iloc[idx:-1, 2:-1].values, df_prc.iloc[idx:-1, -1].values, df_prc.iloc[idx:-1]
X_test, y_test, df_test = X_y_test(2516)

In [7]:
clf = tree.DecisionTreeClassifier(max_depth=10)
clf.fit(X_train, y_train)
print('Tree depth is ' + str(clf.tree_.max_depth))
y_pred_train = clf.predict(X_train)
print('Training f1 score is ' + str(metrics.f1_score(y_train, y_pred_train, average='micro')))
y_pred_test = clf.predict(X_test)
print('Testing f1 score is ' + str(metrics.f1_score(y_test, y_pred_test, average='micro')))

Tree depth is 10
Training f1 score is 0.804
Testing f1 score is 0.44715447154471544


In [8]:
clf.feature_importances_

array([0.2097934 , 0.13814174, 0.16849774, 0.1405208 , 0.18642281,
       0.15662351])

In [9]:
df = pd.DataFrame({'Actual': y_test, 'pstn': y_pred_test, 'price': df_test.SPY})
pl_buyhold, pl_trade = calc_bt_pl(df)
print('Buy and hold returns ${} for $1 investment.'.format(pl_buyhold))
print('Decision tree based trading returns ${} for $1 investment.'.format(pl_trade))

Buy and hold returns $0.9331 for $1 investment.
Decision tree based trading returns $1.7703 for $1 investment.
