# Resources
- https://www.youtube.com/watch?v=Av7rrIJvI9M&ab_channel=Quantra
- https://www.youtube.com/watch?v=hOLSGMEEwlI&ab_channel=ComputerScience

In [9]:
import pandas as pd
import numpy as np
import yfinance as yf



# IMPORT DATA
df = yf.download("GS", start="2011-01-01", end="2021-01-01")
print(df.keys())

[*********************100%%**********************]  1 of 1 completed

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')





In [10]:
# DEFINE PREDICTOR AND TARGET VARIABLES
import ta
import ta.momentum
import ta.trend
import ta.volume

# ADX, RSI, SMA use as predictors
df['ADX'] = ta.trend.adx(df['High'],df['Low'],df['Close'], 14)
df['RSI'] = ta.momentum.rsi(df['Close'], 14)
df['SMA'] = ta.trend.sma_indicator(df['Close'], 20)

df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,ADX,RSI,SMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-12-24,257.040009,257.619995,253.75,256.160004,236.06221,968100,45.612611,68.209812,242.2745
2020-12-28,257.809998,262.649994,257.0,259.589996,239.223068,2793400,47.141973,70.299938,243.484
2020-12-29,260.26001,260.859985,256.5,258.01001,237.767044,1430900,48.399376,68.07949,244.855501
2020-12-30,258.809998,260.649994,257.829987,259.450012,239.09407,1566500,49.566964,69.039312,246.224001
2020-12-31,258.799988,263.929993,258.0,263.709991,243.019867,2043100,50.887893,71.745953,247.527001


In [11]:
# Target is one day future return, if positive 1 else 0
df['Return'] = df['Close'].pct_change(1).shift(-1)
df['Target'] = np.where(df['Return']>0, 1, 0)

df = df.dropna()
predictors_list = ['ADX', 'RSI', 'SMA']
X = df[predictors_list]
print(X.tail())

y = df.Target
print(y.tail())

                  ADX        RSI         SMA
Date                                        
2020-12-23  44.341870  68.588779  241.293500
2020-12-24  45.612611  68.209812  242.274500
2020-12-28  47.141973  70.299938  243.484000
2020-12-29  48.399376  68.079490  244.855501
2020-12-30  49.566964  69.039312  246.224001
Date
2020-12-23    0
2020-12-24    1
2020-12-28    0
2020-12-29    1
2020-12-30    1
Name: Target, dtype: int32


In [12]:
# Create train and test datasets
split_percentage = 0.8
split = int(split_percentage*len(X))

# Train data set
X_train = X[:split]
y_train = y[:split]

# Test data set
X_test = X[split:]
y_test = y[split:]

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1997, 3) (1997,)
(500, 3) (500,)


In [13]:
from sklearn.tree import DecisionTreeClassifier

# create classification model
clf = DecisionTreeClassifier(criterion='gini', max_depth=3, min_samples_leaf=5)

clf = clf.fit(X_train, y_train)

In [15]:
from sklearn import tree
import graphviz

dot_data = tree.export_graphviz(
    clf, out_file=None, filled=True, feature_names=predictors_list
)
# graphviz.Source(dot_data) -> need to add graphviz to win executables PATH

In [17]:
from sklearn.metrics import classification_report

# PREDICT
y_pred = clf.predict(X_test)

# REPORT
report = classification_report(y_test, y_pred)
print(report)

# precision = TP/(TP+FP) -> percentage correct positives out of pred pos
# recall = TP/(TP+FN) -> 
# F1 = 2*precision*recall/(precision+recall)
# accuracy = (TP+TN)/(TP+FN+TN+FP)
# specificity = TN/(TN+FP)


              precision    recall  f1-score   support

           0       0.54      0.13      0.21       234
           1       0.54      0.90      0.68       266

    accuracy                           0.54       500
   macro avg       0.54      0.52      0.44       500
weighted avg       0.54      0.54      0.46       500

