# Train a Model to Trade on the Stock Market

![](https://www.xm.com/wp-content/uploads/2017/09/Stock-Charts-1.jpg)

## Efficient Market Hypothesis

Future prices cannot be predicted by analyzing prices from the past. Excess returns cannot be earned in the long run by using investment strategies based on historical share prices or other historical data. Technical analysis techniques will not be able to consistently produce excess returns.

 

## Technical Analysis


In finance, technical analysis is an analysis methodology for forecasting the direction of prices through the study of past market data, primarily price and volume. The efficacy of technical  analysis is disputed by the efficient-market hypothesis which states that stock market prices are essentially unpredictable.

 

## Can a neural net learn Technical Analysis techniques to predict the market and make good trades?

Are we about to get rich?

In [9]:
from alpha_vantage.techindicators import TechIndicators
from alpha_vantage.timeseries import TimeSeries
import random
from indicators import *
from transform import *
import pandas_datareader as web
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
import h2o
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.graph_objs import *
from plotly.tools import FigureFactory as FF
import cufflinks as cf
from plots import *
import numpy as np
import operator
import math

# Suppress unwatned warnings
import warnings
warnings.filterwarnings('ignore')
import logging
logging.getLogger("requests").setLevel(logging.WARNING)

# Set some defaults for how pandas displays
pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

### Enter These Variables

In [10]:
stockSymbol = "TTM"
market = "NASDAQ"
invest = 10000
window = 23
start_date = pd.datetime(2013,1,1)
split_validation = pd.datetime(2017,1,1)
split_test = pd.datetime(2017,10,1)

### Let's use a machine learning library called H2O

In [11]:
#h2o.cluster().shutdown()

In [12]:
h2o.init(max_mem_size="12G")
h2o.remove_all()

Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O cluster uptime:,5 hours 58 mins
H2O cluster timezone:,America/Toronto
H2O data parsing timezone:,UTC
H2O cluster version:,3.18.0.8
H2O cluster version age:,9 days
H2O cluster name:,H2O_from_python_brett_08igme
H2O cluster total nodes:,1
H2O cluster free memory:,10.66 Gb
H2O cluster total cores:,8
H2O cluster allowed cores:,8


### Connect to Morningstar and Download the Historical Data  

In [13]:
df = web.DataReader(stockSymbol, 'morningstar', start=start_date)
df = df.reset_index(drop=False)
df = df.drop('Symbol', axis=1)
df.set_index('Date', inplace=True)
df = df[['Open','High', 'Low', 'Close','Volume']]


### Plot.ly is a Sweet Charting Library

In [14]:
myPlotlyKey = 'conh5EnFad0Z9Lz6mVWr'
py.sign_in(username='bretto777',api_key=myPlotlyKey)

fig = simpleStockChart(df,stockSymbol,split_validation,split_test)
py.iplot(fig, validate=False)

The draw time for this plot will be slow for clients without much RAM.


### Technical Analysis
Add technical analysis indicators used by the pros! Hopefully our model will learn how these indicators can predict tomorrow's price movements.

In [15]:
%%capture
df.reset_index(drop=False, inplace=True, col_level=0)

typical_price(df, high_col = 'High', low_col = 'Low', close_col = 'Close')
money_flow_index(df, vol_col='Volume')
rsi(df, close_col='Close')
ema(df, period=26, column='Close')
ema(df, period=12, column='Close')
macd(df, period_long=26, period_short=12, period_signal=9, column='Close')
on_balance_volume(df, trend_periods=21, close_col='Close', vol_col='Volume')
price_volume_trend(df, trend_periods=21, close_col='Close', vol_col='Volume')
bollinger_bands(df, trend_periods=20, close_col='Close')

In [16]:
fig = technicalAnalysisChart(df)
py.iplot(fig, validate=False)

In [17]:
%%capture
windowTransform(df, window)

NameError: global name 'window' is not defined

In [None]:
trainingDF = df.loc[df['Date'] <= split_validation]
validationDF = df[df['Date'].isin(pd.date_range(split_validation,split_test))]
testDF = df.loc[df['Date'] > split_test]

trainingDF = trainingDF.dropna(axis=0, how='any')
validationDF = validationDF.dropna(axis=0, how='any')
testDF = testDF.fillna(value=0)

trainingDF.set_index('Date', inplace=True)
validationDF.set_index('Date', inplace=True)
testDF.set_index('Date', inplace=True)

In [None]:
train = h2o.H2OFrame(trainingDF, destination_frame="train.hex")
valid = h2o.H2OFrame(validationDF, destination_frame="valid.hex")
test = h2o.H2OFrame(testDF, destination_frame="test.hex")

train['Close Tomorrow s'] = train['Close Tomorrow s'].asfactor()
valid['Close Tomorrow s'] = valid['Close Tomorrow s'].asfactor()
test['Close Tomorrow s'] = test['Close Tomorrow s'].asfactor()

# Set predictor and response variables
y = "Close Tomorrow s"
x = train.columns
x.remove(y)

### Train the Deep Neural Net
Grid search for a good model

In [None]:
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
from h2o.grid.grid_search import H2OGridSearch

hidden_opt = [[25,25,25,25,25],
              [50,75,75,50],
              [75,100,100,75],
              [50,300,300,50],
              [10,10,100,10,10],
              [1000,1000],
              [50,50,50]]
l1_opt = [0.001,0.0001,0.00001]
l2_opt = [0.001,0.0001]
rate_opt= [0.01,0.001,0.0001]
activation_opt=["Rectifier","TanhWithDropout"]
epochs_opt=[3500,2500]

hyper_parameters = {"hidden":hidden_opt, 
                    "l1":l1_opt, 
                    "l2":l2_opt,
                    "rate":rate_opt, 
                    "activation":activation_opt,
                    "epochs":epochs_opt}

search_criteria = {"strategy":"RandomDiscrete", "max_models":5, "seed":123456}



model_grid = H2OGridSearch(H2ODeepLearningEstimator(),
                           hyper_params=hyper_parameters,
                           search_criteria=search_criteria,
                           grid_id='test8')
model_grid.train(x=x, 
                 y=y,                                   
                 fast_mode=True,                   
                 stopping_rounds=10, 
                 stopping_tolerance=0,
                 stopping_metric='AUC',
                 adaptive_rate=True,                
                 training_frame=train, 
                 validation_frame=valid,
                 score_interval=10)

In [None]:
model_gridperf1 = model_grid.get_grid(sort_by='AUC', decreasing=False)
bestModel = model_gridperf1[0]          
#h2o.save_model(model=bestModel, path="/home/brett/")


In [None]:
#bestModel = h2o.load_model(path="/home/brett/ticker/h2ogbm.h2o")

In [None]:
importances = bestModel.varimp(use_pandas=True)
importances = importances.loc[:,['variable','relative_importance']].groupby('variable').mean()
importances.sort_values(by="relative_importance", ascending=False).iplot(kind='bar', colors='#5AC4F2', theme='white')

In [None]:
cm = bestModel.confusion_matrix(valid)
cm = cm.as_data_frame()
confusionMatrix = FF.create_table(cm)
confusionMatrix.layout.height=300
confusionMatrix.layout.width=800
confusionMatrix.layout.font.size=17
py.iplot(confusionMatrix)

In [None]:
pBestModel = pd.concat([bestModel.predict(test_data=test).as_data_frame(use_pandas=True).reset_index(drop=True), testDF.reset_index(drop=False)], axis =1)


In [None]:
prices = df.loc[df['Date'] > split_test]
prices = prices.loc[:,['Date','Close','Close Tomorrow','Close Tomorrow s']]
profit = pBestModel.loc[:,['predict']]
profit = pd.concat([prices.reset_index(drop=True), profit.reset_index(drop=True)], axis =1)
profit['predict'] = profit['predict'].astype(np.float64)

profit['profit'] = invest

profit['Best Possible'] = profit.apply(lambda row: ((row['profit'] / row['Close'])*row['Close Tomorrow'])-row['profit'] if row['Close Tomorrow s'] > 0 else 0, axis=1)

profit['Randomly Invest'] = profit.apply(lambda row: ((row['profit'] / row['Close'])*row['Close Tomorrow'])-row['profit'] if random.randint(1,1000) > random.randint(1,1000) else 0, axis=1)
profit['profit'] = profit.apply(lambda row: ((row['profit'] / row['Close'])*row['Close Tomorrow'])-row['profit'] if row['predict'] > 0 else 0, axis=1)


print "Times " + stockSymbol + " actually went up    : " + str(profit[profit['Close Tomorrow s']>0].count()['Close Tomorrow s'])
print "Times " + stockSymbol + " predicted to go up  : " + str(profit[profit['predict']>0].count()['predict'])
print "Profit                        : $" + str(profit.profit.sum())
print "Random investment             : $" + str(profit['Randomly Invest'].sum())
print "Best possible outcome         : $" + str(profit['Best Possible'].sum())

profit

