#INSTALL DEPENDENCIES AND IMPORT LIBRARIES

In [0]:
!pip install tensorflow==2.0.0

In [0]:
import tensorflow as tf
tf.__version__

'2.0.0'

In [0]:
!pip install pandas_datareader # this library helps us retrieve all stock market data directly to our environment

In [0]:
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as data_reader
from tqdm import tqdm_notebook, tqdm
from collections import deque
from tensorflow.keras.callbacks import ModelCheckpoint

#BUILD THE AI TRADER NETWORK

In [0]:
class AI_trader():
  def __init__(self, state_size, action_space=3, model_name="AITrader"):
    # 3 actions - stay(dont do anything), buy, sell
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name

    self.gamma = 0.95 # between 0-1
    self.epsilon = 1 # this means that in the beginning of training process, all actions - performed randomly
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    self.model = self.model_builder()

  def model_builder(self):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units=32, activation = 'relu', input_dim = self.state_size)) 
    model.add(tf.keras.layers.Dense(units=64, activation = 'relu')) 
    model.add(tf.keras.layers.Dense(units=128, activation = 'relu')) 
    model.add(tf.keras.layers.Dense(units=self.action_space, activation = 'linear'))  # linear since we'll use mse for loss - outputs are continuous values and not classes
    model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam(lr=0.001))
    # accuracy metric cant be used since regression, not classification
    return model

  def trade(self, state):
    if random.random() < self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0]) # action with highest probability

  def batch_train(self, batch_size):
    batch = []
    filepath="weights.{epoch:02d}-{val_loss:.2f}.hdf5,"
    #checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath, monitor='val_loss', verbose=0, save_best_only=False,
    save_weights_only=False, mode='auto', save_freq='epoch', **kwargs
    )
    callbacks_list = [checkpoint]

    for i in range(len(self.memory)-batch_size+1 , len(self.memory)):
      batch.append(self.memory[i])
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma*np.amax(self.model.predict(next_state)[0])
      target = self.model.predict(state)
      target[0][action] = reward
      self.model.fit(state, target, epochs = 1, callbacks=callbacks_list, verbose=0)
    if self.epsilon > self.epsilon_final:
      self.epsilon = self.epsilon*self.epsilon_decay

#DATASET PREPROCESSING

#HELPER FUNCTIONS

SIGMOID

In [0]:
def sigmoid(x):
  return 1/(1+math.exp(-x))

PRICE FORMAT FUNCTION

In [0]:
# function to print out the stocks bought or sold
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

DATASET LOADER FUNCTION

In [0]:
dataset = data_reader.DataReader("AAPL", data_source="yahoo")
dataset # returns a pandas dataframe
# high, low -> highest and lowest price of the day
# open, close -> price when market physically opens and closes
# volume -> no of stocks there are to be sold
# Adj close -> we wont use this column

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,30.642857,30.340000,30.490000,30.572857,123432400.0,26.538483
2010-01-05,30.798571,30.464285,30.657143,30.625713,150476200.0,26.584366
2010-01-06,30.747143,30.107143,30.625713,30.138571,138040000.0,26.161509
2010-01-07,30.285715,29.864286,30.250000,30.082857,119282800.0,26.113146
2010-01-08,30.285715,29.865715,30.042856,30.282858,111902700.0,26.286753
...,...,...,...,...,...,...
2020-03-11,281.220001,271.859985,277.390015,275.429993,63899700.0,275.429993
2020-03-12,270.000000,248.000000,255.940002,248.229996,104618500.0,248.229996
2020-03-13,279.920013,252.949997,264.890015,277.970001,92683000.0,277.970001
2020-03-16,259.079987,240.000000,241.949997,242.210007,80605900.0,242.210007


In [0]:
str(dataset.index[0]).split()[0]

'2010-01-04'

In [0]:
# dataset loader function
def dataset_loader(stock_name):
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  close = dataset["Close"]
  return close

STATE CREATOR FUNCTION

In [0]:
def state_creator(data, timestep, window_size):
  # timestep is the time at which we want to predict our stock - will take value of stock at prev window_size times as input
  # visualize a graph of time(on x-axis) vs stock price(on y-axis)
  starting_id = timestep - window_size + 1
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = -starting_id*[data[0]] + list(data[0:timestep+1])
  
  state = []
  # state will be the difference of 2 windows
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
  return np.array([state])

#LOADING DATASET

In [0]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

In [0]:
data

Date
2010-01-04     30.572857
2010-01-05     30.625713
2010-01-06     30.138571
2010-01-07     30.082857
2010-01-08     30.282858
                 ...    
2020-03-11    275.429993
2020-03-12    248.229996
2020-03-13    277.970001
2020-03-16    242.210007
2020-03-17    252.860001
Name: Close, Length: 2568, dtype: float64

#TRAIN THE AI TRADER

SET HYPERPARAMETERS

In [0]:
window_size = 10
episodes = 1000 # epochs
batch_size = 32
data_samples = len(data) - 1

DEFINE THE TRADER MODEL

In [0]:
trader = AI_trader(window_size)
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


#TRAINING LOOP

In [0]:
for episode in range(episodes+1):
  print("Episode : {}/{}". format(episode, episodes))
  state = state_creator(data, 0, window_size + 1) # initial state
  total_profit = 0
  trader.inventory = [] # list that stores all stocks we bought
  for t in tqdm(range(data_samples)): # tqdm for visualization purposes
    action = trader.trade(state)
    next_state = state_creator(data, t+1, window_size+1)
    reward = 0
    if action==1: # the agent is buying
      trader.inventory.append(data[t])
    elif action==2 and len(trader.inventory) > 0: # selling the stock -> inventory shouldnt be empty
      buy_price = trader.inventory.pop(0)
      reward = max(data[t] - buy_price, 0) # 0 if we're in loss
      total_profit = data[t] - buy_price
    if t==data_samples-1:
      done = True
    else:
      done = False
    trader.memory.append((state, action, reward, next_state, done))
    state = next_state
    if done:
      print("####################")
      print("TOTAL PROFIT : {}".format(total_profit))
      print("####################")
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))