#### Installing Dependencies & Enviornment Setup

In [0]:
!pip install -q tensorflow-gpu==2.0.0-beta1

In [2]:
!pip install pandas-datareader



### Importing Project Dependencies

In [0]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader

from tqdm import tqdm_notebook, tqdm
from collections import deque

In [4]:
tf.__version__

'2.0.0-beta1'

### Building the AI Trader network

In [0]:
class AI_Trader():

  def __init__(self, state_size, action_space=3, model_name ="AITrader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    
    self.model = self.model_builder()
    
  def model_builder(self):

    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(units = 32, activation = 'relu', input_dim = self.state_size))
    
    model.add(tf.keras.layers.Dense(units = 64, activation = 'relu'))
    
    model.add(tf.keras.layers.Dense(units = 128, activation = 'relu'))
    
    model.add(tf.keras.layers.Dense(units = self.action_space, activation = 'linear'))
    
    model.compile(loss = "mse", optimizer = tf.keras.optimizers.Adam(lr = 0.001))
    
    return model
  
  def trade(self, state):

    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  
  def batch_train(self, batch_size):
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
    
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma + np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose = 0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay
      
      

## Dataset Preprocessing

### Defining helper function

**Sigmoid**

In [0]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

### Price format function

In [0]:
def stocks_price_format(n):
  if n < 0:
    return "- $ (0:2f)".format(abs(n))
  else:
    return "$ (02f)".format(abs(n))

### Dataset loader 

In [0]:
def dataset_loader(stock_name):
  
  #Complete the dataset loader function
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  
  close = dataset["Close"]
  
  return close

### State Creator

In [0]:
def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  for i in range(window_size-1):
    state.append(sigmoid(windowed_data[i+1])-windowed_data[i])
    
  return np.array([state])

### Loading a dataset

In [0]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

##Stage 5: Training the AI Trader 

###Setting Hyper Parameters

In [0]:
window_size = 10 
episodes = 1000

batch_size = 32
data_samples = len(data) - 1

### Defining the Trader Model

In [0]:
trader = AI_Trader(window_size)

In [13]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


### Training Loop

In [0]:
for episode in range(1, episodes + 1):
  
  print(f"Episode: {episode}/{episodes}")
  
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  
  for t in tqdm(range(data_samples)):
    
    action = trader.trade(state)
    
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0
    
    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))
    

  0%|          | 0/2416 [00:00<?, ?it/s]

Episode: 1/1000
AI Trader bought:  $ (02f)
AI Trader bought:  $ (02f)
AI Trader sold:  $ (02f)  Profit: - $ (0:2f)
AI Trader sold:  $ (02f)  Profit: - $ (0:2f)
AI Trader bought:  $ (02f)
AI Trader sold:  $ (02f)  Profit: - $ (0:2f)
AI Trader bought:  $ (02f)
AI Trader bought:  $ (02f)
AI Trader sold:  $ (02f)  Profit: $ (02f)
AI Trader sold:  $ (02f)  Profit: $ (02f)
AI Trader bought:  $ (02f)
AI Trader bought:  $ (02f)
AI Trader bought:  $ (02f)
AI Trader bought:  $ (02f)
AI Trader bought:  $ (02f)
AI Trader sold:  $ (02f)  Profit: $ (02f)
AI Trader bought:  $ (02f)


  1%|▏         | 33/2416 [00:01<01:21, 29.27it/s]

AI Trader bought:  $ (02f)


  1%|▏         | 34/2416 [00:01<03:55, 10.11it/s]

AI Trader bought:  $ (02f)


  1%|▏         | 36/2416 [00:01<06:42,  5.92it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  2%|▏         | 37/2416 [00:02<07:23,  5.36it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  2%|▏         | 39/2416 [00:02<08:17,  4.77it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 40/2416 [00:02<08:35,  4.61it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 41/2416 [00:03<08:43,  4.54it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 42/2416 [00:03<08:51,  4.47it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 43/2416 [00:03<08:55,  4.43it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  2%|▏         | 46/2416 [00:04<08:58,  4.40it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 47/2416 [00:04<08:57,  4.41it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  2%|▏         | 48/2416 [00:04<08:59,  4.39it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 49/2416 [00:04<09:11,  4.29it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 52/2416 [00:05<08:55,  4.41it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  2%|▏         | 53/2416 [00:05<08:59,  4.38it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 54/2416 [00:05<09:03,  4.34it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  2%|▏         | 56/2416 [00:06<08:57,  4.39it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 57/2416 [00:06<08:58,  4.38it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 58/2416 [00:06<09:05,  4.32it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 59/2416 [00:07<09:05,  4.32it/s]

AI Trader bought:  $ (02f)


  2%|▏         | 60/2416 [00:07<09:04,  4.33it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  3%|▎         | 63/2416 [00:08<08:58,  4.37it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  3%|▎         | 64/2416 [00:08<09:00,  4.35it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  3%|▎         | 65/2416 [00:08<08:58,  4.36it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  3%|▎         | 67/2416 [00:08<09:03,  4.32it/s]

AI Trader bought:  $ (02f)


  3%|▎         | 70/2416 [00:09<08:58,  4.36it/s]

AI Trader bought:  $ (02f)


  3%|▎         | 71/2416 [00:09<08:56,  4.37it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  3%|▎         | 72/2416 [00:10<09:06,  4.29it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  3%|▎         | 73/2416 [00:10<09:02,  4.32it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  3%|▎         | 74/2416 [00:10<09:04,  4.30it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  3%|▎         | 78/2416 [00:11<08:55,  4.37it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  3%|▎         | 79/2416 [00:11<08:56,  4.36it/s]

AI Trader bought:  $ (02f)


  3%|▎         | 80/2416 [00:11<09:02,  4.31it/s]

AI Trader bought:  $ (02f)


  3%|▎         | 82/2416 [00:12<09:06,  4.27it/s]

AI Trader bought:  $ (02f)


  3%|▎         | 83/2416 [00:12<09:00,  4.32it/s]

AI Trader bought:  $ (02f)


  3%|▎         | 84/2416 [00:12<08:53,  4.37it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  4%|▎         | 86/2416 [00:13<08:52,  4.37it/s]

AI Trader bought:  $ (02f)


  4%|▎         | 87/2416 [00:13<08:49,  4.40it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  4%|▎         | 88/2416 [00:13<08:53,  4.37it/s]

AI Trader bought:  $ (02f)


  4%|▎         | 90/2416 [00:14<08:53,  4.36it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  4%|▍         | 92/2416 [00:14<08:50,  4.38it/s]

AI Trader bought:  $ (02f)


  4%|▍         | 96/2416 [00:15<08:46,  4.40it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  4%|▍         | 97/2416 [00:15<08:49,  4.38it/s]

AI Trader bought:  $ (02f)


  4%|▍         | 98/2416 [00:16<08:55,  4.33it/s]

AI Trader bought:  $ (02f)


  4%|▍         | 100/2416 [00:16<08:48,  4.38it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  4%|▍         | 101/2416 [00:16<08:45,  4.41it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  4%|▍         | 102/2416 [00:17<08:46,  4.39it/s]

AI Trader bought:  $ (02f)


  4%|▍         | 103/2416 [00:17<09:06,  4.23it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  4%|▍         | 104/2416 [00:17<08:59,  4.28it/s]

AI Trader sold:  $ (02f)  Profit: - $ (0:2f)


  4%|▍         | 105/2416 [00:17<08:55,  4.32it/s]

AI Trader sold:  $ (02f)  Profit: - $ (0:2f)


  4%|▍         | 106/2416 [00:17<08:49,  4.36it/s]

AI Trader sold:  $ (02f)  Profit: - $ (0:2f)


  4%|▍         | 107/2416 [00:18<08:57,  4.30it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  4%|▍         | 108/2416 [00:18<08:51,  4.34it/s]

AI Trader sold:  $ (02f)  Profit: - $ (0:2f)


  5%|▍         | 109/2416 [00:18<08:51,  4.34it/s]

AI Trader sold:  $ (02f)  Profit: - $ (0:2f)


  5%|▍         | 110/2416 [00:18<08:48,  4.36it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  5%|▍         | 111/2416 [00:19<08:41,  4.42it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  5%|▍         | 114/2416 [00:19<08:42,  4.41it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  5%|▍         | 116/2416 [00:20<08:53,  4.31it/s]

AI Trader bought:  $ (02f)


  5%|▍         | 118/2416 [00:20<08:46,  4.36it/s]

AI Trader sold:  $ (02f)  Profit: $ (02f)


  5%|▍         | 119/2416 [00:20<08:49,  4.34it/s]

AI Trader bought:  $ (02f)


  5%|▍         | 120/2416 [00:21<08:53,  4.31it/s]

AI Trader bought:  $ (02f)


  5%|▌         | 121/2416 [00:21<08:49,  4.34it/s]

AI Trader sold:  $ (02f)  Profit: - $ (0:2f)


  5%|▌         | 123/2416 [00:21<08:47,  4.35it/s]

AI Trader sold:  $ (02f)  Profit: - $ (0:2f)


  5%|▌         | 126/2416 [00:22<09:02,  4.22it/s]