<a href="https://colab.research.google.com/github/Satwikram/Deep-Learning-Implementations/blob/master/Reinforcement%20Learning/Reinforcement%20Learning%20for%20Stock%20Market%20Trading.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Author: Satwik Ram K

Reinforcement Learning

## Installing and Importing Dependencies

In [14]:
!pip install pandas-datareader



In [15]:
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
import tensorflow as tf
from tensorflow import keras
import pandas_datareader as data_reader

from tqdm import tqdm_notebook, tqdm
from collections import deque
from tensorflow.keras import layers

## Building The AI Trader Network

In [61]:
class AI_Trader():

  def __init__(self, state_size, action_space = 3, model_name = "AITrader"):

    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen = 2000)
    self.model_name = model_name
    self.inventory = []

    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995

    self.model = self.model_builder()

  def model_builder(self):

    model = tf.keras.models.Sequential()

    model.add(layers.Dense(units = 32, activation = "relu", input_dim = self.state_size))

    model.add(layers.Dense(units = 64, activation = "relu"))

    model.add(layers.Dense(units = 128, activation = "relu"))

    model.add(layers.Dense(units = self.action_space, activation = "linear"))

    model.compile(optimizer = tf.keras.optimizers.Adam(lr = 0.001), loss = 'mse')

    return model

  def trade(self, state):

    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)

    actions = self.model.predict(state)

    return np.argmax(actions[0])


  def batch_train(self, batch_size):
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

## Data Preprocessing

### Sigmoid Function

In [62]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

### Price Format Function

In [63]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

### Dataset Loader

In [64]:
def dataset_loader(stock_name):
  
  #Complete the dataset loader function
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  
  close = dataset['Close']

  return close

### State Creater

In [65]:
def state_creator(data, timestep, window_size): 

  starting_id = timestep - window_size + 1

  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

## Loading the Dataset

In [66]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

In [67]:
data.head()

Date
2015-08-03    118.440002
2015-08-04    114.639999
2015-08-05    115.400002
2015-08-06    115.129997
2015-08-07    115.519997
Name: Close, dtype: float64

## Defining the model

In [68]:
window_size = 10
episodes = 20

batch_size = 32
data_samples = len(data) - 1


In [69]:
trader = AI_Trader(window_size)

In [70]:
trader.model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 32)                352       
_________________________________________________________________
dense_17 (Dense)             (None, 64)                2112      
_________________________________________________________________
dense_18 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_19 (Dense)             (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


## Training

In [None]:
for episode in range(1, episodes + 1):
  
  print("Episode: {}/{}".format(episode, episodes))
  
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  
  for t in tqdm(range(data_samples)):
    
    action = trader.trade(state)
    
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0
    
    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))
    


  0%|          | 0/1256 [00:00<?, ?it/s][A

Episode: 1/20
AI Trader bought:  $ 118.440002
AI Trader sold:  $ 115.400002  Profit: - $ 3.040001
AI Trader bought:  $ 109.690002
AI Trader bought:  $ 112.919998
AI Trader bought:  $ 112.760002
AI Trader sold:  $ 107.720001  Profit: - $ 1.970001
AI Trader sold:  $ 112.339996  Profit: - $ 0.580002
AI Trader bought:  $ 109.269997
AI Trader bought:  $ 112.309998
AI Trader sold:  $ 110.150002  Profit: - $ 2.610001
AI Trader bought:  $ 114.209999
AI Trader sold:  $ 116.279999  Profit: $ 7.010002
AI Trader sold:  $ 116.410004  Profit: $ 4.100006



  3%|▎         | 33/1256 [00:02<01:47, 11.33it/s][A

AI Trader sold:  $ 113.449997  Profit: - $ 0.760002



  3%|▎         | 34/1256 [00:05<18:48,  1.08it/s][A

AI Trader bought:  $ 115.209999



  3%|▎         | 35/1256 [00:08<31:51,  1.57s/it][A

AI Trader sold:  $ 113.400002  Profit: - $ 1.809998



  3%|▎         | 36/1256 [00:11<39:35,  1.95s/it][A
  3%|▎         | 37/1256 [00:14<44:13,  2.18s/it][A

AI Trader bought:  $ 115.000000



  3%|▎         | 38/1256 [00:17<49:07,  2.42s/it][A

AI Trader bought:  $ 114.709999



  3%|▎         | 39/1256 [00:20<51:55,  2.56s/it][A

AI Trader bought:  $ 112.440002



  3%|▎         | 40/1256 [00:23<53:13,  2.63s/it][A
  3%|▎         | 41/1256 [00:26<55:43,  2.75s/it][A
  3%|▎         | 42/1256 [00:29<57:01,  2.82s/it][A

AI Trader bought:  $ 109.580002



  3%|▎         | 43/1256 [00:31<57:18,  2.83s/it][A
  4%|▎         | 44/1256 [00:34<57:28,  2.85s/it][A
  4%|▎         | 45/1256 [00:37<56:49,  2.82s/it][A

AI Trader bought:  $ 111.309998



  4%|▎         | 46/1256 [00:40<59:08,  2.93s/it][A

AI Trader sold:  $ 110.779999  Profit: - $ 4.220001



  4%|▎         | 47/1256 [00:43<59:16,  2.94s/it][A
  4%|▍         | 48/1256 [00:47<1:01:17,  3.04s/it][A
  4%|▍         | 49/1256 [00:49<59:45,  2.97s/it]  [A

AI Trader sold:  $ 111.599998  Profit: - $ 3.110001



  4%|▍         | 50/1256 [00:52<1:00:15,  3.00s/it][A

AI Trader sold:  $ 111.790001  Profit: - $ 0.650002



  4%|▍         | 51/1256 [00:55<1:00:50,  3.03s/it][A
  4%|▍         | 52/1256 [00:59<1:03:02,  3.14s/it][A
  4%|▍         | 53/1256 [01:01<59:40,  2.98s/it]  [A
  4%|▍         | 54/1256 [01:05<1:00:03,  3.00s/it][A

AI Trader sold:  $ 111.730003  Profit: $ 2.150002



  4%|▍         | 55/1256 [01:07<59:38,  2.98s/it]  [A

AI Trader sold:  $ 113.769997  Profit: $ 2.459999



  4%|▍         | 56/1256 [01:11<1:00:14,  3.01s/it][A

AI Trader bought:  $ 113.760002



  5%|▍         | 57/1256 [01:13<58:57,  2.95s/it]  [A

AI Trader bought:  $ 115.500000



  5%|▍         | 58/1256 [01:16<58:28,  2.93s/it][A
  5%|▍         | 59/1256 [01:19<57:47,  2.90s/it][A
  5%|▍         | 60/1256 [01:23<1:02:01,  3.11s/it][A

AI Trader sold:  $ 114.550003  Profit: $ 0.790001



  5%|▍         | 61/1256 [01:26<1:01:48,  3.10s/it][A
  5%|▍         | 62/1256 [01:29<1:00:31,  3.04s/it][A
  5%|▌         | 63/1256 [01:32<1:01:04,  3.07s/it][A
  5%|▌         | 64/1256 [01:35<1:02:03,  3.12s/it][A
  5%|▌         | 65/1256 [01:38<1:02:11,  3.13s/it][A
  5%|▌         | 66/1256 [01:41<1:01:10,  3.08s/it][A

AI Trader sold:  $ 122.000000  Profit: $ 6.500000



  5%|▌         | 67/1256 [01:44<1:00:43,  3.06s/it][A
  5%|▌         | 68/1256 [01:47<1:01:23,  3.10s/it][A
  5%|▌         | 69/1256 [01:50<59:54,  3.03s/it]  [A
  6%|▌         | 70/1256 [01:53<58:38,  2.97s/it][A

AI Trader bought:  $ 116.769997



  6%|▌         | 71/1256 [01:57<1:02:34,  3.17s/it][A
  6%|▌         | 72/1256 [01:59<1:00:03,  3.04s/it][A

AI Trader sold:  $ 115.720001  Profit: - $ 1.049995



  6%|▌         | 73/1256 [02:03<1:01:01,  3.10s/it][A
  6%|▌         | 74/1256 [02:06<1:00:40,  3.08s/it][A

AI Trader bought:  $ 114.180000



  6%|▌         | 75/1256 [02:09<59:18,  3.01s/it]  [A
  6%|▌         | 76/1256 [02:12<59:44,  3.04s/it][A

AI Trader bought:  $ 117.290001



  6%|▌         | 77/1256 [02:15<1:00:39,  3.09s/it][A
  6%|▌         | 78/1256 [02:18<1:03:38,  3.24s/it][A

AI Trader bought:  $ 119.300003



  6%|▋         | 79/1256 [02:22<1:05:06,  3.32s/it][A

AI Trader sold:  $ 117.750000  Profit: $ 3.570000



  6%|▋         | 80/1256 [02:25<1:02:11,  3.17s/it][A

AI Trader sold:  $ 118.879997  Profit: $ 1.589996



  6%|▋         | 81/1256 [02:28<1:03:29,  3.24s/it][A

AI Trader bought:  $ 118.029999



  7%|▋         | 82/1256 [02:32<1:06:38,  3.41s/it][A

AI Trader bought:  $ 117.809998



  7%|▋         | 83/1256 [02:36<1:09:37,  3.56s/it][A
  7%|▋         | 84/1256 [02:40<1:09:51,  3.58s/it][A

AI Trader bought:  $ 117.339996



  7%|▋         | 85/1256 [02:43<1:07:44,  3.47s/it][A

AI Trader bought:  $ 116.279999



  7%|▋         | 86/1256 [02:46<1:06:20,  3.40s/it][A
  7%|▋         | 87/1256 [02:49<1:06:28,  3.41s/it][A

AI Trader sold:  $ 119.029999  Profit: - $ 0.270004



  7%|▋         | 88/1256 [02:53<1:05:14,  3.35s/it][A
  7%|▋         | 89/1256 [02:56<1:03:10,  3.25s/it][A
  7%|▋         | 90/1256 [02:59<1:04:49,  3.34s/it][A

AI Trader bought:  $ 115.620003



  7%|▋         | 91/1256 [03:02<1:04:12,  3.31s/it][A
  7%|▋         | 92/1256 [03:06<1:05:26,  3.37s/it][A
  7%|▋         | 93/1256 [03:09<1:03:12,  3.26s/it][A
  7%|▋         | 94/1256 [03:12<1:01:07,  3.16s/it][A

AI Trader sold:  $ 110.489998  Profit: - $ 7.540001



  8%|▊         | 95/1256 [03:15<1:00:23,  3.12s/it][A

AI Trader bought:  $ 111.339996



  8%|▊         | 96/1256 [03:19<1:03:42,  3.30s/it][A

AI Trader sold:  $ 108.980003  Profit: - $ 8.829994



  8%|▊         | 97/1256 [03:22<1:04:22,  3.33s/it][A

AI Trader sold:  $ 106.029999  Profit: - $ 11.309998



  8%|▊         | 98/1256 [03:25<1:04:45,  3.36s/it][A
  8%|▊         | 99/1256 [03:29<1:03:17,  3.28s/it][A
  8%|▊         | 100/1256 [03:32<1:01:36,  3.20s/it][A

AI Trader sold:  $ 108.610001  Profit: - $ 7.669998



  8%|▊         | 101/1256 [03:35<1:01:48,  3.21s/it][A

AI Trader bought:  $ 108.029999



  8%|▊         | 102/1256 [03:38<1:00:25,  3.14s/it][A
  8%|▊         | 103/1256 [03:40<57:49,  3.01s/it]  [A

AI Trader sold:  $ 108.739998  Profit: - $ 6.880005



  8%|▊         | 104/1256 [03:43<57:58,  3.02s/it][A
  8%|▊         | 105/1256 [03:47<59:01,  3.08s/it][A
  8%|▊         | 106/1256 [03:50<1:00:01,  3.13s/it][A
  9%|▊         | 107/1256 [03:53<1:00:06,  3.14s/it][A
  9%|▊         | 108/1256 [03:56<58:58,  3.08s/it]  [A
  9%|▊         | 109/1256 [03:59<57:07,  2.99s/it][A

AI Trader sold:  $ 96.449997  Profit: - $ 14.889999



  9%|▉         | 110/1256 [04:02<56:20,  2.95s/it][A
  9%|▉         | 111/1256 [04:05<57:37,  3.02s/it][A

AI Trader sold:  $ 98.529999  Profit: - $ 9.500000



  9%|▉         | 112/1256 [04:08<57:45,  3.03s/it][A

AI Trader bought:  $ 99.959999



  9%|▉         | 113/1256 [04:11<56:19,  2.96s/it][A

AI Trader sold:  $ 97.389999  Profit: - $ 2.570000



  9%|▉         | 114/1256 [04:14<56:12,  2.95s/it][A

AI Trader bought:  $ 99.519997



  9%|▉         | 115/1256 [04:17<55:30,  2.92s/it][A

AI Trader sold:  $ 97.129997  Profit: - $ 2.389999



  9%|▉         | 116/1256 [04:20<56:01,  2.95s/it][A
  9%|▉         | 117/1256 [04:23<58:06,  3.06s/it][A
  9%|▉         | 118/1256 [04:26<56:56,  3.00s/it][A

AI Trader bought:  $ 96.300003



  9%|▉         | 119/1256 [04:29<56:19,  2.97s/it][A

AI Trader sold:  $ 101.419998  Profit: $ 5.119995



 10%|▉         | 120/1256 [04:32<56:47,  3.00s/it][A

AI Trader bought:  $ 99.440002



 10%|▉         | 121/1256 [04:35<57:06,  3.02s/it][A

AI Trader sold:  $ 99.989998  Profit: $ 0.549995



 10%|▉         | 122/1256 [04:38<59:07,  3.13s/it][A
 10%|▉         | 123/1256 [04:42<1:00:29,  3.20s/it][A
 10%|▉         | 124/1256 [04:45<59:36,  3.16s/it]  [A
 10%|▉         | 125/1256 [04:48<58:43,  3.12s/it][A
 10%|█         | 126/1256 [04:50<57:11,  3.04s/it][A
 10%|█         | 127/1256 [04:54<58:26,  3.11s/it][A

AI Trader bought:  $ 96.349998
