<a href="https://colab.research.google.com/github/Gvinay90/TensorFlow-2.0/blob/master/Stock_market_prediction_reinforcement_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [66]:
!pip install pandas-datareader



## Import all the required library

In [67]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader

from tqdm import tqdm_notebook, tqdm
from collections import deque

In [68]:
tf.__version__

'2.2.0'

### Building the Neural Network

In [69]:
class AI_Trader():
  
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    
    self.model = self.model_builder()
    
  def model_builder(self):
    
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))
    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))
    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
    
    return model
  
  def trade(self, state):

    # random.random generates a random no if the no is less than equal to epsilon
     #the function will return no between 0 to 2
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
     # It will provide highest value of prediction
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  
  def batch_train(self, batch_size):
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

## Data Preprocessing

### Sigmoid
its an activation function its used to scale the prices

In [70]:
 def sigmoid(x):
   return 1 /(1+ math.exp(-x))

## Price format function

In [71]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

## Data Loader

In [72]:
 
def dataset_loader(stock_name):
  #it will reach to yahoo finance and search for apple stock
  dataset = data_reader.DataReader("AAPL", data_source="yahoo")
 
  start_date=str(dataset.index[0]).split()[0]
  end_date=str(dataset.index[-1]).split()[0]
  #close contains the closing price of the day
  close = dataset['Close']

  return close

## State creator


In [73]:
def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

## Loading a Dataset

In [74]:
stock_name="AAPL"
data = dataset_loader(stock_name)

## Training The AI Trader

### Setting Hyper parameter

In [75]:
window_size = 10
episodes=1000

batch_size=32
data_samples=len(data)-1


### Defining Trader Model

In [76]:
trader = AI_Trader(window_size)

In [77]:
trader.model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 32)                352       
_________________________________________________________________
dense_9 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_10 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_11 (Dense)             (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


## Training Loop

In [78]:
for episode in range(1, episodes + 1):
  
  print("Episode: {}/{}".format(episode, episodes))
  
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  
  for t in tqdm(range(data_samples)):
    
    action = trader.trade(state)
    
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0
    
    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))
    

  0%|          | 0/1258 [00:00<?, ?it/s]

Episode: 1/1000
AI Trader bought:  $ 120.070000
AI Trader bought:  $ 123.279999
AI Trader bought:  $ 125.660004
AI Trader bought:  $ 125.610001
AI Trader sold:  $ 128.509995  Profit: $ 8.439995
AI Trader sold:  $ 129.619995  Profit: $ 6.339996
AI Trader bought:  $ 132.070007
AI Trader sold:  $ 125.220001  Profit: - $ 0.440002
AI Trader bought:  $ 125.160004
AI Trader bought:  $ 124.500000
AI Trader sold:  $ 122.769997  Profit: - $ 2.840004
AI Trader bought:  $ 123.379997
AI Trader sold:  $ 121.300003  Profit: - $ 10.770004
AI Trader sold:  $ 118.440002  Profit: - $ 6.720001
AI Trader sold:  $ 115.519997  Profit: - $ 8.980003
AI Trader bought:  $ 119.720001
AI Trader bought:  $ 113.489998
AI Trader bought:  $ 115.150002
AI Trader sold:  $ 115.959999  Profit: - $ 7.419998
AI Trader bought:  $ 116.500000


  3%|▎         | 34/1258 [00:06<14:45,  1.38it/s]

AI Trader sold:  $ 105.760002  Profit: - $ 13.959999


  3%|▎         | 35/1258 [00:08<23:10,  1.14s/it]

AI Trader bought:  $ 103.120003


  3%|▎         | 36/1258 [00:10<29:17,  1.44s/it]

AI Trader sold:  $ 103.739998  Profit: - $ 9.750000


  3%|▎         | 37/1258 [00:12<34:14,  1.68s/it]

AI Trader sold:  $ 109.690002  Profit: - $ 5.459999


  3%|▎         | 38/1258 [00:15<37:02,  1.82s/it]

AI Trader bought:  $ 112.919998


  3%|▎         | 39/1258 [00:17<38:53,  1.91s/it]

AI Trader sold:  $ 113.290001  Profit: - $ 3.209999


  3%|▎         | 40/1258 [00:19<40:16,  1.98s/it]

AI Trader sold:  $ 112.760002  Profit: $ 9.639999


  3%|▎         | 41/1258 [00:21<41:17,  2.04s/it]

AI Trader sold:  $ 107.720001  Profit: - $ 5.199997


  3%|▎         | 43/1258 [00:25<42:56,  2.12s/it]

AI Trader bought:  $ 110.370003


  3%|▎         | 44/1258 [00:27<43:05,  2.13s/it]

AI Trader bought:  $ 109.269997


  4%|▎         | 45/1258 [00:30<43:06,  2.13s/it]

AI Trader bought:  $ 112.309998


  4%|▎         | 46/1258 [00:32<43:08,  2.14s/it]

AI Trader bought:  $ 110.150002


  4%|▎         | 47/1258 [00:34<43:20,  2.15s/it]

AI Trader bought:  $ 112.570000


  4%|▍         | 49/1258 [00:38<43:53,  2.18s/it]

AI Trader sold:  $ 115.309998  Profit: $ 4.939995


  4%|▍         | 50/1258 [00:40<43:40,  2.17s/it]

AI Trader sold:  $ 116.279999  Profit: $ 7.010002


  4%|▍         | 51/1258 [00:43<43:21,  2.16s/it]

AI Trader bought:  $ 116.410004


  4%|▍         | 53/1258 [00:47<42:52,  2.14s/it]

AI Trader bought:  $ 113.449997


  4%|▍         | 56/1258 [00:53<43:04,  2.15s/it]

AI Trader bought:  $ 114.320000


  5%|▍         | 57/1258 [00:55<43:01,  2.15s/it]

AI Trader sold:  $ 115.000000  Profit: $ 2.690002


  5%|▍         | 58/1258 [00:58<42:50,  2.14s/it]

AI Trader sold:  $ 114.709999  Profit: $ 4.559998


  5%|▍         | 59/1258 [01:00<42:45,  2.14s/it]

AI Trader sold:  $ 112.440002  Profit: - $ 0.129997


  5%|▍         | 60/1258 [01:02<42:52,  2.15s/it]

AI Trader sold:  $ 109.059998  Profit: - $ 7.350006


  5%|▍         | 62/1258 [01:06<43:21,  2.18s/it]

AI Trader bought:  $ 109.580002


  5%|▌         | 63/1258 [01:08<43:04,  2.16s/it]

AI Trader sold:  $ 110.379997  Profit: - $ 3.070000


  5%|▌         | 64/1258 [01:11<43:00,  2.16s/it]

AI Trader sold:  $ 110.779999  Profit: - $ 3.540001


  5%|▌         | 66/1258 [01:15<42:41,  2.15s/it]

AI Trader sold:  $ 110.779999  Profit: $ 1.199997


  5%|▌         | 69/1258 [01:21<42:55,  2.17s/it]

AI Trader bought:  $ 111.599998


  6%|▌         | 70/1258 [01:24<42:39,  2.15s/it]

AI Trader bought:  $ 111.790001


  6%|▌         | 71/1258 [01:26<42:28,  2.15s/it]

AI Trader bought:  $ 110.209999


  6%|▌         | 72/1258 [01:28<42:27,  2.15s/it]

AI Trader bought:  $ 111.860001


  6%|▌         | 73/1258 [01:30<43:02,  2.18s/it]

AI Trader bought:  $ 111.040001


  6%|▌         | 75/1258 [01:34<42:26,  2.15s/it]

KeyboardInterrupt: ignored