<a href="https://colab.research.google.com/github/SChoi005/DeepLearningBasics/blob/main/AI_trader_reinforcement_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AI Trader

## Import

In [None]:
!pip install --upgrade pandas-datareader

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install --upgrade pandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader

from tqdm import tqdm_notebook, tqdm
from collections import deque
import os
#resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])

#tf.config.experimental_connect_to_cluster(resolver)
#tf.tpu.experimental.initialize_tpu_system(resolver)
#strategy = tf.distribute.TPUStrategy(resolver)

In [None]:
tf.__version__

'2.8.2'

##Building the AI Trader network

In [None]:
class AI_Trader():
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name

    self.gamma = 0.96
    self.epsillon = 1.0
    self.epsillon_final = 0.01
    self.epsillon_decay = 0.995

    self.model = self.model_builder()
  
  def model_builder(self):
    model = tf.keras.models.Sequential()

    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))

    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))

    model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam(lr=0.001))

    return model

  def trade(self, state):
    if random.random() <= self.epsillon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])

  def batch_train(self, batch_size):
    batch = []

    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
    
    for state, action, reward, next_state, done in batch :
      reward = reward
      if not done :
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
      
      target = self.model.predict(state)
      target[0][action] = reward

      self.model.fit(state, target, epochs=1, verbose=0)

    if self.epsillon > self.epsillon_final:
      self.epsillon *= self.epsillon_decay

## Dataset Preprocessing

In [None]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [None]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

In [None]:
def dataset_loader(stock_name):
  
  #Complete the dataset loader function
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  
  close = dataset['Close']

  return close

## State creator

In [None]:
def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1

  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else :
    windowed_data = -starting_id * [data[0]] + list(data[0:timestep+1])

  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))

  return np.array([state])

## Loading a dataset

In [None]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

In [None]:
data

Date
2017-07-10     36.264999
2017-07-11     36.382500
2017-07-12     36.435001
2017-07-13     36.942501
2017-07-14     37.259998
                 ...    
2022-06-29    139.229996
2022-06-30    136.720001
2022-07-01    138.929993
2022-07-05    141.559998
2022-07-06    142.919998
Name: Close, Length: 1257, dtype: float64

# Training the AI Trader

## Setting hyper parameters

In [None]:
window_size = 10
episodes = 1000

batch_size = 32
data_samples = len(data) - 1

## Defining the Trader model

In [None]:
trader = AI_Trader(window_size)

  super(Adam, self).__init__(name, **kwargs)


In [None]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                352       
                                                                 
 dense_1 (Dense)             (None, 64)                2112      
                                                                 
 dense_2 (Dense)             (None, 128)               8320      
                                                                 
 dense_3 (Dense)             (None, 3)                 387       
                                                                 
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


## Training loop

In [None]:
#with tf.device('/device:GPU:0'):

for episode in range(1, episodes + 1):
    
    print("Episode: {}/{}".format(episode, episodes))
    state = state_creator(data, 0, window_size + 1)

    total_profit = 0
    trader.inventory = []

    for t in tqdm(range(data_samples)):

      action = trader.trade(state)

      next_state = state_creator(data, t+1, window_size + 1)
      reward = 0

      if action == 1: # buying
        trader.inventory.append(data[t])
        print("AI Trader bought: ", stocks_price_format(data[t]))
      elif action == 2 and len(trader.inventory) > 0: # Selling
        buy_price = trader.inventory.pop(0)

        reward = max(data[t] - buy_price, 0)
        total_profit += data[t] - buy_price
        print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )

      if t == data_samples - 1:
        done = True
      else :
        done = False

      trader.memory.append((state, action, reward, next_state, done))
      state = next_state

      if done:
        print("########################")
        print("TOTAL PROFIT: {}".format(total_profit))
        print("########################")

      if len(trader.memory) > batch_size:
        trader.batch_train(batch_size)
    
    if episode % 10 == 0:
      trader.model.save("ai_trader_{}.h5".format(episode))

 19%|█▉        | 240/1256 [12:41<53:42,  3.17s/it]  


KeyboardInterrupt: ignored