In [1]:
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [2]:
!pip install pandas-datareader



In [3]:
import os
import warnings
warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import math
import random
import numpy as np

import tensorflow as tf
import matplotlib.pyplot as plt

import pandas as pd
import pandas_datareader as data_reader 
from tqdm import tqdm_notebook, tqdm
from collections import deque

In [4]:
tf.__version__

'2.2.0'

In [5]:
class DQN_trader():
  
  def __init__(self, state_size, action_num=3, model_name="DQN_trader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_num = action_num
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    
    self.model = self.model_dnn()
    
  def model_dnn(self):
    
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(units=16, activation='relu', input_dim=self.state_size))
    
    model.add(tf.keras.layers.Dense(units=32, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=self.action_num, activation='linear'))
    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=1e-3))
    
    return model
  
  def trade(self, state):
    # random or use model predict
    
    if random.random() <= self.epsilon:
      return random.randrange(self.action_num)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  
  def batch_train(self, batch_size):
    
    batch = []
    # get pervious action
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

In [6]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

In [7]:
def dataset_loader(stock_name):
  
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  
  close = dataset['Close']

  return close


def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

In [8]:
stock_name = "TSM"
data = dataset_loader(stock_name)
data.head()

Date
2015-07-02    23.150000
2015-07-06    22.940001
2015-07-07    22.959999
2015-07-08    22.230000
2015-07-09    22.219999
Name: Close, dtype: float64

In [9]:
window_size = 10
episodes = 100

batch_size = 32
data_samples = len(data) - 1

In [10]:
trader = DQN_trader(window_size)

In [11]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 16)                176       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                544       
_________________________________________________________________
dense_2 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_3 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 387       
Total params: 11,539
Trainable params: 11,539
Non-trainable params: 0
_________________________________________________________________


In [None]:
buy_list=[]
sell_list=[]
for episode in range(1, episodes + 1):
  
  print("Episode: {}/{}".format(episode, episodes))
  
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  buy_count = 0
  sell_count = 0
  
  # for t in tqdm(range(data_samples)):
  for t in range(data_samples):
    action = trader.trade(state)    
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0    
    
    if action == 1:                                                     #Buying
      trader.inventory.append(data[t])
      print("DQN Trader bought: ", stocks_price_format(data[t]))
      buy_count +=1
    elif action == 2 and len(trader.inventory) > 0:                     #Selling
      buy_price = trader.inventory.pop(0)
      sell_count += 1
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("DQN Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))    
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  # if episode % 10 == 0:
  #   trader.model.save("ai_trader_{}.h5".format(episode))
  sell_list.append(sell_count)
  buy_list.append(buy_count)