In [1]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader
from pandas.util.testing import assert_frame_equal

from tqdm import tqdm_notebook, tqdm
from collections import deque

  from pandas.util.testing import assert_frame_equal


In [13]:
class IA_Trader():
  def __init__(self, state_size, action_space=3, name="IA_Trader"):
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen = 2000)
    self.name = name

    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    self.model = self.model_builder()

  def model_builder(self):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(self.state_size, 1)))
    model.add(tf.keras.layers.LSTM(units=32, recurrent_activation='relu'))
    model.add(tf.keras.layers.Dense(units=32, activation='relu'))
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))
    model.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))
    return model

  def trade(self, state):
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)

    actions = self.model.predict(state[0])
    return np.argmax(actions[0])

  def batch_train(self, batch_size):
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])

    for state, action, reward, next_state, done in batch:
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state[0]))

      target = self.model.predict(state[0])
      target[0][action] = reward

      self.model.fit(state[0], target, epochs=1, verbose=0)

    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

In [3]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [4]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

In [5]:
import yfinance as yf

In [6]:
def dataset_loader(stock_name):
  dataset = yf.download(stock_name, start='2016-06-02')
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  close = dataset['Close']
  return close

In [7]:
def state_creator(data, timestep, window_size):
  starting_id = timestep - window_size + 1

  if starting_id >= 0:
    windowed_data = np.array(data[starting_id:timestep + 1])
  else:
   windowed_data = np.array(- starting_id * [data[0]] + list(data[0:timestep + 1]))

  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i + 1] - windowed_data[i]))

  return np.array([state]), windowed_data

In [8]:
stock_name = "MSFT"
data = dataset_loader(stock_name)

[*********************100%***********************]  1 of 1 completed


In [9]:
window_size = 10
episodes = 1000
batch_size = 32
data_samples = len(data) - 1

In [14]:
trader = IA_Trader(window_size)



ValueError: ignored

In [11]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10, 32)            64        
                                                                 
 dense_1 (Dense)             (None, 10, 64)            2112      
                                                                 
 dense_2 (Dense)             (None, 10, 128)           8320      
                                                                 
 dense_3 (Dense)             (None, 10, 3)             387       
                                                                 
Total params: 10,883
Trainable params: 10,883
Non-trainable params: 0
_________________________________________________________________


In [12]:
for episode in range(1, episodes + 1):
    print(f"Episodes {episode}/{episodes}")
    state = state_creator(data, 0, window_size + 1)
    total_profit = 0
    trader.inventory= []

    for t in tqdm(range(data_samples)):
      action = trader.trade(state)
      next_state = state_creator(data, t + 1, window_size+1)
      reward = 0

      if action == 1: # comprando ação
        trader.inventory.append(data[t])
        print("AI Trader bought: ", stocks_price_format(data[t]))
      elif action == 2 and len(trader.inventory) > 0: # Vender ação
        buy_price = trader.inventory.pop(0)

        reward = max(data[t] - buy_price, 0)
        total_profit += data[t] - buy_price
        print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price))

      if t == data_samples - 1:
        done = True
      else:
        done = False

      trader.memory.append([state, action, reward , next_state, done])

      statet = next_state

      if done:
        print("#"*25)
        print(f"Total Profit {total_profit}")
        print("#"*25)

      if len(trader.memory) > batch_size:
        trader.batch_train(batch_size)

      if episode % 10 == 0:
        trader.model.save(f"ai_trader_{episode}.h5")

Episodes 1/1000


  0%|          | 0/1794 [00:00<?, ?it/s]

AI Trader bought:  $ 52.480000
AI Trader sold:  $ 51.790001  Profit: - $ 0.689999
AI Trader bought:  $ 52.040001
AI Trader bought:  $ 51.480000
AI Trader bought:  $ 50.139999
AI Trader sold:  $ 50.130001  Profit: - $ 1.910000
AI Trader sold:  $ 51.189999  Profit: - $ 0.290001
AI Trader sold:  $ 50.990002  Profit: $ 0.850002
AI Trader bought:  $ 51.910000
AI Trader sold:  $ 48.430000  Profit: - $ 3.480000
AI Trader bought:  $ 50.540001
AI Trader bought:  $ 51.169998
AI Trader bought:  $ 51.160000
AI Trader bought:  $ 51.169998
AI Trader sold:  $ 51.380001  Profit: $ 0.840000
AI Trader bought:  $ 52.299999
AI Trader sold:  $ 52.590000  Profit: $ 1.420002
AI Trader sold:  $ 53.209999  Profit: $ 2.049999
AI Trader sold:  $ 53.509998  Profit: $ 2.340000
AI Trader sold:  $ 53.740002  Profit: $ 1.440002
AI Trader bought:  $ 53.700001
AI Trader sold:  $ 53.959999  Profit: $ 0.259998
AI Trader bought:  $ 53.090000


  2%|▏         | 32/1794 [00:10<09:29,  3.10it/s]


KeyboardInterrupt: ignored