In [1]:
#pip install tensorflow==1.15

In [2]:
#pip install matplotlib tqdm

In [3]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader

In [4]:
from tqdm import tqdm_notebook, tqdm
from pandas.util.testing import *
from collections import deque

In [5]:
class AI_Trader():

  def __init__(self, state_size, action_space=3, model_name='AITrader'):
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen = 2000)
    self.model_name = model_name
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    self.model = self.model_builder()
  
  def model_builder(self):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units=32,
                                    activation = "relu",
                                    input_dim = self.state_size))
    model.add(tf.keras.layers.Dense(units=64,
                                    activation = "relu"))
    model.add(tf.keras.layers.Dense(units=128,
                                    activation = "relu"))
    model.add(tf.keras.layers.Dense(units=self.action_space,
                                    activation = "linear"))
    model.compile(loss = "mse",
                  optimizer = tf.keras.optimizers.Adam(lr=0.001))
    return model
  
  def trade(self, state):
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    actions = self.model.predict(state)
    return np.argmax(actions[0])

  def batch_train(self, batch_size):
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
    for state, action, reward, next_state, done in batch:
      if not done:
        reward = reward+self.gamma*np.amax(self.model.predict(next_state)[0])
      target = self.model.predict(state)
      target[0][action] = reward
      self.model.fit(state, target, epochs=1, verbose=0)
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

In [6]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [7]:
def stocks_price_format(n):
  if n < 0:
    return "- R$ {0:2f}".format(abs(n))
  else:
    return "R$ {0:2f}".format(abs(n))

In [8]:
dataset = stocks_price_format(100)
dataset = data_reader.DataReader("AAPL", data_source="yahoo")

In [9]:
print(dataset.head())
print(str(dataset.index[0]).split()[0])
print(dataset.index[-1])
print(dataset['Close'])

                 High        Low       Open      Close       Volume  Adj Close
Date                                                                          
2015-12-30  27.174999  26.795000  27.145000  26.830000  100855200.0  24.857229
2015-12-31  26.757500  26.205000  26.752501  26.315001  163649200.0  24.380093
2016-01-04  26.342501  25.500000  25.652500  26.337500  270597600.0  24.400942
2016-01-05  26.462500  25.602501  26.437500  25.677500  223164000.0  23.789471
2016-01-06  25.592501  24.967501  25.139999  25.174999  273829600.0  23.323915
2015-12-30
2020-12-28 00:00:00
Date
2015-12-30     26.830000
2015-12-31     26.315001
2016-01-04     26.337500
2016-01-05     25.677500
2016-01-06     25.174999
                 ...    
2020-12-21    128.229996
2020-12-22    131.880005
2020-12-23    130.960007
2020-12-24    131.970001
2020-12-28    135.100006
Name: Close, Length: 1258, dtype: float64


In [10]:
def dataset_loader(stock_name):
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  close = dataset['Close']
  return close

In [11]:
def state_creator(data, timestep, window_size):
  starting_id = timestep - window_size + 1

  if starting_id >= 0:
    windowed_data = data[starting_id:timestep + 1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep + 1])
  
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i + 1]))
  
  return np.array([state]), windowed_data

In [12]:
stock_name = "AAPL"
data = dataset_loader(stock_name)
s = state_creator(data, 0, 5)
window_size = 10
episodes = 1000
batch_size = 32
data_samples = len(data) - 1

In [13]:
trader = AI_Trader(window_size)

print(trader.model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________
None


In [15]:
for episode in range(1, episodes+1):
  print(f'Etapa: {episode} de {episodes}')
  state = state_creator(data, 0, window_size + 1)
  total_profit = 0
  trader.inventory = []
  for t in tqdm(range(data_samples)):
    action = trader.trade(state)
    next_state = state_creator(data, t + 1, window_size + 1)
    reward = 0

    if action == 1:
      trader.inventory.append(data[t])
      print("AI Trader comprou: ", stocks_price_format(data[t]))
    elif action == 2 and len(trader.inventory) > 0:
      buy_price = trader.inventory.pop(0)

      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader vendeu: ", stocks_price_format(data[t]),
            "Lucro de: " + stocks_price_format(data[t] - buy_price))
    
    if t == data_samples - 1:
      done = True
    else:
      done = False
    
    trader.memory.append((state, action, reward, next_state, done))
    state = next_state

    if done:
      print("#"*30)
      print(f'Lucro Total Estimado: {total_profit}')
      print("#"*30)

    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
    
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))

  0%|          | 0/1257 [00:00<?, ?it/s]Etapa: 1 de 1000



ValueError: Failed to find data adapter that can handle input: (<class 'tuple'> containing values of types {'(<class \'list\'> containing values of types {"<class \'float\'>", "<class \'numpy.float64\'>"})', "<class 'numpy.ndarray'>"}), <class 'NoneType'>