In [1]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import yfinance as yf
from pandas.util.testing import assert_frame_equal #import alterado

from tqdm import tqdm_notebook, tqdm
from collections import deque

  


In [3]:
class AI_Trader():

    def __init__(self, state_size, action_space = 3, model_name = "AITrader"):
        self.state_size = state_size
        self.action_space = action_space
        self.memory = deque(maxlen = 2000)
        self.model_name = model_name

        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_final = 0.01
        self.epsilon_decay = 0.995
        self.model = self.model_builder()

    def model_builder(self):
        model = tf.keras.models.Sequential()
        model.add(tf.keras.Input(shape=(self.state_size,)))
        model.add(tf.keras.layers.Dense(units = 32, activation = "relu"))
        model.add(tf.keras.layers.Dense(units = 64, activation = "relu"))
        model.add(tf.keras.layers.Dense(units = 128, activation = "relu"))
        model.add(tf.keras.layers.Dense(units = self.action_space, activation = "linear"))
        model.compile(loss = "mse", optimizer = tf.keras.optimizers.Adam(lr = 0.001))
        return model


    def trade(self, state):
        if random.random() <= self.epsilon:
            return random.randrange(self.action_space)

        actions = self.model.predict(state)
        return np.argmax(actions[0])


    def batch_train(self, batch_size):  # sourcery skip: for-append-to-extend, list-comprehension
        batch = []
        for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
            batch.append(self.memory[i])

        for state, action, reward, next_state, done in batch:
            if not done:
                reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])

            target = self.model.predict(state)
            target[0][action] = reward

            self.model.fit(state, target, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_final:
            self.epsilon *= self.epsilon_decay

In [4]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def stocks_price_format(n):
    return "- $ {0:2f}".format(abs(n)) if n < 0 else "$ {0:2f}".format(abs(n))

In [5]:
def dataset_loader(stock_name):
    #dataset = data_reader.DataReader(stock_name, data_source = "yahoo")
    dataset = yf.download(stock_name, start='2016-06-02')
    start_date = str(dataset.index[0]).split()[0]
    end_date = str(dataset.index[-1]).split()[0]
    return dataset['Close']

In [6]:
def state_creator(data, timestep, window_size):
    # sourcery skip: for-append-to-extend, list-comprehension
    starting_id = timestep - window_size + 1

    if starting_id >= 0:
        # windowed_data = data[starting_id:timestep + 1] # Atualizado 14/03/2022
        windowed_data = np.array(data[starting_id:timestep + 1]) # Atualizado 14/03/2022
    else:
        # windowed_data = - starting_id * [data[0]] + list(data[0:timestep + 1]) # Atualizado 14/03/2022
        windowed_data = np.array(- starting_id * [data[0]] + list(data[:timestep + 1]))

    state = []
    for i in range(window_size - 1):
        state.append(sigmoid(windowed_data[i + 1] - windowed_data[i]))

    return np.array([state]), windowed_data

In [7]:
stock_name = "PETR4.SA"
data = dataset_loader(stock_name)

[*********************100%***********************]  1 of 1 completed


In [8]:
window_size = 10
episodes = 1
batch_size = 32
data_samples = len(data) - 1

In [9]:
trader = AI_Trader(window_size)

In [10]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


In [11]:
for episode in range(1, episodes + 1):
    print(f"Episode: {episode}/{episodes}")
    state = state_creator(data, 0, window_size + 1)
    total_profit = 0
    trader.inventory = []
    for t in range(data_samples):
        action = trader.trade(state)
        next_state = state_creator(data, t + 1, window_size + 1)
        reward = 0

        if action == 1: # Comprando uma ação
            trader.inventory.append(data[t])
            print("AI Trader bought: ", stocks_price_format(data[t]))
        elif action == 2 and len(trader.inventory) > 0: # Vendendo uma ação
            buy_price = trader.inventory.pop(0)

            reward = max(data[t] - buy_price, 0)
            total_profit += data[t] - buy_price
            print("AI Trader sold: ", stocks_price_format(data[t]), f" Profit: {stocks_price_format(data[t] - buy_price)}")


        done = t == data_samples - 1
        trader.memory.append((state, action, reward, next_state, done))

        state = next_state

        if done:
            print("########################")
            print(f"Total profit: {total_profit}")
            print("########################")

        if len(trader.memory) > batch_size:
            trader.batch_train(batch_size)

    if episode % 10 == 0:
        trader.model.save(f"ai_trader_{episode}.h5")

Episode: 1/1000
AI Trader bought:  $ 24.757500
AI Trader sold:  $ 24.735001  Profit: - $ 0.022499
AI Trader bought:  $ 24.365000
AI Trader sold:  $ 23.775000  Profit: - $ 0.590000
AI Trader bought:  $ 24.025000
AI Trader sold:  $ 23.350000  Profit: - $ 0.674999
AI Trader bought:  $ 23.985001
AI Trader bought:  $ 24.245001
AI Trader sold:  $ 24.697500  Profit: $ 0.712500
AI Trader bought:  $ 24.695000
AI Trader bought:  $ 24.957500
AI Trader sold:  $ 24.967501  Profit: $ 0.722500
AI Trader sold:  $ 24.990000  Profit: $ 0.295000
AI Trader sold:  $ 24.857500  Profit: - $ 0.100000
AI Trader bought:  $ 24.334999
AI Trader bought:  $ 26.084999
AI Trader sold:  $ 26.052500  Profit: $ 1.717501
AI Trader sold:  $ 26.120001  Profit: $ 0.035002
AI Trader bought:  $ 26.447500
AI Trader sold:  $ 26.467501  Profit: $ 0.020000
AI Trader bought:  $ 27.092501
AI Trader sold:  $ 27.000000  Profit: - $ 0.092501
AI Trader bought:  $ 26.982500
AI Trader bought:  $ 27.370001
AI Trader sold:  $ 27.344999  Pr

KeyboardInterrupt: 

In [13]:
total_profit

24.03750991821289