In [2]:
import math
import random
import numpy as np
import pandas as pd
from datetime import datetime


import tensorflow as tf
import matplotlib.pyplot as plt
from pandas_datareader import data as pdr
import yfinance as yf
yf.pdr_override()


from tqdm import tqdm_notebook, tqdm
from collections import deque

#Warning ignore
import warnings
warnings.filterwarnings('ignore')

In [85]:
class AI_Trader():
  
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    # Define hyperparamaters
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
        
    # Call a function  to build a model trought this class constructor
    # More parameters could be ustilized to programaticaly define network size (layers and neurons)
    self.model = self.model_builder()
    
    
  def model_builder(self):
    metrics = [tf.keras.metrics.BinaryAccuracy(),tf.keras.metrics.FalseNegatives(), tf.keras.metrics.Accuracy()]
    model = tf.keras.models.Sequential()    
    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))    
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=metrics)
    return model
  
  
  
  
  # Trade function that takes state as an input and returns an action 
  # to perform in perticular state 
  def trade(self, state):
    
    # Should we perform a renadom generated action or action defined in model?
    
    # If value from our random generator is smaller or equal to our epsilon 
    #     then we will retun a random action from action_space [0-3)
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    # If our random is greater than epsilon then we will use model to perform action
    actions = self.model.predict(state)
    # return only a one number defining an action (#Stay - 0 , Buy - 1, Sell - 2) 
    #    that has maximum probability
    return np.argmax(actions[0])
  
  
  
  def batch_train(self, batch_size):
    
    batch = []
    
    # Iterrate in momory, we do not want to randolmy select data as we are dealing with 
    #    time constraint data. We will always sample from the end of memory size of bath
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      # insert data from memory to batch      
      batch.append(self.memory[i])
    
    
    # Iterate trought batch of data and train the model for each sample from batch
    # Order of variables in for loop is important
    for state, action, reward, next_state, done in batch:
      # Reward if agent is in terminal state
      reward = reward
      # Check that agent is not in terminal state
      # If not in terminal state calculate reward for actions that could be played
      if not done:
        # Discounted total reward:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])        
      # Target variable that is predicted by the model (action)
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)

      
    # We will decrease epsilon parameter that is 1 as defined in __init__  so
    #    so we can stop performing random actions at some point
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

In [86]:
print(trader.model.predict(trader.memory[0][0]))

print(trader.model.predict(trader.memory[1][0]))

[[20.429379 21.630465 22.635977]]
[[20.429379 21.630465 22.635977]]


In [83]:
trader.memory

deque([(array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]),
        1,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        False),
       (array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]),
        2,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        False),
       (array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        1,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.79248934, 0.45511914]]),
        False),
       (array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 

In [4]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

  
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))


def dataset_loader(stock_name):
  
  #Use pandas data reader for reading stock data from warious sources like "yahoo", "google"
  dataset = pdr.get_data_yahoo("SPY", start="2017-01-01", end="2024-01-01")
   
  # Get start and end time to variables from dataset
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  
  # Model will use "Close" column for training 
  close = dataset['Close']

  return close

# Data -> dataset to predict from, gathered by data:loader()
# Timestep -> Day in the dataset that we want to predict for [0:datalength]
# window_suze -> how many days in past we want to use to predict current status[1:datalength]
#         Try different setup to see what creates best fit
def state_creator(data, timestep, window_size):
  
  # starting day of our state
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    # Replicate member (data[0]) needed times
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  # Iterate trough whole windowed_data minus current state (-1)
  for i in range(window_size - 1):
    # Normalize the difference from current day and the next day
    # Because the prices can be very different and we want them on same scale
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])


In [98]:
def state_creator(data, timestep, window_size):
  
  # starting day of our state
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    # Replicate member (data[0]) needed times
    windowed_data = - starting_id * [data.iloc[0]] + list(data[0:timestep+1])
    
  state = []
  # Iterate trough whole windowed_data minus current state (-1)
  for i in range(window_size - 1):
    # Normalize the difference from current day and the next day
    # Because the prices can be very different and we want them on same scale
    state.append(sigmoid(windowed_data[0][i+1] - windowed_data[0][i]))
    
  return np.array([state])

In [96]:
## Carga los datos utilizados para los indicadores
data = pd.read_parquet('resultadoIndicadores.parquet')
data = data.drop(['EMA_5','EMA_63', 'EMA_63', 'WMA_63', 'MINUS_DM','PLUS_DI', 'AD', 'WMA_5'], axis=1)

# Tage data for Apple ------------------------
#stock_name = "AAPL"
#data = dataset_loader(stock_name)

In [99]:
window_size = 10
episodes = 1000 # same as epoch

batch_size = 32
data_samples = len(data) - 1 # discard last value, that we will predict on

In [73]:
trader.memory[0][0] # Primer state

array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]])

In [100]:
trader = AI_Trader(window_size)
trader.model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 32)                352       
                                                                 
 dense_9 (Dense)             (None, 64)                2112      
                                                                 
 dense_10 (Dense)            (None, 128)               8320      
                                                                 
 dense_11 (Dense)            (None, 3)                 387       
                                                                 
Total params: 11171 (43.64 KB)
Trainable params: 11171 (43.64 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [107]:
episode = 1
state = state_creator(data, 0, 9 + 1)
total_profit = []
trader.inventory = []


for t in tqdm(range(50)):
    action = trader.trade(state)

        # Use action to get to next state(t+)
    next_state = state_creator(data=data, timestep=(t + 1), window_size=(9 + 1))
    # As we did not calculate anything up to this point reward is 0
    reward = 0
    
    if action == 1: #Buying
      # Put buyed stock to inventory to trade with
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    # To sell we need to have something in inventory  
    elif action == 2 and len(trader.inventory) > 0: #Selling
      # Check buy price, pop removes first value from list
      buy_price = trader.inventory.pop(0)
      
      # If we gain money (current price - buy price) we have reward 
      #    if we lost money then reward is 0
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
    # if t is last sample in our dateset we are done
    #     we do not have any steps to perform in current episode
    if t == data_samples - 1:
      done = True
    else:
      done = False
    
    # Append all data to trader-agent memory, experience buffer
    trader.memory.append((state, action, reward, next_state, done))
    
    # change state to next state, so we are done with an episode
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    # Chekc if we have more information in our memory than batch size
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
  
  # Save the model every 10 episodes
    if episode % 10 == 0:
       trader.model.save("ai_trader_{}.h5".format(episode))


  0%|          | 0/50 [00:00<?, ?it/s]


KeyError: 0

In [11]:
trader.action_space

3

In [12]:
data

Date
2017-01-03    225.240005
2017-01-04    226.580002
2017-01-05    226.399994
2017-01-06    227.210007
2017-01-09    226.460007
                 ...    
2023-11-06    435.690002
2023-11-07    436.929993
2023-11-08    437.250000
2023-11-09    433.839996
2023-11-10    436.780090
Name: Close, Length: 1727, dtype: float64

In [13]:
226.580002 - 225.240005

1.339997000000011

In [14]:
#def sigmoid(x):
  #return
1 / (1 + math.exp(-1.339997000000011))

0.792489448091031

In [41]:
state

array([[0.7957598 , 0.71299779, 0.77902779, 0.7170738 , 0.77206376,
        0.45016676, 0.5914578 , 0.80218534, 0.44769043, 0.53991579]])

In [43]:
len(trader.memory)

56

In [44]:
trader.memory

deque([(array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]),
        1,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        False),
       (array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]),
        2,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        False),
       (array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        1,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.79248934, 0.45511914]]),
        False),
       (array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 

In [45]:
trader.batch_train()

32

In [70]:
len(batch)

31

In [68]:
len(trader.memory)

106

In [48]:
trader.memory[52]

(array([[0.82778248, 0.39891124, 0.50250242, 0.57444326, 0.7957598 ,
         0.71299779, 0.77902779, 0.7170738 , 0.77206376, 0.45016676]]),
 0,
 0,
 array([[0.39891124, 0.50250242, 0.57444326, 0.7957598 , 0.71299779,
         0.77902779, 0.7170738 , 0.77206376, 0.45016676, 0.5914578 ]]),
 False)

In [None]:
data

0      146.202915
1      145.497950
2      144.924388
3      145.094980
4      145.688369
          ...    
881    325.840001
882    325.112858
883    324.418097
884    323.774286
885    323.648096
Name: SMA_21, Length: 886, dtype: float64

In [None]:
state

array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
        0.5       , 0.5       , 0.5       , 0.5       , 0.33071239]])

In [49]:
reward

0

In [50]:
trader.model.predict(state)



array([[ 99.85104, 104.43879, 106.46851]], dtype=float32)

In [51]:
state

array([[0.7957598 , 0.71299779, 0.77902779, 0.7170738 , 0.77206376,
        0.45016676, 0.5914578 , 0.80218534, 0.44769043, 0.53991579]])

In [52]:
trader.trade(state)

1

In [53]:
trader.memory

deque([(array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]),
        1,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        False),
       (array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]),
        2,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        False),
       (array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        1,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.79248934, 0.45511914]]),
        False),
       (array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 

In [21]:
batch = []


In [23]:
memory = trader.memory
memory

deque([(array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]),
        1,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        False),
       (array([[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]),
        2,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        False),
       (array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.5       , 0.79248934]]),
        1,
        0,
        array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 0.79248934, 0.45511914]]),
        False),
       (array([[0.5       , 0.5       , 0.5       , 0.5       , 0.5       ,
                0.5       , 0.5       , 0.5       , 

In [36]:
memory[1][2]

0

In [24]:
# Iterrate in momory, we do not want to randolmy select data as we are dealing with 
#    time constraint data. We will always sample from the end of memory size of bath
for i in range(len(memory) - batch_size + 1, len(memory)):
    # insert data from memory to batch      
  batch.append(memory[i])

In [66]:
len(batch)

31

In [59]:
t = 35
next_state = state_creator(data=data, timestep=(t + 1), window_size=(window_size + 1))
next_state

array([[0.71299779, 0.77902779, 0.7170738 , 0.77206376, 0.45016676,
        0.5914578 , 0.80218534, 0.44769043, 0.53991579, 0.57444326]])

In [55]:
# Iterate trought batch of data and train the model for each sample from batch
# Order of variables in for loop is important
for state, action, reward, next_state, done in batch:
    # Reward if agent is in terminal state
    reward = reward
    # Check that agent is not in terminal state
    # If not in terminal state calculate reward for actions that could be played
    if not done:
    # Discounted total reward:
        reward = reward + trader.gamma * np.amax(trader.model.predict(next_state)[0])        
        # Target variable that is predicted by the model (action)
    target = trader.model.predict(state)
    target[0][action] = reward



In [61]:
target

array([[87.92146, 90.3666 , 92.32832]], dtype=float32)

In [91]:
action

2

In [93]:
target[0][2]

92.32832

In [94]:
reward

2.0500030517578125

In [95]:
target[0][2] = reward
target

array([[87.92146 , 90.3666  ,  2.050003]], dtype=float32)

In [60]:
state

array([[0.62245933, 0.30153556, 0.69635532, 0.35663219, 0.81000031,
        0.87761124, 0.440285  , 0.41095942, 0.19466187, 0.4949991 ]])

In [64]:
trader.model.fit(state, target, epochs=1, verbose=0)

<keras.src.callbacks.History at 0x2d869d30a30>

In [65]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                352       
                                                                 
 dense_1 (Dense)             (None, 64)                2112      
                                                                 
 dense_2 (Dense)             (None, 128)               8320      
                                                                 
 dense_3 (Dense)             (None, 3)                 387       
                                                                 
Total params: 11171 (43.64 KB)
Trainable params: 11171 (43.64 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [88]:
model.evaluate(state, target)



8133.76025390625

In [90]:
state

array([[0.53742833, 0.32960073, 0.32959736, 0.39174039, 0.57444326,
        0.69635532, 0.52996284, 0.28699909, 0.88594793, 0.38461595]])

In [84]:
model_vars = tf.get_collection(tf.GraphKeys.MODEL_VARIABLES)


AttributeError: module 'tensorflow' has no attribute 'get_collection'

In [None]:

    

    
    # Iterate trought batch of data and train the model for each sample from batch
    # Order of variables in for loop is important
    for state, action, reward, next_state, done in batch:
      # Reward if agent is in terminal state
      reward = reward
      # Check that agent is not in terminal state
      # If not in terminal state calculate reward for actions that could be played
      if not done:
        # Discounted total reward:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])        
      # Target variable that is predicted by the model (action)
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    # We will decrease epsilon parameter that is 1 as defined in __init__  so
    #    so we can stop performing random actions at some point
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay