In [1]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader
import yfinance as yf
from pandas_datareader import data as pdr
yf.pdr_override()

from tqdm import tqdm_notebook, tqdm
from collections import deque

import warnings
warnings.filterwarnings('ignore')

In [2]:
class AI_Trader():
  
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    # Define hyperparamaters
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
        
    # Call a function  to build a model trought this class constructor
    # More parameters could be ustilized to programaticaly define network size (layers and neurons)
    self.model = self.model_builder()
    
    
  def model_builder(self):
    metrics = [tf.keras.metrics.BinaryAccuracy(),tf.keras.metrics.FalseNegatives(), tf.keras.metrics.Accuracy()]
    model = tf.keras.models.Sequential()    
    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))    
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=metrics)
    return model
  
  
  
  
  # Trade function that takes state as an input and returns an action 
  # to perform in perticular state 
  def trade(self, state):
    
    # Should we perform a renadom generated action or action defined in model?
    
    # If value from our random generator is smaller or equal to our epsilon 
    #     then we will retun a random action from action_space [0-3)
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    # If our random is greater than epsilon then we will use model to perform action
    actions = self.model.predict(state)
    # return only a one number defining an action (#Stay - 0 , Buy - 1, Sell - 2) 
    #    that has maximum probability
    return np.argmax(actions[0])
  
  
  
  def batch_train(self, batch_size):
    
    batch = []
    
    # Iterrate in momory, we do not want to randolmy select data as we are dealing with 
    #    time constraint data. We will always sample from the end of memory size of bath
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      # insert data from memory to batch      
      batch.append(self.memory[i])
    
    
    # Iterate trought batch of data and train the model for each sample from batch
    # Order of variables in for loop is important
    for state, action, reward, next_state, done in batch:
      # Reward if agent is in terminal state
      reward = reward
      # Check that agent is not in terminal state
      # If not in terminal state calculate reward for actions that could be played
      if not done:
        # Discounted total reward:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])        
      # Target variable that is predicted by the model (action)
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    # We will decrease epsilon parameter that is 1 as defined in __init__  so
    #    so we can stop performing random actions at some point
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

In [3]:
# Usually used at the end of a network for binary classifictation
# It changes range of input to scale of [0,1]
# So we can normalize input data for comparision day by day if they are on different scale
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

In [4]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

In [5]:
def state_creator(data, timestep, window_size):
  
  # starting day of our state
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    # Replicate member (data[0]) needed times
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  # Iterate trough whole windowed_data minus current state (-1)
  for i in range(window_size - 1):
    # Normalize the difference from current day and the next day
    # Because the prices can be very different and we want them on same scale
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

In [6]:
activo = pdr.get_data_yahoo('AAPL')

[*********************100%%**********************]  1 of 1 completed


In [7]:
close = activo['Close']

In [6]:
def indicadores(data, indicadores):
    for i in range(0, indicadores):
        if i == 0:
            globals()['df_' + str(data.columns[i])] = data.iloc[:,0:1]
        elif i > 0:
            globals()['df_' + str(data.columns[i])] = data.iloc[:,i:i+1]

In [7]:
data = pd.read_parquet('resultadoIndicadores.parquet')
indicadores(data, 10)
data = df_EMA_21['EMA_21']

In [8]:
window_size = 10
episodes = 1000 # same as epoch

batch_size = 32
data_samples = len(data) - 1 # discard last value, that we will predict on

In [9]:
trader = AI_Trader(window_size)
trader.model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                352       
                                                                 
 dense_1 (Dense)             (None, 64)                2112      
                                                                 
 dense_2 (Dense)             (None, 128)               8320      
                                                                 
 dense_3 (Dense)             (None, 3)                 387       
                                                                 
Total params: 11171 (43.64 KB)
Trainable params: 11171 (43.64 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [102]:
df_historia = pd.DataFrame({'Fecha': [],
                                  'Precio': [],
                                  'Accion': []})
for i in range(50):
    accion = np.random.randint(0,2)
    if accion == 0:
        df_compra = pd.DataFrame({'Fecha': [data.index.values[i]],
                                  'Precio': data[i],
                                  'Accion': [1]})
        df_historia = pd.concat([df_historia, df_compra])
    
    if accion == 1:
        df_venta = pd.DataFrame({'Fecha': [data.index.values[i]],
                                 'Precio': data[i],
                                 'Accion':[-1]})
            
        df_historia = pd.concat([df_historia, df_venta])
             


In [10]:
episode = 1
state = state_creator(data, timestep=0, window_size =window_size + 1)
total_profit = []
trader.inventory = []
df_historia = pd.DataFrame({'Fecha': [],
                            'Precio': [],
                            'Accion': [],
                            'Ganancia_total': []})

for t in tqdm(range(35)):
# First we will access action that is going to be taken by model 
    action = trader.trade(state)
    
    # Use action to get to next state(t+)
    next_state = state_creator(data=data, timestep=t+1, window_size=window_size + 1)
    # As we did not calculate anything up to this point reward is 0
    reward = 0
    
    if action == 1: #Buying
      # Put buyed stock to inventory to trade with
      trader.inventory.append(data[t])
      df_compra = pd.DataFrame({'Fecha': [data.index.values[t]],
                                  'Precio': [data[t]],
                                  'Accion': [action]})
      df_historia = pd.concat([df_historia, df_compra])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    # To sell we need to have something in inventory  
    elif action == 2 and len(trader.inventory) > 0: #Selling
      # Check buy price, pop removes first value from list
      buy_price = trader.inventory.pop(0)
      
      # If we gain money (current price - buy price) we have reward 
      #    if we lost money then reward is 0
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      df_compra = pd.DataFrame({'Fecha': [data.index.values[t]],
                                  'Precio': data[t],
                                  'Accion': [action],
                                  'Ganancia_total': [stocks_price_format(data[t] - buy_price)]})
      df_historia = pd.concat([df_historia, df_compra])
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price))
      
    # if t is last sample in our dateset we are done
    #     we do not have any steps to perform in current episode
    if t == data_samples - 1:
      done = True
    else:
      done = False
    
    # Append all data to trader-agent memory, experience buffer
    trader.memory.append((state, action, reward, next_state, done))
    
    # change state to next state, so we are done with an episode
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    # Chekc if we have more information in our memory than batch size
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
  
  # Save the model every 10 episodes
    if episode % 10 == 0:
      trader.model.save("ai_trader_{}.h5".format(episode))

 43%|████▎     | 15/35 [00:00<00:00, 137.84it/s]

AI Trader bought:  $ 148.507584
AI Trader bought:  $ 148.673038
AI Trader bought:  $ 148.697579
AI Trader bought:  $ 149.726850
AI Trader sold:  $ 151.358433  Profit: $ 2.850848
AI Trader bought:  $ 154.264094
AI Trader sold:  $ 156.827914  Profit: $ 8.154876
AI Trader sold:  $ 160.363555  Profit: $ 11.665976
AI Trader bought:  $ 161.612807
AI Trader bought:  $ 163.288911
AI Trader bought:  $ 164.218711
AI Trader bought:  $ 166.300277
AI Trader sold:  $ 167.249426  Profit: $ 17.522576
AI Trader bought:  $ 168.205591
AI Trader bought:  $ 169.169894


 83%|████████▎ | 29/35 [00:00<00:00, 105.05it/s]

AI Trader sold:  $ 170.227858  Profit: $ 15.963764
AI Trader sold:  $ 170.817314  Profit: $ 9.204507
AI Trader bought:  $ 171.110244
AI Trader sold:  $ 171.981204  Profit: $ 8.692293
AI Trader bought:  $ 172.622515


 83%|████████▎ | 29/35 [00:20<00:00, 105.05it/s]



 94%|█████████▍| 33/35 [00:33<00:03,  1.57s/it] 

AI Trader bought:  $ 173.092860


 97%|█████████▋| 34/35 [00:59<00:03,  3.14s/it]



100%|██████████| 35/35 [01:19<00:00,  2.28s/it]


In [11]:
df_historia

Unnamed: 0,Fecha,Precio,Accion,Ganancia_total
0,2020-04-01,148.507584,1.0,
0,2020-04-02,148.673038,1.0,
0,2020-04-03,148.697579,1.0,
0,2020-04-06,149.72685,1.0,
0,2020-04-08,151.358433,2.0,$ 2.850848
0,2020-04-14,154.264094,1.0,
0,2020-04-16,156.827914,2.0,$ 8.154876
0,2020-04-22,160.363555,2.0,$ 11.665976
0,2020-04-24,161.612807,1.0,
0,2020-04-29,163.288911,1.0,


In [29]:
datosDelModelo = {'Date': [data.index.values], 'Prices': [data.values], 'bought':[trader.inventory], 'sold':[buy_price], 'reward':[reward], 
 'profit': [total_profit]}

In [43]:
df_model = pd.DataFrame()
for i in range(50):
    df_model['Date'] = data.index[i]

df_model

Unnamed: 0,Date


In [57]:
total_profit

array([], dtype=float64)

In [51]:
df_historia  = pd.DataFrame({"fecha":[data.index.values[0]],
                        "precio":[data[0]],
                        "accion":[0]})
df_historia

Unnamed: 0,fecha,precio,accion
0,2020-04-01,148.507584,0


In [52]:
df_historia_app = pd.DataFrame({"fecha":[data.index.values[1]],
                        "precio":[data[1]],
                        "accion":[1]})

df_historia.append(df_historia_app)                


AttributeError: 'DataFrame' object has no attribute 'append'

In [55]:

# Importing pandas as pd
import pandas as pd
 
# creating the Series
series1 = pd.Series([1, 2, 3])
display('series1:', series1)
series2 = pd.Series(['A', 'B', 'C'])
display('series2:', series2)
 
# concatenating
display('After concatenating:')
display(pd.concat([series1, series2], 
                  axis = 1))

'series1:'

0    1
1    2
2    3
dtype: int64

'series2:'

0    A
1    B
2    C
dtype: object

'After concatenating:'

Unnamed: 0,0,1
0,1,A
1,2,B
2,3,C


In [None]:
for i in range(data):
    accion = np.random(0,2)
    if accion == 0:
        buy = dat
    


In [34]:
df2 = pd.DataFrame()
df2['Date'] = data.index.values
df2['Prices'] = data.values
df2['Bought'] = trader.inventory
df2['Sold'] = buy_price
df2['Reward'] = reward
df2['Profift'] = total_profit

ValueError: Length of values (3) does not match length of index (886)

In [118]:
for episode in range(1, episodes + 1):
  
  # To keep track of training process
  # .format populates {} with variables in .format(x,y)
  print("Episode: {}/{}".format(episode, episodes))
  
  # Create state
  # second parameter is timestep = 0
  state = state_creator(data, timestep=0, window_size =window_size + 1)
  
  total_profit = 0
  # Empty inventory before starting episode
  trader.inventory = []
  
  # One timestep is one day so number of timesteps we have represent data we have
  # tqdm is used for visualization
  for t in tqdm(range(data_samples)):
    
    # First we will access action that is going to be taken by model 
    action = trader.trade(state)
    
    # Use action to get to next state(t+)
    next_state = state_creator(data=data, timestep=t+1, window_size=window_size + 1)
    # As we did not calculate anything up to this point reward is 0
    reward = 0
    
    if action == 1: #Buying
      # Put buyed stock to inventory to trade with
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    # To sell we need to have something in inventory  
    elif action == 2 and len(trader.inventory) > 0: #Selling
      # Check buy price, pop removes first value from list
      buy_price = trader.inventory.pop(0)
      
      # If we gain money (current price - buy price) we have reward 
      #    if we lost money then reward is 0
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
    # if t is last sample in our dateset we are done
    #     we do not have any steps to perform in current episode
    if t == data_samples - 1:
      done = True
    else:
      done = False
    
    # Append all data to trader-agent memory, experience buffer
    trader.memory.append((state, action, reward, next_state, done))
    
    # change state to next state, so we are done with an episode
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    # Chekc if we have more information in our memory than batch size
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
  
  # Save the model every 10 episodes
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))

Episode: 1/1000


AttributeError: 'list' object has no attribute 'iloc'

In [None]:
trader.memory