<a href="https://colab.research.google.com/github/Moisito12/Tensorflow-2.0/blob/master/Reinforcement_Learning_para_problemas_del_Stock_Market_Trading_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Paso 1: ** Instalar las dependencias necesarias

In [0]:
!pip install tensorflow-gpu==2.0.0

Collecting tensorflow-gpu==2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/25/44/47f0722aea081697143fbcf5d2aa60d1aee4aaacb5869aee2b568974777b/tensorflow_gpu-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (380.8MB)
[K     |████████████████████████████████| 380.8MB 38kB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/fc/08/8b927337b7019c374719145d1dceba21a8bb909b93b1ad6f8fb7d22c1ca1/tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449kB)
[K     |████████████████████████████████| 450kB 42.4MB/s 
Collecting tensorboard<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/76/54/99b9d5d52d5cb732f099baaaf7740403e83fe6b0cedde940fabd2b13d75a/tensorboard-2.0.2-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 46.5MB/s 
Collecting google-auth<2,>=1.6.3
[?25l  Downloading https://files.pythonhosted.org/packages/36/f8/84b5771faec3eba9fe0c91c8c5896364a8ba08852c0dea5ad2025026dd95/

In [0]:
!pip install pandas-datareader



**Paso 2:** Importar las dependencias del proyecto

In [0]:
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as data_reader
from tqdm import tqdm_notebook, tqdm
from collections import deque
import tensorflow as tf

**Paso 3:** Realizar la construcción de la red neuroan del AI Treader

In [0]:
class AI_Trader():
  
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Manten, Compra, Vende
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    
    self.model = self.model_builder()
    
  def model_builder(self):
    
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))
    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))
    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
    
    return model
  
  def trade(self, state):
    
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  
  def batch_train(self, batch_size):
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

**Paso 4:** Proceder con el Pre-Procesado de datos

**Definir las siguientes funciones**

**Sigmoide**

In [0]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

**Función de formato de precios**

In [0]:
def stock_price_format(n):
  if n < 0:
    return "- ${0:2f}".format(abs(n))
  else:
    return "{0:2f}".format(abs(n))

**Carga del Dataset**

In [0]:
def dataset_loader(stock_name):

  #complete the dataset loader function
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")

  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]

  close = dataset['Close']

  return close

**State Creator**

In [0]:
def statate_creator(data, timestap, window_size):

  starting_id = timestap - window_size + 1

  if starting_id >= 0:
    windowed_data = data[starting_id: timestap+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestap+1])

  state = []

  for i in range(window_size -1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[1]))

  return np.array([state])

**Carga una divisa del mercado**

In [0]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

In [66]:
data.head(20)

Date
2010-01-04    30.572857
2010-01-05    30.625713
2010-01-06    30.138571
2010-01-07    30.082857
2010-01-08    30.282858
2010-01-11    30.015715
2010-01-12    29.674286
2010-01-13    30.092857
2010-01-14    29.918571
2010-01-15    29.418571
2010-01-19    30.719999
2010-01-20    30.247143
2010-01-21    29.724285
2010-01-22    28.250000
2010-01-25    29.010000
2010-01-26    29.420000
2010-01-27    29.697144
2010-01-28    28.469999
2010-01-29    27.437143
2010-02-01    27.818571
Name: Close, dtype: float64

**Paso 5:** Entrenar al AI Trader

**Configurar los hyper parámetros**

In [0]:
window_size = 10
episodes = 1000

batch_size = 32
data_simples = len(data) -1

**Definir el modelo del AI Trader**

In [0]:
trader = AI_Trader(window_size)

In [69]:
trader.model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_24 (Dense)             (None, 32)                352       
_________________________________________________________________
dense_25 (Dense)             (None, 64)                2112      
_________________________________________________________________
dense_26 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_27 (Dense)             (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


**Bucle de entrenamiento**

In [0]:
for episode in range(1, episodes + 1):
  
  print("Episodio: {}/{}".format(episode, episodes))
  
  state = statate_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  
  for t in tqdm(range(data_simples)):
    
    action = trader.trade(state)
    
    next_state = statate_creator(data, t+1, window_size + 1)
    reward = 0
    
    if action == 1: #Compra
      trader.inventory.append(data[t])
      print("AI Trader compró: ", stock_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #Vende
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader vendió: ", stock_price_format(data[t]), " Beneficio: " + stock_price_format(data[t] - buy_price) )
      
    if t == data_simples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("BENEFICIO TOTAL: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))