<a href="https://colab.research.google.com/github/Argentan/DMA_LAB2/blob/master/tutoriales/08_Ejemplo_RNN_Trading.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

import plotly.graph_objects as go
import matplotlib.pyplot as plt

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.layers.experimental.preprocessing import Normalization

In [None]:
# Leer un Ticket, frecuencia de la observación = 3 Minutos
intrad = pd.read_csv('https://github.com/Argentan/DMA_LAB2/blob/master/data/AAPL.csv.gz?raw=true', compression='gzip', parse_dates=[0], index_col=[0])

In [None]:
intrad.head()

In [None]:
intrad.shape

(337010, 8)

In [None]:
# Define Función para Agrupar datos por Hora (H), Dia (D), Semana (W) - Ver al final más opciones

def agrupar_temporalmente(dataset, frecuencia = 'D'):
  df = pd.DataFrame()
  df['Open']   = intrad.Open.resample(frecuencia).first().dropna()
  df['High']   = intrad.High.resample(frecuencia).max().dropna()
  df['Low']    = intrad.Low.resample(frecuencia).min().dropna()
  df['Close']  = intrad.Close.resample(frecuencia).last().dropna()
  df['Volume'] = intrad.Volume.resample(frecuencia).sum().dropna()
  df['WAP']    = (intrad['WAP'] * intrad['Volume']).resample(frecuencia).sum() / intrad.resample(frecuencia)['Volume'].sum().dropna()
  df['Count']  = intrad.Count.resample(frecuencia).sum().dropna()
  return df

In [None]:
# Agrupa por día
diario = agrupar_temporalmente(intrad)

In [None]:
diario.tail()

In [None]:
# Graficar
fig = go.Figure(data=[go.Candlestick(x=diario.index,
                               open=diario.Open, 
                               high=diario.High,
                               low=diario.Low, 
                               close=diario.Close)])
fig.update_layout(autosize=True)

# Feature Engineering

Ver tf.keras.utils.normalize

In [None]:
# Creamos una función para normalizar y armar train y test por ventanas de tiempo

def multivariate_data(dataset, target_col, train_perc, history_size, future_target, step = 1):
  
  train_split = int(round(dataset.shape[0] * train_perc ,0))
  
  scaler = MinMaxScaler(feature_range=(0,1))
  dataset = scaler.fit_transform(dataset)

  # Crear listas vacias y separar el Target
  x_train, y_train, x_valid, y_valid  = [], [], [], []
  target = dataset[:, target_col]

  # Armar las observaciones de Train
  for i in range(history_size, train_split):
    indices = range(i-history_size, i, step)
    x_train.append(dataset[indices])

    y_train.append(target[i : i + future_target])

  # Armar las observaciones de Valid
  start_index = train_split + history_size
  end_index = len(dataset) - future_target

  for i in range(start_index, end_index):
    indices = range(i-history_size, i, step)
    x_valid.append(dataset[indices])

    y_valid.append(target[i : i + future_target])

  return np.array(x_train), np.array(y_train), np.array(x_valid), np.array(y_valid)

In [None]:
# Definimos los parámetos para armar la base

train_perc = 0.8    # Porcentaje de obesrvaciones que se usan para Train (el resto es Valid) 
history_size = 10   # Cuantas obesrvaciones pasadas mirar
future_target = 1   # Cuantas observaciones en el futuro tiene que predecir
step = 1            # Pasos que da la ventana por cada observación
target_col = 3      # Columna que se va a usar de target

x_train, y_train, x_val, y_val = multivariate_data(diario, target_col, train_perc, history_size, future_target, step)


In [None]:
y_train

In [None]:
# Armamos el Modelo
model = Sequential()
model.add(LSTM(32, input_shape = x_train.shape[-2:]))
model.add(Dense(1))

# Compilamos el Modelo
model.compile(optimizer= 'adam', loss= 'mean_squared_error')

In [None]:
# Entrenamos
model.fit(x_train, y_train, batch_size= 8, epochs = 10, 
          steps_per_epoch = 500, validation_data=(x_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
 50/500 [==>...........................] - 0s 6ms/step - loss: 7.0726e-05 - val_loss: 2.5175e-04


<tensorflow.python.keras.callbacks.History at 0x7fcc8b391e48>



---



# Notas

In [None]:
# Opciones de resample
# B         business day frequency
# C         custom business day frequency (experimental)
# D         calendar day frequency
# W         weekly frequency
# M         month end frequency
# SM        semi-month end frequency (15th and end of month)
# BM        business month end frequency
# CBM       custom business month end frequency
# MS        month start frequency
# SMS       semi-month start frequency (1st and 15th)
# BMS       business month start frequency
# CBMS      custom business month start frequency
# Q         quarter end frequency
# BQ        business quarter endfrequency
# QS        quarter start frequency
# BQS       business quarter start frequency
# A         year end frequency
# BA, BY    business year end frequency
# AS, YS    year start frequency
# BAS, BYS  business year start frequency
# BH        business hour frequency
# H         hourly frequency
# T, min    minutely frequency
# S         secondly frequency
# L, ms     milliseconds
# U, us     microseconds
# N         nanoseconds