In [None]:
#@title Imports basicos
import pandas as pd
import time
import matplotlib.pyplot as plt
import datetime
import gc
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit

In [None]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [None]:
name = 'vale_bov_opcao2'
stocks = pd.read_csv('data/processed/{}.csv'.format(name))

In [None]:
stocks.head()

In [None]:
dataStocks = pd.to_datetime(stocks['data'], format='%Y%m%d').sort_values(ascending=True)

In [None]:
stocks = stocks.sort_values(by = 'data', ascending=True).reset_index(drop=True)

In [None]:
stocks.head()

In [None]:
stocks.fillna(0, inplace=True)

In [None]:
plt.figure(figsize=(16,6))
plt.plot(dataStocks, stocks[['preabe', 'premax', 'premin', 'premed', 'preult', 'preofc', 'preofv']])
plt.title('Valores da Ação variando conforme a data');

## Definição de X e Y


In [None]:
y = stocks[['premed']]
y = y.drop(0).reset_index(drop=True)

In [None]:
x = stocks.drop(columns='data')
x.drop(len(x)-1, inplace = True)

In [None]:
dataStocks.drop(len(dataStocks)-1, inplace = True)

defasagem = 0
for i in range(defasagem):
    y = y.drop(i)
    dataStocks.drop(len(dataStocks)-1, inplace = True)

y = y.reset_index(drop=True)
y.shape

auxTrain = int(x.shape[0] * 0.8)
testSize = x.shape[0] - auxTrain

trainSize = int(auxTrain * 0.8)
valSize = auxTrain - trainSize

print("Train Size: {}".format(trainSize))
print("Val Size  : {}".format(valSize))
print("Test Size : {}".format(testSize))
print("Somatorio dos sizes: {}".format(trainSize + valSize + testSize))
print("Tamanho do dataset : {}".format(x.shape[0]))

xTrain = x[:trainSize]
xTest = x[trainSize + valSize:]
xVal = x[trainSize : trainSize + valSize]

xTrain.shape[0], xVal.shape[0], xTest.shape[0]

yTrain = y[:trainSize]
yVal = y[trainSize : trainSize + valSize]
yTest = y[trainSize + valSize:]

yTrain.shape[0], yVal.shape[0], yTest.shape[0]

xScaler = MinMaxScaler(feature_range = (0,1))
yScaler = MinMaxScaler(feature_range = (0,1))

x_train = xScaler.fit_transform(xTrain)
x_val = xScaler.transform(xVal)
x_test = xScaler.transform(xTest)

y_train = yScaler.fit_transform(yTrain)
y_val = yScaler.transform(yVal)
y_test = yScaler.transform(yTest)

x = pd.DataFrame(x_train).append(pd.DataFrame(x_val)).append(pd.DataFrame(x_test))

x_defasado = series_to_supervised(x, defasagem)
x_defasado.shape
# x_defasado.shape = x

x_train = x_defasado[:trainSize].values
x_test = x_defasado[trainSize + valSize:].values
x_val = x_defasado[trainSize : trainSize + valSize].values

inputVar = x.shape[1]

x_train = x_train.reshape(x_train.shape[0], (defasagem+1), inputVar)
print('x_train shape {}'.format(x_train.shape))

x_val = x_val.reshape(x_val.shape[0], (defasagem+1), inputVar)
print('x_val shape {}'.format(x_val.shape))

x_test = x_test.reshape(x_test.shape[0], (defasagem+1), inputVar)
print('x_test shape {}'.format(x_test.shape))

In [None]:
#@title imports RNN

import keras
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout
from matplotlib import pyplot
from keras.callbacks import TensorBoard
from time import time
from keras.callbacks import ModelCheckpoint
from keras.utils.vis_utils import plot_model
from keras import backend as K

def rmse(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) 

In [None]:
#@title Configurações da rede
activation_function = 'tanh'  # activation function for LSTM and Dense layer
loss = 'mse'                  # loss function for calculating the gradient, in this case Mean Squared Error
optimizer = 'adam'
dropout = 0.25                # dropout ratio used after each LSTM layer to avoid overfitting

In [None]:
batch_size = 64
epochs = 50

In [None]:
def build_model(inputs, output_size, activ_func=activation_function, dropout=dropout, loss=loss, optimizer=optimizer):
  model = Sequential()
  model.add(LSTM(100, return_sequences=True, input_shape=(inputs.shape[1], inputs.shape[2]), activation=activ_func))
  model.add(Dropout(dropout))
  model.add(LSTM(100, return_sequences=True))
  model.add(Dropout(dropout))
  model.add(LSTM(48, return_sequences=True))
  model.add(Dropout(dropout))
  model.add(LSTM(50, activation=activ_func))
  model.add(Dropout(dropout))
  model.add(Dense(units=output_size))
  model.add(Activation('linear'))
  model.compile(loss=loss, optimizer=optimizer, metrics=['mae'])
  model.summary()
  return model

In [None]:
gc.collect()

# random seed for reproducibility
np.random.seed(202)

# initialise model architecture
rnn_model = build_model(x_train, output_size=1)

# tensorboard = TensorBoard(log_dir="log/{}".format(time()))
filepath = 'weights_{}_novo.hdf5'.format(name)
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
# train model on data
history = rnn_model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, validation_data=(x_val, y_val), shuffle=False, callbacks=[checkpoint])

In [None]:
# #Plot Loss
plt.figure(figsize=(16,6))
pyplot.plot(history.history['loss'], label='treino')
pyplot.plot(history.history['val_loss'], label='validação')
pyplot.legend();

In [None]:
encontradoTest = rnn_model.predict(x_test)
encontradoVal = rnn_model.predict(x_val)
encontradoTrain = rnn_model.predict(x_train)

In [None]:
foundTrain = yScaler.inverse_transform(encontradoTrain)
foundVal = yScaler.inverse_transform(encontradoVal)
foundTest = yScaler.inverse_transform(encontradoTest)

targetTrain = yScaler.inverse_transform(y_train)
targetVal = yScaler.inverse_transform(y_val)
targetTest = yScaler.inverse_transform(y_test)

In [None]:
plt.figure(figsize=(30,12))
plt.title('Treino/Teste - Encontrado x Esperado ({}d Defasagem)'.format(defasagem))
plt.xlabel('Dia')
plt.ylabel('Valor')
# plt.axes().set_facecolor('lightgrey')
#Grafico pro que foi encontrado

esperado = [i for i in targetTrain] + [x for x in targetVal] + [t for t in targetTest]
plt.plot(dataStocks.values, [i for i in foundTrain] + [x for x in foundVal] + [t for t in foundTest], label = 'Encontrado', color = 'blue')
plt.plot(dataStocks.values, esperado, label = 'Esperado', color='green')

lineXTrain = len(foundTrain)
lineXTest = len(foundVal) + lineXTrain
minEsperado = min(esperado)
maxEsperado = max(esperado)
plt.plot([ dataStocks[lineXTrain], dataStocks[lineXTrain]  ], [ minEsperado, maxEsperado], c = 'black')
plt.plot([ dataStocks[lineXTest], dataStocks[lineXTest]  ], [ minEsperado, maxEsperado], c = 'black')

plt.text(dataStocks[300], 10, 'Treinamento', fontsize = 15)
plt.text(dataStocks[820], 10, 'Validação', fontsize = 15)
plt.text(dataStocks[1100], 10, 'Teste', fontsize = 15)
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error

print('MSE {}'.format(mean_squared_error(y_test, encontradoTest, squared=True)))
print('RMSE {}'.format(mean_squared_error(y_test, encontradoTest, squared=False)))

## Teste com uma mlp


In [None]:
x_train = x_train.reshape(x_train.shape[0], x_train.shape[2])

In [None]:
x_val = x_val.reshape(x_val.shape[0], x_val.shape[2])

In [None]:
x_train.shape, x_val.shape

In [None]:
x_train = np.append(x_train, x_val, axis=0)
y_train = np.append(y_train, y_val, axis=0)

In [None]:
x_train.shape, y_train.shape

In [None]:
x_test = x_test.reshape(x_test.shape[0], x_test.shape[2])

In [None]:
from sklearn.neural_network import MLPRegressor

regr = MLPRegressor(random_state=1, max_iter=500).fit(x_train, y_train)

In [None]:
regr.score(x_test, y_test)

In [None]:
encontradoTest = regr.predict(x_test)
encontradoTrain = regr.predict(x_train)

In [None]:
plt.title('MLP - Treino/Teste - Encontrado x Esperado ({}d Defasagem)'.format(defasagem))
plt.xlabel('Dia')
plt.ylabel('Valor')
# plt.axes().set_facecolor('lightgrey')
plt.rcParams['figure.figsize'] = (16,8)
#Grafico pro que foi encontrado

esperado = [i for i in y_train] + [x for x in y_test]
plt.plot(dataStocks.values, [i for i in encontradoTrain] + [x for x in encontradoTest], label = 'Encontrado', color = 'blue')
plt.plot(dataStocks.values, esperado, label = 'Esperado', color='green')

lineXTrain = len(encontradoTrain)

minEsperado = min(esperado)
maxEsperado = max(esperado)

plt.plot([ dataStocks[lineXTrain], dataStocks[lineXTrain]  ], [ minEsperado, maxEsperado], c = 'black')

plt.text(dataStocks[300], 0, 'Treinamento', fontsize = 15)

plt.text(dataStocks[1100], 0, 'Teste', fontsize = 15)
plt.legend()
plt.show()