# Previsão de séries temporais financeiras com memória de longo prazo _ MODIFICADO LSTM
## https://humboldt-wi.github.io/blog/research/information_systems_1718/06financialtime-series/

In [24]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# We read in the dataset
data = pd.read_csv("/home/bene/MachineLearning/TCC/Base de Dados/dados.csv")
 # delete column you dont want to use for training here!
                      # We are deleteting date here.
data.head()
data = pd.DataFrame(data)
data.dropna(inplace = True)
data = data.values
print(data)

[[ 5.04032269e-04  1.00000000e+00]
 [ 3.14445891e-03  1.00000000e+00]
 [-3.77453896e-03  0.00000000e+00]
 ...
 [ 8.28794275e-03  1.00000000e+00]
 [ 3.90202099e-03  1.00000000e+00]
 [-1.08767659e-02  0.00000000e+00]]


In [15]:
# Append with timesteps
def createTimeSteps(df, lags=4):
    """
        creates the amount of timesteps from the target and appends to df.
        How many lags do we use to predict the target.
        @param df: data frame with all features
        @param lags: number of lags from the target that are appended
    """
    df = pd.DataFrame(df)
    columns = list()
    for i in range(lags, 0, -1):
        columns.append(df.shift(i))
    columns.append(df) #add original
    # combine
    output = pd.concat(columns, axis=1)
    # replace rows with NaN values
    output.fillna(0, inplace = True)
    return output

In [20]:
# Everything prepared...

BATCH_SIZE = 1 # batch size during training
TS = 4 # length of Sequence we use for our samples (7 = week, 30 = month)
FEATURES = 1 # number of features in data set
TRAINING_DAYS = 1250 # Training/Test split for data

full_df = createTimeSteps(normalized_data, TS)
display(full_df)
full_df = full_df.values # Training vs Test

train = full_df[:TRAINING_DAYS, :]
test = full_df[TRAINING_DAYS:, :]

input_var = int(TS*FEATURES) # Every feature has as many columns as defined timestep
target = -1 # Our Volkswagen AG stock price is the last column of our dataset
X_train, y_train = train[:, :input_var], train[:, target]
#print(X_train)
print(y_train)
X_test, y_test = test[:, :input_var], test[:, target]

X_train = X_train.reshape(TRAINING_DAYS, TS, FEATURES)
X_test = X_test.reshape(X_test.shape[0], TS, FEATURES)

Unnamed: 0,0,0.1,0.2,0.3,0.4
0,0.000000,0.000000,0.000000,0.000000,-0.226870
1,0.000000,0.000000,0.000000,-0.226870,-0.190287
2,0.000000,0.000000,-0.226870,-0.190287,-0.313755
3,0.000000,-0.226870,-0.190287,-0.313755,-0.313755
4,-0.226870,-0.190287,-0.313755,-0.313755,-0.318328
...,...,...,...,...,...
2545,-0.091808,-0.061064,-0.061064,0.000423,0.010672
2546,-0.061064,-0.061064,0.000423,0.010672,0.005548
2547,-0.061064,0.000423,0.010672,0.005548,-0.050817
2548,0.000423,0.010672,0.005548,-0.050817,-0.055940


[-0.22686958 -0.19028664 -0.31375504 ... -0.27060318 -0.28453445
 -0.2102375 ]


In [5]:
# Our first very easy model
def helloModel(timesteps, features, batch_size=1):
    model = Sequential()
    model.add(LSTM(16, input_shape=(timesteps, features)))
    model.add(Dense(1))
    model.add(Activation('linear'))  
    model.compile(loss='mse', optimizer='adam', metrics=['mse'])  
    return model

In [6]:
# Fit the model
def fitting(model, X, y, val_X, val_y, epochs, batch_size=1, state_config=False, sf=False):
    """
        fits the model to the data via keras API.
        @param model: before designed model setup
        @param X: correctly reshaped input data
        @param y: correctly reshaped target
        @param val_X, val_y: correctly reshaped test data
        @param epochs: number of epochs to repeat training
        @param batch_size: number of rows after the weights of the network are updated
        @param state_config: True/False - if true, model is trained with stateful mode and
        states are resetted every epoch
        @param sf: True/False - shuffle mode. If stateless, this makes sense to increase
        generalization of the model
    """
    if state_config:
        training_mse = list()
        val_mse = list()
        for i in range(epochs):
            model.reset_states()
            result = model.fit(X, y, batch_size=batch_size, epochs=1, validation_data=(val_X, val_y), shuffle=sf)
            training_mse.append(result.history['mse'])
            val_mse.append(result.history['val_mse'])
    else:
        result = model.fit(X, y, batch_size=batch_size,
                        epochs=epochs, validation_data=(val_X, val_y), shuffle=sf)
        training_mse = result.history['mse']
        val_mse = result.history['val_mse']


    return result, training_mse, val_mse

In [7]:
# Almost forgot... your libraries
import keras
from keras import Sequential
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Dropout, TimeDistributed, RepeatVector
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import MinMaxScaler

from pandas import read_csv, DataFrame
import pandas as pd

from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import os

# Our new CONSTANTS
EPOCHS = 30 # number of training Epochs
STATEFUL = True # stateless/stateful
SF = False # activate shuffle
RETURN_SEQ = False # many to many prediction (outputs results of every TS)

# Choose a model
model =  helloModel(TS, FEATURES, batch_size=BATCH_SIZE)

# Fit the model
result, training_mse, val_mse = fitting(model, X_train, y_train, X_test, y_test, EPOCHS, batch_size=BATCH_SIZE)
# Predict the model
yhat = model.predict(X_test, batch_size = BATCH_SIZE)
print(yhat.shape)
print(y_test.shape)

# Plot the model
plt.plot(y_test, label='y')
plt.plot(yhat, label='yhat')
plt.legend()
plt.show()

plt.plot(training_mse, label='Training: MSE')
plt.plot(val_mse, label='Test: MSE')
plt.legend()
plt.show()

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30

KeyboardInterrupt: 