# Implementação do modelo LSTM

Implementar modelo LSTM para predição de subidas.

Problema: De acordo com as 10 últimos valores de fechamento, o próximo valor irá subir ou descer?

Entradas: close das 10 ultimas operações

Saída: Subiu

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
data_frame = pd.read_csv("/content/drive/MyDrive/9P/DSC/datasets/WINN_M1.csv",sep='\t')

In [4]:
from torch.utils.data import Dataset 

In [5]:
data_frame.head()

Unnamed: 0,<DATE>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<TICKVOL>,<VOL>,<SPREAD>
0,2020.10.15,11:49:00,98565,98575,98530,98575,2373,9001,5
1,2020.10.15,11:50:00,98570,98625,98570,98600,5483,19111,5
2,2020.10.15,11:51:00,98595,98600,98535,98580,4572,17663,5
3,2020.10.15,11:52:00,98575,98620,98535,98615,3536,13143,5
4,2020.10.15,11:53:00,98620,98625,98575,98610,3729,13702,5


In [6]:
data_frame.loc[lambda data_frame1: data_frame['<OPEN>'] > data_frame['<CLOSE>'], "Subiu"] = 0
data_frame.loc[lambda data_frame1: data_frame['<OPEN>'] < data_frame['<CLOSE>'], "Subiu"] = 1
data_frame.loc[lambda data_frame1: data_frame['<OPEN>'] == data_frame['<CLOSE>'], "Subiu"] = 2
data_frame.head()

Unnamed: 0,<DATE>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<TICKVOL>,<VOL>,<SPREAD>,Subiu
0,2020.10.15,11:49:00,98565,98575,98530,98575,2373,9001,5,1.0
1,2020.10.15,11:50:00,98570,98625,98570,98600,5483,19111,5,1.0
2,2020.10.15,11:51:00,98595,98600,98535,98580,4572,17663,5,0.0
3,2020.10.15,11:52:00,98575,98620,98535,98615,3536,13143,5,1.0
4,2020.10.15,11:53:00,98620,98625,98575,98610,3729,13702,5,0.0


In [9]:
data_frame.loc[lambda data_frame: data_frame['Subiu'] == 2]

Unnamed: 0,<DATE>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<TICKVOL>,<VOL>,<SPREAD>,Subiu
22,2020.10.15,12:11:00,98900,98950,98895,98900,3952,14171,5,2.0
34,2020.10.15,12:23:00,98900,98935,98880,98900,5152,19660,5,2.0
52,2020.10.15,12:41:00,98975,98995,98960,98975,2317,9072,5,2.0
57,2020.10.15,12:46:00,99005,99015,98985,99005,2099,8235,5,2.0
74,2020.10.15,13:03:00,98985,98995,98950,98985,3688,14706,5,2.0
...,...,...,...,...,...,...,...,...,...,...
170998,2022.02.07,17:46:00,112275,112330,112255,112275,1449,30182,0,2.0
171021,2022.02.07,18:09:00,112160,112185,112130,112160,739,9037,0,2.0
171029,2022.02.07,18:17:00,112195,112210,112175,112195,414,4867,0,2.0
171032,2022.02.07,18:20:00,112235,112275,112235,112235,613,6713,0,2.0


In [10]:
data_frame = data_frame.drop(data_frame.loc[lambda data_frame: data_frame['Subiu'] == 2].index,axis= 0)
data_frame = data_frame.reset_index()
data_frame = data_frame.drop(['index'],axis= 1)

In [None]:
class DadosFinanceiros(Dataset):
    def __init__(self, dataframe):
        self.tabela = dataframe
        self.x = torch.tensor(self.tabela.iloc[:, :-1].values, dtype= torch.float32)
        self.y = torch.tensor(self.tabela['label'].values, dtype= torch.float32)

    def __len__(self):
      return len(self.x)

    def __getitem__(self, index):
      return self.x[index], self.y[index]

In [None]:
# Criar dataset
operacoes = DadosFinanceiros(data_frame)

In [None]:
# Dividir treino e teste
tamanho_treino = int(0.7*len(operacoes))
tamanho_validacao = int(0.1*len(operacoes))
tamanho_teste = len(operacoes)-tamanho_treino-tamanho_validacao

ds_treino, ds_validacao, ds_teste = torch.utils.data.random_split(operacoes, [tamanho_treino,
                                                                               tamanho_validacao,
                                                                               tamanho_teste])

In [None]:
# Fazer dataloader
batch_size = 30

loader_treino = torch.utils.data.DataLoader(ds_treino, batch_size= batch_size,
                                            shuffle= True)
loader_validacao = torch.utils.data.DataLoader(ds_validacao, batch_size= batch_size,
                                            shuffle= True)
loader_teste = torch.utils.data.DataLoader(ds_teste, batch_size= batch_size,
                                            shuffle= True)

In [None]:
dataiter = iter(loader_treino)
dados, labels = dataiter.next()

print(dados, labels)
print(dados.shape)

In [None]:
plt.plot(dados[labels==0,:].T);
plt.plot(dados[labels==0,:].T.mean(axis=1),linewidth=7);

In [None]:
plt.plot(dados[labels==1,:].T);
plt.plot(dados[labels==1,:].T.mean(axis=1),linewidth=7);

In [None]:
from torch import nn
import torch.nn.functional as F

In [None]:
tem_gpu = torch.cuda.is_available()

if not tem_gpu:
  print("Treinaremos na CPU")
else:
  print("Treinaremos da GPU")

In [None]:
import torch.nn as nn

class Modelo_RNN(nn.Module):
    """
    The RNN model that will be used to perform Sentiment analysis.
    """

    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, 
                 drop_prob=0.5, softmax= False):
        """
        Initialize the model by setting up the layers.
        """
        super(Modelo_RNN, self).__init__()
        
        self.softmax = softmax
        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        # embedding and LSTM layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, 
                            dropout=drop_prob, batch_first=True)
        
        # dropout layer
        self.dropout = nn.Dropout(0.3)
        
        # linear and sigmoid layers
        self.fc = nn.Linear(hidden_dim, output_size)
        
        if softmax == False:
            self.act = nn.Sigmoid()
        

    def forward(self, x, hidden):
        """
        Perform a forward pass of our model on some input and hidden state.
        """
        batch_size = x.size(0)

        # embeddings and lstm_out
        x = x.long()
        embeds = self.embedding(x)
        lstm_out, hidden = self.lstm(embeds, hidden)
        
        lstm_out = lstm_out[:, -1, :] # getting the last time step output
        
        # dropout and fully-connected layer
        out = self.dropout(lstm_out)
        out = self.fc(out)
        # Activation function
        if self.softmax == False:
            out = self.act(out)
        
        
        # return last sigmoid output and hidden state
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        
        return hidden