In [10]:
from sklearn.preprocessing import MinMaxScaler
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
import torch

#Download dos dados de preço das ações Itau Unibanco com ticker "ITUB4"
df_ITUB4 = yf.download('ITUB4.SA', start="2016-01-01", end="2024-03-31")
#Gerando um novo dataset reduzido, apenas com Data(index), alta e baixa
df_reduced = df_ITUB4[['High', 'Low']]

[*********************100%%**********************]  1 of 1 completed


In [None]:
#Ordenando valores por data
df_reduced = df_reduced.sort_values(by='Date')
#Calculando a média entre alta e baixa e atribuindo a coluna median
df_reduced['Median'] = df_reduced.mean(axis=1)


In [None]:
#Gerando gráfico da média de preços da ação
plt.plot(df_reduced['Median'])
plt.show

In [None]:
#Vamos padronizar as médias de valor do ticker
#feature_range=(0,1)
norm = MinMaxScaler()
normalized = df_reduced.iloc[:, 2:3]
df_reduced['Stand']= norm.fit_transform(normalized.values)

In [None]:
#Retirar eventuais valores nulos
df_reduced = df_reduced.dropna()
#Selecionando a base de treinamento em 80% das observações
df_training = df_reduced['Stand'].iloc[0:math.ceil(len(df_reduced) * 0.8)].values
#Selecionando a base de testes
df_testing = df_reduced['Stand'].iloc[math.ceil(len(df_reduced) * 0.8):len(df_reduced)].values

In [None]:
# Transformando o array de uma dimensão em um array de duas dimensões
df_training = np.reshape(df_training, (-1,1))
df_training.shape
df_testing = np.reshape(df_testing, (-1,1))
df_testing.shape
print(*df_training[:5])
print(*df_testing[:5])

In [14]:
#Criando as sequencias para treinamento do modelo
training_steps_backpropagation = 100
test_steps_backpropagation = 50

x_training, y_training = [], []
for i in range(len(df_training) - training_steps_backpropagation):
    x_training.append(df_training[i:i+training_steps_backpropagation])
    y_training.append(df_training[i+1:i+training_steps_backpropagation+1])
x_training, y_training = np.array(x_training), np.array(y_training)


x_test, y_test = [], []
for i in range(len(df_testing) - test_steps_backpropagation):
    x_test.append(df_testing[i:i+test_steps_backpropagation])
    y_test.append(df_testing[i+1:i+test_steps_backpropagation+1])
x_test, y_test = np.array(x_test), np.array(y_test)

In [15]:
#Conversão dos dados para tensores Pytorch
x_training = torch.tensor(x_training, dtype=torch.float32)
y_training = torch.tensor(y_training, dtype=torch.float32)
x_training.shape, y_training.shape

x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
x_test.shape, y_test.shape

(torch.Size([360, 50, 1]), torch.Size([360, 50, 1]))

In [16]:
#Definição de classe do modelo LSTM
class LSTMModel(torch.nn.Module):
    
    def __init__(self,input_size, hidden_size, num_layers ) -> None:
        super(LSTMModel, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.linear = torch.nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.linear(out)
        return out

In [19]:
#Checagem dos resources da máquina a qual irá rodar o modelo LSTM
device = torch.device('cpu')
print(device)

cpu


In [29]:
#Definição dos parâmetros do modelo LSTM
input_size = 1
num_layers = 12
hidden_size = 24
output_size = 1

#Definição do Modelo
model = LSTMModel(input_size, hidden_size, num_layers).to(device)

#Definição da função perda para o modelo
loss_func = torch.nn.MSELoss(reduction='mean')

#Definição do otimizador
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [30]:
#Criação dos DataLoaders para o treinamento e teste
import torch.utils
import torch.utils.data

batch_size = 10

#DataLoader de treino
training_data = torch.utils.data.TensorDataset(x_training, y_training)
training_loader = torch.utils.data.DataLoader(training_data, batch_size=batch_size, shuffle=True)

#DataLoader de teste
test_data = torch.utils.data.TensorDataset(x_test, y_test)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [31]:
#Fase de treino e tunning dos parametros
epochs = 50
training_hist = []
test_hist = []

#Loop de treinamento do modelo
for epoch in range(epochs):
    perda_total_treino = 0.0
    
    #Treinamento do modelo
    model.train()
    for batch_x_training, batch_y_training in training_loader:
        batch_x_training, batch_y_training = batch_x_training.to(device), batch_y_training.to(device)
        
        predictions_training = model(batch_x_training)
        
        loss_training = loss_func(predictions_training, batch_y_training)
        
        optimizer.zero_grad()
        loss_training.backward()
        optimizer.step
        
        perda_total_treino += loss_training.item()
        
    #Calculo da média da perda na base de treino e a acurácia do modelo
    loss_medio_treino = perda_total_treino / len(training_loader)
    training_hist.append(loss_medio_treino)
    
    #Validação do modelo na base de teste
    model.eval()    
    with torch.no_grad():
        perda_total_teste = 0.0
        
        #Avaliação do modelo
        for batch_x_test, batch_y_test in test_loader:
            batch_x_test, batch_y_test = batch_x_test.to(device), batch_y_test.to(device)
            
            predictions_test = model(batch_x_test)
            
            loss_test = loss_func(predictions_test, batch_y_test)
            
            perda_total_teste += loss_test.item()

        #Calculo da média da perda na base de teste e a acurácia do modelo
        loss_medio_teste = perda_total_teste / len(test_loader)
        test_hist.append(loss_medio_teste)
    
    #Apresentando as perdas médias na base de treino e na base de teste
    if(epoch + 1)%10 == 0:
        print(f'Epoca[{epoch+1}/{epochs}] - Perda de treino médio: {loss_medio_treino:.4f}, Perda de teste médio: {loss_medio_teste:.4f}')