In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.layers import Dropout

In [None]:
# Configurações iniciais
previsao_dias = 15
root_path = os.getcwd()
minmaxcaler_5y = root_path + '/minmaxscaler_5y'
lista = [a[2] for a in os.walk(minmaxcaler_5y)]

In [None]:
# Preparação dos dados
total_mse_high = []
total_mse_low = []
count_files = 0

In [None]:
for arquivo in lista[0]:
    df = pd.read_csv(os.path.join(minmaxcaler_5y, arquivo))
    df = df.drop(columns=['Datetime'])
    codigo = arquivo.split('_')[0]  # assumindo que o código da ação está no nome do arquivo

    # Ajustando scalers
    scaler_high = MinMaxScaler()
    scaler_low = MinMaxScaler()
    high_data = scaler_high.fit_transform(df[['High']])
    low_data = scaler_low.fit_transform(df[['Low']])
    combined_data = np.hstack((high_data, low_data))

    training_size = int(len(combined_data) * 0.75)
    train_data = combined_data[:training_size]
    test_data = combined_data[training_size - previsao_dias:]

    x_train, y_train, x_test, y_test = [], [], [], []

    for i in range(previsao_dias, len(train_data)):
        x_train.append(train_data[i-previsao_dias:i])
        y_train.append(train_data[i])

    for i in range(previsao_dias, len(test_data)):
        x_test.append(test_data[i-previsao_dias:i])
        y_test.append(test_data[i])

    x_train, y_train = np.array(x_train), np.array(y_train)
    x_test, y_test = np.array(x_test), np.array(y_test)

    # Modelo
    model = Sequential()
    model.add(LSTM(100, return_sequences=False, input_shape=(previsao_dias, 2)))
    model.add(Dropout(0.2))
    model.add(Dense(2))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(x_train, y_train, epochs=15, batch_size=64, validation_split=0.1)

    # Baixando dados reais
    dados_reais = yf.download([codigo + ".SA"], period = "5y", interval="1d")
    dados_reais = dados_reais.dropna(axis = 0)
    dados = pd.DataFrame()
    dados["Datetime"] = dados_reais.index[:-1]

    for colunas in ["High", "Low"]:
      dados[colunas] = dados_reais[colunas].values[:-1]
    reais_high = dados_reais['High'].values
    reais_low = dados_reais['Low'].values


    # Previsão e cálculo do MSE
    predictions = model.predict(x_test)
    highs_predictions_rescaled = scaler_high.inverse_transform(predictions[:, 0].reshape(-1, 1)).flatten()
    lows_predictions_rescaled = scaler_low.inverse_transform(predictions[:, 1].reshape(-1, 1)).flatten()

    print(highs_predictions_rescaled)

    mse_high = mean_squared_error(reais_high[-len(highs_predictions_rescaled):], highs_predictions_rescaled)
    mse_low = mean_squared_error(reais_low[-len(lows_predictions_rescaled):], lows_predictions_rescaled)

    total_mse_high.append(mse_high)
    total_mse_low.append(mse_low)

    max_mse_high = max(total_mse_high)
    max_mse_low = max(total_mse_low)

    total_mse_high_normalized = [mse / max_mse_high for mse in total_mse_high]
    total_mse_low_normalized = [mse / max_mse_low for mse in total_mse_low]

print(total_mse_high_normalized)
print(total_mse_low_normalized)

In [24]:
print(str(sum(total_mse_high_normalized)))
print(str(sum(total_mse_low_normalized)))

5.48674881391877
5.420316517125867
