In [1]:
import pandas as pd
import numpy as np
import yfinance as yfin
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense

In [None]:
def carregarDados(ticker, data_inicio, data_fim):
    df = yfin.download(tickers=ticker, start=data_inicio, end=data_fim)
    return df

In [3]:
def normalizar(df):
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df['Adj Close'].values.reshape(-1, 1))
    return scaled_data, scaler

In [4]:
def prepararDados(df, days):
    x_train = []
    y_train = []
    for i in range(days, len(df)):
        x_train.append(df[i-days:i])
        y_train.append(df[i])
    return np.array(x_train), np.array(y_train)


In [5]:
def modeloLSTM(input_shape):
    model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(units=50, return_sequences=True),
        Dropout(0.2),
        LSTM(units=50),
        Dropout(0.2),
        Dense(units=1)
    ])
    return model

In [6]:
def treinarModelo(model, x_train, y_train, epochs=25, batch_size=32):
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)
    return model

In [7]:
def avaliarTreino(model, x_train, df, scaler, days):
    predicted_train = model.predict(x_train)
    predicted_train = scaler.inverse_transform(predicted_train)

    actual_train = df['Adj Close'].values
    mae_train = mean_absolute_error(actual_train[days:], predicted_train)
    mse_train = mean_squared_error(actual_train[days:], predicted_train)
    rmse_train = np.sqrt(mse_train)
    r2_train = r2_score(actual_train[days:], predicted_train)

    return mae_train, mse_train, rmse_train, r2_train

In [8]:
def prepararDadosTeste(total_dataset, df_teste, scaler, days):
    model_inputs = total_dataset[len(total_dataset) - len(df_teste) - days:].values
    model_inputs = model_inputs.reshape(-1, 1)
    model_inputs = scaler.transform(model_inputs)
    
    x_test = []
    for i in range(days, len(model_inputs)):
        x_test.append(model_inputs[i-days:i, 0])

    x_test = np.array(x_test)
    return np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))


In [9]:
def avaliarTeste(model, x_test, df_teste, scaler, days):
    predicted_test = model.predict(x_test)
    predicted_test = scaler.inverse_transform(predicted_test)
    
    actual_test = df_teste['Adj Close'].values
    
    predicted_test = predicted_test[:-1]
    actual_test = actual_test[:len(predicted_test)]
    
    mae = mean_absolute_error(actual_test, predicted_test)
    mse = mean_squared_error(actual_test, predicted_test)
    rmse = np.sqrt(mse)
    r2 = r2_score(actual_test, predicted_test)
    
    return mae, mse, rmse, r2, actual_test, predicted_test

In [10]:
def plotarResultados(df_treino, predicted_train, df_teste, predicted_test, days, ticker):
    plt.figure(figsize=(14, 7))

    plt.plot(df_treino['Adj Close'].values, color='blue', label=f'Dados de Treinamento da {ticker}')
    
    plt.plot(range(days, days + len(predicted_train)), predicted_train, color='red', label=f'Previsão do Treinamento {ticker}')
    
    plt.plot(range(len(df_treino), len(df_treino) + len(df_teste)), df_teste['Adj Close'].values, color='black', label=f'Preços Reais da {ticker}')
    
    plt.plot(range(len(df_treino), len(df_treino) + len(predicted_test)), predicted_test, color='green', label=f'Previsão do Teste da {ticker}')

    plt.title(f'{ticker} - Janela: {days} dias')
    plt.xlabel('Tempo')
    plt.ylabel(f'Preço de {ticker}')
    
    plt.legend()
    
    plt.savefig(f'prediction_plot_{days}_days.png')
    plt.show()


In [11]:
def executarModelo(ticket, data, max_days=21):
    all_results = [] 

    scaled_data, scaler = normalizar(data)

    for days in range(1, max_days):
        x_train, y_train = prepararDados(scaled_data, days)
        x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

        model = modeloLSTM((x_train.shape[1], 1))
        model = treinarModelo(model, x_train, y_train)

        mae_train, mse_train, rmse_train, r2_train = avaliarTreino(model, x_train, data, scaler, days)

        df_teste = carregarDados(ticket, '2021-01-01', '2024-01-01')
        total_dataset = pd.concat((data['Adj Close'], df_teste['Adj Close']), axis=0)
        x_test = prepararDadosTeste(total_dataset, df_teste, scaler, days)

        mae, mse, rmse, r2, actual_test, predicted_test = avaliarTeste(model, x_test, df_teste, scaler, days)

        plotarResultados(data, scaler.inverse_transform(y_train.reshape(-1,1)), df_teste, predicted_test, days, ticket)

        all_results.append({
            'Days': days,
            'MAE_Train': mae_train,
            'MSE_Train': mse_train,
            'RMSE_Train': rmse_train,
            'R²_Train': r2_train,
            'MAE_Test': mae,
            'MSE_Test': mse,
            'RMSE_Test': rmse,
            'R²_Test': r2
        })

    all_results_df = pd.DataFrame(all_results)
    all_results_df.to_csv('model_performance_all_days.csv', index=False)

In [None]:
ticket = 'PETR4.SA'
data = carregarDados(ticket, '2005-01-01', '2021-01-01')
data


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-01-03,12.187500,12.237500,11.773750,11.775000,2.700688,30044800
2005-01-04,11.873750,11.950000,11.625000,11.635000,2.668579,37667200
2005-01-05,11.662500,11.772500,11.475000,11.590000,2.658257,30873600
2005-01-06,11.652500,11.687500,11.487500,11.625000,2.666286,28224000
2005-01-07,11.648750,11.787500,11.563750,11.676250,2.678040,25980800
...,...,...,...,...,...,...
2020-12-22,27.200001,27.469999,27.049999,27.280001,9.438391,46513200
2020-12-23,27.430000,28.250000,27.350000,27.950001,9.670202,49038900
2020-12-28,28.360001,28.520000,28.180000,28.180000,9.749776,36313300
2020-12-29,28.379999,28.430000,27.990000,28.270000,9.780915,29891100


: 