In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [2]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

In [3]:
# Função para carregar e normalizar os dados
def load_and_normalize_data(path):
    df = pd.read_csv(path, index_col=0)
    scalers = {}
    df_scaled = pd.DataFrame(index=df.index)
    
    for column in df.columns:
        scaler = MinMaxScaler()
        df_scaled[column] = scaler.fit_transform(df[[column]])
        scalers[column] = scaler
    
    return df, df_scaled, scalers

In [4]:
# Função para preparar os dados para a rede neural (sequências)
def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data) - seq_length):
        x = data.iloc[i:i + seq_length].values
        y = data.iloc[i + seq_length].values
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [5]:
# Função para dividir os dados em treinamento, validação e teste
def split_data(data, train_pct, val_pct):
    train_size = int(len(data) * train_pct)
    val_size = int(len(data) * val_pct)
    train_data = data[:train_size]
    val_data = data[train_size:train_size + val_size]
    test_data = data[train_size + val_size:]
    return train_data, val_data, test_data

In [7]:
# Função para criar o modelo LSTM
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(125, activation='relu', input_shape=input_shape, return_sequences=True))
    model.add(LSTM(75, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
   return model

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 8)

In [None]:
# Função para treinar o modelo
def train_model(model, X_train, y_train, X_val, y_val, epochs=250):
    history = model.fit(X_train, y_train, epochs=epochs, verbose=1, validation_data=(X_val, y_val))
    return history

In [None]:
# Função para fazer previsões
def make_predictions(model, X_test):
    predictions = model.predict(X_test)
    return predictions

In [None]:
# Função para avaliar o modelo
def evaluate_model(y_test, predictions):
    mse = mean_squared_error(y_test, predictions)
    return mse

In [None]:
# Função para reverter a normalização das previsões
def inverse_transform_predictions(predictions, scaler, df, column):
    # Converte previsões em DataFrame com a mesma forma que os dados originais
    predictions_df = pd.DataFrame(predictions, columns=[column])
    
    # Define os índices das previsões
    predictions_df.index = df.index[-len(predictions):]
    
    # Reverte a normalização
    try:
        predictions_df = pd.DataFrame(scaler.inverse_transform(predictions_df), columns=[column], index=predictions_df.index)
    except ValueError as e:
        print(f'Error during inverse transformation: {e}')
        print(f'Predictions: {predictions_df.head()}')
        raise
    
    return predictions_df


In [None]:
# Função para substituir dados no DataFrame original
def replace_predictions_in_df(df, predictions_df):
    df = df.copy()  # Evitar alterar o DataFrame original diretamente
    df.update(predictions_df)
    return df

In [None]:
def plot_and_save(df_real, df_previsoes, column, pdf_pages):
    #Preparar os dados para o gráfico
    df_comparacao = pd.DataFrame({'Real':df_real[column], 'Previsto':df_previsoes[column]})
    #Calcular o desvio percentual
    #df_comparacao['Desvio %'] = ((df_comparacao['Previsto'] - df_comparacao['Real']) / df_comparacao['Real']) * 100
    #Plotar os dados
    fig, ax1 = plt.subplots(figsize=(12,6))
    #Plotar dados reais e previsões
    ax1.plot(df_comparacao.index, df_comparacao['Real'], 'b-', label = 'Dados Reais')
    ax1.plot(df_comparacao.index, df_comparacao['Previsto'], 'r--', label = 'Dados Previstos')
    ax1.legend(loc = 'best')
    #ax2 = ax1.twinx()
    #ax2.plot(df_comparacao.index, df_comparacao['Desvio %'], 'g-*', label = 'Desvio (%)')
    #ax2.legend(loc = 'best')
    plt.title(f'Comparativo entre dados reais e previstos para ativo {column}')
    #plt.show()
    # Salvar o gráfico no PDF
    pdf_pages.savefig(fig)
    plt.close(fig)

In [None]:
# Função principal
def main():
    path = r'C:\Users\dotiw\Documents\UFMG\OneDrive\.vscode\cli\DividendsDownload\tabela_dividendos_por_ativo.csv'
    pdf_path = r'C:\Users\dotiw\Documents\UFMG\OneDrive\.vscode\cli\DividendsModelling\graficos.pdf'

    df, df_scaled, scalers = load_and_normalize_data(path)

    all_predictions_df = pd.DataFrame(index=df.index)
    df_comparacao_total = pd.DataFrame()

    with PdfPages(pdf_path) as pdf:
        for column in df.columns:
            data = df_scaled[[column]]
            train_data, val_data, test_data = split_data(data, train_pct=0.65, val_pct=0.175)
            
            seq_length = 3
            X_train, y_train = create_sequences(pd.DataFrame(train_data, columns=[column]), seq_length)
            X_val, y_val = create_sequences(pd.DataFrame(val_data, columns=[column]), seq_length)
            X_test, y_test = create_sequences(pd.DataFrame(test_data, columns=[column]), seq_length)

            # Ajustar as dimensões para o modelo LSTM
            X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
            X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
            X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
            
            # Criar e treinar o modelo
            model = create_lstm_model(input_shape=(X_train.shape[1], X_train.shape[2]))
            train_model(model, X_train, y_train, X_val, y_val)
            
            # Fazer previsões
            predictions = make_predictions(model, X_test)
            mse = evaluate_model(y_test, predictions)
            print(f'MSE for {column}: {mse}')
            
            # Converter previsões de volta à escala original
            scaler = scalers[column]
            predictions_df = inverse_transform_predictions(predictions, scaler, df, column)
            all_predictions_df = all_predictions_df.join(predictions_df, how='outer')

            # Plotar e salvar gráficos no PDF
            df_comparacao = plot_and_save(df, all_predictions_df, column, pdf)
    
    df_updated = replace_predictions_in_df(df, all_predictions_df)
    df_updated.to_csv(r'C:\Users\dotiw\Documents\UFMG\OneDrive\.vscode\cli\DividendsModelling\tabela_dividendos_por_ativo_atualizado.csv')

if __name__ == '__main__':
    main()

Epoch 1/250


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0025 - val_loss: 0.0033
Epoch 2/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.0020 - val_loss: 0.0026
Epoch 3/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - loss: 0.0015 - val_loss: 0.0020
Epoch 4/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0012 - val_loss: 0.0015
Epoch 5/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 8.7897e-04 - val_loss: 0.0011
Epoch 6/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - loss: 6.6951e-04 - val_loss: 7.3676e-04
Epoch 7/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 5.3820e-04 - val_loss: 4.6433e-04
Epoch 8/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 4.8021e-04 - val_loss: 2.6871e-04
Epoch 9/250
[1m1/1[0m [32m━━━━━━━━━━━━━

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0461 - val_loss: 0.8148
Epoch 2/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 0.0440 - val_loss: 0.7940
Epoch 3/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0420 - val_loss: 0.7733
Epoch 4/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0401 - val_loss: 0.7528
Epoch 5/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 0.0382 - val_loss: 0.7322
Epoch 6/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0364 - val_loss: 0.7116
Epoch 7/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.0347 - val_loss: 0.6909
Epoch 8/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0330 - val_loss: 0.6701
Epoch 9/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0296 - val_loss: 0.9790
Epoch 2/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 0.0279 - val_loss: 0.9631
Epoch 3/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0262 - val_loss: 0.9472
Epoch 4/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - loss: 0.0246 - val_loss: 0.9313
Epoch 5/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0230 - val_loss: 0.9153
Epoch 6/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0215 - val_loss: 0.8992
Epoch 7/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0201 - val_loss: 0.8828
Epoch 8/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - loss: 0.0187 - val_loss: 0.8663
Epoch 9/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - loss: 0.0081 - val_loss: 0.0081
Epoch 2/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - loss: 0.0072 - val_loss: 0.0070
Epoch 3/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - loss: 0.0064 - val_loss: 0.0060
Epoch 4/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step - loss: 0.0057 - val_loss: 0.0050
Epoch 5/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - loss: 0.0051 - val_loss: 0.0042
Epoch 6/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - loss: 0.0046 - val_loss: 0.0034
Epoch 7/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - loss: 0.0041 - val_loss: 0.0027
Epoch 8/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - loss: 0.0037 - val_loss: 0.0020
Epoch 9/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0843 - val_loss: 0.3398
Epoch 2/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - loss: 0.0805 - val_loss: 0.3250
Epoch 3/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step - loss: 0.0768 - val_loss: 0.3104
Epoch 4/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - loss: 0.0732 - val_loss: 0.2961
Epoch 5/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - loss: 0.0698 - val_loss: 0.2818
Epoch 6/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - loss: 0.0664 - val_loss: 0.2677
Epoch 7/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - loss: 0.0630 - val_loss: 0.2536
Epoch 8/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 0.0598 - val_loss: 0.2396
Epoch 9/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m