In [1]:
# Importando as bibliotecas necessárias
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM, GRU
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from google.colab import drive
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from keras.callbacks import EarlyStopping
!pip install optuna
import optuna
import optuna.visualization as vis
from tqdm import tqdm
from optuna.importance import get_param_importances

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [2]:
# Carregar o drive

drive.mount('/content/drive/')

Mounted at /content/drive/


In [7]:
# Definição de hiperparametros
batch_size = 32
lookback = 5
learning_rate=0.002

In [3]:
# Carregar o arquivo CSV sem índice e com a coluna 'Date' no formato datetime
df = pd.read_csv(
    "/content/drive/MyDrive/Mestrado/df_indice_brasil_endogenous_exogenous_red1.csv",
    sep=',',
    parse_dates=['Date'],  # Converte a coluna 'Date' para datetime
    index_col=None         # Não define uma coluna de índice
)

# Visualizar as primeiras linhas para confirmar o carregamento
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Close,High,Low,Volume,USD
0,0,2013-01-02,62550.1,62887.0,60990.0,3739800.0,2.0456
1,1,2013-01-03,63312.46,63473.0,62341.0,3355800.0,2.0456
2,2,2013-01-04,62523.06,63314.0,62415.0,6233800.0,2.0487
3,3,2013-01-07,61932.54,62699.0,61639.0,3985800.0,2.0324
4,4,2013-01-08,61127.84,62265.0,61081.0,3840600.0,2.0264


In [4]:
#carregar base de dados
# Selecionar apenas as colunas desejadas
df = df[['Date', 'Close', 'High', 'Low', 'Volume', 'USD']]

# Verificar as informações do DataFrame
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2724 entries, 0 to 2723
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    2724 non-null   datetime64[ns]
 1   Close   2724 non-null   float64       
 2   High    2724 non-null   float64       
 3   Low     2724 non-null   float64       
 4   Volume  2724 non-null   float64       
 5   USD     2724 non-null   float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 127.8 KB


In [5]:
# separação dos dados em treino, validação e teste
train_size = int(len(df['Close']) * 0.75)
val_start = train_size
val_end = int(len(df['Close']) * 0.85)

df_train = df.iloc[:train_size].copy()
df_valid = df.iloc[val_start:val_end].copy()
df_test = df.iloc[val_end:].copy()

In [8]:
# Normalização das colunas 'Close' e 'High'
train_mean = df_train[['Close', 'USD']].mean().values
train_std = df_train[['Close', 'USD']].std().values

train_not_norm = df_train[['Close', 'USD']].to_numpy()
val_not_norm = df_valid[['Close']].to_numpy()

train_norm = (train_not_norm - train_mean) / train_std
val_norm = (val_not_norm - train_mean) / train_std

# Preparação dos dados de treino, validação e teste com exógena 'Volume'
train_dataset = tf.keras.utils.timeseries_dataset_from_array(
    data=train_norm[lookback:],  # Usar dados a partir de lookback
    targets=train_norm[lookback:, 0],  # Target continua sendo 'Close'
    sequence_length=lookback,
    shuffle=False,
    batch_size=batch_size
)

validation_dataset = tf.keras.utils.timeseries_dataset_from_array(
    data=val_norm[lookback:],  # Usar dados a partir de lookback
    targets=val_norm[lookback:, 0],  # Target continua sendo 'Close'
    sequence_length=lookback,
    shuffle=False,
    batch_size=batch_size
)

In [9]:
# Função para construir o modelo GRU
def build_model(n_layers, n_units_list, dropout_rates, activations, n_units_dense, activation_dense, lookback):
    model = tf.keras.models.Sequential()
    for i in range(n_layers):
        # Adiciona camadas GRU de acordo com o número de camadas especificado
        if i == 0:
            # A primeira camada GRU precisa da dimensão de entrada
            model.add(tf.keras.layers.GRU(n_units_list[i], activation=activations[i], return_sequences=True if n_layers > 1 else False, input_shape=(lookback, 2)))
        else:
            # As camadas GRU subsequentes não precisam da dimensão de entrada
            model.add(tf.keras.layers.GRU(n_units_list[i], activation=activations[i], return_sequences=True if i != n_layers - 1 else False))
        # Adiciona uma camada de Dropout após cada camada GRU
        model.add(tf.keras.layers.Dropout(dropout_rates[i]))
    # Adiciona uma camada Densa após as camadas GRU
    model.add(tf.keras.layers.Dense(n_units_dense, activation=activation_dense))
    # A última camada é uma camada Densa com uma única unidade (para previsão de série temporal univariada)
    model.add(tf.keras.layers.Dense(1))
    return model

# Função objetivo para a otimização do Optuna
def objective(trial):
  # Define os hiperparâmetros

  n_layers = trial.suggest_int('n_layers', 1, 2)#3 é demais
  n_units_list = [trial.suggest_int(f'n_units_lstm{i+1}', 32, 256) for i in range(n_layers)]
  dropout_rates = [trial.suggest_float(f'dropout_rate_lstm{i+1}', 0.0, 0.5) for i in range(n_layers)]
  activations = [trial.suggest_categorical(f'activation_lstm{i+1}', ['tanh', 'relu', 'sigmoid']) for i in range(n_layers)]
  n_units_dense = trial.suggest_int('n_units_dense', 16, 64)
  activation_dense = trial.suggest_categorical('activation_dense', ['relu', 'sigmoid', 'tanh'])
  optimizer = trial.suggest_categorical('optimizer', ['adam', 'rmsprop'])# sgd não deve dar bom resultado
  lookback = trial.suggest_int('lookback', 3, 15, step=2)
  batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
  learning_rate = trial.suggest_float('learning_rate', 0.001, 0.01, log=True)

  # Constrói o modelo GRU com os hiperparâmetros sugeridos
  model = build_model(n_layers, n_units_list, dropout_rates, activations, n_units_dense, activation_dense, lookback)

  # Compila o modelo com a função de perda e o otimizador sugeridos
  model.compile(optimizer=optimizer, loss='mae')

  # Adiciona um callback para parada antecipada

  early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

  # Treina o modelo com os dados de treino
  for epoch in range(epochs):
      history = model.fit(train_dataset, epochs=1, batch_size=batch_size, validation_data=validation_dataset, verbose=0, callbacks=[early_stopping])

      # Obtém a métrica de validação para a época atual
      val_loss = history.history['val_loss'][0]

      # Reporta o valor intermediário
      trial.report(val_loss, epoch)

      # Verifica se o teste deve ser interrompido
      if trial.should_prune():
          raise optuna.TrialPruned()# Leva adiante o treino apenas se for promissor

  # Avalia o modelo com os dados de validação
  metric = model.evaluate(validation_dataset, verbose=0)

  # Optuna minimiza a função objetivo, então retorne a métrica que você deseja minimizar
  return metric

epochs = 200

# Cria um estudo Optuna
study = optuna.create_study(direction='minimize')

# Função de callback para atualizar a barra de progresso
def callback(study, trial):
    pbar.update(1)

# Otimiza o estudo com a função objetivo
# Utiliza o tqdm para visualizar a progressão da otimização
with tqdm(total=50, desc="Optimizing", bar_format="{l_bar}{bar} [ time left: {remaining} ]") as pbar:
    study.optimize(objective, n_trials=50, callbacks=[callback])
# Obtém os melhores hiperparâmetros do estudo
best_params = study.best_params
print("Melhores Hiperparâmetros:", best_params)

[I 2024-11-14 09:33:59,300] A new study created in memory with name: no-name-7c749b61-b509-43e3-ba15-56879dee243d
  super().__init__(**kwargs)
[I 2024-11-14 09:36:16,347] Trial 0 finished with value: 1.565255880355835 and parameters: {'n_layers': 1, 'n_units_lstm1': 38, 'dropout_rate_lstm1': 0.3295807356732453, 'activation_lstm1': 'sigmoid', 'n_units_dense': 30, 'activation_dense': 'sigmoid', 'optimizer': 'rmsprop', 'lookback': 7, 'batch_size': 16, 'learning_rate': 0.0021373294384563093}. Best is trial 0 with value: 1.565255880355835.
Optimizing:   2%|▏          [ time left: 1:51:55 ][I 2024-11-14 09:40:53,798] Trial 1 finished with value: 1.0640273094177246 and parameters: {'n_layers': 1, 'n_units_lstm1': 206, 'dropout_rate_lstm1': 0.4885298953575753, 'activation_lstm1': 'relu', 'n_units_dense': 39, 'activation_dense': 'tanh', 'optimizer': 'adam', 'lookback': 11, 'batch_size': 128, 'learning_rate': 0.002829719725404078}. Best is trial 1 with value: 1.0640273094177246.
Optimizing:   4%

Melhores Hiperparâmetros: {'n_layers': 1, 'n_units_lstm1': 206, 'dropout_rate_lstm1': 0.4885298953575753, 'activation_lstm1': 'relu', 'n_units_dense': 39, 'activation_dense': 'tanh', 'optimizer': 'adam', 'lookback': 11, 'batch_size': 128, 'learning_rate': 0.002829719725404078}





In [8]:
'''# Função para construir o modelo LSTM
def build_model(n_layers, n_units_list, dropout_rates, activations, n_units_dense, activation_dense, lookback):
    model = tf.keras.models.Sequential()
    for i in range(n_layers):
        if i == 0:
            model.add(tf.keras.layers.GRU(n_units_list[i], activation=activations[i], return_sequences=True if n_layers > 1 else False, input_shape=(lookback, 2)))
        else:
            model.add(tf.keras.layers.GRU(n_units_list[i], activation=activations[i], return_sequences=True if i != n_layers - 1 else False))
        model.add(tf.keras.layers.Dropout(dropout_rates[i]))
    model.add(tf.keras.layers.Dense(n_units_dense, activation=activation_dense))
    model.add(tf.keras.layers.Dense(1))
    return model'''

In [9]:
'''# Função objetivo para a otimização do Optuna
def objective(trial):
    n_layers = trial.suggest_int('n_layers', 1, 3)
    n_units_list = [trial.suggest_int(f'n_units_GRU{i+1}', 32, 256) for i in range(n_layers)]
    dropout_rates = [trial.suggest_float(f'dropout_rate_GRU{i+1}', 0.0, 0.5) for i in range(n_layers)]
    activations = [trial.suggest_categorical(f'activation_GRU{i+1}', ['tanh', 'relu', 'sigmoid']) for i in range(n_layers)]
    n_units_dense = trial.suggest_int('n_units_dense', 16, 64)
    activation_dense = trial.suggest_categorical('activation_dense', ['relu', 'sigmoid', 'tanh'])
    optimizer = trial.suggest_categorical('optimizer', ['adam', 'rmsprop'])
    lookback = trial.suggest_int('lookback', 3, 15, step=2)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
    learning_rate = trial.suggest_float('learning_rate', 0.001, 0.01, log=True)

    model = build_model(n_layers, n_units_list, dropout_rates, activations, n_units_dense, activation_dense, lookback)
    model.compile(optimizer=optimizer, loss='mae')

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

    history = model.fit(
        train_dataset,
        epochs=epochs,
        validation_data=validation_dataset,
        callbacks=[early_stopping],
        batch_size=batch_size,
        verbose=0
    )

    val_loss = history.history['val_loss'][-1]
    return val_loss

epochs = 200

study = optuna.create_study(direction='minimize')

def callback(study, trial):
    pbar.update(1)

with tqdm(total=50, desc="Optimizing", bar_format="{l_bar}{bar} [ time left: {remaining} ]") as pbar:
    study.optimize(objective, n_trials=50, callbacks=[callback])

best_params = study.best_params
print("Melhores Hiperparâmetros:", best_params)'''

[I 2024-11-13 19:50:46,327] A new study created in memory with name: no-name-f0e04244-0dd2-46b6-be27-648d2945b715
  super().__init__(**kwargs)
[I 2024-11-13 19:51:43,405] Trial 0 finished with value: 0.15655815601348877 and parameters: {'n_layers': 3, 'n_units_GRU1': 162, 'n_units_GRU2': 94, 'n_units_GRU3': 152, 'dropout_rate_GRU1': 0.28380584877820486, 'dropout_rate_GRU2': 0.12658416777294268, 'dropout_rate_GRU3': 0.1603231496129071, 'activation_GRU1': 'tanh', 'activation_GRU2': 'relu', 'activation_GRU3': 'tanh', 'n_units_dense': 64, 'activation_dense': 'sigmoid', 'optimizer': 'rmsprop', 'lookback': 15, 'batch_size': 16, 'learning_rate': 0.0011113182397012268}. Best is trial 0 with value: 0.15655815601348877.
Optimizing:   2%|▏          [ time left: 46:36 ][I 2024-11-13 19:52:58,894] Trial 1 finished with value: 0.08726052939891815 and parameters: {'n_layers': 2, 'n_units_GRU1': 127, 'n_units_GRU2': 138, 'dropout_rate_GRU1': 0.34200621409657883, 'dropout_rate_GRU2': 0.3961303619107070

Melhores Hiperparâmetros: {'n_layers': 1, 'n_units_GRU1': 208, 'dropout_rate_GRU1': 0.09876576801277778, 'activation_GRU1': 'relu', 'n_units_dense': 34, 'activation_dense': 'relu', 'optimizer': 'adam', 'lookback': 3, 'batch_size': 64, 'learning_rate': 0.009911782526393935}





In [10]:
#Gráfico do history das épocas
optuna.visualization.plot_optimization_history(study)

In [11]:
# Visualização os valores intermediários

optuna.visualization.plot_intermediate_values(study)

In [12]:
# Importância de cada hiperparâmetro
optuna.visualization.plot_param_importances(study)

In [13]:
# Pegar apenas hiperparametros com mais de 10% de relevancia
fig = vis.plot_contour(study, params=['dropout_rate_lstm1', 'learning_rate', 'n_units_dense', 'optimizer'])
fig.show()
