Modelo Transformer com Otimização de Hiperparâmetros
funcionando com PyTorch e Optuna

In [3]:
import os
import logging
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import optuna
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from datetime import datetime
from copy import deepcopy as dc
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import ExtraTreesRegressor
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
import optuna
import math

data_hoje = datetime.now().strftime('%d-%m')
inicio_execucao = pd.Timestamp.now()

os.makedirs(f'../logs/{data_hoje}', exist_ok=True)
os.makedirs(f'../plots/{data_hoje}', exist_ok=True)
os.makedirs(f'../best_models/{data_hoje}', exist_ok=True)

logging.basicConfig(filename=f'../logs/{data_hoje}/transformer_optuna.log', level=logging.INFO, format='- %(message)s')
logging.info('-' * 50)
logging.info(f'{inicio_execucao} - Iniciando o processo de otimização e treinamento do modelo BiLSTM')

df_original = pd.read_csv('../dados_tratados/combinado/Piratininga/Piratininga_tratado_combinado.csv',
                          usecols=['PM2.5', 'Data e Hora', 'PM10', 'Monóxido de Carbono'], low_memory=False)

df_original['Data e Hora'] = pd.to_datetime(df_original['Data e Hora'], format='%Y-%m-%d %H:%M:%S')
df_original.index = df_original['Data e Hora']
df_original.sort_index(inplace=True)

colunas_selecionadas = ['PM2.5', 'PM10', 'Monóxido de Carbono']
logging.info(f"Colunas selecionadas: {colunas_selecionadas}")

df = df_original[colunas_selecionadas]
df = df.loc['2019-01-01':'2022-01-01']

logging.info(f"Período de análise: 2019-01-01 a 2022-01-01")
df = df.apply(pd.to_numeric, errors='coerce')

In [4]:
df_imputed = df.interpolate(method='linear')

In [5]:
def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)
    for col in df.columns:
        for i in range(1, n_steps + 1):
            df[f'{col}(t-{i})'] = df[col].shift(i)
    df.dropna(inplace=True)
    return df

lookback = 8
shifted_df = prepare_dataframe_for_lstm(df_imputed, lookback)

# **Correção do possível vazamento de dados**
# Normalizar apenas os dados de treino e aplicar a transformação para validação e teste
train_split = int(len(shifted_df) * 0.7)
val_split = int(len(shifted_df) * 0.85)

train_df = shifted_df[:train_split]
val_df = shifted_df[train_split:val_split]
test_df = shifted_df[val_split:]

preprocessing_scaler = StandardScaler()
preprocessing_scaler.fit(train_df)

X_train = preprocessing_scaler.transform(train_df)[:, len(colunas_selecionadas):]
y_train = preprocessing_scaler.transform(train_df)[:, 0]

X_val = preprocessing_scaler.transform(val_df)[:, len(colunas_selecionadas):]
y_val = preprocessing_scaler.transform(val_df)[:, 0]

X_test = preprocessing_scaler.transform(test_df)[:, len(colunas_selecionadas):]
y_test = preprocessing_scaler.transform(test_df)[:, 0]

X_train = dc(np.flip(X_train, axis=1))
X_val = dc(np.flip(X_val, axis=1))
X_test = dc(np.flip(X_test, axis=1))

X_train = X_train.reshape((-1, lookback, X_train.shape[1] // lookback))
X_val = X_val.reshape((-1, lookback, X_val.shape[1] // lookback))
X_test = X_test.reshape((-1, lookback, X_test.shape[1] // lookback))
y_train = y_train.reshape((-1, 1))
y_val = y_val.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_val = torch.tensor(X_val).float()
y_val = torch.tensor(y_val).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]


device = 'cuda:0' if torch.cuda.is_available() else 'cpu'


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(0), :]


class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_size, d_model, nhead, num_layers, dropout):
        super(TimeSeriesTransformer, self).__init__()
        self.encoder = nn.Linear(input_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=d_model * 4, dropout=dropout,
                                                    batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, src):
        src = self.encoder(src)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.decoder(output[:, -1, :])
        return output


def objective(trial):
    d_model = trial.suggest_int('d_model', 128, 512, step=64)
    nhead_options = [i for i in range(2, 9) if d_model % i == 0]
    nhead = trial.suggest_categorical('nhead', nhead_options)
    num_layers = trial.suggest_int('num_layers', 2, 4)
    dropout = trial.suggest_float('dropout', 0.1, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-3, log=True)

    model = TimeSeriesTransformer(input_size=X_train.shape[2], d_model=d_model,
                                  nhead=nhead, num_layers=num_layers, dropout=dropout).to(device)

    criterion = nn.HuberLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    train_loader = DataLoader(TimeSeriesDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TimeSeriesDataset(X_val, y_val), batch_size=batch_size)

    for epoch in range(100):
        model.train()
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()

        val_loss /= len(val_loader)

        trial.report(val_loss, epoch)

        if trial.should_prune():
            raise optuna.TrialPruned()

    return val_loss


study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=3)

print('Best trial:')
trial = study.best_trial
print('Value: ', trial.value)
print('Params: ')
for key, value in trial.params.items():
    print(f'    {key}: {value}')

best_params = study.best_params
final_model = TimeSeriesTransformer(input_size=X_train.shape[2], d_model=best_params['d_model'],
                                    nhead=best_params['nhead'], num_layers=best_params['num_layers'],
                                    dropout=best_params['dropout']).to(device)

criterion = nn.HuberLoss()
optimizer = optim.AdamW(final_model.parameters(), lr=best_params['learning_rate'],
                        weight_decay=best_params['weight_decay'])

scaler = GradScaler()
num_workers = 0

train_loader = DataLoader(TimeSeriesDataset(X_train, y_train), batch_size=best_params['batch_size'], shuffle=True,
                          num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(TimeSeriesDataset(X_val, y_val), batch_size=best_params['batch_size'], num_workers=num_workers,
                        pin_memory=True)

num_epochs = 500
best_val_loss = float('inf')
patience = 50
no_improve = 0

for epoch in range(num_epochs):
    final_model.train()
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()

        with autocast():
            outputs = final_model(batch_X)
            loss = criterion(outputs, batch_y)

        torch.nn.utils.clip_grad_norm_(final_model.parameters(), max_norm=1.0)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

    final_model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            with autocast():
                outputs = final_model(batch_X)
                val_loss += criterion(outputs, batch_y).item()

    val_loss /= len(val_loader)
    print(f'Epoch {epoch + 1}/{num_epochs}, Validation Loss: {val_loss:.4f}')

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improve = 0
        torch.save(final_model.state_dict(), f'../best_models/{data_hoje}/best_model.pth')
    else:
        no_improve += 1

    if no_improve >= patience:
        print(f'Early stopping triggered after {epoch + 1} epochs')
        break

[I 2024-11-24 13:43:47,751] A new study created in memory with name: no-name-d935a98a-21d0-4be4-99ad-9329105e5847
[I 2024-11-24 13:47:54,437] Trial 0 finished with value: 0.1860526707924662 and parameters: {'d_model': 512, 'nhead': 4, 'num_layers': 2, 'dropout': 0.4279146571887482, 'learning_rate': 4.6953861598845956e-05, 'batch_size': 64, 'weight_decay': 0.000438182009847369}. Best is trial 0 with value: 0.1860526707924662.
[W 2024-11-24 13:47:54,438] Trial 1 failed with parameters: {'d_model': 448} because of the following error: ValueError('CategoricalDistribution does not support dynamic value space.').
Traceback (most recent call last):
  File "C:\dev\fast_api\venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\portes\AppData\Local\Temp\ipykernel_18228\1820543220.py", line 102, in objective
    nhead = trial.suggest_categorical('nhead', nhead_options)
            ^^^^^^^^^^^

ValueError: CategoricalDistribution does not support dynamic value space.

In [None]:
# Carregar o modelo final
final_model.load_state_dict(torch.load(f'../best_models/{data_hoje}/best_model.pth'))

# Criar o DataLoader para os dados de teste
test_loader = DataLoader(TimeSeriesDataset(X_test, y_test), batch_size=best_params['batch_size'])
final_model.eval()

test_loss = 0
predictions = []
actuals = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = final_model(batch_X)
        test_loss += criterion(outputs, batch_y).item()
        predictions.extend(outputs.cpu().numpy().squeeze())
        actuals.extend(batch_y.cpu().numpy().squeeze())

test_loss /= len(test_loader)
print(f'Test Loss: {test_loss:.4f}')

# Desnormalização
def inverse_transform_data(data, preprocessing_scaler):
    if data.ndim == 1:
        data = data.reshape(-1, 1)
    # Criar dummy array para desnormalizar com a mesma escala usada anteriormente
    dummies = np.zeros((data.shape[0], preprocessing_scaler.n_features_in_))
    dummies[:, 0] = data.ravel()  # Use ravel() para garantir que o array seja 1D
    dummies = preprocessing_scaler.inverse_transform(dummies)
    return dummies[:, 0]

# Aplicar a desnormalização
predictions = inverse_transform_data(np.array(predictions), preprocessing_scaler)
actuals = inverse_transform_data(np.array(actuals), preprocessing_scaler)

# Continuar com o cálculo das métricas e a visualização dos resultados
mse = mean_squared_error(actuals, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)
mape = np.mean(np.abs((actuals - predictions) / actuals)) * 100

print(f'Mean Squared Error: {mse:.4f}')
print(f'Root Mean Squared Error: {rmse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R-squared: {r2:.4f}')
print(f'Mean Absolute Percentage Error: {mape:.4f}')

logging.info(f'Mean Squared Error: {mse:.4f}')
logging.info(f'Root Mean Squared Error: {rmse:.4f}')
logging.info(f'Mean Absolute Error: {mae:.4f}')
logging.info(f'R-squared: {r2:.4f}')
logging.info(f'Mean Absolute Percentage Error: {mape:.4f}')

# Plotar os resultados
plt.figure(figsize=(12, 6))
plt.plot(actuals, label='Actual')
plt.plot(predictions, label='Predicted')
plt.title('Actual vs Predicted PM2.5 Values')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.legend()
plt.savefig(f'../plots/{data_hoje}/actual_vs_predicted.png')
plt.close()

# Plotar scatter plot
plt.figure(figsize=(10, 10))
plt.scatter(actuals, predictions, alpha=0.5)
plt.plot([min(actuals), max(actuals)], [min(actuals), max(actuals)], 'r--', lw=2)
plt.xlabel('Actual PM2.5')
plt.ylabel('Predicted PM2.5')
plt.title('Actual vs Predicted PM2.5 Scatter Plot')
plt.savefig(f'../plots/{data_hoje}/scatter_plot.png')
plt.close()

# Salvar resultados
results = pd.DataFrame({'Actual': actuals, 'Predicted': predictions})
results.to_csv(f'../results/{data_hoje}/predictions.csv', index=False)

# Salvar métricas
with open(f'../results/{data_hoje}/metrics.txt', 'w') as f:
    f.write(f'Mean Squared Error: {mse:.4f}\n')
    f.write(f'Root Mean Squared Error: {rmse:.4f}\n')
    f.write(f'Mean Absolute Error: {mae:.4f}\n')
    f.write(f'R-squared: {r2:.4f}\n')

print("Análise concluída. Resultados salvos nos diretórios correspondentes.")
