Modelo CNN-LSTM com Otimização de Hiperparâmetros
funcionando com PyTorch e Optuna

In [84]:
import os
import logging
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import optuna
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from datetime import datetime
from copy import deepcopy as dc
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import ExtraTreesRegressor
data_hoje = datetime.now().strftime('%d-%m')
inicio_execucao = pd.Timestamp.now()

os.makedirs(f'../logs/{data_hoje}', exist_ok=True)
os.makedirs(f'../plots/{data_hoje}', exist_ok=True)
os.makedirs(f'../best_models/{data_hoje}', exist_ok=True)

logging.basicConfig(filename=f'../logs/{data_hoje}/bilstm_optuna.log', level=logging.INFO, format='- %(message)s')
logging.info('-' * 50)
logging.info(f'{inicio_execucao} - Iniciando o processo de otimização e treinamento do modelo BiLSTM')

df_original = pd.read_csv('../dados_tratados/combinado/Piratininga/Piratininga_tratado_combinado.csv',
                          usecols=['PM2.5', 'Data e Hora', 'PM10', 'Monóxido de Carbono'], low_memory=False)

df_original['Data e Hora'] = pd.to_datetime(df_original['Data e Hora'], format='%Y-%m-%d %H:%M:%S')
df_original.index = df_original['Data e Hora']
df_original.sort_index(inplace=True)

colunas_selecionadas = ['PM2.5','PM10', 'Monóxido de Carbono']
logging.info(f"Colunas selecionadas: {colunas_selecionadas}")

df = df_original[colunas_selecionadas]
# df = df.loc[(df.index >= '2017-01-01') & ((df.index < '2020-01-01') | (df.index > '2022-01-01'))]
df = df.loc['2019-01-01':'2022-01-01']

df = df.apply(pd.to_numeric, errors='coerce')

In [None]:
from sklearn.preprocessing import RobustScaler


def prepare_df(df, n_steps, weekly_step):
    important_features = [
        'PM2.5', 'PM10', 'Monóxido de Carbono'
    ]

    # Create a list to hold all the new columns
    new_columns = []

    for col in important_features:
        if col in df.columns:
            # Shift for n_steps
            for i in range(1, n_steps + 1):
                new_columns.append(df[col].shift(i).rename(f'{col}(t-{i})'))

            # Shift for weekly_step (by hours)
            for hour in range(24):
                new_columns.append(df[col].shift(weekly_step - hour).rename(f'{col}(t-{weekly_step - hour})'))

    # Concatenate the new columns at once
    df = pd.concat([df] + new_columns, axis=1)

    df.dropna(inplace=True)
    return df



lookback = 48
weekly_step = 24 * 7
shifted_df = prepare_df(df, lookback, weekly_step)

preprocessing_scaler = RobustScaler()
preprocessing_scaler.fit(shifted_df)
shifted_df_as_np = preprocessing_scaler.transform(shifted_df)

X = shifted_df_as_np[:, len(colunas_selecionadas):]
y = shifted_df_as_np[:, 0]

X = dc(np.flip(X, axis=1))

train_split = int(len(X) * 0.7)
val_split = int(len(X) * 0.85)

X_train, X_val, X_test = X[:train_split], X[train_split:val_split], X[val_split:]
y_train, y_val, y_test = y[:train_split], y[train_split:val_split], y[val_split:]

X_train = X_train.reshape((-1, 72, 3))
X_val = X_val.reshape((-1, 72, 3))
X_test = X_test.reshape((-1, 72, 3))
y_train = y_train.reshape((-1, 1))
y_val = y_val.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_val = torch.tensor(X_val).float()
y_val = torch.tensor(y_val).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'


class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]


class CNN_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, kernel_size, out_channels, num_heads):
        super(CNN_LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.cnn = nn.Sequential(
            nn.Conv1d(in_channels=input_size, out_channels=out_channels, kernel_size=kernel_size, padding=kernel_size // 2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )

        self.lstm = nn.LSTM(out_channels, hidden_size, num_layers, batch_first=True, dropout=dropout)

        # Multihead Attention
        self.attention = nn.MultiheadAttention(embed_dim=hidden_size, num_heads=num_heads, batch_first=True)

        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        # CNN
        x = x.permute(0, 2, 1)  # Change shape to (batch_size, input_size, sequence_length)
        x = self.cnn(x)
        x = x.permute(0, 2, 1)  # Change shape back to (batch_size, sequence_length, out_channels)

        # LSTM
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        lstm_out, _ = self.lstm(x, (h0, c0))

        # Multihead Attention
        attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out)

        # Fully Connected Layer
        out = self.fc(attn_out[:, -1, :])
        return out


def objective(trial):
    hidden_size = trial.suggest_int('hidden_size', 16, 128)
    num_layers = trial.suggest_int('num_layers', 1, 5)
    dropout = trial.suggest_float('dropout', 0.0, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [128])
    weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-2, log=True)
    kernel_size = trial.suggest_categorical('kernel_size', [3, 5])
    out_channels = trial.suggest_int('out_channels', 16, 64)
    num_heads = trial.suggest_int('num_heads', 2, 4)

    model = CNN_LSTM(input_size=X_train.shape[2], hidden_size=hidden_size,
                     num_layers=num_layers, dropout=dropout,
                     kernel_size=kernel_size, out_channels=out_channels, num_heads = num_heads).to(device)

    criterion = nn.HuberLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    train_loader = DataLoader(TimeSeriesDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TimeSeriesDataset(X_val, y_val), batch_size=batch_size)

    for epoch in range(100):
        model.train()
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()

        val_loss /= len(val_loader)

        intermediate_value = val_loss
        trial.report(intermediate_value, epoch)

        if trial.should_prune():
            raise optuna.TrialPruned()

    return val_loss


study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=2)

print('Best trial:')
trial = study.best_trial
print('Value: ', trial.value)
print('Params: ')
for key, value in trial.params.items():
    print(f'    {key}: {value}')

In [86]:
best_params = study.best_params
final_model = CNN_LSTM(input_size=X_train.shape[2], hidden_size=best_params['hidden_size'],
                          num_layers=best_params['num_layers'], dropout=best_params['dropout'],
                          kernel_size=best_params['kernel_size'], out_channels=best_params['out_channels'], num_heads=best_params['num_heads']).to(device)


criterion = nn.HuberLoss()
optimizer = optim.AdamW(final_model.parameters(), lr=best_params['learning_rate'],
                        weight_decay=best_params['weight_decay'])

scaler = GradScaler()
num_workers = 0

train_loader = DataLoader(TimeSeriesDataset(X_train, y_train), batch_size=best_params['batch_size'], shuffle=True,
                          num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(TimeSeriesDataset(X_val, y_val), batch_size=best_params['batch_size'], num_workers=num_workers,
                        pin_memory=True)

num_epochs = 500
best_val_loss = float('inf')
patience = 30
no_improve = 0

for epoch in range(num_epochs):
    final_model.train()
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()

        # Usar precisão mista para forward pass
        with autocast():
            outputs = final_model(batch_X)
            loss = criterion(outputs, batch_y)

        # Escalar a perda e fazer o backward pass
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

    final_model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            with autocast():
                outputs = final_model(batch_X)
                val_loss += criterion(outputs, batch_y).item()

    val_loss /= len(val_loader)
    print(f'Epoch {epoch + 1}/{num_epochs}, Validation Loss: {val_loss:.4f}')

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improve = 0
        torch.save(final_model.state_dict(), f'../best_models/{data_hoje}/best_model.pth')
    else:
        no_improve += 1

    if no_improve >= patience:
        print(f'Early stopping triggered after {epoch + 1} epochs')
        break

  scaler = GradScaler()
  with autocast():
  with autocast():


Epoch 1/500, Validation Loss: 0.4762
Epoch 2/500, Validation Loss: 0.4669
Epoch 3/500, Validation Loss: 0.4666
Epoch 4/500, Validation Loss: 0.4153
Epoch 5/500, Validation Loss: 0.4809
Epoch 6/500, Validation Loss: 0.4576
Epoch 7/500, Validation Loss: 0.4665
Epoch 8/500, Validation Loss: 0.4817
Epoch 9/500, Validation Loss: 0.4607
Epoch 10/500, Validation Loss: 0.3442
Epoch 11/500, Validation Loss: 0.3473
Epoch 12/500, Validation Loss: 0.3119
Epoch 13/500, Validation Loss: 0.2857
Epoch 14/500, Validation Loss: 0.2621
Epoch 15/500, Validation Loss: 0.2664
Epoch 16/500, Validation Loss: 0.1883
Epoch 17/500, Validation Loss: 0.1867
Epoch 18/500, Validation Loss: 0.2014
Epoch 19/500, Validation Loss: 0.2207
Epoch 20/500, Validation Loss: 0.2404
Epoch 21/500, Validation Loss: 0.2120
Epoch 22/500, Validation Loss: 0.1833
Epoch 23/500, Validation Loss: 0.1689
Epoch 24/500, Validation Loss: 0.2020
Epoch 25/500, Validation Loss: 0.1732
Epoch 26/500, Validation Loss: 0.1908
Epoch 27/500, Validat

In [87]:
final_model.load_state_dict(torch.load(f'../best_models/{data_hoje}/best_model.pth'))

test_loader = DataLoader(TimeSeriesDataset(X_test, y_test), batch_size=best_params['batch_size'])
final_model.eval()
test_loss = 0
predictions = []
actuals = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = final_model(batch_X)
        test_loss += criterion(outputs, batch_y).item()
        predictions.extend(outputs.cpu().numpy().squeeze())
        actuals.extend(batch_y.cpu().numpy().squeeze())

test_loss /= len(test_loader)
print(f'Test Loss: {test_loss:.4f}')

# Desnormalização
def inverse_transform_data(data):
    if data.ndim == 1:
        data = data.reshape(-1, 1)
    dummies = np.zeros((data.shape[0], shifted_df_as_np.shape[1]))
    dummies[:, 0] = data.ravel()  # Use ravel() to ensure 1D array
    dummies = preprocessing_scaler.inverse_transform(dummies)
    return dummies[:, 0]

# Desnormalização
predictions = inverse_transform_data(np.array(predictions))
actuals = inverse_transform_data(np.array(actuals))

# Rest of the code remains the same
mse = mean_squared_error(actuals, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)
mape = np.mean(np.abs((actuals - predictions) / actuals)) * 100

print(f'Mean Squared Error: {mse:.4f}')
print(f'Root Mean Squared Error: {rmse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R-squared: {r2:.4f}')
print(f'Mean Absolute Percentage Error: {mape:.4f}')

logging.info(f'Mean Squared Error: {mse:.4f}')
logging.info(f'Root Mean Squared Error: {rmse:.4f}')
logging.info(f'Mean Absolute Error: {mae:.4f}')
logging.info(f'R-squared: {r2:.4f}')
logging.info(f'Mean Absolute Percentage Error: {mape:.4f}')


# Plotar resultados
plt.figure(figsize=(12, 6))
plt.plot(actuals, label='Actual')
plt.plot(predictions, label='Predicted')
plt.title('Actual vs Predicted PM2.5 Values')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.legend()
plt.savefig(f'../plots/{data_hoje}/actual_vs_predicted.png')
plt.close()

# Plotar scatter plot
plt.figure(figsize=(10, 10))
plt.scatter(actuals, predictions, alpha=0.5)
plt.plot([min(actuals), max(actuals)], [min(actuals), max(actuals)], 'r--', lw=2)
plt.xlabel('Actual PM2.5')
plt.ylabel('Predicted PM2.5')
plt.title('Actual vs Predicted PM2.5 Scatter Plot')
plt.savefig(f'../plots/{data_hoje}/scatter_plot.png')
plt.close()

# Salvar resultados
results = pd.DataFrame({'Actual': actuals, 'Predicted': predictions})
results.to_csv(f'../results/{data_hoje}/predictions.csv', index=False)

# Salvar métricas
with open(f'../results/{data_hoje}/metrics.txt', 'w') as f:
    f.write(f'Mean Squared Error: {mse:.4f}\n')
    f.write(f'Root Mean Squared Error: {rmse:.4f}\n')
    f.write(f'Mean Absolute Error: {mae:.4f}\n')
    f.write(f'R-squared: {r2:.4f}\n')

print("Análise concluída. Resultados salvos nos diretórios correspondentes.")


Test Loss: 0.1503
Mean Squared Error: 40.1873
Root Mean Squared Error: 6.3393
Mean Absolute Error: 4.1637
R-squared: 0.4156
Mean Absolute Percentage Error: 64.9984


OSError: Cannot save file into a non-existent directory: '..\results\24-11'