In [6]:
import os
import logging
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import optuna
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from datetime import datetime
from copy import deepcopy as dc
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler

data_hoje = datetime.now().strftime('%d-%m')
inicio_execucao = pd.Timestamp.now()

os.makedirs(f'../logs/{data_hoje}', exist_ok=True)
os.makedirs(f'../plots/{data_hoje}', exist_ok=True)
os.makedirs(f'../best_models/{data_hoje}', exist_ok=True)

logging.basicConfig(filename=f'../logs/{data_hoje}/bilstm_optuna.log', level=logging.INFO, format='- %(message)s')
logging.info('-' * 50)
logging.info(f'{inicio_execucao} - Iniciando o processo de otimização e treinamento do modelo BiLSTM')

df_original = pd.read_csv('../dados_tratados/combinado/Piratininga/Piratininga_tratado_combinado.csv',
                          usecols=['PM2.5', 'Data e Hora', 'PM10', 'Monóxido de Carbono'], low_memory=False)

df_original['Data e Hora'] = pd.to_datetime(df_original['Data e Hora'], format='%Y-%m-%d %H:%M:%S')
df_original.index = df_original['Data e Hora']
df_original.sort_index(inplace=True)

colunas_selecionadas = ['PM2.5', 'PM10', 'Monóxido de Carbono']
df = df_original[colunas_selecionadas]
df = df.loc['2019-01-01':'2022-01-01']

df = df.apply(pd.to_numeric, errors='coerce')

def impute_missing_data(df):
    random_imputer = SimpleImputer(strategy='mean')
    df_random_imputed = pd.DataFrame(random_imputer.fit_transform(df), columns=df.columns, index=df.index)
    df_interpolated = df_random_imputed.interpolate(method='time')
    knn_imputer = KNNImputer(n_neighbors=5)
    df_knn_imputed = pd.DataFrame(knn_imputer.fit_transform(df_interpolated), columns=df.columns, index=df.index)
    return df_interpolated

df_imputed = impute_missing_data(df)

logging.info(f"Dados ausentes antes da imputação: {df.isna().sum()}")
logging.info(f"Dados ausentes após a imputação: {df_imputed.isna().sum()}")

def add_cyclical_features(df):
    df['hour'] = df.index.hour
    df['day'] = df.index.day
    df['month'] = df.index.month
    df['day_of_week'] = df.index.dayofweek
    
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['day_sin'] = np.sin(2 * np.pi * df['day'] / 31)
    df['day_cos'] = np.cos(2 * np.pi * df['day'] / 31)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['day_of_week_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['day_of_week_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    
    df.drop(['hour', 'day', 'month', 'day_of_week'], axis=1, inplace=True)
    return df

df_imputed = add_cyclical_features(df_imputed)

def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)
    for col in df.columns:
        for i in range(1, n_steps + 1):
            df[f'{col}(t-{i})'] = df[col].shift(i)
    df.dropna(inplace=True)
    return df

lookback = 8
shifted_df = prepare_dataframe_for_lstm(df_imputed, lookback)

preprocessing_scaler = StandardScaler()
shifted_df_as_np = preprocessing_scaler.fit_transform(shifted_df)

X = shifted_df_as_np[:, len(colunas_selecionadas):]
y = shifted_df_as_np[:, 0]

X = dc(np.flip(X, axis=1))

train_split = int(len(X) * 0.7)
val_split = int(len(X) * 0.85)

X_train, X_val, X_test = X[:train_split], X[train_split:val_split], X[val_split:]
y_train, y_val, y_test = y[:train_split], y[train_split:val_split], y[val_split:]

X_train = X_train.reshape((-1, lookback, X_train.shape[1] // lookback))
X_val = X_val.reshape((-1, lookback, X_val.shape[1] // lookback))
X_test = X_test.reshape((-1, lookback, X_test.shape[1] // lookback))
y_train = y_train.reshape((-1, 1))
y_val = y_val.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_val = torch.tensor(X_val).float()
y_val = torch.tensor(y_val).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(BiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size * 2, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

def objective(trial):
    hidden_size = trial.suggest_int('hidden_size', 32, 512)
    num_layers = trial.suggest_int('num_layers', 1, 5)
    dropout = trial.suggest_float('dropout', 0.0, 0.7)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [128])
    weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-2, log=True)

    model = BiLSTM(input_size=X_train.shape[2], hidden_size=hidden_size, 
                   num_layers=num_layers, dropout=dropout).to(device)

    criterion = nn.HuberLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    train_loader = DataLoader(TimeSeriesDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TimeSeriesDataset(X_val, y_val), batch_size=batch_size)

    for epoch in range(100):
        model.train()
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()

        val_loss /= len(val_loader)

        intermediate_value = val_loss
        trial.report(intermediate_value, epoch)

        if trial.should_prune():
            raise optuna.TrialPruned()

    return val_loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

print('Best trial:')
trial = study.best_trial
print('Value: ', trial.value)
print('Params: ')
for key, value in trial.params.items():
    print(f'    {key}: {value}')

best_params = study.best_params
final_model = BiLSTM(input_size=X_train.shape[2], 
                     hidden_size=best_params['hidden_size'],
                     num_layers=best_params['num_layers'], 
                     dropout=best_params['dropout']).to(device)

criterion = nn.HuberLoss()
optimizer = optim.AdamW(final_model.parameters(), lr=best_params['learning_rate'], weight_decay=best_params['weight_decay'])

scaler = GradScaler()
num_workers = 0

train_loader = DataLoader(TimeSeriesDataset(X_train, y_train), batch_size=best_params['batch_size'], shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(TimeSeriesDataset(X_val, y_val), batch_size=best_params['batch_size'], num_workers=num_workers, pin_memory=True)

num_epochs = 500
best_val_loss = float('inf')
patience = 20
no_improve = 0

for epoch in range(num_epochs):
    final_model.train()
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()
        
        # Usar precisão mista para forward pass
        with autocast():
            outputs = final_model(batch_X)
            loss = criterion(outputs, batch_y)
        
        # Escalar a perda e fazer o backward pass
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

    final_model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            with autocast():
                outputs = final_model(batch_X)
                val_loss += criterion(outputs, batch_y).item()

    val_loss /= len(val_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}')

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improve = 0
        torch.save(final_model.state_dict(), f'../best_models/{data_hoje}/best_model.pth')
    else:
        no_improve += 1

    if no_improve >= patience:
        print(f'Early stopping triggered after {epoch+1} epochs')
        break

[I 2024-08-14 10:40:06,050] A new study created in memory with name: no-name-28b94a7f-4eaf-49c9-b369-5ef5d048589c
[I 2024-08-14 10:41:16,761] Trial 0 finished with value: 0.2220389230116721 and parameters: {'hidden_size': 34, 'num_layers': 2, 'dropout': 0.40069303125942135, 'learning_rate': 0.002079717611368995, 'batch_size': 128, 'weight_decay': 2.405878791056044e-05}. Best is trial 0 with value: 0.2220389230116721.
[I 2024-08-14 10:43:00,641] Trial 1 finished with value: 0.25118544914068713 and parameters: {'hidden_size': 240, 'num_layers': 2, 'dropout': 0.5580542313613788, 'learning_rate': 0.0017655807159163873, 'batch_size': 128, 'weight_decay': 0.00010072993364342317}. Best is trial 0 with value: 0.2220389230116721.
[I 2024-08-14 10:47:21,794] Trial 2 finished with value: 0.16410353275076037 and parameters: {'hidden_size': 466, 'num_layers': 4, 'dropout': 0.2971638901398904, 'learning_rate': 0.009022550593338795, 'batch_size': 128, 'weight_decay': 3.482675019169891e-05}. Best is t

Best trial:
Value:  0.16410353275076037
Params: 
    hidden_size: 466
    num_layers: 4
    dropout: 0.2971638901398904
    learning_rate: 0.009022550593338795
    batch_size: 128
    weight_decay: 3.482675019169891e-05
Epoch 1/500, Validation Loss: 0.2152
Epoch 2/500, Validation Loss: 0.1928
Epoch 3/500, Validation Loss: 0.1912
Epoch 4/500, Validation Loss: 0.1685
Epoch 5/500, Validation Loss: 0.1749
Epoch 6/500, Validation Loss: 0.1760
Epoch 7/500, Validation Loss: 0.1688
Epoch 8/500, Validation Loss: 0.1705
Epoch 9/500, Validation Loss: 0.1667
Epoch 10/500, Validation Loss: 0.1804
Epoch 11/500, Validation Loss: 0.1630
Epoch 12/500, Validation Loss: 0.1734
Epoch 13/500, Validation Loss: 0.1749
Epoch 14/500, Validation Loss: 0.1674
Epoch 15/500, Validation Loss: 0.1998
Epoch 16/500, Validation Loss: 0.1709
Epoch 17/500, Validation Loss: 0.1631
Epoch 18/500, Validation Loss: 0.1638
Epoch 19/500, Validation Loss: 0.1764
Epoch 20/500, Validation Loss: 0.1638
Epoch 21/500, Validation Loss

In [7]:
final_model.load_state_dict(torch.load(f'../best_models/{data_hoje}/best_model.pth'))

test_loader = DataLoader(TimeSeriesDataset(X_test, y_test), batch_size=best_params['batch_size'])
final_model.eval()
test_loss = 0
predictions = []
actuals = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = final_model(batch_X)
        test_loss += criterion(outputs, batch_y).item()
        predictions.extend(outputs.cpu().numpy())
        actuals.extend(batch_y.cpu().numpy())

test_loss /= len(test_loader)
print(f'Test Loss: {test_loss:.4f}')

predictions = preprocessing_scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
actuals = preprocessing_scaler.inverse_transform(np.array(actuals).reshape(-1, 1))

mse = mean_squared_error(actuals, predictions)
mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)

print(f'Mean Squared Error: {mse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R2 Score: {r2:.4f}')

plt.figure(figsize=(12, 6))
plt.plot(actuals, label='Actual')
plt.plot(predictions, label='Predicted')
plt.legend()
plt.title('BiLSTM Model: Actual vs Predicted PM2.5 Values')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.savefig(f'../plots/{data_hoje}/bilstm_results.png')
plt.close()

logging.info(f'Best hyperparameters: {best_params}')
logging.info(f'Test Loss: {test_loss:.4f}')
logging.info(f'Mean Squared Error: {mse:.4f}')
logging.info(f'Mean Absolute Error: {mae:.4f}')
logging.info(f'R2 Score: {r2:.4f}')

fim_execucao = pd.Timestamp.now()
logging.info(f'{fim_execucao} - Processo de otimização e treinamento do modelo BiLSTM concluído')
logging.info(f'Tempo total de execução: {fim_execucao - inicio_execucao}')
logging.info('-' * 50)

Test Loss: 0.1598


ValueError: non-broadcastable output operand with shape (3948,1) doesn't match the broadcast shape (3948,99)