Modelo BiLSTM mais complexto com Otimização de Hiperparâmetros


In [1]:
import os
import logging
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import optuna
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from datetime import datetime
from copy import deepcopy as dc
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau

data_hoje = datetime.now().strftime('%d-%m')
inicio_execucao = pd.Timestamp.now()

os.makedirs(f'../logs/{data_hoje}', exist_ok=True)
os.makedirs(f'../plots/{data_hoje}', exist_ok=True)
os.makedirs(f'../best_models/{data_hoje}', exist_ok=True)

logging.basicConfig(filename=f'../logs/{data_hoje}/bilstm_optuna.log', level=logging.INFO, format='- %(message)s')
logging.info('-' * 50)
logging.info(f'{inicio_execucao} - Iniciando o processo de otimização e treinamento do modelo BiLSTM')

df_original = pd.read_csv('../dados_tratados/combinado/Piratininga/Piratininga_tratado_combinado.csv',
                          usecols=['PM2.5', 'Data e Hora', 'PM10', 'Monóxido de Carbono'], low_memory=False)

df_original['Data e Hora'] = pd.to_datetime(df_original['Data e Hora'], format='%Y-%m-%d %H:%M:%S')
df_original.index = df_original['Data e Hora']
df_original.sort_index(inplace=True)

colunas_selecionadas = ['PM2.5', 'PM10', 'Monóxido de Carbono']
df = df_original[colunas_selecionadas]
df = df.loc['2019-01-01':'2022-01-01']

df = df.apply(pd.to_numeric, errors='coerce')


def impute_missing_data(df):
    random_imputer = SimpleImputer(strategy='mean')
    df_random_imputed = pd.DataFrame(random_imputer.fit_transform(df), columns=df.columns, index=df.index)
    df_interpolated = df_random_imputed.interpolate(method='time')
    knn_imputer = KNNImputer(n_neighbors=5)
    df_knn_imputed = pd.DataFrame(knn_imputer.fit_transform(df_interpolated), columns=df.columns, index=df.index)
    return df_interpolated


df_imputed = impute_missing_data(df)

logging.info(f"Dados ausentes antes da imputação: {df.isna().sum()}")
logging.info(f"Dados ausentes após a imputação: {df_imputed.isna().sum()}")


def add_cyclical_features(df):
    df['hour'] = df.index.hour
    df['day'] = df.index.day
    df['month'] = df.index.month
    df['day_of_week'] = df.index.dayofweek

    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['day_sin'] = np.sin(2 * np.pi * df['day'] / 31)
    df['day_cos'] = np.cos(2 * np.pi * df['day'] / 31)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['day_of_week_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['day_of_week_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)

    df.drop(['hour', 'day', 'month', 'day_of_week'], axis=1, inplace=True)
    return df


df_imputed = add_cyclical_features(df_imputed)


def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)
    for col in df.columns:
        for i in range(1, n_steps + 1):
            df[f'{col}(t-{i})'] = df[col].shift(i)
    df.dropna(inplace=True)
    return df


lookback = 8
shifted_df = prepare_dataframe_for_lstm(df_imputed, lookback)

preprocessing_scaler = StandardScaler()
preprocessing_scaler.fit(shifted_df)
shifted_df_as_np = preprocessing_scaler.transform(shifted_df)

X = shifted_df_as_np[:, len(colunas_selecionadas):]
y = shifted_df_as_np[:, 0]

X = dc(np.flip(X, axis=1))

train_split = int(len(X) * 0.7)
val_split = int(len(X) * 0.85)

X_train, X_val, X_test = X[:train_split], X[train_split:val_split], X[val_split:]
y_train, y_val, y_test = y[:train_split], y[train_split:val_split], y[val_split:]

X_train = X_train.reshape((-1, lookback, X_train.shape[1] // lookback))
X_val = X_val.reshape((-1, lookback, X_val.shape[1] // lookback))
X_test = X_test.reshape((-1, lookback, X_test.shape[1] // lookback))
y_train = y_train.reshape((-1, 1))
y_val = y_val.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_val = torch.tensor(X_val).float()
y_val = torch.tensor(y_val).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()


class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]


device = 'cuda:0' if torch.cuda.is_available() else 'cpu'


class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(BiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True, dropout=dropout)
        self.fc1 = nn.Linear(hidden_size * 2, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc1(out[:, -1, :])
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out


def objective(trial):
    hidden_size = trial.suggest_int('hidden_size', 16, 256)
    num_layers = trial.suggest_int('num_layers', 1, 5)
    dropout = trial.suggest_float('dropout', 0.0, 0.7)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [64, 128])
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)

    model = BiLSTM(input_size=X_train.shape[2], hidden_size=hidden_size,
                   num_layers=num_layers, dropout=dropout).to(device)

    criterion = nn.HuberLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)

    train_loader = DataLoader(TimeSeriesDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TimeSeriesDataset(X_val, y_val), batch_size=batch_size)

    for epoch in range(100):
        model.train()
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()

        val_loss /= len(val_loader)
        scheduler.step(val_loss)

        trial.report(val_loss, epoch)

        if trial.should_prune():
            raise optuna.TrialPruned()

    return val_loss


study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=2)

print('Best trial:')
trial = study.best_trial
print('Value: ', trial.value)
print('Params: ')
for key, value in trial.params.items():
    print(f'    {key}: {value}')

best_params = study.best_params
final_model = BiLSTM(input_size=X_train.shape[2],
                     hidden_size=best_params['hidden_size'],
                     num_layers=best_params['num_layers'],
                     dropout=best_params['dropout']).to(device)

criterion = nn.HuberLoss()
optimizer = optim.AdamW(final_model.parameters(), lr=best_params['learning_rate'],
                        weight_decay=best_params['weight_decay'])
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)

scaler = GradScaler()
num_workers = 4  

train_loader = DataLoader(TimeSeriesDataset(X_train, y_train), batch_size=best_params['batch_size'], shuffle=True,
                          num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(TimeSeriesDataset(X_val, y_val), batch_size=best_params['batch_size'], num_workers=num_workers,
                        pin_memory=True)

num_epochs = 1000
best_val_loss = float('inf')
patience = 50  
no_improve = 0

for epoch in range(num_epochs):
    final_model.train()
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()

        with autocast():
            outputs = final_model(batch_X)
            loss = criterion(outputs, batch_y)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

    final_model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            with autocast():
                outputs = final_model(batch_X)
                val_loss += criterion(outputs, batch_y).item()

    val_loss /= len(val_loader)
    scheduler.step(val_loss)
    print(f'Epoch {epoch + 1}/{num_epochs}, Validation Loss: {val_loss:.4f}')

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improve = 0
        torch.save(final_model.state_dict(), f'../best_models/{data_hoje}/best_model.pth')
    else:
        no_improve += 1

    if no_improve >= patience:
        print(f'Early stopping triggered after {epoch + 1} epochs')
        break

[I 2024-08-29 17:58:01,753] A new study created in memory with name: no-name-76bd532b-2d01-4431-9d05-6ddce73646c3
[I 2024-08-29 18:03:42,740] Trial 0 finished with value: 0.1575148761753113 and parameters: {'hidden_size': 125, 'num_layers': 1, 'dropout': 0.5694171095473334, 'learning_rate': 2.940107083814579e-05, 'batch_size': 128, 'weight_decay': 7.63975484900382e-05}. Best is trial 0 with value: 0.1575148761753113.
[I 2024-08-29 19:23:00,212] Trial 1 finished with value: 0.1611629225794346 and parameters: {'hidden_size': 220, 'num_layers': 4, 'dropout': 0.6425153198211533, 'learning_rate': 1.5453245737870007e-05, 'batch_size': 64, 'weight_decay': 1.0164443978050213e-06}. Best is trial 0 with value: 0.1575148761753113.
  scaler = GradScaler()


Best trial:
Value:  0.1575148761753113
Params: 
    hidden_size: 125
    num_layers: 1
    dropout: 0.5694171095473334
    learning_rate: 2.940107083814579e-05
    batch_size: 128
    weight_decay: 7.63975484900382e-05


RuntimeError: DataLoader worker (pid(s) 17712, 17856, 15244, 21272) exited unexpectedly

In [None]:
final_model.load_state_dict(torch.load(f'../best_models/{data_hoje}/best_model.pth'))

test_loader = DataLoader(TimeSeriesDataset(X_test, y_test), batch_size=best_params['batch_size'])
final_model.eval()
test_loss = 0
predictions = []
actuals = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = final_model(batch_X)
        test_loss += criterion(outputs, batch_y).item()
        predictions.extend(outputs.cpu().numpy().squeeze())
        actuals.extend(batch_y.cpu().numpy().squeeze())

test_loss /= len(test_loader)
print(f'Test Loss: {test_loss:.4f}')

# Desnormalização
def inverse_transform_data(data):
    if data.ndim == 1:
        data = data.reshape(-1, 1)
    dummies = np.zeros((data.shape[0], shifted_df_as_np.shape[1]))
    dummies[:, 0] = data.ravel()  # Use ravel() to ensure 1D array
    dummies = preprocessing_scaler.inverse_transform(dummies)
    return dummies[:, 0]

# Desnormalização
predictions = inverse_transform_data(np.array(predictions))
actuals = inverse_transform_data(np.array(actuals))

# Rest of the code remains the same
mse = mean_squared_error(actuals, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)

print(f'Mean Squared Error: {mse:.4f}')
print(f'Root Mean Squared Error: {rmse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R-squared: {r2:.4f}')

# Plotar resultados
plt.figure(figsize=(12, 6))
plt.plot(actuals, label='Actual')
plt.plot(predictions, label='Predicted')
plt.title('Actual vs Predicted PM2.5 Values')
plt.xlabel('Time')
plt.ylabel('PM2.5')
plt.legend()
plt.savefig(f'../plots/{data_hoje}/actual_vs_predicted.png')
plt.close()

# Plotar scatter plot
plt.figure(figsize=(10, 10))
plt.scatter(actuals, predictions, alpha=0.5)
plt.plot([min(actuals), max(actuals)], [min(actuals), max(actuals)], 'r--', lw=2)
plt.xlabel('Actual PM2.5')
plt.ylabel('Predicted PM2.5')
plt.title('Actual vs Predicted PM2.5 Scatter Plot')
plt.savefig(f'../plots/{data_hoje}/scatter_plot.png')
plt.close()

# Salvar resultados
results = pd.DataFrame({'Actual': actuals, 'Predicted': predictions})
results.to_csv(f'../results/{data_hoje}/predictions.csv', index=False)

# Salvar métricas
with open(f'../results/{data_hoje}/metrics.txt', 'w') as f:
    f.write(f'Mean Squared Error: {mse:.4f}\n')
    f.write(f'Root Mean Squared Error: {rmse:.4f}\n')
    f.write(f'Mean Absolute Error: {mae:.4f}\n')
    f.write(f'R-squared: {r2:.4f}\n')

print("Análise concluída. Resultados salvos nos diretórios correspondentes.")
