# Melhor modelo LSTM padrao ate o momento R2 de .71

In [68]:
import logging
import os
from copy import deepcopy as dc
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import torch
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from torch.utils.data import Dataset, DataLoader

In [69]:
# Configuração inicial
data_hoje = datetime.now().strftime('%d-%m')
inicio_execucao = pd.Timestamp.now()

# Criando diretórios para logs e plots
os.makedirs(f'../logs/{data_hoje}', exist_ok=True)
os.makedirs(f'../plots/{data_hoje}', exist_ok=True)

# Configuração do logging
logging.basicConfig(filename=f'../logs/{data_hoje}/lstm_optuna.log', level=logging.INFO, format='- %(message)s')
logging.info('-' * 50)
logging.info(f'{inicio_execucao} - Iniciando o processo de otimização e treinamento do modelo LSTM')

# Carregando e preparando os dados
df_original = pd.read_csv('../dados_tratados/combinado/Piratininga/Piratininga_tratado_combinado.csv',
                          usecols=['PM2.5', 'Data e Hora', 'PM10', 'Monóxido de Carbono', 'Dióxido de Enxofre',
                                   'Dióxido de Nitrogênio', 'Temperatura', 'Velocidade do Vento', 'Umidade Relativa',
                                   'Direção do Vento'], low_memory=False)

df_original['Data e Hora'] = pd.to_datetime(df_original['Data e Hora'])
df_original.set_index('Data e Hora', inplace=True)
df_original.sort_index(inplace=True)

colunas_selecionadas = ['PM2.5', 'PM10', 'Monóxido de Carbono']
logging.info(f"Colunas selecionadas: {colunas_selecionadas}")
df = df_original[colunas_selecionadas]
df = df.loc['2019-01-01':'2022-01-01']

df = df.apply(pd.to_numeric, errors='coerce')

In [70]:

def linear_interpolation_imputer(df):
    df_imputed = df.interpolate(method='linear')
    return df_imputed

df_imputed = linear_interpolation_imputer(df)


In [71]:
from copy import deepcopy as dc

def prepare_dataframe_for_lstm(df, n_steps, weekly_steps, weekly_window):
    df = dc(df)
    new_columns = []

    for col in colunas_selecionadas:
        # Últimas n_steps horas
        for i in range(1, n_steps + 1):
            new_columns.append(df[col].shift(i).rename(f'{col}(t-{i})'))
        # Janela semanal
        for j in range(0, weekly_window):
            weekly_shift = weekly_steps + j
            new_columns.append(df[col].shift(weekly_shift).rename(f'{col}(t-{weekly_shift})'))

    df = pd.concat([df] + new_columns, axis=1)
    df.dropna(inplace=True)
    return df

lookback = 72
weekly_step = 24 * 7
weekly_window = 24

shifted_df = prepare_dataframe_for_lstm(df_imputed, lookback, weekly_step, weekly_window)
shifted_df = shifted_df[[col for col in shifted_df.columns if any(c in col for c in colunas_selecionadas)]]


train_size = int(len(shifted_df) * 0.7)
val_size = int(len(shifted_df) * 0.15)

train_df = shifted_df.iloc[:train_size]
val_df = shifted_df.iloc[train_size:train_size + val_size]
test_df = shifted_df.iloc[train_size + val_size:]

from sklearn.preprocessing import StandardScaler, RobustScaler

def apply_scaler(df_train, df_val, df_test, scaler_type='Standard'):
    if scaler_type == 'Standard':
        scaler = StandardScaler()
    elif scaler_type == 'Robust':
        scaler = RobustScaler()
    else:
        raise ValueError("scaler_type deve ser 'Standard' ou 'Robust'")

    train_scaled = pd.DataFrame(scaler.fit_transform(df_train), columns=df_train.columns, index=df_train.index)
    val_scaled = pd.DataFrame(scaler.transform(df_val), columns=df_val.columns, index=df_val.index)
    test_scaled = pd.DataFrame(scaler.transform(df_test), columns=df_test.columns, index=df_test.index)

    return train_scaled, val_scaled, test_scaled

train_scaled, val_scaled, test_scaled = apply_scaler(train_df, val_df, test_df, scaler_type='Robust')

X_train, y_train = train_scaled.iloc[:, len(colunas_selecionadas):].values, train_scaled.iloc[:, 0].values
X_val, y_val = val_scaled.iloc[:, len(colunas_selecionadas):].values, val_scaled.iloc[:, 0].values
X_test, y_test = test_scaled.iloc[:, len(colunas_selecionadas):].values, test_scaled.iloc[:, 0].values

def check_and_reshape(X, lookback, n_features):
    num_samples = X.shape[0] - (X.shape[0] % (lookback + 1))
    X = X[:num_samples]
    X_reshaped = X.reshape((-1, lookback + 1, n_features))
    return X_reshaped, num_samples



X_train, num_samples_train = check_and_reshape(X_train, lookback, len(colunas_selecionadas))
X_val, num_samples_val = check_and_reshape(X_val, lookback, len(colunas_selecionadas))
X_test, num_samples_test = check_and_reshape(X_test, lookback, len(colunas_selecionadas))

y_train = y_train[:num_samples_train].reshape((-1, 1))
y_val = y_val[:num_samples_val].reshape((-1, 1))
y_test = y_test[:num_samples_test].reshape((-1, 1))

y_train = y_train.reshape((-1, 1))
y_val = y_val.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train = X_train.to(device)
y_train = y_train.to(device)
X_val = X_val.to(device)
y_val = y_val.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]


# Modelo LSTM
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [72]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class FeatureAttention(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        initial_weights = torch.tensor([1.0, 0.491396, 0.339309])
        self.feature_weights = nn.Parameter(initial_weights)

    def forward(self, x):
        return x * self.feature_weights.view(1, 1, -1)

class TemporalAttention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.attention = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, 1)
        )

    def forward(self, lstm_output):
        attention_weights = self.attention(lstm_output)
        attention_weights = F.softmax(attention_weights, dim=1)
        return attention_weights * lstm_output



class ImprovedLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2):
        super().__init__()

        self.feature_attention = FeatureAttention(input_size)

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout if num_layers > 1 else 0
        )

        self.temporal_attention = TemporalAttention(hidden_size * 2)

        self.fc_layers = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 2, 1)
        )

        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size // 2)

    def forward(self, x):
        original_features = x[:, -1, :]

        x = self.feature_attention(x)

        lstm_out, _ = self.lstm(x)

        attended_output = self.temporal_attention(lstm_out)

        last_output = attended_output[:, -1, :]

        fc1_out = self.fc_layers[0](last_output)
        fc1_out = self.bn1(fc1_out)
        fc1_out = F.relu(fc1_out)
        fc1_out = self.fc_layers[2](fc1_out)

        fc2_out = self.fc_layers[3](fc1_out)
        fc2_out = self.bn2(fc2_out)
        fc2_out = F.relu(fc2_out)
        fc2_out = self.fc_layers[5](fc2_out)

        output = self.fc_layers[6](fc2_out)

        return output, original_features


class CustomLoss(nn.Module):
    def __init__(self, correlation_weights=[1.0, 0.491396, 0.339309]):
        super().__init__()
        self.correlation_weights = torch.tensor(correlation_weights)
        self.mse = nn.MSELoss()
        self.mae = nn.L1Loss()

    def calculate_correlation(self, x, y):
        x_centered = x - x.mean()
        y_centered = y - y.mean()

        numerator = (x_centered * y_centered).sum()
        denominator = torch.sqrt((x_centered ** 2).sum() * (y_centered ** 2).sum())

        return numerator / (denominator + 1e-8)

    def forward(self, y_pred, y_true, features=None):
        assert isinstance(y_pred, torch.Tensor), "y_pred must be a PyTorch tensor"
        assert isinstance(y_true, torch.Tensor), "y_true must be a PyTorch tensor"

        if y_pred.dim() == 1:
            y_pred = y_pred.unsqueeze(1)
        if y_true.dim() == 1:
            y_true = y_true.unsqueeze(1)

        mse_loss = self.mse(y_pred, y_true)
        mae_loss = self.mae(y_pred, y_true)

        base_loss = 0.7 * mse_loss + 0.3 * mae_loss

        if features is not None:
            try:
                y_pred_flat = y_pred.view(-1)
                correlations = []
                for i in range(features.size(-1)):
                    feature_flat = features[:, i].view(-1)
                    correlation = self.calculate_correlation(y_pred_flat, feature_flat)
                    correlations.append(correlation)

                correlations = torch.stack(correlations)

                correlation_weights = self.correlation_weights.to(y_pred.device)
                correlation_loss = torch.mean(torch.abs(correlations - correlation_weights))

                return base_loss + 0.1 * correlation_loss
            except Exception as e:
                print(f"Warning: Error in correlation calculation: {e}")
                return base_loss

        return base_loss

In [73]:
class Block(nn.Module):
    def __init__(self, units, theta_dim, backcast_length=73, forecast_length=1, share_theta=False):
        super().__init__()
        self.units = units
        self.theta_dim = theta_dim
        self.backcast_length = backcast_length
        self.forecast_length = forecast_length
        self.share_theta = share_theta

        self.fc1 = nn.Linear(backcast_length, units)
        self.fc2 = nn.Linear(units, units)
        self.fc3 = nn.Linear(units, units)
        self.fc4 = nn.Linear(units, units)

        if share_theta:
            self.theta_f = self.theta_b = nn.Linear(units, theta_dim, bias=False)
        else:
            self.theta_b = nn.Linear(units, theta_dim, bias=False)
            self.theta_f = nn.Linear(units, theta_dim, bias=False)

        self.backcast_basis = nn.Linear(theta_dim, backcast_length, bias=False)
        self.forecast_basis = nn.Linear(theta_dim, forecast_length, bias=False)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))

        theta_b = self.theta_b(x)
        theta_f = self.theta_f(x) if not self.share_theta else theta_b

        backcast = self.backcast_basis(theta_b)
        forecast = self.forecast_basis(theta_f)

        return backcast, forecast

class Stack(nn.Module):
    def __init__(self, num_blocks, units, theta_dim, backcast_length=73, forecast_length=1, share_theta=False):
        super().__init__()
        self.blocks = nn.ModuleList([
            Block(units, theta_dim, backcast_length, forecast_length, share_theta)
            for _ in range(num_blocks)
        ])

    def forward(self, x, return_components=False):
        components = []
        total_forecast = torch.zeros(x.size(0), 1).to(x.device)

        for block in self.blocks:
            backcast, forecast = block(x)
            total_forecast = total_forecast + forecast
            x = x - backcast
            if return_components:
                components.append(forecast)

        if return_components:
            return total_forecast, components
        return total_forecast

class NBEATS(nn.Module):
    def __init__(self,
                 backcast_length=73,
                 forecast_length=1,
                 num_stacks=2,
                 num_blocks=3,
                 units=256,
                 theta_dim=16,
                 share_theta=False,
                 feature_attention=True):
        super().__init__()

        self.backcast_length = backcast_length
        self.forecast_length = forecast_length
        self.feature_attention = feature_attention

        if feature_attention:
            self.attention = nn.Parameter(torch.ones(3))  # For 3 features
            self.softmax = nn.Softmax(dim=-1)

        self.stacks = nn.ModuleList([
            Stack(num_blocks, units, theta_dim, backcast_length, forecast_length, share_theta)
            for _ in range(num_stacks)
        ])

    def forward(self, x):
        batch_size = x.size(0)

        if self.feature_attention:
            attention_weights = self.softmax(self.attention)
            x = x * attention_weights.view(1, 1, -1)

        x = x.reshape(batch_size, -1)

        forecast = torch.zeros(batch_size, self.forecast_length).to(x.device)
        for stack in self.stacks:
            stack_forecast = stack(x)
            forecast = forecast + stack_forecast

        return forecast, x[:, -3:]

class NBEATSTrainer:
    def __init__(self, model, optimizer, criterion, device):
        self.model = model
        self.optimizer = optimizer
        self.criterion = criterion
        self.device = device
        self.scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

    def train_epoch(self, train_loader):
        self.model.train()
        total_loss = 0

        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(self.device), batch_y.to(self.device)

            self.optimizer.zero_grad()
            predictions, features = self.model(batch_x)
            loss = self.criterion(predictions, batch_y, features)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
            self.optimizer.step()

            total_loss += loss.item()

        return total_loss / len(train_loader)

    def validate(self, val_loader):
        self.model.eval()
        total_loss = 0

        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(self.device), batch_y.to(self.device)
                predictions, features = self.model(batch_x)
                loss = self.criterion(predictions, batch_y, features)
                total_loss += loss.item()

        return total_loss / len(val_loader)

    def train(self, train_loader, val_loader, num_epochs=1000, patience=30):
        best_val_loss = float('inf')
        patience_counter = 0

        for epoch in range(num_epochs):
            train_loss = self.train_epoch(train_loader)
            val_loss = self.validate(val_loader)

            self.scheduler.step(val_loss)

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                # Save best model
                torch.save({
                    'model_state_dict': self.model.state_dict(),
                    'optimizer_state_dict': self.optimizer.state_dict(),
                    'val_loss': val_loss,
                    'epoch': epoch,
                }, 'best_nbeats_model.pth')
            else:
                patience_counter += 1

            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch}")
                break

            if (epoch + 1) % 10 == 0:
                print(f"Epoch {epoch+1}: Train Loss = {train_loss:.6f}, Val Loss = {val_loss:.6f}")

def create_nbeats_model(input_size=3):
    model = NBEATS(
        backcast_length=73,
        forecast_length=1,
        num_stacks=2,
        num_blocks=3,
        units=256,
        theta_dim=16,
        share_theta=False,
        feature_attention=True
    ).to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
    criterion = CustomLoss()

    return model, optimizer, criterion

In [74]:

from torch.optim.lr_scheduler import ReduceLROnPlateau, OneCycleLR


In [75]:
import optuna
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import os

def calculate_mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def evaluate_predictions(y_true, y_pred):
    metrics = {
        'MSE': mean_squared_error(y_true, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'MAE': mean_absolute_error(y_true, y_pred),
        'MAPE': calculate_mape(y_true, y_pred),
        'R2': r2_score(y_true, y_pred)
    }

    metrics['Pearson_Correlation'] = np.corrcoef(y_true.flatten(), y_pred.flatten())[0, 1]

    return metrics

def plot_predictions(y_true, y_pred, title, save_path=None):
    plt.figure(figsize=(12, 6))

    plt.subplot(2, 1, 1)
    plt.plot(y_true, label='Real', alpha=0.7)
    plt.plot(y_pred, label='Previsto', alpha=0.7)
    plt.title(f'{title} - Valores Reais vs Previstos')
    plt.legend()
    plt.grid(True)

    plt.subplot(2, 1, 2)
    plt.scatter(y_true, y_pred, alpha=0.5)
    plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', alpha=0.8)
    plt.xlabel('Valores Reais')
    plt.ylabel('Valores Previstos')
    plt.title('Scatter Plot: Real vs Previsto')
    plt.grid(True)

    plt.tight_layout()
    if save_path:
        plt.savefig(save_path)
    plt.close()


In [76]:
# Modified objective function for Optuna
def objective_nbeats(trial):
    config = {
        'num_stacks': trial.suggest_int('num_stacks', 1, 3),
        'num_blocks': trial.suggest_int('num_blocks', 2, 4),
        'units': trial.suggest_int('units', 128, 512),
        'theta_dim': trial.suggest_int('theta_dim', 8, 32),
        'batch_size': trial.suggest_categorical('batch_size', [32, 64, 128, 256]),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    }

    train_dataset = TimeSeriesDataset(X_train, y_train)
    val_dataset = TimeSeriesDataset(X_val, y_val)

    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)

    model = NBEATS(
        num_stacks=config['num_stacks'],
        num_blocks=config['num_blocks'],
        units=config['units'],
        theta_dim=config['theta_dim']
    ).to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'])
    criterion = CustomLoss()
    trainer = NBEATSTrainer(model, optimizer, criterion, device)

    best_val_loss = float('inf')
    patience_counter = 0
    patience = 30

    for epoch in range(100):
        train_loss = trainer.train_epoch(train_loader)
        val_loss = trainer.validate(val_loader)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            break

    return val_loss

def train_and_evaluate_nbeats(best_params, save_dir='resultados_nbeats'):
    os.makedirs(save_dir, exist_ok=True)

    train_dataset = TimeSeriesDataset(X_train, y_train)
    val_dataset = TimeSeriesDataset(X_val, y_val)
    test_dataset = TimeSeriesDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=best_params['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=best_params['batch_size'], shuffle=False)

    model = NBEATS(
        num_stacks=best_params['num_stacks'],
        num_blocks=best_params['num_blocks'],
        units=best_params['units'],
        theta_dim=best_params['theta_dim']
    ).to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=best_params['learning_rate'])
    criterion = CustomLoss()
    trainer = NBEATSTrainer(model, optimizer, criterion, device)

    trainer.train(train_loader, val_loader)

    results = {}
    for name, loader in [('Train', train_loader), ('Validation', val_loader), ('Test', test_loader)]:
        predictions = []
        true_values = []

        model.eval()
        with torch.no_grad():
            for batch_x, batch_y in loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                pred, _ = model(batch_x)
                predictions.extend(pred.cpu().numpy())
                true_values.extend(batch_y.cpu().numpy())

        predictions = np.array(predictions)
        true_values = np.array(true_values)

        metrics = evaluate_predictions(true_values, predictions)
        results[name] = metrics

        plot_predictions(
            true_values,
            predictions,
            f'N-BEATS {name} Set',
            save_path=os.path.join(save_dir, f'nbeats_predictions_{name.lower()}.png')
        )

    results_df = pd.DataFrame(results)
    results_df.to_csv(os.path.join(save_dir, 'nbeats_metrics.csv'))

    return model, results

def run_nbeats_optimization(n_trials=1):
    study = optuna.create_study(direction='minimize')
    study.optimize(objective_nbeats, n_trials=n_trials)

    best_params = study.best_trial.params
    print("\nBest N-BEATS hyperparameters:")
    print(best_params)

    final_model, results = train_and_evaluate_nbeats(best_params)

    print("\nN-BEATS Final Results:")
    for dataset_name, metrics in results.items():
        print(f"\n{dataset_name}:")
        for metric_name, value in metrics.items():
            print(f"{metric_name}: {value:.4f}")

    return study, final_model, results

if __name__ == "__main__":
    study, final_model, results = run_nbeats_optimization(n_trials=1)

[I 2024-11-15 11:49:03,813] A new study created in memory with name: no-name-32f309f2-bf64-4188-b73d-7f0286d81c50
[W 2024-11-15 11:49:03,855] Trial 0 failed with parameters: {'num_stacks': 2, 'num_blocks': 2, 'units': 382, 'theta_dim': 19, 'batch_size': 64, 'learning_rate': 0.0005695517040551368} because of the following error: IndexError('index 19912 is out of bounds for dimension 0 with size 18250').
Traceback (most recent call last):
  File "C:\dev\fast_api\venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\portes\AppData\Local\Temp\ipykernel_3256\374379225.py", line 34, in objective_nbeats
    train_loss = trainer.train_epoch(train_loader)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\portes\AppData\Local\Temp\ipykernel_3256\2791364336.py", line 114, in train_epoch
    for batch_x, batch_y in train_loader:
                            ^^^^^^^^^^^^
  File

IndexError: index 19912 is out of bounds for dimension 0 with size 18250