# Melhor modelo LSTM padrao ate o momento R2 de .71

In [24]:
import logging
import os
from copy import deepcopy as dc
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import torch
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from torch.utils.data import Dataset, DataLoader

In [25]:
# Configuração inicial
data_hoje = datetime.now().strftime('%d-%m')
inicio_execucao = pd.Timestamp.now()

# Criando diretórios para logs e plots
os.makedirs(f'../logs/{data_hoje}', exist_ok=True)
os.makedirs(f'../plots/{data_hoje}', exist_ok=True)

# Configuração do logging
logging.basicConfig(filename=f'../logs/{data_hoje}/lstm_optuna.log', level=logging.INFO, format='- %(message)s')
logging.info('-' * 50)
logging.info(f'{inicio_execucao} - Iniciando o processo de otimização e treinamento do modelo LSTM')

# Carregando e preparando os dados
df_original = pd.read_csv('../dados_tratados/combinado/Piratininga/Piratininga_tratado_combinado.csv',
                          usecols=['PM2.5', 'Data e Hora', 'PM10', 'Monóxido de Carbono', 'Dióxido de Enxofre',
                                   'Dióxido de Nitrogênio', 'Temperatura', 'Velocidade do Vento', 'Umidade Relativa',
                                   'Direção do Vento'], low_memory=False)

df_original['Data e Hora'] = pd.to_datetime(df_original['Data e Hora'])
df_original.set_index('Data e Hora', inplace=True)
df_original.sort_index(inplace=True)

colunas_selecionadas = ['PM2.5', 'PM10', 'Monóxido de Carbono']
logging.info(f"Colunas selecionadas: {colunas_selecionadas}")
df = df_original[colunas_selecionadas]
df = df.loc['2019-01-01':'2022-01-01']

df = df.apply(pd.to_numeric, errors='coerce')

In [26]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# fazendo o logging de qual algoritmo de imputação foi utilizado
def log_imputation(method_name, impute_function, df):
    df_imputed = impute_function(df)
    logging.info(f"Imputação realizada usando: {method_name}")
    return df_imputed

def linear_interpolation_imputer(df):
    df_imputed = df.interpolate(method='linear')
    return df_imputed


def random_forest_imputer(df):
    imputer = IterativeImputer(estimator=RandomForestRegressor(), max_iter=10, random_state=0)
    df_imputed = imputer.fit_transform(df)
    df_imputed = pd.DataFrame(df_imputed, columns=df.columns, index=df.index)
    return df_imputed


# df_imputed = log_imputation('Random Forest', random_forest_imputer, df)
df_imputed = log_imputation('linear', linear_interpolation_imputer, df)

logging.info(f"Dados ausentes antes da imputação: {df.isna().sum()}")
logging.info(f"Dados ausentes após a imputação: {df_imputed.isna().sum()}")
logging.info(f"Dados totais: {len(df_imputed)}")

In [27]:
from sklearn.preprocessing import StandardScaler


from copy import deepcopy as dc

# Preparando os dados para LSTM
def prepare_dataframe_for_lstm(df, n_steps, weekly_step):
    df = dc(df)
    for col in colunas_selecionadas:
        # Adicionar as últimas `n_steps` horas
        for i in range(1, n_steps + 1):
            df[f'{col}(t-{i})'] = df[col].shift(i)
        # Adicionar o mesmo dia da semana anterior
        df[f'{col}(t-{weekly_step})'] = df[col].shift(weekly_step)
    df.dropna(inplace=True)
    return df

# Parâmetros de janela
lookback = 24  # 48 horas
weekly_step = 24 * 7  # 7 dias

# Preparar o dataframe com a nova configuração de janelas
shifted_df = prepare_dataframe_for_lstm(df_imputed, lookback, weekly_step)
shifted_df = shifted_df[[col for col in shifted_df.columns if any(c in col for c in colunas_selecionadas)]]


# Dividindo em conjuntos de treino, validação e teste
train_size = int(len(shifted_df) * 0.7)
val_size = int(len(shifted_df) * 0.15)

train_df = shifted_df.iloc[:train_size]
val_df = shifted_df.iloc[train_size:train_size + val_size]
test_df = shifted_df.iloc[train_size + val_size:]

# Normalizando os dados de forma correta
scaler = StandardScaler()
train_scaled = pd.DataFrame(scaler.fit_transform(train_df), columns=shifted_df.columns, index=train_df.index)
val_scaled = pd.DataFrame(scaler.transform(val_df), columns=shifted_df.columns, index=val_df.index)
test_scaled = pd.DataFrame(scaler.transform(test_df), columns=shifted_df.columns, index=test_df.index)

X_train, y_train = train_scaled.iloc[:, len(colunas_selecionadas):].values, train_scaled.iloc[:, 0].values
X_val, y_val = val_scaled.iloc[:, len(colunas_selecionadas):].values, val_scaled.iloc[:, 0].values
X_test, y_test = test_scaled.iloc[:, len(colunas_selecionadas):].values, test_scaled.iloc[:, 0].values

# Reshape para LSTM
X_train = X_train.reshape((-1, 25, len(colunas_selecionadas)))
X_val = X_val.reshape((-1, 25, len(colunas_selecionadas)))
X_test = X_test.reshape((-1, 25, len(colunas_selecionadas)))
y_train = y_train.reshape((-1, 1))
y_val = y_val.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

# Convertendo para tensores PyTorch
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)


# Dataset e DataLoader
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]


# Modelo LSTM
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class FeatureAttention(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        # Inicialização dos pesos baseada nas correlações conhecidas
        initial_weights = torch.tensor([1.0, 0.491396, 0.339309])
        self.feature_weights = nn.Parameter(initial_weights)

    def forward(self, x):
        # Aplica pesos de atenção específicos para cada feature
        return x * self.feature_weights.view(1, 1, -1)

class TemporalAttention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.attention = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, 1)
        )

    def forward(self, lstm_output):
        # Calcula pesos de atenção temporal
        attention_weights = self.attention(lstm_output)
        attention_weights = F.softmax(attention_weights, dim=1)
        return attention_weights * lstm_output



class ImprovedLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2):
        super().__init__()

        self.feature_attention = FeatureAttention(input_size)

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout if num_layers > 1 else 0
        )

        self.temporal_attention = TemporalAttention(hidden_size * 2)

        self.fc_layers = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 2, 1)
        )

        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size // 2)

    def forward(self, x):
        # Guarda as features originais para uso na loss function
        original_features = x[:, -1, :]  # Último timestep

        # Aplica atenção nas features
        x = self.feature_attention(x)

        # Processa através do LSTM
        lstm_out, _ = self.lstm(x)

        # Aplica atenção temporal
        attended_output = self.temporal_attention(lstm_out)

        # Extrai o último estado com atenção
        last_output = attended_output[:, -1, :]

        # Processa através das camadas fully connected com batch norm
        fc1_out = self.fc_layers[0](last_output)
        fc1_out = self.bn1(fc1_out)
        fc1_out = F.relu(fc1_out)
        fc1_out = self.fc_layers[2](fc1_out)

        fc2_out = self.fc_layers[3](fc1_out)
        fc2_out = self.bn2(fc2_out)
        fc2_out = F.relu(fc2_out)
        fc2_out = self.fc_layers[5](fc2_out)

        # Camada final
        output = self.fc_layers[6](fc2_out)

        return output, original_features


class CustomLoss(nn.Module):
    def __init__(self, correlation_weights=[1.0, 0.491396, 0.339309]):
        super().__init__()
        self.correlation_weights = torch.tensor(correlation_weights)
        self.mse = nn.MSELoss()
        self.mae = nn.L1Loss()

    def calculate_correlation(self, x, y):
        """Calculate correlation between two vectors"""
        x_centered = x - x.mean()
        y_centered = y - y.mean()

        numerator = (x_centered * y_centered).sum()
        denominator = torch.sqrt((x_centered ** 2).sum() * (y_centered ** 2).sum())

        return numerator / (denominator + 1e-8)  # Add small epsilon to avoid division by zero

    def forward(self, y_pred, y_true, features=None):
        # Debugging the types of y_pred and y_true
        # print(f"y_pred type: {type(y_pred)}, y_true type: {type(y_true)}")
        assert isinstance(y_pred, torch.Tensor), "y_pred must be a PyTorch tensor"
        assert isinstance(y_true, torch.Tensor), "y_true must be a PyTorch tensor"

        if y_pred.dim() == 1:
            y_pred = y_pred.unsqueeze(1)
        if y_true.dim() == 1:
            y_true = y_true.unsqueeze(1)

        mse_loss = self.mse(y_pred, y_true)
        mae_loss = self.mae(y_pred, y_true)

        # Perda base
        base_loss = 0.7 * mse_loss + 0.3 * mae_loss

        if features is not None:
            try:
                # Reshape predictions and features to ensure correct dimensions
                y_pred_flat = y_pred.view(-1)

                # Calculate correlations for each feature
                correlations = []
                for i in range(features.size(-1)):
                    feature_flat = features[:, i].view(-1)
                    correlation = self.calculate_correlation(y_pred_flat, feature_flat)
                    correlations.append(correlation)

                # Stack correlations into tensor
                correlations = torch.stack(correlations)

                # Calculate correlation loss
                correlation_weights = self.correlation_weights.to(y_pred.device)
                correlation_loss = torch.mean(torch.abs(correlations - correlation_weights))

                return base_loss + 0.1 * correlation_loss
            except Exception as e:
                print(f"Warning: Error in correlation calculation: {e}")
                return base_loss

        return base_loss

In [29]:

from torch.nn.utils import clip_grad_norm_
from torch.optim.lr_scheduler import ReduceLROnPlateau

def train_model(model, train_loader, val_loader, num_epochs=1000, patience=30):
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5, verbose=True
    )
    criterion = CustomLoss()

    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0

        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()

            predictions, _ = model(batch_x)
            loss = criterion(predictions, batch_y, batch_x[:, -1, :])  # Usando último timestep das features

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()

        # Validação
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                predictions, _ = model(batch_x)
                val_loss += criterion(predictions, batch_y, batch_x[:, -1, :]).item()

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch}")
            break

    return model


In [30]:
import optuna
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os

def calculate_mape(y_true, y_pred):
    """Calcula o Mean Absolute Percentage Error"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def evaluate_predictions(y_true, y_pred):
    """Calcula múltiplas métricas de avaliação"""
    metrics = {
        'MSE': mean_squared_error(y_true, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'MAE': mean_absolute_error(y_true, y_pred),
        'MAPE': calculate_mape(y_true, y_pred),
        'R2': r2_score(y_true, y_pred)
    }

    # Adiciona correlação de Pearson
    metrics['Pearson_Correlation'] = np.corrcoef(y_true.flatten(), y_pred.flatten())[0, 1]

    return metrics

def plot_predictions(y_true, y_pred, title, save_path=None):
    """Cria visualizações das previsões vs valores reais"""
    plt.figure(figsize=(12, 6))

    # Plotar valores reais e previstos
    plt.subplot(2, 1, 1)
    plt.plot(y_true, label='Real', alpha=0.7)
    plt.plot(y_pred, label='Previsto', alpha=0.7)
    plt.title(f'{title} - Valores Reais vs Previstos')
    plt.legend()
    plt.grid(True)

    # Plotar scatter plot
    plt.subplot(2, 1, 2)
    plt.scatter(y_true, y_pred, alpha=0.5)
    plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', alpha=0.8)
    plt.xlabel('Valores Reais')
    plt.ylabel('Valores Previstos')
    plt.title('Scatter Plot: Real vs Previsto')
    plt.grid(True)

    plt.tight_layout()
    if save_path:
        plt.savefig(save_path)
    plt.close()

def objective(trial):
    """Função objetivo para otimização com Optuna"""
    config = {
        'hidden_size': trial.suggest_int('hidden_size', 32, 256),
        'num_layers': trial.suggest_int('num_layers', 1, 4),
        'dropout': trial.suggest_float('dropout', 0.1, 0.5),
        'batch_size': trial.suggest_categorical('batch_size', [32, 64, 128, 256]),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    }

    train_dataset = TimeSeriesDataset(X_train, y_train)
    val_dataset = TimeSeriesDataset(X_val, y_val)

    train_loader = DataLoader(
        train_dataset,
        batch_size=config['batch_size'],
        shuffle=True,
        num_workers=0
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=config['batch_size'],
        shuffle=False,
        num_workers=0
    )

    model = ImprovedLSTM(
        input_size=X_train.shape[2],
        hidden_size=config['hidden_size'],
        num_layers=config['num_layers'],
        dropout=config['dropout']
    ).to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'])
    criterion = CustomLoss()

    best_val_loss = float('inf')
    patience_counter = 0
    patience = 30

    for epoch in range(100):
        model.train()
        train_loss = 0.0

        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            optimizer.zero_grad()
            predictions, _ = model(batch_x)
            loss = criterion(predictions, batch_y, batch_x[:, -1, :])


            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()

        model.eval()
        val_loss = 0.0
        val_predictions = []
        val_true = []

        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                predictions, features = model(batch_x)
                val_loss += criterion(predictions, batch_y, features).item()

                val_predictions.extend(predictions.cpu().numpy())
                val_true.extend(batch_y.cpu().numpy())

        val_loss /= len(val_loader)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            break

    val_metrics = evaluate_predictions(
        np.array(val_true),
        np.array(val_predictions)
    )

    for metric_name, metric_value in val_metrics.items():
        trial.set_user_attr(metric_name, metric_value)

    return val_loss


def train_and_evaluate_final_model(best_params, save_dir='resultados'):
    """Treina e avalia o modelo final com os melhores parâmetros"""
    os.makedirs(save_dir, exist_ok=True)

    # Criar datasets
    train_dataset = TimeSeriesDataset(X_train, y_train)
    val_dataset = TimeSeriesDataset(X_val, y_val)
    test_dataset = TimeSeriesDataset(X_test, y_test)

    # Criar dataloaders
    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=best_params['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=best_params['batch_size'], shuffle=False)

    # Criar modelo final
    model = ImprovedLSTM(
        input_size=X_train.shape[2],
        hidden_size=best_params['hidden_size'],
        num_layers=best_params['num_layers'],
        dropout=best_params['dropout']
    ).to(device)

    # Treinar modelo final
    model = train_model(model, train_loader, val_loader)

    # Avaliar em todos os conjuntos
    results = {}
    for name, loader in [('Train', train_loader), ('Validation', val_loader), ('Test', test_loader)]:
        predictions = []
        true_values = []

        model.eval()
        with torch.no_grad():
            for batch_x, batch_y in loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                pred, _ = model(batch_x)
                predictions.extend(pred.cpu().numpy())
                true_values.extend(batch_y.cpu().numpy())

        # Converter para arrays
        predictions = np.array(predictions)
        true_values = np.array(true_values)

        # Calcular métricas
        metrics = evaluate_predictions(true_values, predictions)
        results[name] = metrics

        # Criar visualizações
        plot_predictions(
            true_values,
            predictions,
            f'Conjunto {name}',
            save_path=os.path.join(save_dir, f'predictions_{name.lower()}.png')
        )

    # Salvar resultados
    results_df = pd.DataFrame(results)
    results_df.to_csv(os.path.join(save_dir, 'metrics.csv'))

    # Salvar modelo
    torch.save({
        'model_state_dict': model.state_dict(),
        'best_params': best_params,
        'results': results
    }, os.path.join(save_dir, 'model.pth'))

    return model, results

# Executar otimização
def run_optimization(n_trials=50):
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=n_trials)

    # Treinar modelo final com melhores parâmetros
    best_params = study.best_trial.params
    print("\nMelhores hiperparâmetros encontrados:")
    print(best_params)

    # Treinar e avaliar modelo final
    final_model, results = train_and_evaluate_final_model(best_params)

    # Imprimir resultados finais
    print("\nResultados finais:")
    for dataset_name, metrics in results.items():
        print(f"\n{dataset_name}:")
        for metric_name, value in metrics.items():
            print(f"{metric_name}: {value:.4f}")

    return study, final_model, results

# Executar otimização e treinamento
if __name__ == "__main__":
    study, final_model, results = run_optimization(n_trials=1)

[I 2024-11-13 23:40:48,962] A new study created in memory with name: no-name-629a0e7c-a4b8-49ca-9917-72b9c63b12db
[I 2024-11-14 00:23:06,787] Trial 0 finished with value: 0.4495667377486825 and parameters: {'hidden_size': 148, 'num_layers': 4, 'dropout': 0.16133087132712484, 'batch_size': 256, 'learning_rate': 0.00011513821359178125}. Best is trial 0 with value: 0.4495667377486825.



Melhores hiperparâmetros encontrados:
{'hidden_size': 148, 'num_layers': 4, 'dropout': 0.16133087132712484, 'batch_size': 256, 'learning_rate': 0.00011513821359178125}
Early stopping at epoch 47

Resultados finais:

Train:
MSE: 0.4017
RMSE: 0.6338
MAE: 0.3617
MAPE: 127.3319
R2: 0.5983
Pearson_Correlation: 0.7740

Validation:
MSE: 0.4052
RMSE: 0.6365
MAE: 0.3967
MAPE: 143.8007
R2: 0.4731
Pearson_Correlation: 0.6891

Test:
MSE: 0.3371
RMSE: 0.5806
MAE: 0.3496
MAPE: 113.1328
R2: 0.7331
Pearson_Correlation: 0.8572
