# Melhor modelo LSTM padrao ate o momento R2 de .71

In [2]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name())

True
NVIDIA GeForce RTX 3070


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from scipy import stats
import optuna
import logging
import holidays

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils import clip_grad_norm_
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import optuna
import holidays
import logging
from copy import deepcopy as dc
import os

def load_and_prepare_data(filepath, start_date='2019-01-01', end_date='2022-01-01'):
    dtypes = {
        'PM2.5': 'float32',
        'PM10': 'float32',
        'Monóxido de Carbono': 'float32',
        'Dióxido de Enxofre': 'float32',
        'Dióxido de Nitrogênio': 'float32',
        'Temperatura': 'float32',
        'Velocidade do Vento': 'float32',
        'Umidade Relativa': 'float32',
        'Direção do Vento': 'float32'
    }

    df = pd.read_csv(filepath, usecols=list(dtypes.keys()) + ['Data e Hora'], dtype=dtypes, low_memory=False)
    df['Data e Hora'] = pd.to_datetime(df['Data e Hora'])
    df.set_index('Data e Hora', inplace=True)
    df.sort_index(inplace=True)
    df = df.loc[start_date:end_date]

    return df

def add_temporal_features(df):
    df = df.copy()
    df['hora'] = df.index.hour
    df['dia_semana'] = df.index.dayofweek
    df['mes'] = df.index.month
    df['dia_ano'] = df.index.dayofyear

    # Criando features cíclicas
    df['hora_sin'] = np.sin(2 * np.pi * df['hora']/24)
    df['hora_cos'] = np.cos(2 * np.pi * df['hora']/24)
    df['dia_semana_sin'] = np.sin(2 * np.pi * df['dia_semana']/7)
    df['dia_semana_cos'] = np.cos(2 * np.pi * df['dia_semana']/7)
    df['mes_sin'] = np.sin(2 * np.pi * df['mes']/12)
    df['mes_cos'] = np.cos(2 * np.pi * df['mes']/12)

    br_holidays = holidays.BR()
    df['is_holiday'] = df.index.date.map(lambda x: x in br_holidays).astype(int)

    df['periodo_dia'] = pd.cut(df.index.hour,
                              bins=[-1, 6, 12, 18, 23],
                              labels=['madrugada', 'manha', 'tarde', 'noite'])

    df = pd.get_dummies(df, columns=['periodo_dia'], prefix='periodo')
    return df

def prepare_dataframe_for_lstm(df, lookback=48, weekly_step=24*7):
    df = dc(df)
    colunas_selecionadas = ['PM2.5', 'PM10']

    for col in colunas_selecionadas:
        # Adicionar as últimas horas
        for i in range(1, lookback + 1):
            df[f'{col}(t-{i})'] = df[col].shift(i)
        # Adicionar dados do mesmo dia da semana anterior
        df[f'{col}(t-{weekly_step})'] = df[col].shift(weekly_step)

    df.dropna(inplace=True)
    return df

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

class Attention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.attention_weights = nn.Parameter(torch.randn(hidden_size))

    def forward(self, lstm_output):
        attention_scores = torch.tanh(torch.matmul(lstm_output, self.attention_weights))
        attention_scores = F.softmax(attention_scores, dim=1)
        weighted_output = torch.mul(lstm_output, attention_scores.unsqueeze(-1))
        return weighted_output.sum(dim=1)

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_sizes, activation='relu', dropout=0.2):
        super().__init__()
        self.hidden_sizes = hidden_sizes
        self.num_layers = len(hidden_sizes)
        self.activation = activation

        self.lstm_layers = nn.ModuleList([
            nn.LSTM(input_size if i == 0 else hidden_sizes[i-1],
                   hidden_sizes[i],
                   num_layers=1,
                   batch_first=True)
            for i in range(self.num_layers)
        ])

        self.attention = Attention(hidden_sizes[-1])
        self.dropout = nn.Dropout(dropout) if self.num_layers > 1 else nn.Identity()
        self.fc = nn.Linear(hidden_sizes[-1], 1)

    def forward(self, x):
        batch_size = x.size(0)

        for i, lstm in enumerate(self.lstm_layers):
            h0 = torch.zeros(1, batch_size, self.hidden_sizes[i]).to(x.device)
            c0 = torch.zeros(1, batch_size, self.hidden_sizes[i]).to(x.device)
            x, _ = lstm(x, (h0, c0))

            if i < self.num_layers - 1:
                x = self.apply_activation(x)
                x = self.dropout(x)

        x = self.attention(x)
        out = self.fc(x)
        return out

    def apply_activation(self, x):
        if self.activation == 'relu':
            return F.relu(x)
        elif self.activation == 'leaky_relu':
            return F.leaky_relu(x)
        elif self.activation == 'elu':
            return F.elu(x)
        elif self.activation == 'sigmoid':
            return F.sigmoid(x)
        return x

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, patience, device):
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    best_model_state = None

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0

        for batch in train_loader:
            x_batch, y_batch = batch[0].to(device), batch[1].to(device)
            optimizer.zero_grad()
            output = model(x_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in val_loader:
                x_batch, y_batch = batch[0].to(device), batch[1].to(device)
                output = model(x_batch)
                val_loss += criterion(output, y_batch).item()
        val_loss /= len(val_loader)

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
            best_model_state = model.state_dict()
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= patience:
            logging.info(f"Early stopping ativado na época {epoch + 1}")
            break

    model.load_state_dict(best_model_state)
    return model

def objective(trial, X_train, y_train, X_val, y_val, device):
    num_layers = trial.suggest_int('num_layers', 1, 4)
    hidden_sizes = [trial.suggest_int(f'hidden_size_{i}', 16, 64) for i in range(num_layers)]
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    activation = trial.suggest_categorical('activation', ['relu', 'leaky_relu', 'elu', 'sigmoid'])
    dropout = trial.suggest_float('dropout', 0.0, 0.5)
    weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-2, log=True)

    train_dataset = TimeSeriesDataset(X_train, y_train)
    val_dataset = TimeSeriesDataset(X_val, y_val)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    model = LSTM(X_train.shape[2], hidden_sizes, activation, dropout).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    criterion = nn.HuberLoss()

    model = train_model(model, train_loader, val_loader, criterion, optimizer,
                       num_epochs=1000, patience=20, device=device)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            x_batch, y_batch = batch[0].to(device), batch[1].to(device)
            output = model(x_batch)
            val_loss += criterion(output, y_batch).item()

    return val_loss / len(val_loader)

def main():
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    torch.backends.cudnn.benchmark = True

    # Carregar e preparar dados
    df = load_and_prepare_data('../dados_tratados/combinado/Piratininga/Piratininga_tratado_combinado.csv')
    df = add_temporal_features(df)
    df = prepare_dataframe_for_lstm(df)

    # Separar em conjuntos de treino, validação e teste
    train_size = int(len(df) * 0.7)
    val_size = int(len(df) * 0.15)

    train_df = df.iloc[:train_size]
    val_df = df.iloc[train_size:train_size + val_size]
    test_df = df.iloc[train_size + val_size:]

    # Normalizar dados
    scaler = StandardScaler()
    train_scaled = scaler.fit_transform(train_df)
    val_scaled = scaler.transform(val_df)
    test_scaled = scaler.transform(test_df)

    # Preparar dados para LSTM
    X_train = train_scaled[:, 1:].reshape((-1, 25, 2))
    y_train = train_scaled[:, 0].reshape((-1, 1))
    X_val = val_scaled[:, 1:].reshape((-1, 25, 2))
    y_val = val_scaled[:, 0].reshape((-1, 1))
    X_test = test_scaled[:, 1:].reshape((-1, 25, 2))
    y_test = test_scaled[:, 0].reshape((-1, 1))

    # Converter para tensores
    X_train = torch.tensor(X_train).float()
    y_train = torch.tensor(y_train).float()
    X_val = torch.tensor(X_val).float()
    y_val = torch.tensor(y_val).float()
    X_test = torch.tensor(X_test).float()
    y_test = torch.tensor(y_test).float()

    # Otimização com Optuna
    study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler())
    study.optimize(lambda trial: objective(trial, X_train, y_train, X_val, y_val, device), n_trials=1)

    # Treinar modelo final com melhores parâmetros
    best_params = study.best_params
    best_hidden_sizes = [best_params[f'hidden_size_{i}'] for i in range(best_params['num_layers'])]

    final_model = LSTM(X_train.shape[2], best_hidden_sizes,
                      best_params['activation'],
                      best_params['dropout']).to(device)

    optimizer = torch.optim.AdamW(final_model.parameters(),
                                 lr=best_params['learning_rate'],
                                 weight_decay=best_params['weight_decay'])

    train_dataset = TimeSeriesDataset(X_train, y_train)
    val_dataset = TimeSeriesDataset(X_val, y_val)
    test_dataset = TimeSeriesDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=best_params['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=best_params['batch_size'], shuffle=False)

    final_model = train_model(final_model, train_loader, val_loader,
                            nn.HuberLoss(), optimizer, 1000, 20, device)

    # Salvar modelo
    torch.save(final_model.state_dict(), 'best_model.pth')

    return final_model, scaler

if __name__ == "__main__":
    main()