In [7]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using {device}')

df = pd.read_csv('../dados_tratados/combinado/Piratininga/Piratininga_tratado_combinado.csv',
                 usecols=['PM2.5', 'Data e Hora'])
df.dropna(inplace=True)
df.index = pd.to_datetime(df['Data e Hora'], format='%Y-%m-%d %H:%M:%S')
train_dates = pd.to_datetime(df['Data e Hora'], format='%Y-%m-%d %H:%M:%S')

df['Data e Hora'] = pd.to_datetime(df['Data e Hora'])
df['hour'] = df['Data e Hora'].dt.hour
df['minute'] = df['Data e Hora'].dt.minute
df['year'] = df['Data e Hora'].dt.year
df['month'] = df['Data e Hora'].dt.month
df['day'] = df['Data e Hora'].dt.day
df['day_of_week'] = df['Data e Hora'].dt.dayofweek
df['day_of_year'] = df['Data e Hora'].dt.dayofyear
df['week'] = df['Data e Hora'].dt.isocalendar().week

df.drop('Data e Hora', axis=1, inplace=True)

Using cuda


In [9]:
# Normalizando os dados de PM2.5
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_values = scaler.fit_transform(df['PM2.5'].values.reshape(-1, 1))

df['PM2.5'] = scaled_values


def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data) - seq_length - 1):
        x = data[i:(i + seq_length), :].astype(np.float32)
        y = data[i + seq_length, 0]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)


seq_length = 8
X, y = create_sequences(df.values, seq_length)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).float().reshape(-1, 1)
X_val = torch.from_numpy(X_val).float()
y_val = torch.from_numpy(y_val).float().reshape(-1, 1)
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).float().reshape(-1, 1)

train_data = TensorDataset(X_train, y_train)
val_data = TensorDataset(X_val, y_val)
test_data = TensorDataset(X_test, y_test)

batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [None]:
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.base import BaseEstimator, RegressorMixin

# Definir input_size (precisa ser ajustado conforme seus dados)
input_size = X_train.shape[2]  # Supondo que X_train seja um tensor com a forma (n_samples, seq_len, n_features)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Attention(nn.Module):
    def __init__(self, hidden_layer_size):
        super(Attention, self).__init__()
        self.attention = nn.Linear(hidden_layer_size, 1)

    def forward(self, lstm_output):
        attention_weights = self.attention(lstm_output).squeeze(-1)
        attention_weights = torch.softmax(attention_weights, dim=1)
        context_vector = torch.sum(attention_weights.unsqueeze(-1) * lstm_output, dim=1)
        return context_vector, attention_weights

class LSTMWithAttention(nn.Module):
    def __init__(self, input_size, hidden_layer_size, output_size, num_layers, drop_prob, activation_function):
        super(LSTMWithAttention, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_layer_size, num_layers, batch_first=True, dropout=drop_prob)
        self.attention = Attention(hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.activation_function = activation_function

    def forward(self, input_seq):
        lstm_out, _ = self.lstm(input_seq)
        context_vector, attention_weights = self.attention(lstm_out)
        predictions = self.linear(context_vector)
        if self.activation_function == 'relu':
            predictions = nn.ReLU()(predictions)
        elif self.activation_function == 'tanh':
            predictions = nn.Tanh()(predictions)
        elif self.activation_function == 'sigmoid':
            predictions = nn.Sigmoid()(predictions)
        return predictions

def train_and_evaluate_lstm(hidden_layer_size, num_layers, lr, batch_size, drop_prob, activation_function, weight_decay, num_epochs, patience):
    model = LSTMWithAttention(
        input_size=input_size,
        hidden_layer_size=hidden_layer_size,
        num_layers=num_layers,
        drop_prob=drop_prob,
        output_size=1,
        activation_function=activation_function
    ).to(device)
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    train_data = TensorDataset(X_train, y_train)
    val_data = TensorDataset(X_val, y_val)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)

    best_val_loss = np.inf
    epochs_no_improve = 0

    def train_model():
        model.train()
        for seq, labels in train_loader:
            seq, labels = seq.to(device), labels.to(device)
            optimizer.zero_grad()
            y_pred = model(seq)
            single_loss = loss_function(y_pred, labels)
            single_loss.backward()
            optimizer.step()

    def evaluate_model():
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for seq, labels in val_loader:
                seq, labels = seq.to(device), labels.to(device)
                y_pred = model(seq)
                single_loss = loss_function(y_pred, labels)
                val_loss += single_loss.item() * seq.size(0)
        val_loss /= len(val_loader.dataset)
        return val_loss

    for epoch in range(num_epochs):
        train_model()
        val_loss = evaluate_model()
        print(f"Epoch {epoch + 1}/{num_epochs}, Validation Loss: {val_loss}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                print("Early stopping")
                break
    
    model.load_state_dict(torch.load('best_model.pth'))
    return val_loss

class PyTorchLSTMAttentionRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, hidden_layer_size=100, num_layers=2, lr=0.001, batch_size=64, drop_prob=0.2,
                 activation_function='relu', weight_decay=1e-8, num_epochs=50, patience=5):
        self.hidden_layer_size = hidden_layer_size
        self.num_layers = num_layers
        self.lr = lr
        self.drop_prob = drop_prob
        self.batch_size = batch_size
        self.activation_function = activation_function
        self.weight_decay = weight_decay
        self.num_epochs = num_epochs
        self.patience = patience

    def fit(self, X, y):
        self.model = LSTMWithAttention(
            input_size=input_size,
            hidden_layer_size=self.hidden_layer_size,
            num_layers=self.num_layers,
            drop_prob=self.drop_prob,
            output_size=1,
            activation_function=self.activation_function
        ).to(device)
        self.loss_function = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)

        train_data = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).float().reshape(-1, 1))
        self.train_loader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True)

        self.train_model()
        return self

    def train_model(self):
        self.model.train()
        for epoch in range(self.num_epochs):
            for seq, labels in self.train_loader:
                seq, labels = seq.to(device), labels.to(device)
                self.optimizer.zero_grad()
                y_pred = self.model(seq)
                single_loss = self.loss_function(y_pred, labels)
                single_loss.backward()
                self.optimizer.step()

    def predict(self, X):
        self.model.eval()
        test_data = torch.from_numpy(X).float()
        test_loader = DataLoader(test_data, batch_size=self.batch_size, shuffle=False)

        predictions = []
        with torch.no_grad():
            for seq in test_loader:
                seq = seq.to(device)
                y_pred = self.model(seq)
                predictions.append(y_pred.cpu().numpy())

        predictions = np.concatenate(predictions, axis=0)
        return predictions

param_space = {
    'hidden_layer_size': Integer(64, 512),
    'num_layers': Integer(1, 5),
    'lr': Real(0.0001, 0.01, prior='log-uniform'),
    'batch_size': Categorical([32, 64, 96, 128]),
    'num_epochs': Categorical([100, 200, 300, 400]),
    'activation_function': Categorical(['relu', 'tanh', 'sigmoid']),
    'drop_prob': Categorical([0.1, 0.2, 0.3]),
    'weight_decay': Categorical([1e-6, 1e-4, 0.01]),
    'patience': Categorical([3, 10, 15])
}

bayes_search = BayesSearchCV(estimator=PyTorchLSTMAttentionRegressor(), search_spaces=param_space, scoring='neg_mean_squared_error', cv=3, n_iter=30, random_state=42, verbose=3)
bayes_search.fit(X_train.numpy(), y_train.numpy())

print("Best Parameters:", bayes_search.best_params_)
print("Best Score:", -bayes_search.best_score_)


Fitting 3 folds for each of 1 candidates, totalling 3 fits
