In [36]:
# %%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import (
    Input, Dense, Dropout, LayerNormalization, LSTM, Conv1D, MaxPooling1D, Flatten
)
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.regularizers import l2
from keras_tuner import HyperParameters, Hyperband
import os

tfd = tfp.distributions

# Carregar dados
data = pd.read_csv("dataset.csv")

# Converter 'id' em datetime e definir como índice
data['timestamp'] = pd.to_datetime(data['id'], errors='coerce')
data.set_index('timestamp', inplace=True)

# Selecionar variáveis
variables = data[['ws100', 'humid', 'wdisp100', 'hour', 'wdir100']]

# Remover valores ausentes
variables = variables.dropna()

# Padronização dos dados usando MinMaxScaler
scaler = MinMaxScaler()
variables_scaled = scaler.fit_transform(variables)

# Parâmetros
sequence_length = 36  # janela de aprendizado de 36
pred_step = 6         # Previsão do sexto passo à frente
split_ratio = 0.8     # 80% treinamento, 20% teste

# Divisão dos dados em treinamento e teste
split_index = int(len(variables_scaled) * split_ratio)
train_data = variables_scaled[:split_index]
test_data = variables_scaled[split_index:]

# Função para preparar sequências de dados
def create_sequences(data, seq_length, pred_step):
    X, y = [], []
    for i in range(len(data) - seq_length - pred_step + 1):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length+pred_step-1, 0])  # ws100 no sexto passo à frente
    return np.array(X), np.array(y)

# Criar sequências para treinamento e teste
X_train, y_train = create_sequences(train_data, sequence_length, pred_step)
X_test, y_test = create_sequences(test_data, sequence_length, pred_step)

# Ajustar as formas de y_train e y_test
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

# Valores mínimos e máximos de 'ws100' para inversão da escala
ws100_min = scaler.data_min_[0]
ws100_max = scaler.data_max_[0]

# Definição de métricas customizadas
def custom_mae(y_true, y_pred):
    return tf.reduce_mean(tf.abs(y_true - y_pred))

def custom_rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

def custom_mape(y_true, y_pred):
    epsilon = tf.keras.backend.epsilon()
    return tf.reduce_mean(tf.abs((y_true - y_pred) / tf.maximum(tf.abs(y_true), epsilon))) * 100

custom_objects = {
    'custom_mae': custom_mae,
    'custom_rmse': custom_rmse,
    'custom_mape': custom_mape
}

# Cálculo de pesos baseados na frequência (mesmo código que antes)
ws100_train = train_data[:, 0]  # ws100 é a primeira coluna
hist, bin_edges = np.histogram(ws100_train, bins=100)
freq = hist / np.sum(hist)
weights = 1 / (freq + 1e-6)
weights = weights / np.max(weights)

def get_weight(value):
    bin_index = np.searchsorted(bin_edges, value) - 1
    bin_index = np.clip(bin_index, 0, len(weights) - 1)
    return weights[bin_index]

def weighted_mse(y_true, y_pred):
    weights_vector = tf.numpy_function(get_weight, [y_true], tf.float64)
    weights_vector = tf.cast(weights_vector, tf.float32)
    loss = tf.reduce_mean(weights_vector * tf.square(y_true - y_pred))
    return loss

def negative_log_likelihood(y_true, y_pred):
    return -y_pred.log_prob(y_true)

# Definir a função de distribuição separadamente
def make_normal_distribution(params):
    loc = params[..., 0]
    scale = 1e-3 + tf.math.softplus(params[..., 1])
    return tfd.Normal(loc=loc, scale=scale)

# Função para construir o modelo
def build_model_cnn_lstm_probabilistic(hp):
    num_features = X_train.shape[2]
    units = hp.Int('units', min_value=64, max_value=256, step=64)
    learning_rate = hp.Choice('learning_rate', [1e-4, 1e-3])
    dropout_rate_initial = hp.Float('dropout_rate_initial', min_value=0.1, max_value=0.3, step=0.1)
    optimizer_choice = hp.Choice('optimizer', ['adam', 'rmsprop'])
    l2_reg = hp.Float('l2_reg', min_value=1e-4, max_value=1e-2, sampling='log')

    # Entrada
    inputs = Input(shape=(sequence_length, num_features), name='input_layer')

    # Camadas CNN com Regularização L2
    x = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same',
               kernel_regularizer=l2(l2_reg))(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(rate=dropout_rate_initial)(x)

    x = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same',
               kernel_regularizer=l2(l2_reg))(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(rate=dropout_rate_initial)(x)

    # Camadas LSTM com Regularização L2
    x = LSTM(units=units, return_sequences=True, dropout=dropout_rate_initial,
             kernel_regularizer=l2(l2_reg))(x)
    x = LSTM(units=units, dropout=dropout_rate_initial,
             kernel_regularizer=l2(l2_reg))(x)

    # Ajuste de Dropout Dinâmico
    dropout_rate_final = hp.Float('dropout_rate_final', min_value=0.3, max_value=0.5, step=0.1)
    x = Dropout(rate=dropout_rate_final)(x)

    # Camada de Saída Probabilística
    params = Dense(2)(x)  # Saída dos parâmetros da distribuição
    outputs = tfp.layers.DistributionLambda(make_distribution_fn=make_normal_distribution)(params)

    # Definição do Modelo
    model = Model(inputs=inputs, outputs=outputs, name='CNN_LSTM_Probabilistic_Model')

    # Seleção do Otimizador
    if optimizer_choice == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer_choice == 'rmsprop':
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

    # Compilação do Modelo com Negative Log-Likelihood
    model.compile(
        optimizer=optimizer,
        loss=negative_log_likelihood,
        metrics=[
            custom_mae,
            custom_rmse,
            custom_mape
        ]
    )

    return model

# Testar o modelo sem o Keras Tuner
model = build_model_cnn_lstm_probabilistic(HyperParameters())
model.summary()

# Treinar o modelo
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=10,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
    verbose=1
)

# Salvar o modelo
model_save_path = 'best_cinn_model.h5'
model.save(model_save_path)

# Carregar o modelo final
loaded_model = load_model(model_save_path, custom_objects={
    'negative_log_likelihood': negative_log_likelihood,
    'make_normal_distribution': make_normal_distribution
})

# Avaliação no conjunto de teste
y_pred_distribution = loaded_model(X_test)

# Extraindo a média das previsões
y_pred_mean = y_pred_distribution.mean().numpy()

# Inversão da padronização
y_pred_inv = y_pred_mean * (ws100_max - ws100_min) + ws100_min
y_test_inv = y_test * (ws100_max - ws100_min) + ws100_min

# Avaliação das métricas
mae = mean_absolute_error(y_test_inv, y_pred_inv)
rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
mape = np.mean(np.abs((y_test_inv - y_pred_inv) / np.maximum(y_test_inv, 1e-6))) * 100  # Evitar divisão por zero

print("Desempenho no Conjunto de Teste:")
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAPE: {mape:.2f}%\n")

# Plotagem das previsões
plt.figure(figsize=(12, 6))
plt.plot(y_test_inv, label='Valor Real')
plt.plot(y_pred_inv, label='Previsão (Média)')
plt.fill_between(range(len(y_pred_inv)),
                 y_pred_inv - y_pred_distribution.stddev().numpy() * (ws100_max - ws100_min),
                 y_pred_inv + y_pred_distribution.stddev().numpy() * (ws100_max - ws100_min),
                 color='gray', alpha=0.2, label='Incerteza (Desvio Padrão)')
plt.legend()
plt.title('Comparação entre Valores Reais e Previstos (Sexto Passo à Frente) - Modelo Probabilístico')
plt.xlabel('Amostras')
plt.ylabel('Velocidade do Vento a 100 metros')
plt.show()


ValueError: Exception encountered when calling layer 'distribution_lambda_4' (type DistributionLambda).

A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


Call arguments received by layer 'distribution_lambda_4' (type DistributionLambda):
  • inputs=<KerasTensor shape=(None, 2), dtype=float32, sparse=False, name=keras_tensor_57>
  • args=<class 'inspect._empty'>
  • kwargs={'training': 'None'}