In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Data Preparation
df = pd.read_csv('dataset.csv')
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)  # Set 'date' as index

# Features to use
features = ['temperature', 'humidity', 'wind_speed', 'power']
df = df[features]

# Scaling
scalers = {}
for column in df.columns:
    scaler = MinMaxScaler()
    df[column] = scaler.fit_transform(df[[column]])
    scalers['scaler_' + column] = scaler

# Define n_past, n_future, and n_features
n_past = 10  # Number of past time steps to use
n_future = 5  # Number of future time steps to predict
n_features = len(features)

# Create sequences (sliding window approach)
X = []
y = []
for i in range(n_past, len(df) - n_future + 1):
    X.append(df.iloc[i - n_past:i].values)
    y.append(df.iloc[i:i + n_future].values)

X = np.array(X)
y = np.array(y)

# Train/Test Split (Temporal split - 80:20)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


# E1D1 Model with BiLSTM

encoder_inputs = tf.keras.layers.Input(shape=(n_past, n_features))
encoder_bilstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_state=True))(encoder_inputs)
encoder_states1 = encoder_bilstm[1:]

decoder_inputs = tf.keras.layers.RepeatVector(n_future)(encoder_bilstm[0])

decoder_bilstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(decoder_inputs, initial_state=encoder_states1)
decoder_outputs1 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(n_features))(decoder_bilstm)

model_e1d1_bilstm = tf.keras.models.Model(encoder_inputs, decoder_outputs1)

# E2D2 Model with 2 layers of BiLSTM

encoder_inputs = tf.keras.layers.Input(shape=(n_past, n_features))
encoder_bilstm1 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True, return_state=True))(encoder_inputs)
encoder_states1 = encoder_bilstm1[1:]

encoder_bilstm2 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_state=True))(encoder_bilstm1[0])
encoder_states2 = encoder_bilstm2[1:]

decoder_inputs = tf.keras.layers.RepeatVector(n_future)(encoder_bilstm2[0])

decoder_bilstm1 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(decoder_inputs, initial_state=encoder_states1)
decoder_bilstm2 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(decoder_bilstm1, initial_state=encoder_states2)
decoder_outputs2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(n_features))(decoder_bilstm2)

model_e2d2_bilstm = tf.keras.models.Model(encoder_inputs, decoder_outputs2)

# 3. Compilation and Training
reduce_lr = tf.keras.callbacks.LearningRateScheduler(lambda x: 1e-3 * 0.90 ** x)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model_e1d1_bilstm.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.Huber())
history_e1d1_bilstm = model_e1d1_bilstm.fit(X_train, y_train, epochs=25, validation_data=(X_test, y_test), batch_size=16, verbose=1, callbacks=[reduce_lr, early_stopping])

model_e2d2_bilstm.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.Huber())
history_e2d2_bilstm = model_e2d2_bilstm.fit(X_train, y_train, epochs=25, validation_data=(X_test, y_test), batch_size=16, verbose=1, callbacks=[reduce_lr, early_stopping])


# 4. Prediction and Inverse Scaling
pred_e1d1_bilstm = model_e1d1_bilstm.predict(X_test)
pred_e2d2_bilstm = model_e2d2_bilstm.predict(X_test)

for index, i in enumerate(features):  # Iterate through the feature names
    scaler = scalers['scaler_' + i]
    pred_e1d1_bilstm[:, :, index] = scaler.inverse_transform(pred_e1d1_bilstm[:, :, index])
    pred_e2d2_bilstm[:, :, index] = scaler.inverse_transform(pred_e2d2_bilstm[:, :, index])
    y_train[:, :, index] = scaler.inverse_transform(y_train[:, :, index])
    y_test[:, :, index] = scaler.inverse_transform(y_test[:, :, index])


# 5. Evaluation (for the whole model across all 5 predicted days)

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Define function for Mean Absolute Percentage Error (MAPE)
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Flatten the true and predicted values for overall comparison
y_true_all = y_test.flatten()
# Use predictions from the BiLSTM models:
y_pred_e1d1_all = pred_e1d1_bilstm.flatten()  # Changed from pred_e1d1 to pred_e1d1_bilstm
y_pred_e2d2_all = pred_e2d2_bilstm.flatten()  # Changed from pred_e2d2 to pred_e2d2_bilstm

# Compute metrics for E1D1 model
mae_e1d1 = mean_absolute_error(y_true_all, y_pred_e1d1_all)
mse_e1d1 = mean_squared_error(y_true_all, y_pred_e1d1_all)
rmse_e1d1 = np.sqrt(mse_e1d1)
mape_e1d1 = mean_absolute_percentage_error(y_true_all, y_pred_e1d1_all)
r2_e1d1 = r2_score(y_true_all, y_pred_e1d1_all)

# Compute metrics for E2D2 model
mae_e2d2 = mean_absolute_error(y_true_all, y_pred_e2d2_all)
mse_e2d2 = mean_squared_error(y_true_all, y_pred_e2d2_all)
rmse_e2d2 = np.sqrt(mse_e2d2)
r2_e2d2 = r2_score(y_true_all, y_pred_e2d2_all)

# Display results in a structured table
import pandas as pd
results_df = pd.DataFrame({
    "Model": ["E1D1", "E2D2"],
    "MAE": [mae_e1d1, mae_e2d2],
    "MSE": [mse_e1d1, mse_e2d2],
    "RMSE": [rmse_e1d1, rmse_e2d2],
    "R2 Score": [r2_e1d1, r2_e2d2]
})

print(results_df)

from sklearn.metrics import mean_squared_error
import numpy as np

# Reshape y_test and pred_e1d1 to 2D
y_test_2d = y_test.reshape(-1, y_test.shape[-1])  # Reshape to (samples * time steps, features)
#Get prediction from the bilstm model
pred_e1d1_bilstm_2d = pred_e1d1_bilstm.reshape(-1, pred_e1d1_bilstm.shape[-1])  # Reshape to (samples * time steps, features)

# Normalize MSE using variance of actual data
y_var = np.var(y_test_2d)  # Variance of true values
normalized_mse_e1d1 = mean_squared_error(y_test_2d, pred_e1d1_bilstm_2d) / y_var #Use the prediction from bilstm model

# Reshape y_test and pred_e2d2 to 2D for E2D2 model
pred_e2d2_bilstm_2d = pred_e2d2_bilstm.reshape(-1, pred_e2d2_bilstm.shape[-1])  # Reshape to (samples * time steps, features)
normalized_mse_e2d2 = mean_squared_error(y_test_2d, pred_e2d2_bilstm_2d) / y_var #Use the prediction from bilstm model


print("Normalized MSE E1D1:", normalized_mse_e1d1)
print("Normalized MSE E2D2:", normalized_mse_e2d2)

# Define the split_series function (re-added)
def split_series(series, n_past, n_future):
    X, y = list(), list()
    for window_start in range(len(series)):
        past_end = window_start + n_past
        future_end = past_end + n_future
        if future_end > len(series):
            break
        past, future = series[window_start:past_end, :], series[past_end:future_end, :]
        X.append(past)
        y.append(future)
    return np.array(X), np.array(y)


n_future = 10  # Predict 10 days ahead (already set in data prep, but good to reiterate)

# Reshape X_test for single prediction
X_test_last = X_test[-1:].reshape(1, n_past, n_features) # Reshape to (1, n_past, n_features)

pred_next_10_days_e1d1 = model_e1d1_bilstm.predict(X_test_last)
pred_next_10_days_e2d2 = model_e2d2_bilstm.predict(X_test_last)

# Inverse transform the predicted values
for index, i in enumerate(features):  # Use 'features' list
    scaler = scalers['scaler_' + i]
    pred_next_10_days_e1d1[:, :, index] = scaler.inverse_transform(pred_next_10_days_e1d1[:, :, index])
    pred_next_10_days_e2d2[:, :, index] = scaler.inverse_transform(pred_next_10_days_e2d2[:, :, index])
