# Forecasting Renewable Energy Output Using Long-Short Term Memory and Convolutional Neural Networks

## Import all the needed packages

In [1]:
# Basic Data Science Packages
import numpy as np
import pandas as pd

#TensorFlow packages required for building CNN + LSTM model
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv1D, BatchNormalization, Input, Dense, Flatten, LSTM, Reshape, TimeDistributed
from tensorflow.keras.models import Model

#Plotting graph
import matplotlib.pyplot as plt

#Plotting Keras Model
import keras.utils.vis_utils
from importlib import reload
reload(keras.utils.vis_utils)

from keras.utils.vis_utils import plot_model

#Utility Module for computing and displaying metrics
from utility_functions import metrics

import warnings
warnings.filterwarnings("ignore")

## Import the data and split into Train, Test and Validation
- Fetch the first 70% records as train and the rest 15% each as validation and test
    - 8711 * 70% = 6097 records from the start (Train)
    - 6097 + 1307 = 6098 to 7404 records as (Validation)
    - 7404 + 1307 = 7405 to 8711 records as (Test)

In [2]:
thesis_dataset = pd.read_csv('data/final_thesis_dataset.csv',parse_dates=[0], index_col=0)

#Extracting dataset into train, validation and test sets
train = thesis_dataset[:6097]
valid = thesis_dataset[6097:7404]
test = thesis_dataset[7404:]

#Extracting solar and wind X, y columns per set
X_solar_train = train[['SWTDN', 'SWGDN', 'T', 'p']]
y_solar_train = train['DE_solar_generation_actual']
X_solar_valid = valid[['SWTDN', 'SWGDN', 'T', 'p']]
y_solar_valid = valid['DE_solar_generation_actual']
X_solar_test = test[['SWTDN', 'SWGDN', 'T', 'p']]
y_solar_test = test['DE_solar_generation_actual']

X_wind_train = train[['v1', 'v2', 'v_50m', 'z0']]
y_wind_train = train['DE_wind_generation_actual']
X_wind_valid = valid[['v1', 'v2', 'v_50m', 'z0']]
y_wind_valid = valid['DE_wind_generation_actual']
X_wind_test = test[['v1', 'v2', 'v_50m', 'z0']]
y_wind_test = test['DE_wind_generation_actual']

## Building a CNN + LSTM TensorFlow Model using Conv1D and LSTM for prediction

In [None]:
#Input layer describing the dimension in which the data will be passed
digit_a = Input(shape=(4,))
#Reshape helps in converting row * col wise data into only col wise data for Conv layer to process
x = Reshape((-1,1))(digit_a)
#Building Conv layers of 128 filters and 2 kernel_size with BatchNormalization
x = Conv1D(128, 2)(x)
x = BatchNormalization()(x)
x = Conv1D(128, 2)(x)
x = BatchNormalization()(x)
x = Conv1D(128, 2)(x)
x = BatchNormalization()(x)
#Flattening the output into timedistributed fashion for LSTM to process
out_a = TimeDistributed(Flatten())(x)

#LSTM layer with 1024 units and with no return_sequences
out = LSTM(1024, input_shape = train.shape, return_sequences=False)(out_a)
#Dense layer of 1024 that captures the output of LSTM layer
out = Dense(1024)(out)
#BatchNormalization followed by Dense of 1 layer to get the output of the model
out = BatchNormalization()(out)
out = Dense(1)(out)
#Create the layer and print the summary
model = Model(digit_a, out)
model.summary()

## Compiling the model by passing adam optimizer, loss=MAE and fetching accuracy metrics 

In [None]:
model.compile('adam', 'mean_absolute_error', ['accuracy'])
history = model.fit(X_solar_train, y_solar_train, validation_data=(X_solar_valid, y_solar_valid), epochs=2)

In [None]:
model.evaluate(X_solar_test, y_solar_test)

### Plotting the keras model

In [None]:
plot_model(
    model, to_file='figs/3-model.png', show_shapes=False, show_dtype=False,
    show_layer_names=True, rankdir='TB', expand_nested=False, dpi=96
)

### Plotting Model's Accuracy and Loss Metrics over Train and Validation sets for Solar

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

### Plotting Actual vs Predictions on the same plot

In [None]:
train_solar_pred = model.predict(X_solar_train)
val_solar_pred = model.predict(X_solar_valid)
test_solar_pred = model.predict(X_solar_test)

In [None]:
df = thesis_dataset[['DE_solar_generation_actual']]

train_df = train[['DE_solar_generation_actual']]
train_df.loc[:, 'DE_solar_generation_actual'] = train_solar_pred

valid_df = valid[['DE_solar_generation_actual']]
valid_df.loc[:, 'DE_solar_generation_actual'] = val_solar_pred

test_df = test[['DE_solar_generation_actual']]
test_df.loc[:, 'DE_solar_generation_actual'] = test_solar_pred

# # Plot all predictions
inversetransform, =plt.plot(df, label = 'Actual')
train_solar_predx, =plt.plot(train_df, color='orange', label = 'Train')
val_solar_predx, =plt.plot(valid_df, color='green', label = 'Validation')
test_solar_predx, =plt.plot(test_df, color='black', label = 'Test')
plt.xlabel('Date')
plt.ylabel('Solar Output')
plt.legend(loc="upper right")
plt.title("Predicted vs. Actual Solar Generation")

# plt.savefig('figs/3-solar.png', dpi=200)

### Regression Metrics for Solar

In [None]:
metrics('cnn+lstm', test[['DE_solar_generation_actual']], test_solar_pred)

## Fitting the same model for Wind Dataset

In [None]:
model.compile('adam', 'mean_absolute_error', ['accuracy'])
history = model.fit(X_wind_train, y_wind_train, validation_data=(X_wind_valid, y_wind_valid), epochs=2)

In [None]:
model.evaluate(X_wind_test, y_wind_test)

## Model's Accuracy and Loss for Wind Dataset

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

## Plotting Actual vs Predicted for Wind on same graph

In [None]:
train_wind_pred = model.predict(X_wind_train)
val_wind_pred = model.predict(X_wind_valid)
test_wind_pred = model.predict(X_wind_test)

In [None]:
df = thesis_dataset[['DE_wind_generation_actual']]

train_df = train[['DE_wind_generation_actual']]
train_df.loc[:, 'DE_wind_generation_actual'] = train_wind_pred

valid_df = valid[['DE_wind_generation_actual']]
valid_df.loc[:, 'DE_wind_generation_actual'] = val_wind_pred

test_df = test[['DE_wind_generation_actual']]
test_df.loc[:, 'DE_wind_generation_actual'] = test_wind_pred

# Plot all predictions
inversetransform, =plt.plot(df, label = 'Actual')
train_wind_predx, =plt.plot(train_df, color='orange', label = 'Train')
val_wind_predx, =plt.plot(valid_df, color='green', label = 'Validation')
test_wind_predx, =plt.plot(test_df, color='black', label = 'Test')
plt.xlabel('Date')
plt.ylabel('Wind Output')
plt.legend(loc="upper right")
plt.title("Predicted vs. Actual Wind Generation")
plt.show()

### Regression Metrics for Wind

In [None]:
metrics('cnn+lstm', test[['DE_wind_generation_actual']], test_wind_pred)

## The Reason for this is because Wind Energy Production is highly random in nature.
### So CNN + LSTM model couldn't map it as accurately as in Solar Energy Production scenario. So in order to resolve it we use another method - WD that decomposes this variability for our LSTM to predict well