In [18]:
# A python library that allows to work with multidimensional arrays and mathematical functions.
import numpy as np
# A python library that is widely used for reading, processing and analyzind data.
import pandas as pd
# They imported the pickle and csv files to work with.
import pickle, csv
# It used to interact with the operating system, and its path. 
import os

# Keras is a high-level API used to build deep learning models. Keras enables model creation with sequential and functional APIs.
from keras.models import Sequential, load_model, Model
# These are the layers of the model. LSTM is a type of reccurent neural network. 
# Dense is a fully connected layer. Others are different types of neural network layers.
from keras.layers import LSTM, Dense, Dropout, Input, concatenate, Flatten, Reshape
# This following import uses to reduce overfitting problems by limiting weights.
from keras import regularizers

# ModelCheckPoint records the model's weights at regular intervals during training. 
# EarlyStopping stops training if a specific metric does not improve over a specified period of time.
from keras.callbacks import ModelCheckpoint, EarlyStopping
# MeanSquaredError provides the mean square error measurement.
# RootMeanSquared Error measures the root mean square error.
# Adam is a widely used gradient descent optimization algorithm.
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.sequence import TimeseriesGenerator
import tensorflow.keras.backend as K

# Importing pyplot module from the matplotlib library.
# This module is used to perform data visualization. 
# With abbreviation 'plt' this module can be accessable. 
from matplotlib import pyplot as plt
# Importing the style module from the matplotlib library. 
# This module is used to determine the style of graphics and drawings.
from matplotlib import style
# Importing StandardScaler and MinMaxScaler classes of the preprocessing module from the sklearn library.
# These classes are used to scale and standardize data properties.
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# Importing the train_test_split function of the model_selection module from the sklearn library.
# This function is used to separate the data set into training and testing sets.
from sklearn.model_selection import train_test_split
#  Seaborn is a Python library used for data visualization. 
# Similar to Matplotlib, it is used to create more effective and attractive visualizations.
import seaborn as sns

In [19]:
# This block of code in the following reads data from a file named 'PowerPrice.csv', 
# edits the datetime column, and creates a data frame as a result.
# Such operations are performed during the data preprocessing stage and
# ensure that the data set is ready for analysis.

In [20]:
# Read a CSV file named and assign this data to a Pandas data frame (DateFrame).
# The name of the data frame is df_powerprice.
df_powerprice = pd.read_csv("data/PowerPrice.csv")
# convert the 'Time' column into datatime objects. The data type of the column is converted to date-time
# format with the pd.to_datetime function.
# infer_datetime_format = True parameter allows the date format to be determined automatically.
df_powerprice["Time"] = pd.to_datetime(df_powerprice["Time"], infer_datetime_format=True)
# In the following row, 'Time' column is removed from the data frame. 
# This column will no longer be present in the dataframe.
df_powerprice = df_powerprice.drop(columns=["Time"])
# Print the data frame obtained as a result of the above rows to the screen.
df_powerprice

Unnamed: 0,DayAhead,Spot,REBAP
0,2.83200,3.688,-4.566
1,2.37575,4.245,-13.964
2,1.91950,3.161,0.345
3,1.46325,1.664,3.048
4,1.00700,3.110,-6.679
...,...,...,...
35035,3.77625,3.342,4.856
35036,3.73900,4.425,8.561
35037,3.70175,2.473,6.076
35038,3.66450,3.241,11.797


In [21]:
# This bloc of the code processes files with the '.pickle' extebsion in the 'data/Spatial' directory and adds them 
# to the list of datasets with a certain shape. It prints the names of files whose shapes do not match and 
# the expected shape to the screen. This process is done to examine the existing files and select the appropriate ones.#

In [22]:
# Creating an empty list datasets. 
# This list will be used to store processed datasets.
datasets = []
# Starting a loop to process the files in the 'data/Spatial' directory.
# os.listdir() function returns a list of files located in the specified directory.
for filename in os.listdir("data/Spatial"):
    # If statement checks the file is '.pickle', then process will be started.
    # The .endswith() method checks whether the last characters in a string match the specified pattern.
    if filename.endswith(".pickle"):
        # Starting the file reading process.
        # The file is opened in 'rb' mode (binary reading).
        with open(os.path.join("data/Spatial", filename), "rb") as file:
            # The data in the file is loaded using the pickle module. It is loaded data stored in pickle format.
            datasets.append(pickle.load(file))
            print(filename)
# The processed datasets in the datasets list are combine (row-wise) on the axis = 0.
data = np.concatenate(datasets, axis=0)
# The combined dataset is transposed. This process converts the dataset's column into rows and rows into columns.
data = data.T
# The data converted into a DataFrame. This is used to process and analyze data more easily.
df_spatial = pd.DataFrame(data)

biogas_kw_2019.pickle
brown_coal_kw_2019.pickle
completed_solar_kw_2019.pickle
completed_wind_onshore_kw_2019.pickle
demand_kw_2019.pickle
hard_coal_kw_2019.pickle
nuclear_kw_2019.pickle
oil_kw_2019.pickle
water_laufwasser_kw_2019.pickle
water_pumpspeicher_PB_kw_2019.pickle
water_pumpspeicher_TB_kw_2019.pickle
water_speicherwasser_kw_2019.pickle
wind_offshore_kw_2019.pickle


In [None]:
# The following code is used to scale data. The scaling process ensures that the data is brought within a certain range.

In [23]:
# Min-Max scaling scales data by converting it to a specific range [0, 1]. 
# This makes it easier to compare data across different features.
scaler_spatial = MinMaxScaler()
df_spatial_scaled = scaler_spatial.fit_transform(df_spatial)

scaler_powerprice = MinMaxScaler()
df_powerprice_scaled = scaler_powerprice.fit_transform(df_powerprice)


In [24]:
df_powerprice_scaled.shape, df_spatial_scaled.shape

((35040, 3), (35040, 1300))

In [None]:
# It performs reshaping operations to make the data compatible with the input form of the train_set_split function and model.

In [25]:
# Reshape the data to fit the model input shape
input_1_data = df_spatial_scaled.reshape((-1, 6, 1300))
input_2_data = df_powerprice_scaled[:, [1]].reshape((-1, 6, 1))
output_data = df_powerprice_scaled[:,1].reshape((-1, 6, 1))

In [26]:
# check the removed REBAP
input_2_data.shape, input_1_data.shape, output_data.shape

((5840, 6, 1), (5840, 6, 1300), (5840, 6, 1))

In [37]:
# Split the data into train, validation and test sets
input_1_train, input_1_test, input_2_train, input_2_test, output_train, output_test = train_test_split(
    input_1_data, input_2_data, output_data, test_size=0.2, random_state=30)
input_1_train, input_1_val, input_2_train, input_2_val, output_train, output_val = train_test_split(
    input_1_train, input_2_train, output_train, test_size=0.3, random_state=30)

In [38]:
# In the code below, a deep learning model with two different inputs is created.
# This model includes fully connected layers, dropout layers, LSTM layers and an output layer.
# This architecture has been chosen to suit the characteristics of spatial and temporal datasets.

In [39]:
# The input function defines the input data format of the model.There are two inputs are defines as shape (6, 1300) and (6,1).
# The first input (spatial_input) is determined to have 6 time periods and 1300 features columns.
# The second input (powerprice_input) is determined to have 6 time frames and 1 feature column.
spatial_input = Input(shape=(6, 1300,))
powerprice_input = Input(shape=(6, 1,))
# The dense layer creates a fully connected layer.
# There are 64 neurons in this layer and the ReLU activation function is used.
fcnn_layer = Dense(64, activation="relu")(spatial_input)
# The dropout layer is used to prevent overfitting.
# In this case, randomly selected neurons with probability 0.2 are disabled.
fcnn_layer = Dropout(0.2)(fcnn_layer)
# Another fully connected layer is added.
# This time 32 neurons are used and the ReLU activation is used again.
fcnn_layer = Dense(32, activation="relu")(fcnn_layer)
# Another dropout layer.
fcnn_layer = Dropout(0.2)(fcnn_layer)
# The last fully connected layer is created.
# This time, 8 nmeurons and the ReLU activation function is used.
fcnn_output = Dense(8, activation="relu")(fcnn_layer)
# with the concat_layer the two outputs of two different layers are concatenated. 
concat_layer = concatenate([fcnn_output, powerprice_input])
# The data reshaped in specific needs.
reshape_layer = Reshape((concat_layer.shape[1], concat_layer.shape[2]))(concat_layer)
# Here, the shape of the output is set to concat_layer.shape[1] and concat_layer.shape[2].
lstm_layer = LSTM(10, activation="relu")(reshape_layer)
output_layer = Dense(1)(lstm_layer)
model = Model(inputs=[spatial_input, powerprice_input], outputs=output_layer)

In [40]:
# Compile the model.
# A model object is created by determining the inputs and outputs of the model.
# The input and output layers are determined with the inputs and outputs parameters.
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])

In [41]:
concat_layer.shape

TensorShape([None, 6, 9])

In [42]:
# EarlyStopping callback is created to enable early stopping during the trainin process.
# This recall monitors changes in validation loass and stops training if there is no improvement
# for a certain patience period. 
# The restore_best_weights = true parameters ensures that the best weights are stored.
es = EarlyStopping(monitor='val_loss', patience=20, verbose=1, restore_best_weights=True)

# A ModelCheckpoint callback to record the weights at which the model performs best during training. 
# This callback will save the best model with the specified file path and name.
cp = ModelCheckpoint(filepath="model_spot/forecasting-TRY/", 
                      save_best_only=True,
                      monitor='val_loss', 
                      mode='min', 
                      save_weights_only=False, 
                      verbose=1)

In [None]:
# The line is basicly the model training is done with input_1_train, input_2_train and output_train.
# The batch_size = 5 parameter specifies the size of the data batch to be used in each training step.
# The epochs = 1000 parameter specifies how many epochs will be trained in total.
# The validation_data = (input_1_val, input_2_val) parameter enables the validation set to be used.

history = model.fit([input_1_train, input_2_train], output_train, 
                    epochs=1000,
                    batch_size=5,
                    validation_data=([input_1_val, input_2_val], output_val),
                    callbacks=[cp, es])

Epoch 1/1000
Epoch 1: val_loss improved from inf to 0.00069, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 2/1000
Epoch 2: val_loss improved from 0.00069 to 0.00060, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 3/1000
Epoch 3: val_loss did not improve from 0.00060
Epoch 4/1000
Epoch 4: val_loss improved from 0.00060 to 0.00050, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 5/1000
Epoch 5: val_loss improved from 0.00050 to 0.00042, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 6/1000
Epoch 6: val_loss improved from 0.00042 to 0.00034, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 7/1000
Epoch 7: val_loss did not improve from 0.

Epoch 22/1000
Epoch 22: val_loss improved from 0.00024 to 0.00024, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 23/1000
Epoch 23: val_loss improved from 0.00024 to 0.00023, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 24/1000
Epoch 24: val_loss did not improve from 0.00023
Epoch 25/1000
Epoch 25: val_loss did not improve from 0.00023
Epoch 26/1000
Epoch 26: val_loss improved from 0.00023 to 0.00023, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 27/1000
Epoch 27: val_loss did not improve from 0.00023
Epoch 28/1000
Epoch 28: val_loss improved from 0.00023 to 0.00019, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 29/1000
Epoch 29: val_loss did not improve from 0.00019
Epoch 30/1000
Epoch 30: val_loss

INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 43/1000
Epoch 43: val_loss improved from 0.00012 to 0.00012, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 44/1000
Epoch 44: val_loss improved from 0.00012 to 0.00011, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 45/1000
Epoch 45: val_loss improved from 0.00011 to 0.00011, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 46/1000
Epoch 46: val_loss improved from 0.00011 to 0.00010, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 47/1000
Epoch 47: val_loss did not improve from 0.00010
Epoch 48/1000
Epoch 48: val_loss improved from 0.00010 to 0.00010, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: mod

Epoch 62/1000
Epoch 62: val_loss did not improve from 0.00008
Epoch 63/1000
Epoch 63: val_loss did not improve from 0.00008
Epoch 64/1000
Epoch 64: val_loss improved from 0.00008 to 0.00008, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 65/1000
Epoch 65: val_loss did not improve from 0.00008
Epoch 66/1000
Epoch 66: val_loss improved from 0.00008 to 0.00008, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 67/1000
Epoch 67: val_loss did not improve from 0.00008
Epoch 68/1000
Epoch 68: val_loss improved from 0.00008 to 0.00008, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 69/1000
Epoch 69: val_loss did not improve from 0.00008
Epoch 70/1000
Epoch 70: val_loss improved from 0.00008 to 0.00008, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: mode

Epoch 84: val_loss did not improve from 0.00008
Epoch 85/1000
Epoch 85: val_loss improved from 0.00008 to 0.00008, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 86/1000
Epoch 86: val_loss did not improve from 0.00008
Epoch 87/1000
Epoch 87: val_loss did not improve from 0.00008
Epoch 88/1000
Epoch 88: val_loss did not improve from 0.00008
Epoch 89/1000
Epoch 89: val_loss did not improve from 0.00008
Epoch 90/1000
Epoch 90: val_loss did not improve from 0.00008
Epoch 91/1000
Epoch 91: val_loss improved from 0.00008 to 0.00008, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 92/1000
Epoch 92: val_loss did not improve from 0.00008
Epoch 93/1000
Epoch 93: val_loss did not improve from 0.00008
Epoch 94/1000
Epoch 94: val_loss improved from 0.00008 to 0.00008, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/f

Epoch 107/1000
Epoch 107: val_loss did not improve from 0.00008
Epoch 108/1000
Epoch 108: val_loss did not improve from 0.00008
Epoch 109/1000
Epoch 109: val_loss improved from 0.00008 to 0.00008, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 110/1000
Epoch 110: val_loss improved from 0.00008 to 0.00008, saving model to model_spot/forecasting-TRY\
INFO:tensorflow:Assets written to: model_spot/forecasting-TRY\assets
Epoch 111/1000
Epoch 111: val_loss did not improve from 0.00008
Epoch 112/1000
Epoch 112: val_loss did not improve from 0.00008
Epoch 113/1000
Epoch 113: val_loss did not improve from 0.00008
Epoch 114/1000

In [None]:
# Evaluate the model on test data

test_loss, test_rmse = model.evaluate([input_1_test, input_2_test], output_test)
print(f"Test loss: {test_loss}, Test RMSE: {test_rmse}")

In [None]:
#Plot the training and validation loss

plt.figure(figsize=(10,5))
plt.plot(history.history['loss'][1:])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

In [None]:
# Assuming you have already defined 'history' somewhere in your code

plt.figure(figsize=(10,5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss (log10 scale)')
plt.xlabel('Epoch')
plt.yscale('log')  # Set the y-axis to a logarithmic scale
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.plot(history.history["root_mean_squared_error"][1:])
plt.plot(history.history["val_root_mean_squared_error"][1:])
plt.title('Model RMSE')
plt.ylabel('RMSE')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()