In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
train = pd.read_csv("../data/input-output-batches/TrainSet.csv", sep=",")
validation = pd.read_csv("../data/input-output-batches/ValidationSet.csv", sep=",")
test = pd.read_csv("../data/input-output-batches/TestSet.csv", sep=",")
carbon_mlp = pd.read_csv("../data/results/carbon_mlp.csv", sep=",", header=None)
carbon_lstm = pd.read_csv("../data/results/carbon_lstm.csv", sep=",", header=None)
carbon_gru = pd.read_csv("../data/results/carbon_gru.csv", sep=",", header=None)
solar_mlp = pd.read_csv("../data/results/solar_mlp.csv", sep=",", header=None)
solar_lstm = pd.read_csv("../data/results/solar_lstm.csv", sep=",", header=None)
solar_gru = pd.read_csv("../data/results/solar_gru.csv", sep=",", header=None)

To correctly plot all the results, I need the actual values that the models tried to predict in the correct format. The functions are taken from the model codes, but the input part is ignored (i.e. empty list for independent variables)

In [None]:
independent_variables = []
columns_to_predict = ["kg_CO2/kWh", "Avg solar generation"]
NUM_OF_TIMESTEPS_INPUT = 48
NUM_OF_TIMESTEPS_OUTPUT = 24

# Split the X and Y for all sets

# Train set
X_train_default = train[independent_variables]
Y_train_default = train[columns_to_predict]

# Validation set, also include the data from train that was used only as output to get more datapoints
X_val_default = pd.concat([X_train_default.tail(NUM_OF_TIMESTEPS_OUTPUT), validation[independent_variables]], ignore_index=True)
Y_val_default = pd.concat([Y_train_default.tail(NUM_OF_TIMESTEPS_OUTPUT), validation[columns_to_predict]], ignore_index=True)

# Test set, also include the data from train that was used only as output to get more datapoints
X_test_default = pd.concat([X_val_default.tail(NUM_OF_TIMESTEPS_OUTPUT), test[independent_variables]], ignore_index=True)
Y_test_default = pd.concat([Y_val_default.tail(NUM_OF_TIMESTEPS_OUTPUT), test[columns_to_predict]], ignore_index=True)

NUM_OF_ROWS_TRAIN, NUM_OF_FEATURES = X_train_default.shape

In [None]:
def create_sequences(input_data, output_data, timesteps_input, timesteps_output):
    sequences, targets = [], []
    for i in range(len(input_data) - timesteps_input - timesteps_output + 1):
        seq = input_data[i:i + timesteps_input]
        target = output_data[i + timesteps_input: i + timesteps_input + timesteps_output]
        sequences.append(seq)
        targets.append(target)

    return np.array(sequences), np.array(targets)


X_test, Y_test = create_sequences(X_test_default, Y_test_default, NUM_OF_TIMESTEPS_INPUT, NUM_OF_TIMESTEPS_OUTPUT)
print(len(Y_test))

In [None]:
# Denormalize the data

carbon_max = 0.5560628544361648
carbon_min = 0.337515252471314
solar_max = 703.628269052724
solar_min = 0.0

for i in range(len(Y_test)):
    carbon_mlp[i] = carbon_mlp[i] * (carbon_max - carbon_min) + carbon_min
    carbon_lstm[i] = carbon_lstm[i] * (carbon_max - carbon_min) + carbon_min
    carbon_gru[i] = carbon_gru[i] * (carbon_max - carbon_min) + carbon_min
    solar_mlp[i] = solar_mlp[i] * (solar_max - solar_min) + solar_min
    solar_lstm[i] = solar_lstm[i] * (solar_max - solar_min) + solar_min
    solar_gru[i] = solar_gru[i] * (solar_max - solar_min) + solar_min
    Y_test[i, :, 0] = Y_test[i, :, 0] * (carbon_max - carbon_min) + carbon_min
    Y_test[i, :, 1] = Y_test[i, :, 1] * (solar_max - solar_min) + solar_min

In [None]:
plt.figure(figsize=(20, 48))

i = 1

for num in range(len(Y_test)):
    plt.subplot(9, 3, i)
    i += 1
    plt.plot(carbon_mlp[num], label="MLP")
    plt.plot(carbon_lstm[num], label="LSTM")
    plt.plot(carbon_gru[num], label="GRU")
    plt.plot(Y_test[num, :, 0], label="Actual")
    plt.xlabel('Timestamp', fontsize=20)
    plt.ylabel('Carbon Intensity (kg/kWh)', fontsize=20)
    plt.legend(fontsize=14)
    plt.title(f'Prediction number {num+1}', fontsize=22)
    plt.tick_params(labelsize=16)

plt.suptitle('Predicted and actual Carbon Intensity', fontsize=26, y=1)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(20, 48))

i = 1

for num in range(len(Y_test)):
    plt.subplot(9, 3, i)
    i += 1
    plt.plot(solar_mlp[num], label="MLP")
    plt.plot(solar_lstm[num], label="LSTM")
    plt.plot(solar_gru[num], label="GRU")
    plt.plot(Y_test[num, :, 1], label="Actual")
    plt.xlabel('Timestamp', fontsize=20)
    plt.ylabel('Solar Generation (W/kW)', fontsize=20)
    plt.legend(fontsize=14)
    plt.title(f'Prediction number {num+1}', fontsize=22)
    plt.tick_params(labelsize=16)

plt.suptitle('Predicted and actual Solar Generation', fontsize=26, y=1)
plt.tight_layout()
plt.show()