In [147]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error

import DataRetriever as dr

RETRIEVER = dr.DataRetriever()
CON_ATTRIBUTES = RETRIEVER.get_attributes(file_name='consuming_attributes.pkl')
PV_ATTRIBUTES = RETRIEVER.get_attributes(file_name='producing_attributes.pkl')
FLEX_ATTRIBUTES = ["Load_ClothesWasherPowerWithStandby", "Elec_PowerDishwasher", "Load_DryerPowerTotal"]
FIXED_ATTRIBUTES = list(set(CON_ATTRIBUTES) - set(FLEX_ATTRIBUTES))

attributes = FIXED_ATTRIBUTES

DATA = RETRIEVER.get_data(file_name='All-Subsystems-hour-Year2.pkl')[attributes].sum(axis=1).clip(lower=0) / 1000

In [148]:
def fourierExtrapolation(data: np.array, number_of_predictions: int, n_sinusoids: int) -> np.array:
    """
    Predict {number_of_predictions} observations after the index data.size of {data}, using {n_sinusoids} sinusoids.
    :param data: The data on which to train the model. Corresponds to {_x = x_0, x_1, ... x_(n-1)} in the theory
    :param number_of_predictions: The amount of predictions to output. Corresponds to {x_((n-1)+1), x_((n-1)+2), ..., x_((n-1)+{number_of_predictions})}
    :param n_sinusoids: The amount of sinusoids on which to base the predictions. I.e. the data probably contains many sinusoids, but we only wish to make predictions based on the {n_sinusoids} largest frequencies.
    :return: A numpy array of length data.size + number_of_predictions, containing the transformed original data + predictions
    """
    data_size = data.size  # n

    X_frequency_domain = list(np.fft.fft(data))  # The series of complex numbers X = {X1, X2, ...}
    frequencies = list(np.fft.fftfreq(data_size, d=1))  # Some frequencies, e.g. {4, 3, -7, 8, -5, ...}

    indexes = list(range(len(X_frequency_domain)))  # {0, 1, ..., n-1}
    indexes.sort(key=lambda idx: np.absolute(X_frequency_domain[idx]), reverse=True)  # ascendingly sort indices by amplitude
    
    sample_index = np.arange(0, data_size + number_of_predictions)  # sample_index = {0, 1, ..., n-1, n, n+1, ..., (n-1)+number_of_predictions}
    x_restored_sig = np.zeros(sample_index.size)  # Prepare a numpy array to receive x reconstructed from its Fourier Transform

    if n_sinusoids == 0:
        return x_restored_sig + data.mean()

    for i in indexes[:n_sinusoids]:
        amplitude = np.absolute(X_frequency_domain[i])
        phase = np.angle(X_frequency_domain[i])
        x_restored_sig += amplitude * np.cos(2 * np.pi * frequencies[i] * sample_index + phase)

    x_restored = 1 / data_size * x_restored_sig

    return x_restored

In [149]:
TRAIN_DATA = DATA[:int(len(DATA) * 0.80)]

In [150]:
TRAIN_DATA_SIZE = TRAIN_DATA.size

WEEK = 24 * 7

train_range = WEEK
VALIDATION_RANGE = 24 * 3

SINUSOIDS = list(range(0, 50)) + list(range(50, 210, 10))

In [151]:
np.seterr(invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}

In [152]:
cv_DataFrame = pd.DataFrame(index=SINUSOIDS)
fold = 0
while train_range + VALIDATION_RANGE <= TRAIN_DATA_SIZE:
    sinusoids_rmse = []
    for n_sinusoids in SINUSOIDS:
        fft = fourierExtrapolation(data=TRAIN_DATA[train_range - WEEK:train_range], number_of_predictions=VALIDATION_RANGE, n_sinusoids=n_sinusoids)
        prediction = pd.Series(data=fft[WEEK:], index=TRAIN_DATA[train_range:train_range + VALIDATION_RANGE].index)

        actual = TRAIN_DATA[train_range:train_range+VALIDATION_RANGE]

        prediction.reset_index(inplace=True, drop=True)

        sinusoids_rmse.append(np.sqrt(mean_squared_error(actual, prediction)))

    cv_DataFrame[f"Fold_{fold}"] = sinusoids_rmse

    train_range += 24 * 5
    fold += 1

In [153]:
cv_DataFrame

Unnamed: 0,Fold_0,Fold_1,Fold_2,Fold_3,Fold_4,Fold_5,Fold_6,Fold_7,Fold_8,Fold_9,...,Fold_47,Fold_48,Fold_49,Fold_50,Fold_51,Fold_52,Fold_53,Fold_54,Fold_55,Fold_56
0,0.572256,1.073675,0.686379,0.752570,0.812613,0.734595,0.948837,0.487269,0.761634,0.551533,...,0.543403,0.516169,0.439756,0.508708,0.579314,0.610836,0.547136,0.585801,0.409187,0.436892
1,0.572256,1.073675,0.686379,0.752570,0.812613,0.734595,0.948837,0.487269,0.761634,0.551533,...,0.543403,0.516169,0.439756,0.508708,0.579314,0.610836,0.547136,0.585801,0.409187,0.436892
2,0.526302,1.034416,0.819417,0.693076,0.799358,0.668457,0.961516,0.449180,0.745852,0.576125,...,0.501090,0.492153,0.437596,0.524521,0.638968,0.734421,0.504567,0.546389,0.373054,0.415959
3,0.527735,1.017040,0.986349,0.659445,0.830955,0.641637,0.994482,0.431096,0.741538,0.622353,...,0.476004,0.499845,0.454731,0.551130,0.705756,0.893453,0.486356,0.528518,0.370208,0.419184
4,0.484367,1.044078,0.950726,0.672569,0.757215,0.632081,0.977668,0.413415,0.743847,0.607379,...,0.504061,0.524255,0.438077,0.531075,0.639228,0.900907,0.483441,0.521676,0.380028,0.422419
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,0.617055,1.151393,0.960108,0.587453,0.850115,0.559415,0.934832,0.425556,0.596617,0.527102,...,0.480848,0.655596,0.439267,0.401605,0.660425,0.898645,0.423601,0.480663,0.318675,0.338877
170,0.617197,1.151133,0.960407,0.587751,0.851029,0.561535,0.934583,0.426771,0.596877,0.528684,...,0.481255,0.655565,0.438526,0.402477,0.660093,0.898737,0.423589,0.480653,0.319049,0.339234
180,0.617197,1.151133,0.960407,0.587751,0.851029,0.561535,0.934583,0.426771,0.596877,0.528684,...,0.481255,0.655565,0.438526,0.402477,0.660093,0.898737,0.423589,0.480653,0.319049,0.339234
190,0.617197,1.151133,0.960407,0.587751,0.851029,0.561535,0.934583,0.426771,0.596877,0.528684,...,0.481255,0.655565,0.438526,0.402477,0.660093,0.898737,0.423589,0.480653,0.319049,0.339234


In [154]:
cv_DataFrame.mean(axis=1).idxmin()

130

In [155]:
cv_DataFrame.mean(axis=1)

0      0.598310
1      0.598310
2      0.587236
3      0.601959
4      0.590819
         ...   
160    0.549915
170    0.550119
180    0.550119
190    0.550119
200    0.550119
Length: 66, dtype: float64

In [156]:
sinusoids_rmse = []
fold = 0
while train_range + VALIDATION_RANGE <= DATA.size:
    fft = fourierExtrapolation(data=DATA[train_range - WEEK:train_range],
                               number_of_predictions=VALIDATION_RANGE,
                               n_sinusoids=cv_DataFrame.mean(axis=1).idxmin())
    prediction = pd.Series(data=fft[WEEK:])

    actual = DATA[train_range: train_range+VALIDATION_RANGE]

    prediction.reset_index(inplace=True, drop=True)

    sinusoids_rmse.append(np.sqrt(mean_squared_error(actual, prediction)))

    train_range += 24 * 5
    fold += 1

test_rmse = sum(sinusoids_rmse) / len(sinusoids_rmse)
test_rmse

0.698240112451032

In [157]:
fft = fourierExtrapolation(data=DATA[0:WEEK], number_of_predictions=VALIDATION_RANGE, n_sinusoids=cv_DataFrame.mean(axis=1).idxmin())
prediction = pd.Series(data=fft[WEEK:], index=DATA[DATA[0:WEEK].size:DATA[0:WEEK].size + VALIDATION_RANGE].index)

pred_test = fft[:WEEK]

actual = DATA[WEEK:WEEK+VALIDATION_RANGE]

if attributes == PV_ATTRIBUTES:
    pv = prediction
elif attributes == FIXED_ATTRIBUTES:
    fix = prediction
elif attributes == FLEX_ATTRIBUTES:
    flex = actual

In [158]:
fig = go.Figure()

# fig.add_trace(go.Scatter(x=list(np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE)) + list(np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE))[::-1],
#                          y=list(prediction + epsilon) + list(prediction - epsilon)[::-1], # upper, then lower reversed
#                          fill='toself',
#                          fillcolor='rgba(0,100,80,0.2)',
#                          line=dict(color='rgba(255,255,255,0)'),
#                          hoverinfo="skip",
#                          showlegend=False
#                          ))

# fig.add_trace(go.Scatter(x=np.arange(start=0, stop=WEEK),
#                          y=pred_test,
#                          name='Training Data [Prediction]',
#                          mode='lines',
#                          line=dict(color='rgb(84, 0, 84)')))

fig.add_trace(go.Scatter(x=np.arange(start=WEEK-WEEK, stop=WEEK),
                         y=DATA[WEEK-WEEK:WEEK],
                         name='Training Data',
                         mode='lines',
                         line=dict(color='rgb(84, 84, 84)')))

fig.add_trace(go.Scatter(x=np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE),
                         y=actual,
                         name='Observed',
                         mode='lines',
                         line=dict(color='rgb(234,143,129)')))

fig.add_trace(go.Scatter(x=np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE),
                         y=prediction,
                         name='Predicted',
                         mode='lines',
                         line=dict(color='rgb(32,115,171)')))

fig.update_layout(
    xaxis_title="Index",
    yaxis_title="Photovoltaic Production [kWh]",
    legend=dict(orientation="h",
                yanchor="bottom",
                y=1,
                xanchor="left",
                x=0))

fig.show()

In [159]:
idx = list(range(0, 72, 1))

fig = go.Figure()

fig.add_trace(go.Scatter(x=idx,
                         y=pv,
                         name='PV',
                         mode='lines',
                         line=dict(color='royalblue', width=3)))

fig.add_trace(go.Scatter(x=idx,
                         y=fix,
                         name='Fixed',
                         mode='lines',
                         line=dict(color='firebrick', width=3)))

fig.add_trace(go.Scatter(x=idx,
                         y=flex,
                         name='Flexible',
                         mode='lines',
                         line=dict(color='firebrick', width=3, dash='dash')))

fig.add_trace(go.Scatter(x=idx,
                         y=np.subtract(pv, np.add(fix, flex)),
                         name='Balance',
                         mode='lines',
                         line=dict(color='black', width=4),
                         fill='tozeroy',
                         fillcolor="rgba(80, 80, 80, 0.5)"))

fig.update_layout(plot_bgcolor='rgba(0,0,0,0)',
                  legend=dict(orientation="h",
                              yanchor="top",
                              y=1.06,
                              xanchor="left",
                              x=0.0))

#x axis
# fig.update_xaxes(visible=False)

#y axis
# fig.update_yaxes(visible=False)

fig.update_xaxes(showline=True, linewidth=2, linecolor='grey', title="Index")
fig.update_yaxes(showline=True, linewidth=2, linecolor='grey', title="kWh")

fig.show()

NameError: name 'flex' is not defined

In [None]:
from sklearn.ensemble import RandomForestRegressor
weather = pd.read_csv("GAI_2015_2016.csv")
weather["Time"] = pd.to_datetime(weather["Time"])
ensemble = pd.DataFrame(DATA[0:WEEK+VALIDATION_RANGE], columns=["Target"]).merge(weather, how="left", left_index=True, right_on="Time")
ensemble

In [None]:
ensemble.index = ensemble["Time"]
ensemble["Time"] = ensemble["Time"].dt.hour
ensemble

In [None]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(sparse=False)
hot_np = ohe.fit_transform(ensemble[["Time", "Condition"]])
hot = pd.DataFrame(data=hot_np, columns=ohe.get_feature_names_out())
hot_test = hot[:WEEK]
hot_vali = hot[WEEK:]

In [None]:
rfr = RandomForestRegressor()
rfr.fit(hot_test, ensemble["Target"][:WEEK])
np.sqrt(mean_squared_error(rfr.predict(hot_vali), ensemble["Target"][WEEK:]))

# FIG INTRO FORECASTING

In [None]:
# idx = list(range(0, 72, 2))
# pv = [x * 1.2 for x in [0, 0, 0, 4, 20, 45, 50, 50, 34, 10, 0, 0, 0, 0, 0, 2, 15, 35, 40, 45, 30, 15, 0, 0, 0, 0, 0, 10, 25, 50, 60, 50, 44, 20, 0, 0]]
# fix = [10, 9, 10, 4, 22, 14, 12, 0, 12, 21, 13, 14, 12, 12, 32, 21, 11, 12, 21, 12, 22, 23, 11, 9, 8, 19, 17, 25, 20, 11, 15, 12, 15, 12, 18, 20]
# flex = [0, 0, 0, 20, 5, 0, 0, 0, 19, 10, 0, 8, 0, 0, 0, 5, 9, 0, 0, 0, 0, 20, 10, 0, 5, 0, 19, 0, 0, 20, 9, 7, 0, 0, 0, 4]
#
# fig = go.Figure()
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=pv,
#                          name='PV',
#                          mode='lines',
#                          line=dict(color='royalblue', width=3)))
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=fix,
#                          name='Fixed',
#                          mode='lines',
#                          line=dict(color='firebrick', width=3)))
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=flex,
#                          name='Flexible',
#                          mode='lines',
#                          line=dict(color='firebrick', width=3, dash='dash')))
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=np.subtract(pv, np.add(fix, flex)),
#                          name='Balance',
#                          mode='lines',
#                          line=dict(color='black', width=4),
#                          fill='tozeroy',
#                          fillcolor="rgba(80, 80, 80, 0.5)"))
#
# fig.update_layout(plot_bgcolor='rgba(0,0,0,0)',
#                   legend=dict(orientation="h",
#                               yanchor="top",
#                               y=1.05,
#                               xanchor="left",
#                               x=0.0))
#
# #x axis
# fig.update_xaxes(visible=False)
#
# #y axis
# fig.update_yaxes(visible=False)
#
# fig.show()