In [12]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error

import DataRetriever as dr

RETRIEVER = dr.DataRetriever()
CON_ATTRIBUTES = RETRIEVER.get_attributes(file_name='consuming_attributes.pkl')
PV_ATTRIBUTES = RETRIEVER.get_attributes(file_name='producing_attributes.pkl')
FLEX_ATTRIBUTES = ["Load_ClothesWasherPowerWithStandby", "Elec_PowerDishwasher", "Load_DryerPowerTotal"]
FIXED_ATTRIBUTES = list(set(CON_ATTRIBUTES) - set(FLEX_ATTRIBUTES))

attributes = PV_ATTRIBUTES

DATA = RETRIEVER.get_data(file_name='All-Subsystems-hour-Year2.pkl')[attributes].sum(axis=1).clip(lower=0) / 1000

In [13]:
def fourierExtrapolation(data: np.array, number_of_predictions: int, n_sinusoids: int) -> np.array:
    """
    Predict {number_of_predictions} observations after the index data.size of {data}, using {n_sinusoids} sinusoids.
    :param data: The data on which to train the model. Corresponds to {_x = x_0, x_1, ... x_(n-1)} in the theory
    :param number_of_predictions: The amount of predictions to output. Corresponds to {x_((n-1)+1), x_((n-1)+2), ..., x_((n-1)+{number_of_predictions})}
    :param n_sinusoids: The amount of sinusoids on which to base the predictions. I.e. the data probably contains many sinusoids, but we only wish to make predictions based on the {n_sinusoids} largest frequencies.
    :return: A numpy array of length data.size + number_of_predictions, containing the transformed original data + predictions
    """
    data_size = data.size  # n

    X_frequency_domain = list(np.fft.fft(data))  # The series of complex numbers X = {X1, X2, ...}
    frequencies = list(np.fft.fftfreq(data_size, d=1))  # Some frequencies, e.g. {4, 3, -7, 8, -5, ...}

    indexes = list(range(len(X_frequency_domain)))  # {0, 1, ..., n-1}
    indexes.sort(key=lambda idx: np.absolute(X_frequency_domain[idx]), reverse=True)  # ascendingly sort indices by amplitude
    
    sample_index = np.arange(0, data_size + number_of_predictions)  # sample_index = {0, 1, ..., n-1, n, n+1, ..., (n-1)+number_of_predictions}
    x_restored_sig = np.zeros(sample_index.size)  # Prepare a numpy array to receive x reconstructed from its Fourier Transform

    if n_sinusoids == 0:
        return x_restored_sig + data.mean()

    for i in indexes[:n_sinusoids]:
        amplitude = np.absolute(X_frequency_domain[i])
        phase = np.angle(X_frequency_domain[i])
        x_restored_sig += amplitude * np.cos(2 * np.pi * frequencies[i] * sample_index + phase)

    x_restored = 1 / data_size * x_restored_sig

    return x_restored

In [14]:
DATA_SIZE = DATA.size

WEEK = 24 * 7

train_range = WEEK
VALIDATION_RANGE = 24 * 3

SINUSOIDS = list(range(0, 50)) + list(range(50, 210, 10))

In [15]:
np.seterr(invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}

In [16]:
cv_DataFrame = pd.DataFrame(index=SINUSOIDS)
fold = 0
while train_range + VALIDATION_RANGE <= DATA_SIZE:
    sinusoids_rmse = []
    for n_sinusoids in SINUSOIDS:
        fft = fourierExtrapolation(data=DATA[train_range - WEEK:train_range], number_of_predictions=VALIDATION_RANGE, n_sinusoids=n_sinusoids)
        prediction = pd.Series(data=fft[WEEK:], index=DATA[train_range:train_range + VALIDATION_RANGE].index)

        actual = DATA[train_range:train_range+VALIDATION_RANGE]

        prediction.reset_index(inplace=True, drop=True)

        sinusoids_rmse.append(np.sqrt(mean_squared_error(actual, prediction)))

    cv_DataFrame[f"Fold_{fold}"] = sinusoids_rmse

    train_range += 24 * 5
    fold += 1

In [17]:
cv_DataFrame

Unnamed: 0,Fold_0,Fold_1,Fold_2,Fold_3,Fold_4,Fold_5,Fold_6,Fold_7,Fold_8,Fold_9,...,Fold_62,Fold_63,Fold_64,Fold_65,Fold_66,Fold_67,Fold_68,Fold_69,Fold_70,Fold_71
0,0.976914,2.874471,1.180418,3.336224,2.882459,1.210185,2.433843,2.771775,2.561602,1.894579,...,1.588087,1.592958,0.751028,1.337330,2.756214,1.880038,2.058219,2.250949,0.855160,2.733976
1,0.976914,2.874471,1.180418,3.336224,2.882459,1.210185,2.433843,2.771775,2.561602,1.894579,...,1.588087,1.592958,0.751028,1.337330,2.756214,1.880038,2.058219,2.250949,0.855160,2.733976
2,0.777797,2.531467,1.355898,3.229371,2.423033,1.408076,2.177182,2.182594,2.115640,1.416996,...,1.332983,1.399963,0.643549,1.242731,2.376028,1.573752,1.764520,1.812620,0.954053,2.513237
3,1.061163,2.269154,1.845257,3.127114,2.187382,1.884525,1.985631,1.829805,2.095626,1.621300,...,1.244482,1.351313,0.743767,1.169163,2.065155,1.633747,1.548696,1.552853,1.302154,2.317027
4,1.010154,2.115723,1.886456,3.107812,1.985757,1.928508,2.192910,1.590575,1.951472,1.464755,...,1.108690,1.256489,0.879796,1.113195,1.841256,1.494543,1.400203,1.300918,1.325950,2.434810
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,1.354763,1.994756,3.216715,3.128677,2.575589,3.292408,2.589165,1.763423,2.317410,2.176218,...,1.948775,1.356376,1.775791,1.110079,2.501525,2.061967,1.935273,1.924599,2.550310,2.899791
170,1.355323,1.994574,3.216618,3.128693,2.575609,3.292345,2.589192,1.761963,2.319878,2.176365,...,1.948794,1.359184,1.776219,1.111765,2.501652,2.060954,1.934346,1.924489,2.550460,2.900236
180,1.355323,1.994574,3.216618,3.128693,2.575609,3.292345,2.589192,1.761963,2.319878,2.176365,...,1.948794,1.359184,1.776219,1.111765,2.501652,2.060954,1.934346,1.924489,2.550460,2.900236
190,1.355323,1.994574,3.216618,3.128693,2.575609,3.292345,2.589192,1.761963,2.319878,2.176365,...,1.948794,1.359184,1.776219,1.111765,2.501652,2.060954,1.934346,1.924489,2.550460,2.900236


In [18]:
cv_DataFrame.mean(axis=1).idxmin()

5

In [19]:
cv_DataFrame.mean(axis=1)

0      2.400878
1      2.400878
2      1.845958
3      1.640829
4      1.474310
         ...   
160    1.928368
170    1.928657
180    1.928657
190    1.928657
200    1.928657
Length: 66, dtype: float64

In [20]:
fft = fourierExtrapolation(data=DATA[0:WEEK], number_of_predictions=VALIDATION_RANGE, n_sinusoids=cv_DataFrame.mean(axis=1).idxmin())
prediction = pd.Series(data=fft[WEEK:], index=DATA[DATA[0:WEEK].size:DATA[0:WEEK].size + VALIDATION_RANGE].index)

pred_test = fft[:WEEK]

actual = DATA[WEEK:WEEK+VALIDATION_RANGE]

if attributes == PV_ATTRIBUTES:
    pv = prediction
elif attributes == FIXED_ATTRIBUTES:
    fix = prediction
elif attributes == FLEX_ATTRIBUTES:
    flex = actual

In [21]:
fig = go.Figure()

# fig.add_trace(go.Scatter(x=list(np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE)) + list(np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE))[::-1],
#                          y=list(prediction + epsilon) + list(prediction - epsilon)[::-1], # upper, then lower reversed
#                          fill='toself',
#                          fillcolor='rgba(0,100,80,0.2)',
#                          line=dict(color='rgba(255,255,255,0)'),
#                          hoverinfo="skip",
#                          showlegend=False
#                          ))

# fig.add_trace(go.Scatter(x=np.arange(start=0, stop=WEEK),
#                          y=pred_test,
#                          name='Training Data [Prediction]',
#                          mode='lines',
#                          line=dict(color='rgb(84, 0, 84)')))

fig.add_trace(go.Scatter(x=np.arange(start=WEEK-WEEK, stop=WEEK),
                         y=DATA[WEEK-WEEK:WEEK],
                         name='Training Data',
                         mode='lines',
                         line=dict(color='rgb(84, 84, 84)')))

fig.add_trace(go.Scatter(x=np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE),
                         y=actual,
                         name='Observed',
                         mode='lines',
                         line=dict(color='rgb(234,143,129)')))

fig.add_trace(go.Scatter(x=np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE),
                         y=prediction,
                         name='Predicted',
                         mode='lines',
                         line=dict(color='rgb(32,115,171)')))

fig.update_layout(
    xaxis_title="Index",
    yaxis_title="Photovoltaic Production [kWh]",
    legend=dict(orientation="h",
                yanchor="bottom",
                y=1,
                xanchor="left",
                x=0))

fig.show()

In [22]:
DATA.to_frame()

Unnamed: 0_level_0,0
Timestamp,Unnamed: 1_level_1
2015-02-01 00:00:00,0.018496
2015-02-01 01:00:00,0.018332
2015-02-01 02:00:00,0.018385
2015-02-01 03:00:00,0.018502
2015-02-01 04:00:00,0.018524
...,...
2016-01-31 19:00:00,0.017710
2016-01-31 20:00:00,0.017940
2016-01-31 21:00:00,0.018149
2016-01-31 22:00:00,0.018209


In [23]:
idx = list(range(0, 72, 1))

fig = go.Figure()

fig.add_trace(go.Scatter(x=idx,
                         y=pv,
                         name='PV',
                         mode='lines',
                         line=dict(color='royalblue', width=3)))

fig.add_trace(go.Scatter(x=idx,
                         y=fix,
                         name='Fixed',
                         mode='lines',
                         line=dict(color='firebrick', width=3)))

fig.add_trace(go.Scatter(x=idx,
                         y=flex,
                         name='Flexible',
                         mode='lines',
                         line=dict(color='firebrick', width=3, dash='dash')))

fig.add_trace(go.Scatter(x=idx,
                         y=np.subtract(pv, np.add(fix, flex)),
                         name='Balance',
                         mode='lines',
                         line=dict(color='black', width=4),
                         fill='tozeroy',
                         fillcolor="rgba(80, 80, 80, 0.5)"))

fig.update_layout(plot_bgcolor='rgba(0,0,0,0)',
                  legend=dict(orientation="h",
                              yanchor="top",
                              y=1.06,
                              xanchor="left",
                              x=0.0))

#x axis
# fig.update_xaxes(visible=False)

#y axis
# fig.update_yaxes(visible=False)

fig.update_xaxes(showline=True, linewidth=2, linecolor='grey', title="Index")
fig.update_yaxes(showline=True, linewidth=2, linecolor='grey', title="kWh")

fig.show()

NameError: name 'flex' is not defined

In [None]:
from sklearn.ensemble import RandomForestRegressor
weather = pd.read_csv("GAI_2015_2016.csv")
weather["Time"] = pd.to_datetime(weather["Time"])
ensemble = pd.DataFrame(DATA[0:WEEK+VALIDATION_RANGE], columns=["Target"]).merge(weather, how="left", left_index=True, right_on="Time")
ensemble

In [None]:
ensemble.index = ensemble["Time"]
ensemble["Time"] = ensemble["Time"].dt.hour
ensemble

In [None]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(sparse=False)
hot_np = ohe.fit_transform(ensemble[["Time", "Condition"]])
hot = pd.DataFrame(data=hot_np, columns=ohe.get_feature_names_out())
hot_test = hot[:WEEK]
hot_vali = hot[WEEK:]

In [None]:
rfr = RandomForestRegressor()
rfr.fit(hot_test, ensemble["Target"][:WEEK])
np.sqrt(mean_squared_error(rfr.predict(hot_vali), ensemble["Target"][WEEK:]))

# FIG INTRO FORECASTING

In [None]:
# idx = list(range(0, 72, 2))
# pv = [x * 1.2 for x in [0, 0, 0, 4, 20, 45, 50, 50, 34, 10, 0, 0, 0, 0, 0, 2, 15, 35, 40, 45, 30, 15, 0, 0, 0, 0, 0, 10, 25, 50, 60, 50, 44, 20, 0, 0]]
# fix = [10, 9, 10, 4, 22, 14, 12, 0, 12, 21, 13, 14, 12, 12, 32, 21, 11, 12, 21, 12, 22, 23, 11, 9, 8, 19, 17, 25, 20, 11, 15, 12, 15, 12, 18, 20]
# flex = [0, 0, 0, 20, 5, 0, 0, 0, 19, 10, 0, 8, 0, 0, 0, 5, 9, 0, 0, 0, 0, 20, 10, 0, 5, 0, 19, 0, 0, 20, 9, 7, 0, 0, 0, 4]
#
# fig = go.Figure()
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=pv,
#                          name='PV',
#                          mode='lines',
#                          line=dict(color='royalblue', width=3)))
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=fix,
#                          name='Fixed',
#                          mode='lines',
#                          line=dict(color='firebrick', width=3)))
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=flex,
#                          name='Flexible',
#                          mode='lines',
#                          line=dict(color='firebrick', width=3, dash='dash')))
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=np.subtract(pv, np.add(fix, flex)),
#                          name='Balance',
#                          mode='lines',
#                          line=dict(color='black', width=4),
#                          fill='tozeroy',
#                          fillcolor="rgba(80, 80, 80, 0.5)"))
#
# fig.update_layout(plot_bgcolor='rgba(0,0,0,0)',
#                   legend=dict(orientation="h",
#                               yanchor="top",
#                               y=1.05,
#                               xanchor="left",
#                               x=0.0))
#
# #x axis
# fig.update_xaxes(visible=False)
#
# #y axis
# fig.update_yaxes(visible=False)
#
# fig.show()