In [45]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error

import DataRetriever as dr

RETRIEVER = dr.DataRetriever()
CON_ATTRIBUTES = RETRIEVER.get_attributes(file_name='consuming_attributes.pkl')
PV_ATTRIBUTES = RETRIEVER.get_attributes(file_name='producing_attributes.pkl')
FLEX_ATTRIBUTES = ["Load_ClothesWasherPowerWithStandby", "Elec_PowerDishwasher", "Load_DryerPowerTotal"]
FIXED_ATTRIBUTES = list(set(CON_ATTRIBUTES) - set(FLEX_ATTRIBUTES))

attributes = FIXED_ATTRIBUTES

DATA = RETRIEVER.get_data(file_name='All-Subsystems-hour-Year2.pkl')[attributes].sum(axis=1).clip(lower=0) / 1000

In [46]:
from statsmodels.tsa.seasonal import STL

prod_stl_config = STL(endog=DATA,
                      period=24,  # We expect the pattern to repeat daily
                      seasonal=7,  # Default
                      trend=None,  # Default, follows suggestion from original paper (1.5 * period) / (1 - 1.5 / seasonal)
                      low_pass=None,  # Default lowest odd integer greater than period
                      seasonal_deg=1,  # Degree of LOESS to calculate seasonal element, chosen as in original paper
                      trend_deg=1,  # Degree of LOESS to calculate trend element, chosen as in original paper
                      low_pass_deg=1,  # Degree of LOESS in the low-pass filter, chosen as in original paper
                      robust=True,  # Setting to True should make the method robust to outliers
                      # The following three attributes determines whether to skip some LOESS smoothings, using linear interpolation to estimate the skipped points. Used only to decrease computational time. Set to 1 to not skip any smoothings.
                      seasonal_jump=1,
                      trend_jump=1,
                      low_pass_jump=1)

prod_decomp = prod_stl_config.fit()
(prod_decomp.seasonal + prod_decomp.trend.mean()) - prod_decomp.observed

Timestamp
2015-02-01 00:00:00   -0.958991
2015-02-01 01:00:00   -0.999749
2015-02-01 02:00:00   -0.961100
2015-02-01 03:00:00   -0.869659
2015-02-01 04:00:00   -0.946292
                         ...   
2016-01-31 19:00:00    0.211204
2016-01-31 20:00:00    0.161497
2016-01-31 21:00:00    0.226108
2016-01-31 22:00:00    0.344384
2016-01-31 23:00:00    0.255916
Freq: H, Length: 8760, dtype: float64

In [47]:
def fourierExtrapolation(data: np.array, number_of_predictions: int, n_sinusoids: int) -> np.array:
    """
    Predict {number_of_predictions} observations after the index data.size of {data}, using {n_sinusoids} sinusoids.
    :param data: The data on which to train the model. Corresponds to {_x = x_0, x_1, ... x_(n-1)} in the theory
    :param number_of_predictions: The amount of predictions to output. Corresponds to {x_((n-1)+1), x_((n-1)+2), ..., x_((n-1)+{number_of_predictions})}
    :param n_sinusoids: The amount of sinusoids on which to base the predictions. I.e. the data probably contains many sinusoids, but we only wish to make predictions based on the {n_sinusoids} largest frequencies.
    :return: A numpy array of length data.size + number_of_predictions, containing the transformed original data + predictions
    """
    data_size = data.size  # n

    X_frequency_domain = list(np.fft.fft(data))  # The series of complex numbers X = {X1, X2, ...}
    frequencies = list(np.fft.fftfreq(data_size, d=1))  # Some frequencies, e.g. {4, 3, -7, 8, -5, ...}

    indexes = list(range(len(X_frequency_domain)))  # {0, 1, ..., n-1}
    indexes.sort(key=lambda idx: np.absolute(X_frequency_domain[idx]), reverse=True)  # ascendingly sort indices by amplitude
    
    sample_index = np.arange(0, data_size + number_of_predictions)  # sample_index = {0, 1, ..., n-1, n, n+1, ..., (n-1)+number_of_predictions}
    x_restored_sig = np.zeros(sample_index.size)  # Prepare a numpy array to receive x reconstructed from its Fourier Transform

    if n_sinusoids == 0:
        return x_restored_sig + data.mean()

    for i in indexes[:n_sinusoids]:
        amplitude = np.absolute(X_frequency_domain[i])
        phase = np.angle(X_frequency_domain[i])
        x_restored_sig += amplitude * np.cos(2 * np.pi * frequencies[i] * sample_index + phase)

    x_restored = 1 / data_size * x_restored_sig

    return x_restored

In [48]:
DATA_SIZE = DATA.size

WEEK = 24 * 7

train_range = WEEK
VALIDATION_RANGE = 24 * 3

SINUSOIDS = list(range(0, 50)) + list(range(50, 210, 10))

In [49]:
np.seterr(invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}

In [50]:
# # RANDOM FOREST
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.preprocessing import OneHotEncoder
# weather = pd.read_csv("GAI_2015_2016.csv")
# weather["Time"] = pd.to_datetime(weather["Time"])
#
# fold = 0
# lst = []
# while train_range + VALIDATION_RANGE <= DATA_SIZE:
#     ensemble = pd.DataFrame(DATA[train_range-WEEK:train_range+VALIDATION_RANGE], columns=["Target"]).merge(weather, how="left", left_index=True, right_on="Time")
#
#     ensemble.index = ensemble["Time"]
#     ensemble["Time"] = ensemble["Time"].dt.hour
#
#     ohe = OneHotEncoder(sparse=False)
#     hot_np = ohe.fit_transform(ensemble[["Time", "Condition"]])
#     hot = pd.DataFrame(data=hot_np, columns=ohe.get_feature_names_out())
#     hot_test = hot[:WEEK]
#     hot_vali = hot[WEEK:]
#
#     rfr = RandomForestRegressor()
#     rfr.fit(hot_test, ensemble["Target"][:WEEK])
#
#     lst.append(np.sqrt(mean_squared_error(rfr.predict(hot_vali), ensemble["Target"][WEEK:])))
#
#     train_range += 24 * 14
#     fold += 1
#
# sum(lst) / len(lst)

In [51]:
cv_DataFrame = pd.DataFrame(index=SINUSOIDS)
fold = 0
while train_range + VALIDATION_RANGE <= DATA_SIZE:



    # ensemble = pd.DataFrame(DATA[train_range-WEEK:train_range+VALIDATION_RANGE], columns=["Target"]).merge(weather, how="left", left_index=True, right_on="Time")
    #
    # ensemble.index = ensemble["Time"]
    # ensemble["Time"] = ensemble["Time"].dt.hour
    #
    # ohe = OneHotEncoder(sparse=False)
    # hot_np = ohe.fit_transform(ensemble[["Time", "Condition"]])
    # hot = pd.DataFrame(data=hot_np, columns=ohe.get_feature_names_out())
    # hot_test = hot[:WEEK]
    # hot_vali = hot[WEEK:]
    #
    # rfr = RandomForestRegressor()
    # rfr.fit(hot_test, ensemble["Target"][:WEEK])
    # rfr_pred = pd.Series(rfr.predict(hot_vali))



    sinusoids_r2 = []
    for n_sinusoids in SINUSOIDS:
        fft = fourierExtrapolation(data=DATA[train_range - WEEK:train_range], number_of_predictions=VALIDATION_RANGE, n_sinusoids=n_sinusoids)
        prediction = pd.Series(data=fft[WEEK:], index=DATA[train_range:train_range + VALIDATION_RANGE].index)

        actual = DATA[train_range:train_range+VALIDATION_RANGE]

        prediction.reset_index(inplace=True, drop=True)

        sinusoids_r2.append(np.sqrt(mean_squared_error(actual, prediction)))
        # sinusoids_r2.append(np.sqrt(mean_squared_error(actual, np.add(rfr_pred, prediction) / 2)))

    cv_DataFrame[f"Fold_{fold}"] = sinusoids_r2

    train_range += 24 * 14
    fold += 1

In [52]:
cv_DataFrame

Unnamed: 0,Fold_0,Fold_1,Fold_2,Fold_3,Fold_4,Fold_5,Fold_6,Fold_7,Fold_8,Fold_9,...,Fold_16,Fold_17,Fold_18,Fold_19,Fold_20,Fold_21,Fold_22,Fold_23,Fold_24,Fold_25
0,0.572256,1.004369,0.895792,0.508396,0.529221,0.409703,0.495886,0.590955,0.740146,0.595604,...,0.548294,0.605748,0.619475,0.472206,0.436892,0.486230,0.594145,0.566912,0.669122,1.362211
1,0.572256,1.004369,0.895792,0.508396,0.529221,0.409703,0.495886,0.590955,0.740146,0.595604,...,0.548294,0.605748,0.619475,0.472206,0.436892,0.486230,0.594145,0.566912,0.669122,1.362211
2,0.526302,0.969065,0.882828,0.491193,0.659389,0.389461,0.481852,0.557645,0.705698,0.544173,...,0.536253,0.576579,0.612344,0.428718,0.415959,0.441925,0.555105,0.534052,0.647356,1.449158
3,0.527735,0.951193,0.896275,0.508089,0.813633,0.391169,0.477971,0.553955,0.697831,0.534631,...,0.570258,0.571161,0.614779,0.411862,0.419184,0.420312,0.545003,0.526012,0.648058,1.551863
4,0.484367,0.971413,0.859768,0.484036,0.815273,0.391405,0.475114,0.490126,0.684594,0.592166,...,0.537885,0.561462,0.616995,0.406137,0.422419,0.426161,0.507074,0.511863,0.615837,1.540819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,0.617055,0.967715,0.940679,0.418385,0.923883,0.275971,0.429399,0.224806,0.681958,0.596869,...,0.380293,0.562705,0.622561,0.427304,0.338877,0.483508,0.401574,0.597233,0.658158,1.423390
170,0.617197,0.967694,0.941161,0.418008,0.924273,0.276048,0.429706,0.223836,0.682679,0.596456,...,0.380426,0.562637,0.622759,0.426890,0.339234,0.483397,0.401269,0.597178,0.658428,1.423456
180,0.617197,0.967694,0.941161,0.418008,0.924273,0.276048,0.429706,0.223836,0.682679,0.596456,...,0.380426,0.562637,0.622759,0.426890,0.339234,0.483397,0.401269,0.597178,0.658428,1.423456
190,0.617197,0.967694,0.941161,0.418008,0.924273,0.276048,0.429706,0.223836,0.682679,0.596456,...,0.380426,0.562637,0.622759,0.426890,0.339234,0.483397,0.401269,0.597178,0.658428,1.423456


In [53]:
cv_DataFrame.mean(axis=1).idxmin()

70

In [54]:
cv_DataFrame.mean(axis=1)

0      0.614090
1      0.614090
2      0.594957
3      0.603578
4      0.593170
         ...   
160    0.566070
170    0.566025
180    0.566025
190    0.566025
200    0.566025
Length: 66, dtype: float64

In [55]:
DATA = DATA[WEEK*6:]

In [56]:
fft = fourierExtrapolation(data=DATA[0:WEEK], number_of_predictions=VALIDATION_RANGE, n_sinusoids=cv_DataFrame.mean(axis=1).idxmin())
prediction = pd.Series(data=fft[WEEK:], index=DATA[DATA[0:WEEK].size:DATA[0:WEEK].size + VALIDATION_RANGE].index)

pred_test = fft[:WEEK]

actual = DATA[WEEK:WEEK+VALIDATION_RANGE]

if attributes == PV_ATTRIBUTES:
    pv = prediction
elif attributes == FIXED_ATTRIBUTES:
    fix = prediction
elif attributes == FLEX_ATTRIBUTES:
    flex = actual

In [57]:
fig = go.Figure()

# fig.add_trace(go.Scatter(x=list(np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE)) + list(np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE))[::-1],
#                          y=list(prediction + epsilon) + list(prediction - epsilon)[::-1], # upper, then lower reversed
#                          fill='toself',
#                          fillcolor='rgba(0,100,80,0.2)',
#                          line=dict(color='rgba(255,255,255,0)'),
#                          hoverinfo="skip",
#                          showlegend=False
#                          ))

# fig.add_trace(go.Scatter(x=np.arange(start=0, stop=WEEK),
#                          y=pred_test,
#                          name='Training Data [Prediction]',
#                          mode='lines',
#                          line=dict(color='rgb(84, 0, 84)')))

fig.add_trace(go.Scatter(x=np.arange(start=WEEK-WEEK, stop=WEEK),
                         y=DATA[WEEK-WEEK:WEEK],
                         name='Training Data',
                         mode='lines',
                         line=dict(color='rgb(84, 84, 84)')))

fig.add_trace(go.Scatter(x=np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE),
                         y=actual,
                         name='Observed',
                         mode='lines',
                         line=dict(color='rgb(234,143,129)')))

fig.add_trace(go.Scatter(x=np.arange(start=WEEK, stop=WEEK + VALIDATION_RANGE),
                         y=prediction,
                         name='Predicted',
                         mode='lines',
                         line=dict(color='rgb(32,115,171)')))

fig.update_layout(
    xaxis_title="Index",
    yaxis_title="Photovoltaic Production [kWh]",
    legend=dict(orientation="h",
                yanchor="bottom",
                y=1,
                xanchor="left",
                x=0))

fig.show()

In [58]:
idx = list(range(0, 72, 1))

fig = go.Figure()

fig.add_trace(go.Scatter(x=idx,
                         y=pv,
                         name='PV',
                         mode='lines',
                         line=dict(color='royalblue', width=3)))

fig.add_trace(go.Scatter(x=idx,
                         y=fix,
                         name='Fixed',
                         mode='lines',
                         line=dict(color='firebrick', width=3)))

fig.add_trace(go.Scatter(x=idx,
                         y=flex,
                         name='Flexible',
                         mode='lines',
                         line=dict(color='firebrick', width=3, dash='dash')))

fig.add_trace(go.Scatter(x=idx,
                         y=np.subtract(pv, np.add(fix, flex)),
                         name='Balance',
                         mode='lines',
                         line=dict(color='black', width=4),
                         fill='tozeroy',
                         fillcolor="rgba(80, 80, 80, 0.5)"))

fig.update_layout(plot_bgcolor='rgba(0,0,0,0)',
                  legend=dict(orientation="h",
                              yanchor="top",
                              y=1.06,
                              xanchor="left",
                              x=0.0))

#x axis
# fig.update_xaxes(visible=False)

#y axis
# fig.update_yaxes(visible=False)

fig.update_xaxes(showline=True, linewidth=2, linecolor='grey', title="Index")
fig.update_yaxes(showline=True, linewidth=2, linecolor='grey', title="kWh")

fig.show()

NameError: name 'flex' is not defined

In [None]:
from sklearn.ensemble import RandomForestRegressor
weather = pd.read_csv("GAI_2015_2016.csv")
weather["Time"] = pd.to_datetime(weather["Time"])
ensemble = pd.DataFrame(DATA[0:WEEK+VALIDATION_RANGE], columns=["Target"]).merge(weather, how="left", left_index=True, right_on="Time")
ensemble

In [None]:
ensemble.index = ensemble["Time"]
ensemble["Time"] = ensemble["Time"].dt.hour
ensemble

In [None]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(sparse=False)
hot_np = ohe.fit_transform(ensemble[["Time", "Condition"]])
hot = pd.DataFrame(data=hot_np, columns=ohe.get_feature_names_out())
hot_test = hot[:WEEK]
hot_vali = hot[WEEK:]

In [None]:
rfr = RandomForestRegressor()
rfr.fit(hot_test, ensemble["Target"][:WEEK])
np.sqrt(mean_squared_error(rfr.predict(hot_vali), ensemble["Target"][WEEK:]))

# FIG INTRO FORECASTING

In [None]:
# idx = list(range(0, 72, 2))
# pv = [x * 1.2 for x in [0, 0, 0, 4, 20, 45, 50, 50, 34, 10, 0, 0, 0, 0, 0, 2, 15, 35, 40, 45, 30, 15, 0, 0, 0, 0, 0, 10, 25, 50, 60, 50, 44, 20, 0, 0]]
# fix = [10, 9, 10, 4, 22, 14, 12, 0, 12, 21, 13, 14, 12, 12, 32, 21, 11, 12, 21, 12, 22, 23, 11, 9, 8, 19, 17, 25, 20, 11, 15, 12, 15, 12, 18, 20]
# flex = [0, 0, 0, 20, 5, 0, 0, 0, 19, 10, 0, 8, 0, 0, 0, 5, 9, 0, 0, 0, 0, 20, 10, 0, 5, 0, 19, 0, 0, 20, 9, 7, 0, 0, 0, 4]
#
# fig = go.Figure()
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=pv,
#                          name='PV',
#                          mode='lines',
#                          line=dict(color='royalblue', width=3)))
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=fix,
#                          name='Fixed',
#                          mode='lines',
#                          line=dict(color='firebrick', width=3)))
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=flex,
#                          name='Flexible',
#                          mode='lines',
#                          line=dict(color='firebrick', width=3, dash='dash')))
#
# fig.add_trace(go.Scatter(x=idx,
#                          y=np.subtract(pv, np.add(fix, flex)),
#                          name='Balance',
#                          mode='lines',
#                          line=dict(color='black', width=4),
#                          fill='tozeroy',
#                          fillcolor="rgba(80, 80, 80, 0.5)"))
#
# fig.update_layout(plot_bgcolor='rgba(0,0,0,0)',
#                   legend=dict(orientation="h",
#                               yanchor="top",
#                               y=1.05,
#                               xanchor="left",
#                               x=0.0))
#
# #x axis
# fig.update_xaxes(visible=False)
#
# #y axis
# fig.update_yaxes(visible=False)
#
# fig.show()