    # Decomposotion of NZERTF Data

## Load data and packages

In [17]:
import pandas as pd

import DataRetriever as dr

retriever = dr.DataRetriever()

year2_hour = retriever.get_data("All-Subsystems-hour-Year2.pkl")
year2_hour = year2_hour.resample("D").sum()

load_attributes = retriever.get_data("consuming_attributes.pkl")
load_df = year2_hour[load_attributes]
load_df = load_df.clip(lower=0)
load_df = load_df.sum(axis=1) / 1_000
load_df = pd.DataFrame(load_df, columns=['House Load'])

prod_attributes = retriever.get_data("producing_attributes.pkl")
prod_df = year2_hour[prod_attributes]
prod_df = prod_df.clip(lower=0)
prod_df = prod_df.sum(axis=1) / 1_000
prod_df = pd.DataFrame(prod_df, columns=["Produced Energy"])

## STL
#### Packages

In [18]:
from statsmodels.tsa.seasonal import STL

### Decomposing Production

In [19]:
prod_stl_config = STL(endog=prod_df,
                  period=24,  # Default, attempts to find a suitable period from the data
                  seasonal=7,  # Default
                  trend=None,  # Default, follows suggestion from original paper (1.5 * period) / (1 - 1.5 / seasonal)
                  low_pass=None,  # Default lowest odd integer greater than period
                  seasonal_deg=1,  # Degree of LOESS to calculate seasonal element, chosen as in original paper
                  trend_deg=1,  # Degree of LOESS to calculate trend element, chosen as in original paper
                  low_pass_deg=1,  # Degree of LOESS in the low-pass filter, chosen as in original paper
                  robust=True,  # Setting to True should make the method robust to outliers
                  # The following three attributes determines whether to skip some LOESS smoothings, using linear interpolation to estimate the skipped points. Used only to decrease computational time. Set to 1 to not skip any smoothings.
                  seasonal_jump=1,
                  trend_jump=1,
                  low_pass_jump=1)

prod_decomp = prod_stl_config.fit()
prod_period = prod_stl_config.period

#### Get some measures of how well the decomposition is

In [20]:
prod_decomp_data = {"Time Series": prod_df,
               "Trend": prod_decomp.trend,
               "Seasonality": prod_decomp.seasonal,
               "Residuals": prod_decomp.resid
               }

prod_decomp_df = pd.concat(prod_decomp_data, axis=1)
prod_decomp_df.columns = ['Time Series', 'Trend', 'Seasonality', 'Residuals']

total_residual = prod_decomp_df["Residuals"].abs().sum()
total_produced = prod_df["Produced Energy"].sum()
explained_ts = total_produced - total_residual

print(f"Using STL decomposition, we can explain {round(explained_ts, 2)} of the total {round(total_produced, 2)} kWh of the producing data, with the residuals summing to {round(total_residual, 2)} kWh.")

Using STL decomposition, we can explain 10167.68 of the total 13925.22 kWh of the producing data, with the residuals summing to 3757.54 kWh.


#### Visualization of the decomposition

In [21]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

names = [col_name for col_name in prod_decomp_df.columns]

fig = make_subplots(rows=4, cols=1,
                    subplot_titles=names,
                    shared_xaxes=True)

row = 1
col = 1

for component in list(prod_decomp_df.columns):
    fig.add_trace(go.Scatter(
        x=prod_decomp_df.index,
        y=prod_decomp_df[component],
    ), row=row, col=col)
    row += 1

fig.update_layout(
    showlegend=False,
    height=600,
    title="Energy Produced [kWh]"
)

# fig.update_yaxes(range=[0, 10], row=1)
# fig.update_yaxes(range=[-10, 10], row=4)
# fig.update_xaxes(range=['2015-05-01', '2015-05-30'])

fig.show()

### Decomposing Load

In [22]:
load_stl_config = STL(endog=load_df,
                  period=24,  # Default, attempts to find a suitable period from the data
                  seasonal=7,  # Default, should usually be greater than or equal to 7
                  trend=None,  # Default, suggestion from original paper smallest odd integer greater than (1.5 * period) / (1 - 1.5 / seasonal)
                  low_pass=None,  # Default lowest odd integer greater than period
                  seasonal_deg=1,  # Degree of LOESS to calculate seasonal element, chosen as in original paper
                  trend_deg=1,  # Degree of LOESS to calculate trend element, chosen as in original paper
                  low_pass_deg=1,  # Degree of LOESS in the low-pass filter, chosen as in original paper
                  robust=True,  # Setting to True should make the method robust to outliers
                  # The following three attributes determines whether to skip some LOESS smoothings, using linear interpolation to estimate the skipped points. Used only to decrease computational time. Set to 1 to not skip any smoothings.
                  seasonal_jump=1,
                  trend_jump=1,
                  low_pass_jump=1)

load_decomp = load_stl_config.fit()
load_period = load_stl_config.period

#### Get some measures of how well the decomposition is

In [23]:
load_decomp_data = {"Time Series": load_df,
                    "Trend": load_decomp.trend,
                    "Seasonality": load_decomp.seasonal,
                    "Residuals": load_decomp.resid
                    }

load_decomp_df = pd.concat(load_decomp_data, axis=1)
load_decomp_df.columns = ['Time Series', 'Trend', 'Seasonality', 'Residuals']

total_load_residual = load_decomp_df["Residuals"].abs().sum()
total_load = load_df["House Load"].sum()
explained_ts = total_load - total_load_residual

print(f"Using STL decomposition, we can explain {round(explained_ts, 2)} of the total {round(total_load, 2)} kWh of the producing data, with the residuals summing to {round(total_load_residual, 2)} kWh.")

Using STL decomposition, we can explain 10367.81 of the total 12347.39 kWh of the producing data, with the residuals summing to 1979.58 kWh.


#### Visualization of the decomposition

In [24]:
names = [col_name for col_name in load_decomp_df.columns]

fig = make_subplots(rows=4, cols=1,
                    subplot_titles=names,
                    shared_xaxes=True)

row = 1
col = 1

for component in list(load_decomp_df.columns):
    fig.add_trace(go.Scatter(
        x=load_decomp_df.index,
        y=load_decomp_df[component],
    ), row=row, col=col)
    row += 1

fig.update_layout(
    showlegend=False,
    height=600,
    title="Energy Load [kWh]"
)

# fig.update_yaxes(range=[0, 5], nticks=2, row=1)
# fig.update_yaxes(range=[-5, 5], row=4)
# fig.update_xaxes(range=['2015-05-01', '2015-05-30'])

fig.show()