# Decomposotion of NZERTF Data

## Load data and packages

In [38]:
import pandas as pd

import DataRetriever as dr

retriever = dr.DataRetriever()

year2_hour = retriever.get_data("All-Subsystems-hour-Year2.pkl")
year2_daily = year2_hour.resample("D").sum()

load_attributes = retriever.get_data("consuming_attributes.pkl")
load_df = year2_hour[load_attributes]
load_df = load_df.clip(lower=0)
load_df = load_df.sum(axis=1) / 1_000
load_df = pd.DataFrame(load_df, columns=['House Load'])

prod_attributes = retriever.get_data("producing_attributes.pkl")
prod_df = year2_hour[prod_attributes]
prod_df = prod_df.clip(lower=0)
prod_df = prod_df.sum(axis=1) / 1_000
prod_df = pd.DataFrame(prod_df, columns=["Produced Energy"])

## STL
#### Packages

In [39]:
from statsmodels.tsa.seasonal import STL

### Decomposing Production - DAILY

In [40]:
prod_stl_config = STL(endog=prod_df,
                  period=24,  # We expect the pattern to repeat daily
                  seasonal=7,  # Default
                  trend=None,  # Default, follows suggestion from original paper (1.5 * period) / (1 - 1.5 / seasonal)
                  low_pass=None,  # Default lowest odd integer greater than period
                  seasonal_deg=1,  # Degree of LOESS to calculate seasonal element, chosen as in original paper
                  trend_deg=1,  # Degree of LOESS to calculate trend element, chosen as in original paper
                  low_pass_deg=1,  # Degree of LOESS in the low-pass filter, chosen as in original paper
                  robust=True,  # Setting to True should make the method robust to outliers
                  # The following three attributes determines whether to skip some LOESS smoothings, using linear interpolation to estimate the skipped points. Used only to decrease computational time. Set to 1 to not skip any smoothings.
                  seasonal_jump=1,
                  trend_jump=1,
                  low_pass_jump=1)

prod_decomp = prod_stl_config.fit()
prod_period = prod_stl_config.period

#### Get some measures of how well the decomposition is

In [41]:
prod_decomp_data = {"Time Series": prod_df,
               "Trend": prod_decomp.trend,
               "Seasonality": prod_decomp.seasonal,
               "Residuals": prod_decomp.resid
               }

prod_decomp_df = pd.concat(prod_decomp_data, axis=1)
prod_decomp_df.columns = ['Time Series', 'Trend', 'Seasonality', 'Residuals']

total_residual = prod_decomp_df["Residuals"].abs().sum()
total_produced = prod_df["Produced Energy"].sum()
explained_ts = total_produced - total_residual

print(f"Using STL decomposition, we can explain {round(explained_ts, 2)} of the total {round(total_produced, 2)} kWh of the producing data, with the residuals summing to {round(total_residual, 2)} kWh.")

Using STL decomposition, we can explain 9643.77 of the total 13925.22 kWh of the producing data, with the residuals summing to 4281.44 kWh.


#### Visualization of the decomposition

In [42]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

names = [col_name for col_name in prod_decomp_df.columns]

fig = make_subplots(rows=4, cols=1,
                    subplot_titles=names,
                    shared_xaxes=True)

row1 = 1
col1 = 1

for component in list(prod_decomp_df.columns):
    fig.add_trace(go.Scatter(
        x=prod_decomp_df.index,
        y=prod_decomp_df[component],
    ), row=row1, col=col1)
    row1 += 1

fig.update_layout(
    showlegend=False,
    height=600,
    title="Energy Produced [kWh]"
)

fig.show()

### Decomposing Load

In [43]:
load_stl_config = STL(endog=load_df,
                  period=7*24,  # We expect the pattern to expect weekly
                  seasonal=7,  # Default, should usually be greater than or equal to 7
                  trend=None,  # Default, suggestion from original paper smallest odd integer greater than (1.5 * period) / (1 - 1.5 / seasonal)
                  low_pass=None,  # Default lowest odd integer greater than period
                  seasonal_deg=1,  # Degree of LOESS to calculate seasonal element, chosen as in original paper
                  trend_deg=1,  # Degree of LOESS to calculate trend element, chosen as in original paper
                  low_pass_deg=1,  # Degree of LOESS in the low-pass filter, chosen as in original paper
                  robust=True,  # Setting to True should make the method robust to outliers
                  # The following three attributes determines whether to skip some LOESS smoothings, using linear interpolation to estimate the skipped points. Used only to decrease computational time. Set to 1 to not skip any smoothings.
                  seasonal_jump=1,
                  trend_jump=1,
                  low_pass_jump=1)

load_decomp = load_stl_config.fit()
load_period = load_stl_config.period

#### Get some measures of how well the decomposition is

In [44]:
load_decomp_data = {"Time Series": load_decomp.observed,
                    "Trend": load_decomp.trend,
                    "Seasonality": load_decomp.seasonal,
                    "Residuals": load_decomp.resid
                    }

load_decomp_df = pd.concat(load_decomp_data, axis=1)
load_decomp_df.columns = ['Time Series', 'Trend', 'Seasonality', 'Residuals']

total_load_residual = load_decomp_df["Residuals"].abs().sum()
total_load = load_df["House Load"].sum()
explained_ts = total_load - total_load_residual

print(f"Using STL decomposition, we can explain {round(explained_ts, 2)} of the total {round(total_load, 2)} kWh of the producing data, with the residuals summing to {round(total_load_residual, 2)} kWh.")

Using STL decomposition, we can explain 10461.29 of the total 12356.65 kWh of the producing data, with the residuals summing to 1895.36 kWh.


#### Visualization of the decomposition

In [45]:
names = [col_name for col_name in load_decomp_df.columns]

fig = make_subplots(rows=4, cols=1,
                    subplot_titles=names,
                    shared_xaxes=True)

row = 1
col = 1

for component in list(load_decomp_df.columns):
    fig.add_trace(go.Scatter(
        x=load_decomp_df.index,
        y=load_decomp_df[component],
    ), row=row, col=col)
    row += 1

fig.update_layout(
    showlegend=False,
    height=600,
    title="Energy Load [kWh]"
)

# fig.update_xaxes(range=['2015-06-01', '2015-08-31'])

fig.show()

# Decomposition Example

In [46]:
from statsmodels.tsa.seasonal import seasonal_decompose

series = pd.read_csv("C:/Users/madsc/OneDrive - Aalborg Universitet/P6/Figures/Decomposition/decomposition_concept/example_data.txt", header=0)
series.set_index("Month", inplace=True)
series

Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01,112
1949-02,118
1949-03,132
1949-04,129
1949-05,121
...,...
1960-08,606
1960-09,508
1960-10,461
1960-11,390


In [47]:
result = seasonal_decompose(series, model='additive', period=12)

result_dict = {
    "Time Series": result.observed,
    "Trend": result.trend,
    "Seasonality": result.seasonal,
    "Residual": result.resid
}

In [48]:
names = [col_name for col_name in result_dict.keys()]

row = 1
col = 1

fig = make_subplots(rows=4, cols=1,
                    subplot_titles=names,
                    shared_xaxes=True)

for component in list(names):
    fig.add_trace(go.Scatter(
        x=result.observed.index,
        y=result_dict[component],
    ), row=row, col=col)
    row += 1

fig.update_layout(
    showlegend=False,
    height=600
)

fig.show()