# Decomposotion on House LOAD

In [29]:
import pandas as pd
import numpy as np
import math
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt

import DataRetriever as dr

retriever = dr.DataRetriever()

year2_hour = retriever.get_data("All-Subsystems-hour-Year2.pkl")
load_attributes = retriever.get_data("consuming_attributes.pkl")

load_df = pd.DataFrame(year2_hour[load_attributes])
load_df = load_df.clip(lower=0.000001) #Minimum value of column must be 0, else set to 0.
load_df = load_df.sum(axis = 1) / 1000 # Calculate combined load of house and set the values to kWh
load_df = pd.DataFrame(load_df, columns=['House Load'])

load_df

Unnamed: 0_level_0,House Load
Timestamp,Unnamed: 1_level_1
2015-02-01 00:00:00,1.751517
2015-02-01 01:00:00,2.219437
2015-02-01 02:00:00,1.944296
2015-02-01 03:00:00,1.753827
2015-02-01 04:00:00,1.982696
...,...
2016-01-31 19:00:00,1.019126
2016-01-31 20:00:00,0.657746
2016-01-31 21:00:00,1.339228
2016-01-31 22:00:00,0.659790


In [30]:
gen_year2 = year2_hour[year2_hour["PV_Watts3PhTotalW3PhT1"].isna() == False]
gen_year2 = gen_year2[gen_year2["PV_Watts3PhTotalW3PhT2"].isna() == False] #There are 87 rows with NaN values.

print(f"A total of {len(year2_hour) - len(gen_year2)} rows have been dropped since they have NaN values.")

gen_year2["Generated Energy"] = (gen_year2["PV_Watts3PhTotalW3PhT1"].to_numpy() + gen_year2["PV_Watts3PhTotalW3PhT2"].to_numpy())
gen_year2 = gen_year2[["Generated Energy"]] / 1e3 #Convert Wh to kWh

gen_year2 = gen_year2.resample("D").sum()

gen_year2

A total of 87 rows have been dropped since they have NaN values.


Unnamed: 0_level_0,Generated Energy
Timestamp,Unnamed: 1_level_1
2015-02-01,12.958986
2015-02-02,0.010302
2015-02-03,41.425352
2015-02-04,40.553641
2015-02-05,42.718180
...,...
2016-01-27,33.832924
2016-01-28,43.440532
2016-01-29,27.944539
2016-01-30,46.313858


In [31]:
from dateutil.parser import parse

# Import Data
# df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/a10.csv', parse_dates=['date'], index_col='date')
# df
# type(df)

In [32]:
from statsmodels.tsa.seasonal import STL

plt.rcParams['figure.figsize'] = [12, 9]

decomp = STL(gen_year2, trend=183, seasonal=31).fit() #7 because we have daily data, which should show a weekly pattern??

In [33]:
decomp_data = {"Observed": gen_year2,
               "Trend": decomp.trend,
               "Seasonal": decomp.seasonal,
               "Residuals": decomp.resid,
               #"Additive TS": decomp.trend + decomp.seasonal + decomp.resid,
               #"Multiplicative TS": np.log2(decomp.trend + decomp.seasonal + decomp.resid)
               }

decomp_df = pd.concat(decomp_data, axis=1)
decomp_df.columns = ['Observed', 'Trend', 'Seasonal', 'Residuals']
decomp_df

Unnamed: 0_level_0,Observed,Trend,Seasonal,Residuals
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-02-01,12.958986,22.885906,6.403685,-16.330605
2015-02-02,0.010302,23.191066,5.560217,-28.740982
2015-02-03,41.425352,23.495574,-3.289044,21.218822
2015-02-04,40.553641,23.799421,1.969438,14.784783
2015-02-05,42.718180,24.102596,-0.214167,18.829751
...,...,...,...,...
2016-01-27,33.832924,19.474089,2.103009,12.255826
2016-01-28,43.440532,19.373130,5.263933,18.803469
2016-01-29,27.944539,19.272696,-6.095381,14.767224
2016-01-30,46.313858,19.172789,1.518841,25.622228


In [34]:
names = [col_name for col_name in decomp_df.columns]

fig = make_subplots(rows = 4, cols = 1,
                    subplot_titles = names,
                    shared_xaxes = True)

row = 1
col = 1

for component in list(decomp_df.columns):
    fig.add_trace(go.Scatter(
        x = decomp_df.index,
        y = decomp_df[component],
    ), row = row, col = col)
    row += 1

fig.update_layout(
    showlegend = False,
    height = 600
)

fig.show()