## Investigate motifs in the house load

In [1]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import stumpy

import DataRetriever as dr

retriever = dr.DataRetriever()
hour = retriever.get_data("All-Subsystems-hour-Year2.pkl")
load = retriever.get_data("consuming_attributes.pkl")

load_df = pd.DataFrame(hour[load])
load_df = load_df.clip(lower=0) #Minimum value of column must be 0, else set to 0.
load_df = load_df.sum(axis = 1) / 1000 # Calculate combined load of house and set the values to kWh
load_df

                     Load_LRPlugLoadsPowerUsage  Load_KPlugLoadsPowerUsage  \
Timestamp                                                                    
2015-02-01 00:00:00                    0.000000                       0.82   
2015-02-01 01:00:00                    0.000000                       0.82   
2015-02-01 02:00:00                    0.000000                       0.82   
2015-02-01 03:00:00                    0.000000                       0.82   
2015-02-01 04:00:00                    0.000000                       0.82   
...                                         ...                        ...   
2016-01-31 19:00:00                   60.341602                       0.82   
2016-01-31 20:00:00                   60.577037                       0.82   
2016-01-31 21:00:00                   61.063901                       0.82   
2016-01-31 22:00:00                   40.844162                       0.82   
2016-01-31 23:00:00                    0.000000                 

Timestamp
2015-02-01 00:00:00    1.751517
2015-02-01 01:00:00    2.219437
2015-02-01 02:00:00    1.944296
2015-02-01 03:00:00    1.753827
2015-02-01 04:00:00    1.982696
                         ...   
2016-01-31 19:00:00    1.019126
2016-01-31 20:00:00    0.657746
2016-01-31 21:00:00    1.339228
2016-01-31 22:00:00    0.659790
2016-01-31 23:00:00    0.715960
Length: 8760, dtype: float64

#### Calculate matrix profile and basic statistcs

In [10]:
m = 24*7
load_mp = stumpy.stump(load_df, m = m)
load_mp

array([[10.785523168365863, 336, -1, 336],
       [10.795498955509435, 337, -1, 337],
       [10.75359506248058, 338, -1, 338],
       ...,
       [11.573749217695063, 7750, 7750, -1],
       [11.580599319315446, 1534, 1534, -1],
       [11.530289392276426, 1535, 1535, -1]], dtype=object)

In [11]:
load_mp_mean_distance = np.mean(load_mp[:, 0])

print(f"The mean nearest neighbor distance in load matrix profile is {load_mp_mean_distance}")

load_mp_median_distance = np.median(load_mp[:, 0])

print(f"The median nearest neighbor distance in load matrix profile is {load_mp_median_distance}")

load_mp_quantile_distance = np.quantile(load_mp[:, 0], q=0.25)

print(f"The lower quantile nearest neighbor distance in load matrix profile is {load_mp_quantile_distance}")

The mean nearest neighbor distance in load matrix profile is 7.027556900566204
The median nearest neighbor distance in load matrix profile is 6.549800878971255
The lower quantile nearest neighbor distance in load matrix profile is 5.678206435281048


#### Locate global motif + nearest neighbor and visualise them

In [12]:
global_motif_idx =  np.argsort(load_mp[:, 0])[0]

print(f"The global motif starts at timestamp {load_df.index[global_motif_idx]} and ends a week later at {load_df.index[global_motif_idx+m]}")

nearest_neighbor_idx = load_mp[global_motif_idx, 1]

print(f"The nearest neighbor to first subsequence in the global motif is located at {nearest_neighbor_idx}")

The global motif starts at timestamp 2015-07-10 11:00:00 and ends a week later at 2015-07-17 11:00:00
The nearest neighbor to first subsequence in the global motif is located at 4498


In [13]:
fig = make_subplots(rows=3, cols=2,
                    specs=[
                        [{"colspan": 2}, None],
                        [{"colspan": 2}, None],
                        [{}, {}]
                    ],
                    subplot_titles=(" ", "Matrix Profile", "First Occurrence", "Second Occurrence")
                    )

fig.add_trace(
    go.Scatter(x=load_df.index, y=load_df, name="Total House Load"),
    row=1, col=1)

fig.add_trace(
    go.Scatter(y=load_mp[:, 0], name="Distance Matrix"),
    row=2, col=1)

fig.add_trace(
    go.Scatter(x=load_df[global_motif_idx: global_motif_idx+m].index, y=load_df[global_motif_idx: global_motif_idx+m], name="Zoomed in view"),
    row=3, col=1)

fig.add_trace(
    go.Scatter(x=load_df[nearest_neighbor_idx: nearest_neighbor_idx+m].index, y=load_df[nearest_neighbor_idx: nearest_neighbor_idx+m], name="Zoomed in view"),
    row=3, col=2)

fig.update_yaxes(title_text="House Energy Load [kWh]", title_font=dict(size=12), range=[0, 6], row=1, col=1)
fig.update_yaxes(title_text="Euclidean Distance", title_font=dict(size=12), range=[0, 15], row=2, col=1)

fig.update_layout(
    shapes=[
        dict(type="rect", xref="x1", yref="y1",
             x0=load_df.index[global_motif_idx], x1=load_df.index[global_motif_idx+m], y0=0, y1=6),
        dict(type="rect", xref="x1", yref="y1",
             x0=load_df.index[nearest_neighbor_idx], x1=load_df.index[nearest_neighbor_idx+m], y0=0, y1=6),
        dict(type="line", xref="x2", yref="y2",
             x0=global_motif_idx, x1=global_motif_idx, y0=0, y1=15, line=dict(dash="dot")),
        dict(type="line", xref="x2", yref="y2",
             x0=nearest_neighbor_idx, x1=nearest_neighbor_idx, y0=0, y1=15, line=dict(dash="dot"))
    ],
    height=700, showlegend=False)

fig.show()

#### Find all subsequences in the global load motif

In [14]:
global_load_motif_count = stumpy.match(
    Q = load_df[global_motif_idx: global_motif_idx+m],
    T = load_df,
    max_distance = load_mp_mean_distance
)

print(load_mp_mean_distance)
global_load_motif_count

7.027556900566204


array([[2.115755003607136e-06, 3826],
       [4.787721859187024, 4498],
       [5.938122531796454, 4330],
       [6.011212585983621, 4834],
       [6.125325500910484, 4162],
       [6.192459702040578, 2482],
       [6.244263747868325, 3322],
       [6.326696319594435, 2314],
       [6.336652496445835, 5506],
       [6.356678327664906, 6010],
       [6.5044725301768, 4666],
       [6.726590266407893, 3658],
       [6.7645620367315455, 1642],
       [6.770263911405876, 3994],
       [6.783804516278841, 3490],
       [6.904191762830413, 1810],
       [6.976587697952845, 3154]], dtype=object)

In [15]:
fig = go.Figure()

title = f"All {len(global_load_motif_count)} subsequences in the global motif"

for idx in global_load_motif_count[:, 1]:
    fig.add_trace(go.Scatter(
        y = load_df[idx: idx+m],
        marker = dict(color="rgba(98, 111, 250, 0.2)"),
    ))

fig.update_layout(showlegend=False,
                  title={'text' : title,
                         'xanchor': 'center', 'x' : 0.5,
                         'y' : 0.83, 'yanchor' : 'top'})
fig.update_yaxes(title="House Energy Load [kWh]")
fig.update_xaxes(title="", visible=False)
fig.show()

#### Finding all motifs in the house load

In [16]:
top_load_motifs = stumpy.motifs(
    T = load_df,
    P = load_mp[:, 0],
    max_distance=load_mp_mean_distance,
    min_neighbors=5.0,
    max_matches=500,
    max_motifs=50,
    cutoff=5
)

print(load_mp_mean_distance)

top_load_motifs

7.027556900566204



Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



(array([[2.11575500e-06, 4.78772186e+00, 5.93812253e+00, 6.01121259e+00,
         6.12532550e+00, 6.19245970e+00, 6.24426375e+00, 6.32669632e+00,
         6.33665250e+00, 6.35667833e+00, 6.50447253e+00, 6.72659027e+00,
         6.76456204e+00, 6.77026391e+00, 6.78380452e+00, 6.90419176e+00,
         6.97658770e+00],
        [1.12619660e-06, 4.85469797e+00, 5.58849845e+00, 6.11132693e+00,
         6.62118075e+00, 6.69542394e+00, 6.98735253e+00,            nan,
                    nan,            nan,            nan,            nan,
                    nan,            nan,            nan,            nan,
                    nan],
        [1.98850957e-06, 4.98321883e+00, 5.62188538e+00, 5.80160082e+00,
         6.02770602e+00, 6.29475306e+00, 6.33285866e+00, 6.45832510e+00,
         6.54646985e+00, 6.72343878e+00, 6.78391375e+00, 6.80034673e+00,
                    nan,            nan,            nan,            nan,
                    nan],
        [1.47091897e-06, 4.98803365e+00, 5.500

In [17]:
top_load_motifs[1]

array([[3826, 4498, 4330, 4834, 4162, 2482, 3322, 2314, 5506, 6010, 4666,
        3658, 1642, 3994, 3490, 1810, 3154],
       [6816, 7320, 1439, 1943, 7152, 5807, 1775,   -1,   -1,   -1,   -1,
          -1,   -1,   -1,   -1,   -1,   -1],
       [3885, 4557, 2373, 2037, 4893, 3549, 2205, 5565, 4389, 4053, 5397,
        1701,   -1,   -1,   -1,   -1,   -1],
       [5191, 2503, 4015, 3343, 4351, 2335, 4687, 4855,   -1,   -1,   -1,
          -1,   -1,   -1,   -1,   -1,   -1]], dtype=int64)

In [18]:
ordinal = lambda n: "%d%s" % (n, "tsnrhtdd"[(n // 10 % 10 != 1) * (n % 10 < 4) * n % 10::4]) #Dont ask
motif_names = [ordinal(n) + " Motif" for n in range(1, 1+len(top_load_motifs[1]))]
for i in range(0, len(top_load_motifs[1])):
    motif_names[i] = motif_names[i] + f" ({len([entry for entry in top_load_motifs[1][i] if entry != -1])} Occurrences)"

fig = make_subplots(rows=len(top_load_motifs[1]), cols=1,
                    subplot_titles=(motif_names),
                    shared_xaxes=True,
                    row_heights=[10]*len(top_load_motifs[1]))
row = 1
col = 1

for i in top_load_motifs[1]:
    for j in i:
        fig.add_trace(go.Scatter(x=load_df[j: j+m].reset_index(drop=True).index, y=load_df[j: j+m], mode="lines",
                                 line=dict(color=f"rgba(98, 111, 250, {2/len(i)})")), row=row, col=col)
    row += 1

fig.update_layout(showlegend=False, height=600)
fig.update_yaxes(visible=False)
fig.update_xaxes(visible=False)
fig.update_annotations(font_size=12)

fig.show()

#### Discords in house load

In [19]:
discord_indexes = []
copy_mp = load_mp.copy()

while True:
    discord_distance = np.amax(copy_mp[:, 0])
    discord_index = np.argmax(copy_mp[:, 0])

    discord_indexes.append(np.where(load_mp[:, 0] == discord_distance))

    copy_mp = np.delete(copy_mp, list(range(max(0, discord_index-m//4), min(len(copy_mp), discord_index+m//4+1))), axis=0)

    if len(discord_indexes) == 3:
        break

discord_indexes

[(array([826], dtype=int64),),
 (array([7974], dtype=int64),),
 (array([8591], dtype=int64),)]

In [20]:
names = []
for i in discord_indexes:
    idx = i[0][0]
    dist = round(load_mp[:, 0][idx], 2)
    names.append(dist)

fig = make_subplots(rows=3, cols=1,
                    subplot_titles=[f"Anomaly 1 <br>Distance to NN: {names[0]}", f"Anomaly 2 <br>Distance to NN: {names[1]}",
                                    f"Anomaly 3 <br>Distance to NN: {names[2]}"]
                    )

row = 1
col = 1

for i in discord_indexes:
    idx = i[0][0]
    fig.add_trace(go.Scatter(
        x=load_df[idx: idx+m].index,
        y=load_df[idx: idx+m],
        mode="lines"
    ), row=row, col=col)
    row += 1


fig.update_layout(height=600, showlegend=False)

fig.update_yaxes(title_text='House Energy Load [kWh]', title_font=dict(size=16), row=2, col=1)

fig.show()