# Motif Discovery

## Importing relevant packages and loading the Year2 hour data.

In [336]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import stumpy

import DataRetriever as dr

retriever = dr.DataRetriever()
hour = retriever.get_data("All-Subsystems-hour-Year2.pkl")

In [337]:
gen_df = hour[hour["PV_Watts3PhTotalW3PhT1"].isna() == False] #There are 87 rows with NaN values.
gen_df["PV_Watts3PhTotalW3PhT1"] = gen_df["PV_Watts3PhTotalW3PhT1"] / 1e3 #Set the Wh to kWh



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## Calculating the matrix profile

In [338]:
m = 24*7 #Set the window size to a week
mp = stumpy.stump(gen_df["PV_Watts3PhTotalW3PhT1"], m=m)
#Array 0 is distances, array 1 is nearest neighbor, array 2 is nearest neighbor to the left and array 3 is nearest neighbor to the right.
mp

array([[5.027679994938788, 6359, -1, 6359],
       [5.0276803494568325, 6360, -1, 6360],
       [5.027682060367196, 6361, -1, 6361],
       ...,
       [4.97035699278659, 7197, 7197, -1],
       [4.97035347039983, 7198, 7198, -1],
       [4.970350820351731, 7199, 7199, -1]], dtype=object)

In [339]:
mp[:, 0] #Accessing a column in the matrix profile mp. In this case the distances.

array([5.027679994938788, 5.0276803494568325, 5.027682060367196, ...,
       4.97035699278659, 4.97035347039983, 4.970350820351731],
      dtype=object)

### Basic statistics for the distances, global motif and its nearest neighbor.

In [340]:
mp_median_distance = np.median(mp[:, 0])

print(f"The median nearest neighbor distance between sequences is {mp_median_distance}")

mp_mean_distance = np.mean(mp[:, 0])

print(f"The mean nearest neighbor distance between sequences is {mp_mean_distance}")

mp_quantile_distance = np.quantile(mp[:, 0], q=0.25)

print(f"The lower quartile for distance between nearest neighbor sequences is {mp_quantile_distance}")

The median nearest neighbor distance between sequences is 4.7207997178190375
The mean nearest neighbor distance between sequences is 4.811202683792316
The lower quartile for distance between nearest neighbor sequences is 4.067082661001723


In [341]:
global_motif_idx = np.argsort(mp[:, 0])[0] # Takes all distances in column 0, and sort them in ascending order, this returns the indexes. Then take the first index in sorted array.

print(f"The global motif is located at index {global_motif_idx}. Global motif is the subsequence with smallest distance to its nearest neighbor.")

nearest_neighbor_idx = mp[global_motif_idx, 1] #Access the previous index, and return the index of its nearest neighbor.

print(f"The nearest neighbor to the global motif is located at index {nearest_neighbor_idx}")

The global motif is located at index 2635. Global motif is the subsequence with smallest distance to its nearest neighbor.
The nearest neighbor to the global motif is located at index 4099


## Global Motif

### Calculating distance vector for the global motif

In [342]:
# Here, we use the global motif as query, and calculate the distance between all other subsequences.

mass_matrix = stumpy.mass(
    Q = gen_df["PV_Watts3PhTotalW3PhT1"][global_motif_idx: global_motif_idx+m],
    T = gen_df["PV_Watts3PhTotalW3PhT1"]
)

mass_matrix

array([16.88995206, 18.65377732, 20.01592288, ..., 13.65312767,
       15.54451014, 17.4494133 ])

### Visualising the global motif

In [343]:
fig = make_subplots(rows=3, cols=2,
                    specs=[
                        [{"colspan": 2}, None],
                        [{"colspan": 2}, None],
                        [{}, {}]
                    ],
                    subplot_titles=(" ", "Matrix Profile", "First Occurrence", "Second Occurrence")
                    )

fig.add_trace(
    go.Scatter(x=gen_df['Timestamp'], y=gen_df["PV_Watts3PhTotalW3PhT1"], name="Power Generated"),
    row=1, col=1)

fig.add_trace(
    go.Scatter(y=mp[:, 0], name="Distance Matrix"),
    row=2, col=1)

fig.add_trace(
    go.Scatter(x=gen_df[global_motif_idx: global_motif_idx+m]['Timestamp'], y=gen_df[global_motif_idx: global_motif_idx+m]["PV_Watts3PhTotalW3PhT1"], name="Zoomed in view"),
    row=3, col=1)

fig.add_trace(
    go.Scatter(x=gen_df[nearest_neighbor_idx: nearest_neighbor_idx+m]['Timestamp'], y=gen_df[nearest_neighbor_idx: nearest_neighbor_idx+m]["PV_Watts3PhTotalW3PhT1"], name="Zoomed in view"),
    row=3, col=2)

fig.update_yaxes(title_text="Energy Generated [kWh]", title_font=dict(size=12),  range=[0, 5], row=1, col=1)
fig.update_yaxes(title_text="Euclidean Distance", title_font=dict(size=12), range=[0, 12], row=2, col=1)

fig.update_layout(
    shapes=[
        dict(type="rect", xref="x1", yref="y1",
             x0=gen_df["Timestamp"][global_motif_idx], x1=gen_df["Timestamp"][global_motif_idx+m], y0=0, y1=5),
        dict(type="rect", xref="x1", yref="y1",
             x0=gen_df["Timestamp"][nearest_neighbor_idx], x1=gen_df["Timestamp"][nearest_neighbor_idx+m], y0=0, y1=5),
        dict(type="line", xref="x2", yref="y2",
             x0=global_motif_idx, x1=global_motif_idx, y0=0, y1=12, line=dict(dash="dot")),
        dict(type="line", xref="x2", yref="y2",
             x0=nearest_neighbor_idx, x1=nearest_neighbor_idx, y0=0, y1=12, line=dict(dash="dot"))
    ],
    height=700, showlegend=False)

fig.show()

### Counting all subsequences in the global motif.

In [344]:
global_motif_count = stumpy.match(
    Q = gen_df["PV_Watts3PhTotalW3PhT1"][global_motif_idx: global_motif_idx+m],
    T = gen_df["PV_Watts3PhTotalW3PhT1"],
    max_distance = mp_mean_distance
)

print(mp_mean_distance)

global_motif_count

4.811202683792316


array([[4.3187668163148874e-07, 2635],
       [2.2825339420155735, 4099],
       [3.4765270418642875, 4627],
       [3.6724151870739066, 3043],
       [3.7332845748469037, 4843],
       [4.015492119797373, 4579],
       [4.129827627217115, 2155],
       [4.189481215100035, 4939],
       [4.223372893037016, 4363],
       [4.495029184608503, 4987],
       [4.533827111674303, 5179],
       [4.592538397287818, 2683],
       [4.635634004990765, 3331],
       [4.736425665472354, 2995],
       [4.753860432659059, 2011],
       [4.7608501070962665, 4291],
       [4.80231690036927, 1795]], dtype=object)

In [345]:
fig = go.Figure()

title = f"All {len(global_motif_count)} subsequences in the global motif"

for idx in global_motif_count[:, 1]:
    fig.add_trace(go.Scatter(
        y = gen_df[idx: idx+m]["PV_Watts3PhTotalW3PhT1"],
        marker = dict(color="rgba(98, 111, 250, 0.3)"),
    ))

fig.update_layout(showlegend=False,
                  title={'text' : title,
                         'xanchor': 'center', 'x' : 0.5,
                         'y' : 0.83, 'yanchor' : 'top'})
fig.update_yaxes(title="Energy Generated [kWh]")
fig.update_xaxes(title="", visible=False)
fig.show()

## Finding all top motifs

In [346]:
top_motifs = stumpy.motifs(
    T = gen_df["PV_Watts3PhTotalW3PhT1"],
    P = mp[:, 0],
    max_distance=mp_mean_distance,
    min_neighbors=5.0,
    max_matches=500,
    max_motifs=50,
    cutoff=mp_mean_distance
    )

print(mp_mean_distance)

top_motifs

4.811202683792316



Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



(array([[4.31876682e-07, 2.28253394e+00, 3.47652704e+00, 3.67241519e+00,
         3.73328457e+00, 4.01549212e+00, 4.12982763e+00, 4.18948122e+00,
         4.22337289e+00, 4.49502918e+00, 4.53382711e+00, 4.59253840e+00,
         4.63563400e+00, 4.73642567e+00, 4.75386043e+00, 4.76085011e+00,
         4.80231690e+00],
        [5.11003381e-07, 2.97450505e+00, 3.37470593e+00, 4.19107739e+00,
         4.34361858e+00, 4.50308447e+00, 4.54966863e+00, 4.61436790e+00,
                    nan,            nan,            nan,            nan,
                    nan,            nan,            nan,            nan,
                    nan],
        [9.05912170e-07, 3.32276309e+00, 3.96848153e+00, 4.25013871e+00,
         4.41325414e+00, 4.48952370e+00, 4.53648067e+00, 4.61815084e+00,
         4.65773178e+00, 4.77028134e+00,            nan,            nan,
                    nan,            nan,            nan,            nan,
                    nan],
        [9.05912170e-07, 3.66372691e+00, 3.860

In [347]:
top_motifs[1] #Accessing indexes for all motifs.

array([[2635, 4099, 4627, 3043, 4843, 4579, 2155, 4939, 4363, 4987, 5179,
        2683, 3331, 2995, 2011, 4291, 1795],
       [2578, 4522, 4042, 1690, 5410, 4762, 2722, 1954,   -1,   -1,   -1,
          -1,   -1,   -1,   -1,   -1,   -1],
       [5491, 1651, 5371, 4387, 2539, 1387, 3907, 6019, 5923, 2683,   -1,
          -1,   -1,   -1,   -1,   -1,   -1],
       [4892, 4820, 4604, 2156, 3020, 2684, 3716, 4316, 1772, 4028, 4964,
          -1,   -1,   -1,   -1,   -1,   -1],
       [5076, 1764, 2988, 5916, 2148, 4860,   -1,   -1,   -1,   -1,   -1,
          -1,   -1,   -1,   -1,   -1,   -1],
       [5126, 2990, 3278, 4574, 2078, 4310, 5006, 5942, 6950, 4238,   -1,
          -1,   -1,   -1,   -1,   -1,   -1]], dtype=int64)

In [348]:
top_motifs[1][0] #Accessing a single motif.

array([2635, 4099, 4627, 3043, 4843, 4579, 2155, 4939, 4363, 4987, 5179,
       2683, 3331, 2995, 2011, 4291, 1795], dtype=int64)

In [349]:
ordinal = lambda n: "%d%s" % (n, "tsnrhtdd"[(n // 10 % 10 != 1) * (n % 10 < 4) * n % 10::4]) #Dont ask
motif_names = [ordinal(n) + " Motif" for n in range(1, 1+len(top_motifs[1]))]
for i in range(0, len(top_motifs[1])):
    motif_names[i] = motif_names[i] + f" ({len([entry for entry in top_motifs[1][i] if entry != -1])} Occurrences)"

fig = make_subplots(rows=len(top_motifs[1]), cols=1,
                    y_title="Energy Generated [kWh]",
                    subplot_titles=(motif_names),
                    shared_xaxes=True,)
row = 1
col = 1

for i in top_motifs[1]:
    for j in i:
        fig.add_trace(go.Scatter(x=gen_df[j: j+m].reset_index(drop=True).index, y=gen_df[j: j+m]["PV_Watts3PhTotalW3PhT1"], mode="lines",
                             line=dict(color=f"rgba(98, 111, 250, {2/len(i)})")), row=row, col=col)
    row += 1

fig.update_layout(showlegend=False, height=600)
fig.update_annotations(font_size=12)
fig.update_xaxes(visible=False)


fig.show()

## Longest chain

In [350]:
all_chains, longest_chain = stumpy.allc(mp[:, 2], mp[:, 3])
longest_chain

array([1618, 3514, 5458, 5890, 6010, 6202], dtype=int64)

In [351]:
fig = go.Figure()

for i in longest_chain:
    fig.add_trace(go.Scatter(
        y = gen_df[i: i+m]["PV_Watts3PhTotalW3PhT1"]
    ))

fig.show()


In [352]:
fig = go.Figure()

fig.add_trace(go.Scattergl(
    x = gen_df["Timestamp"],
    y = gen_df["PV_Watts3PhTotalW3PhT1"],
    opacity=0.2
))

for i in longest_chain:
    fig.add_trace(go.Scatter(
        x = gen_df[i: i+m]["Timestamp"],
        y = gen_df[i: i+m]["PV_Watts3PhTotalW3PhT1"]
    ))


fig.show()

## Visualising the PV discords

In [353]:
discord_indexes = []
copy_mp = mp.copy()

while True:
    discord_distance = np.amax(copy_mp[:, 0])
    discord_index = np.argmax(copy_mp[:, 0])

    discord_indexes.append(np.where(mp[:, 0] == discord_distance))


    copy_mp = np.delete(copy_mp, list(range(max(0, discord_index-m//4), min(len(copy_mp), discord_index+m//4+1))), axis=0)



    if len(discord_indexes) == 3:
        break

discord_indexes

[(array([8411], dtype=int64),),
 (array([372], dtype=int64),),
 (array([5769], dtype=int64),)]

In [354]:
names = []
for i in discord_indexes:
    idx = i[0][0]
    dist = round(mp[:, 0][idx], 2)
    names.append(dist)

fig = make_subplots(rows=3, cols=1,
                    subplot_titles=[f"Anomaly 1 <br>Distance to NN: {names[0]}", f"Anomaly 2 <br>Distance to NN: {names[1]}",
                                    f"Anomaly 3 <br>Distance to NN: {names[2]}"]
                    )

row = 1
col = 1

for i in discord_indexes:
    idx = i[0][0]
    fig.add_trace(go.Scatter(
        x=gen_df[idx: idx+m].index,
        y=gen_df[idx: idx+m]["PV_Watts3PhTotalW3PhT1"],
        mode="lines"
    ), row=row, col=col)
    row += 1


fig.update_layout(height=600, showlegend=False)

fig.update_yaxes(title_text='Energy Generated [kWh]', title_font=dict(size=16), row=2, col=1)

fig.show()

## Motifs in house load

### Load the consuming attributes

In [355]:
load = retriever.get_data("consuming_attributes.pkl")

load_df = pd.DataFrame(hour[load])
load_df = load_df.clip(lower=0) #Minimum value of column must be 0, else set to 0.
load_df = load_df.sum(axis = 1) / 1000 # Set the values to kWh
load_df

Timestamp
2015-02-01 00:00:00    1.751517
2015-02-01 01:00:00    2.219437
2015-02-01 02:00:00    1.944296
2015-02-01 03:00:00    1.753827
2015-02-01 04:00:00    1.982696
                         ...   
2016-01-31 19:00:00    1.019126
2016-01-31 20:00:00    0.657746
2016-01-31 21:00:00    1.339228
2016-01-31 22:00:00    0.659790
2016-01-31 23:00:00    0.715960
Length: 8760, dtype: float64

### Calculate matrix profile, and find global motif and nearest neighbor for load dataframe

In [356]:
m = 24*7
load_mp = stumpy.stump(load_df, m = m)
load_mp

array([[10.785523168365863, 336, -1, 336],
       [10.795498955509435, 337, -1, 337],
       [10.75359506248058, 338, -1, 338],
       ...,
       [11.573749217695063, 7750, 7750, -1],
       [11.580599319315446, 1534, 1534, -1],
       [11.530289392276426, 1535, 1535, -1]], dtype=object)

In [357]:
load_mp_mean_distance = np.mean(load_mp[:, 0])

print(f"The mean nearest neighbor distance in load matrix profile is {load_mp_mean_distance}")

load_mp_median_distance = np.median(load_mp[:, 0])

print(f"The median nearest neighbor distance in load matrix profile is {load_mp_median_distance}")

load_mp_quantile_distance = np.quantile(load_mp[:, 0], q=0.25)

print(f"The lower quantile nearest neighbor distance in load matrix profile is {load_mp_quantile_distance}")

The mean nearest neighbor distance in load matrix profile is 7.027556900566204
The median nearest neighbor distance in load matrix profile is 6.549800878971255
The lower quantile nearest neighbor distance in load matrix profile is 5.678206435281048


In [358]:
global_motif_idx =  np.argsort(load_mp[:, 0])[0]

print(f"The global motif starts at timestamp {load_df.index[global_motif_idx]} and ends a week later at {load_df.index[global_motif_idx+m]}")

nearest_neighbor_idx = load_mp[global_motif_idx, 1]

print(f"The nearest neighbor to first subsequence in the global motif is located at {nearest_neighbor_idx}")

The global motif starts at timestamp 2015-07-10 11:00:00 and ends a week later at 2015-07-17 11:00:00
The nearest neighbor to first subsequence in the global motif is located at 4498


## Global load motif

### Visualise the global load motif

In [359]:
fig = make_subplots(rows=3, cols=2,
                    specs=[
                        [{"colspan": 2}, None],
                        [{"colspan": 2}, None],
                        [{}, {}]
                    ],
                    subplot_titles=(" ", "Matrix Profile", "First Occurrence", "Second Occurrence")
                    )

fig.add_trace(
    go.Scatter(x=load_df.index, y=load_df, name="Total House Load"),
    row=1, col=1)

fig.add_trace(
    go.Scatter(y=load_mp[:, 0], name="Distance Matrix"),
    row=2, col=1)

fig.add_trace(
    go.Scatter(x=load_df[global_motif_idx: global_motif_idx+m].index, y=load_df[global_motif_idx: global_motif_idx+m], name="Zoomed in view"),
    row=3, col=1)

fig.add_trace(
    go.Scatter(x=load_df[nearest_neighbor_idx: nearest_neighbor_idx+m].index, y=load_df[nearest_neighbor_idx: nearest_neighbor_idx+m], name="Zoomed in view"),
    row=3, col=2)

fig.update_yaxes(title_text="House Energy Load [kWh]", title_font=dict(size=12), range=[0, 6], row=1, col=1)
fig.update_yaxes(title_text="Euclidean Distance", title_font=dict(size=12), range=[0, 15], row=2, col=1)

fig.update_layout(
    shapes=[
        dict(type="rect", xref="x1", yref="y1",
             x0=load_df.index[global_motif_idx], x1=load_df.index[global_motif_idx+m], y0=0, y1=6),
        dict(type="rect", xref="x1", yref="y1",
             x0=load_df.index[nearest_neighbor_idx], x1=load_df.index[nearest_neighbor_idx+m], y0=0, y1=6),
        dict(type="line", xref="x2", yref="y2",
             x0=global_motif_idx, x1=global_motif_idx, y0=0, y1=15, line=dict(dash="dot")),
        dict(type="line", xref="x2", yref="y2",
             x0=nearest_neighbor_idx, x1=nearest_neighbor_idx, y0=0, y1=15, line=dict(dash="dot"))
    ],
    height=700, showlegend=False)

fig.show()

### Find all subsequences in the global load motif

In [360]:
global_load_motif_count = stumpy.match(
    Q = load_df[global_motif_idx: global_motif_idx+m],
    T = load_df,
    max_distance = load_mp_mean_distance
)

print(load_mp_mean_distance)

global_load_motif_count

7.027556900566204


array([[2.115755003607136e-06, 3826],
       [4.787721859187024, 4498],
       [5.938122531796454, 4330],
       [6.011212585983621, 4834],
       [6.125325500910484, 4162],
       [6.192459702040578, 2482],
       [6.244263747868325, 3322],
       [6.326696319594435, 2314],
       [6.336652496445835, 5506],
       [6.356678327664906, 6010],
       [6.5044725301768, 4666],
       [6.726590266407893, 3658],
       [6.7645620367315455, 1642],
       [6.770263911405876, 3994],
       [6.783804516278841, 3490],
       [6.904191762830413, 1810],
       [6.976587697952845, 3154]], dtype=object)

In [361]:
fig = go.Figure()

title = f"All {len(global_load_motif_count)} subsequences in the global motif"

for idx in global_load_motif_count[:, 1]:
    fig.add_trace(go.Scatter(
        y = load_df[idx: idx+m],
        marker = dict(color="rgba(98, 111, 250, 0.2)"),
    ))

fig.update_layout(showlegend=False,
                  title={'text' : title,
                         'xanchor': 'center', 'x' : 0.5,
                         'y' : 0.83, 'yanchor' : 'top'})
fig.update_yaxes(title="House Energy Load [kWh]")
fig.update_xaxes(title="", visible=False)
fig.show()

## Finding all motifs in the house load

In [362]:
top_load_motifs = stumpy.motifs(
    T = load_df,
    P = load_mp[:, 0],
    max_distance=load_mp_mean_distance,
    min_neighbors=5.0,
    max_matches=500,
    max_motifs=50,
    cutoff=5
)

print(load_mp_mean_distance)

top_load_motifs

7.027556900566204



Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



(array([[2.11575500e-06, 4.78772186e+00, 5.93812253e+00, 6.01121259e+00,
         6.12532550e+00, 6.19245970e+00, 6.24426375e+00, 6.32669632e+00,
         6.33665250e+00, 6.35667833e+00, 6.50447253e+00, 6.72659027e+00,
         6.76456204e+00, 6.77026391e+00, 6.78380452e+00, 6.90419176e+00,
         6.97658770e+00],
        [1.12619660e-06, 4.85469797e+00, 5.58849845e+00, 6.11132693e+00,
         6.62118075e+00, 6.69542394e+00, 6.98735253e+00,            nan,
                    nan,            nan,            nan,            nan,
                    nan,            nan,            nan,            nan,
                    nan],
        [1.98850957e-06, 4.98321883e+00, 5.62188538e+00, 5.80160082e+00,
         6.02770602e+00, 6.29475306e+00, 6.33285866e+00, 6.45832510e+00,
         6.54646985e+00, 6.72343878e+00, 6.78391375e+00, 6.80034673e+00,
                    nan,            nan,            nan,            nan,
                    nan],
        [1.47091897e-06, 4.98803365e+00, 5.500

In [363]:
top_load_motifs[1]

array([[3826, 4498, 4330, 4834, 4162, 2482, 3322, 2314, 5506, 6010, 4666,
        3658, 1642, 3994, 3490, 1810, 3154],
       [6816, 7320, 1439, 1943, 7152, 5807, 1775,   -1,   -1,   -1,   -1,
          -1,   -1,   -1,   -1,   -1,   -1],
       [3885, 4557, 2373, 2037, 4893, 3549, 2205, 5565, 4389, 4053, 5397,
        1701,   -1,   -1,   -1,   -1,   -1],
       [5191, 2503, 4015, 3343, 4351, 2335, 4687, 4855,   -1,   -1,   -1,
          -1,   -1,   -1,   -1,   -1,   -1]], dtype=int64)

In [364]:
ordinal = lambda n: "%d%s" % (n, "tsnrhtdd"[(n // 10 % 10 != 1) * (n % 10 < 4) * n % 10::4]) #Dont ask
motif_names = [ordinal(n) + " Motif" for n in range(1, 1+len(top_load_motifs[1]))]
for i in range(0, len(top_load_motifs[1])):
    motif_names[i] = motif_names[i] + f" ({len([entry for entry in top_load_motifs[1][i] if entry != -1])} Occurrences)"

fig = make_subplots(rows=len(top_load_motifs[1]), cols=1,
                    subplot_titles=(motif_names),
                    shared_xaxes=True,
                    row_heights=[10]*len(top_load_motifs[1]))
row = 1
col = 1

for i in top_load_motifs[1]:
    for j in i:
        fig.add_trace(go.Scatter(x=load_df[j: j+m].reset_index(drop=True).index, y=load_df[j: j+m], mode="lines",
                                 line=dict(color=f"rgba(98, 111, 250, {2/len(i)})")), row=row, col=col)
    row += 1

fig.update_layout(showlegend=False, height=600)
fig.update_yaxes(visible=False)
fig.update_xaxes(visible=False)
fig.update_annotations(font_size=12)

fig.show()

### Potential discords in the house load

In [365]:
discord_indexes = []
copy_mp = load_mp.copy()

while True:
    discord_distance = np.amax(copy_mp[:, 0])
    discord_index = np.argmax(copy_mp[:, 0])

    discord_indexes.append(np.where(load_mp[:, 0] == discord_distance))

    copy_mp = np.delete(copy_mp, list(range(max(0, discord_index-m//4), min(len(copy_mp), discord_index+m//4+1))), axis=0)

    if len(discord_indexes) == 3:
        break

discord_indexes

[(array([826], dtype=int64),),
 (array([7974], dtype=int64),),
 (array([8591], dtype=int64),)]

In [366]:
names = []
for i in discord_indexes:
    idx = i[0][0]
    dist = round(load_mp[:, 0][idx], 2)
    names.append(dist)

fig = make_subplots(rows=3, cols=1,
                    subplot_titles=[f"Anomaly 1 <br>Distance to NN: {names[0]}", f"Anomaly 2 <br>Distance to NN: {names[1]}",
                                    f"Anomaly 3 <br>Distance to NN: {names[2]}"]
                    )

row = 1
col = 1

for i in discord_indexes:
    idx = i[0][0]
    fig.add_trace(go.Scatter(
        x=load_df[idx: idx+m].index,
        y=load_df[idx: idx+m],
        mode="lines"
    ), row=row, col=col)
    row += 1


fig.update_layout(height=600, showlegend=False)

fig.update_yaxes(title_text='House Energy Load [kWh]', title_font=dict(size=16), row=2, col=1)

fig.show()

## Outdoor motifs

In [367]:
outenv_df = hour["OutEnv_OutdoorAmbTemp"]
outenv_df

Timestamp
2015-02-01 00:00:00    -5.053251
2015-02-01 01:00:00    -5.884319
2015-02-01 02:00:00    -5.711684
2015-02-01 03:00:00    -5.191623
2015-02-01 04:00:00    -4.249297
                         ...    
2016-01-31 19:00:00    12.626852
2016-01-31 20:00:00    12.453917
2016-01-31 21:00:00    12.223517
2016-01-31 22:00:00    12.850783
2016-01-31 23:00:00    12.321852
Name: OutEnv_OutdoorAmbTemp, Length: 8760, dtype: float64

In [368]:
outenv_mp = stumpy.stump(outenv_df, m = m)
outenv_mp

array([[9.173848678759567, 673, -1, 673],
       [9.214104572747951, 674, -1, 674],
       [9.255336358104268, 675, -1, 675],
       ...,
       [inf, -1, -1, -1],
       [inf, -1, -1, -1],
       [inf, -1, -1, -1]], dtype=object)

In [369]:
fig = go.Figure(go.Scatter(
    y = outenv_mp[:, 0]
))

fig.show()