In [78]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import stumpy

import DataRetriever as dr

retriever = dr.DataRetriever()

In [79]:
hour = retriever.get_data("All-Subsystems-hour-Year2.pkl")

# Calculating the matrix profile

In [80]:
m = 24*7
mp = stumpy.stump(hour["PV_Watts3PhTotalW3PhT1"], m=m)
#mp[0] = np.delete(mp[0], np.where(mp[0] == float('inf')))
mp

array([[5.027679994938704, 6431, -1, 6431],
       [5.02768034945674, 6432, -1, 6432],
       [5.027682060367114, 6433, -1, 6433],
       ...,
       [inf, -1, -1, -1],
       [inf, -1, -1, -1],
       [inf, -1, -1, -1]], dtype=object)

In [81]:
mp[:, 0] #Accessing a column in the matrix profile mp. In this case the distances.

array([5.027679994938704, 5.02768034945674, 5.027682060367114, ..., inf,
       inf, inf], dtype=object)

# Basic statistics for the distances, global motif and its nearest neighbor, and discord.

In [82]:
mp_median_distance = np.median(mp[:, 0])

print(f"The median distance between sequences is {mp_median_distance}")

mp_tenthquantile_distance = np.quantile(mp[:, 0], q=0.1)

print(f"The lower quartile for distance between sequences is {mp_tenthquantile_distance}")

The median distance between sequences is 4.761013440087671
The lower quartile for distance between sequences is 3.5437529096327514


In [83]:
first_motif_idx = np.argsort(mp[:, 0])[0] # Takes all distances in column 0, and sort them in ascending order, this returns the indexes. Then take the first index in sorted array.

print(f"The first motif is located at index {first_motif_idx}")

nearest_neighbor_idx = mp[first_motif_idx, 1] #Access the previous index, and return the index of its nearest neighbor.

print(f"The nearest neighbor to the first motif is located at index {nearest_neighbor_idx}")

discord_idx = np.argsort(mp[:, 0])[-1]

print(f"The discord is located at index {discord_idx}") #This does not work. Distance from behind is inf.

The first motif is located at index 2635
The nearest neighbor to the first motif is located at index 4099
The discord is located at index 8592


# Finding top motifs

In [84]:
top_motifs = stumpy.motifs(
    T = hour["PV_Watts3PhTotalW3PhT1"],
    P = mp[:, 0],
    max_distance=mp_tenthquantile_distance,
    min_neighbors=2.0,
    max_motifs=10,
    max_matches=100
    )

top_motifs


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



(array([[5.79423371e-07, 2.28253394e+00, 3.47652704e+00],
        [7.96341253e-07, 2.29906696e+00, 3.50077927e+00]]),
 array([[2635, 4099, 4627],
        [4310, 4598, 3014]], dtype=int64))

In [85]:
top_motifs[1]

array([[2635, 4099, 4627],
       [4310, 4598, 3014]], dtype=int64)

In [86]:
top_motifs[1][0]

array([2635, 4099, 4627], dtype=int64)

In [87]:
fig = make_subplots(rows=len(top_motifs[1]), cols=1,
                    subplot_titles=("First Motif", "Second Motif"),
                    shared_xaxes=True,)
row = 1
col = 1

for i in top_motifs[1]:
    for j in i:
        fig.add_trace(go.Scatter(x=hour[j: j+m].reset_index(drop=True).index, y=hour[j: j+m]["PV_Watts3PhTotalW3PhT1"], mode="lines",
                             line=dict(color=f"rgba(98, 111, 250, {2/len(i)})")), row=row, col=col)
    row += 1

fig.update_layout(showlegend=False, height=650)

fig.show()

# Counting sequences alike to the global motif

In [88]:
motif_count = stumpy.match(
    Q = hour["PV_Watts3PhTotalW3PhT1"][first_motif_idx: first_motif_idx+m],
    T = hour["PV_Watts3PhTotalW3PhT1"],
    max_distance = mp_tenthquantile_distance
)

motif_count

array([[5.794233708150262e-07, 2635],
       [2.2825339420155735, 4099],
       [3.476527041864336, 4627]], dtype=object)

In [89]:
hour1= hour[4099: 4099+m].reset_index(drop=True)
hour1

Unnamed: 0,Timestamp,Load_LatentHeatWaterVolume,Load_RefrigeratorTemp,Load_StatusBA1Lights,Load_StatusKitchenLightsA,Load_StatusKitchenLightsB,Load_StatusKitchenLightsC,Load_StatusDRLights,Load_StatusLRLights3,Load_StatusEntryHallLights,...,SHW_GlycolFlowHXCoriolisSHW,SHW_WaterFlowHXCoriolisSHW,SHW_GlycolFlowRateHXCoriolisSHW,SHW_WaterFlowRateHXCoriolisSHW,HVAC_HeatPumpIndoorUnitPower,HVAC_HeatPumpOutdoorUnitPower,HVAC_DehumidifierPower,HVAC_DehumidifierInletAirTemp,HVAC_DehumidifierExitAirTemp,HVAC_DehumidifierAirflow
0,2015-07-21 20:00:00,0.964330,4.649213,0.000000,0.750000,0.750000,0.750000,0.483333,1.000000,0.0,...,429.349391,956.757326,0.001126,0.000424,159.189000,1019.166600,21.434367,63.148600,63.577583,3.398350
1,2015-07-21 21:00:00,1.031041,4.855373,0.000000,1.000000,1.000000,1.000000,0.000000,0.500000,0.0,...,429.349391,957.039307,0.001179,0.005070,81.117850,562.923783,226.304867,68.988283,81.641267,46.481383
2,2015-07-21 22:00:00,1.101053,4.762724,0.000000,0.483333,0.483333,0.483333,0.000000,0.483333,0.0,...,429.349391,957.039307,0.001250,0.000332,84.413433,600.634383,213.026250,68.999983,84.788717,41.925283
3,2015-07-21 23:00:00,1.178332,4.627215,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,429.349391,957.206000,0.001215,0.003344,60.774932,418.172729,311.861881,70.490576,89.756983,64.453237
4,2015-07-22 00:00:00,0.081241,4.829376,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.001255,0.000383,54.616627,368.023237,331.535407,70.389305,89.391831,67.011102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,2015-07-28 15:00:00,0.606339,4.844846,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,521.888525,1150.440914,1.024915,2.220709,158.281500,1164.869017,4.537633,62.110717,63.190517,0.000000
164,2015-07-28 16:00:00,0.668426,4.723932,0.083333,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,565.913776,1246.132485,0.737170,1.594988,97.361617,807.529167,4.510483,63.984200,65.043883,0.000000
165,2015-07-28 17:00:00,0.737779,4.892834,0.000000,0.016667,0.016667,0.016667,0.000000,0.016667,0.0,...,572.028899,1259.411651,0.085298,0.180217,110.998917,883.789250,31.532967,64.706367,66.458733,6.094283
166,2015-07-28 18:00:00,0.811094,4.687619,0.066667,0.500000,0.500000,0.500000,0.000000,0.500000,0.0,...,572.028899,1259.764316,0.001043,0.005866,161.898583,1185.890767,4.528150,63.837967,63.843650,0.000000


In [90]:
hour2 = hour[4627: 4627+m].reset_index(drop=True)

In [91]:
hour3 = hour[3043: 3043+m].reset_index(drop=True)

In [92]:
hour4 = hour[4843: 4843+m].reset_index(drop=True)

In [93]:
oghour = hour[2635: 2635+m].reset_index(drop=True)

In [94]:
fig = go.Figure(data=go.Scattergl(x=hour1.index, y=hour1["PV_Watts3PhTotalW3PhT1"], mode='lines'))
fig.add_trace(go.Scatter(x=hour2.index, y=hour2["PV_Watts3PhTotalW3PhT1"], mode="lines"))
fig.add_trace(go.Scatter(x=hour3.index, y=hour3["PV_Watts3PhTotalW3PhT1"], mode="lines"))
fig.add_trace(go.Scatter(x=hour4.index, y=hour4["PV_Watts3PhTotalW3PhT1"], mode="lines"))
fig.add_trace(go.Scatter(x=oghour.index, y=oghour["PV_Watts3PhTotalW3PhT1"], mode="lines"))
fig.update_yaxes(title="Power genereated [W]")
fig.show()

# Visualisaing the global motif

In [95]:
fig = make_subplots(rows=3, cols=2,
                    specs=[
                        [{"colspan": 2}, None],
                        [{"colspan": 2}, None],
                        [{}, {}]
                    ],
                    subplot_titles=(" ", "Matrix Profile", "First Occurrence", "Second Occurrence")
                    )

fig.add_trace(
    go.Scatter(x=hour['Timestamp'], y=hour["PV_Watts3PhTotalW3PhT1"], name="Power Generated"),
    row=1, col=1)

fig.add_trace(
    go.Scatter(y=mp[:, 0], name="Distance Matrix"),
    row=2, col=1)

fig.add_trace(
    go.Scatter(x=hour[first_motif_idx: first_motif_idx+m]['Timestamp'], y=hour[first_motif_idx: first_motif_idx+m]["PV_Watts3PhTotalW3PhT1"], name="Zoomed in view"),
    row=3, col=1)

fig.add_trace(
    go.Scatter(x=hour[nearest_neighbor_idx: nearest_neighbor_idx+m]['Timestamp'], y=hour[nearest_neighbor_idx: nearest_neighbor_idx+m]["PV_Watts3PhTotalW3PhT1"], name="Zoomed in view"),
    row=3, col=2)

fig.update_yaxes(title_text="Power Generated [W]", range=[0, 4900], row=1, col=1)
fig.update_yaxes(title_text="Euclidean Distance", range=[0, 9], row=2, col=1)

fig.update_layout(
    shapes=[
        dict(type="rect", xref="x1", yref="y1",
             x0=hour["Timestamp"][first_motif_idx], x1=hour["Timestamp"][first_motif_idx+(24*7)], y0=0, y1=4900),
        dict(type="rect", xref="x1", yref="y1",
             x0=hour["Timestamp"][nearest_neighbor_idx], x1=hour["Timestamp"][nearest_neighbor_idx+(24*7)], y0=0, y1=4900),
        dict(type="line", xref="x2", yref="y2",
             x0=first_motif_idx, x1=first_motif_idx, y0=0, y1=9, line=dict(dash="dot")),
        dict(type="line", xref="x2", yref="y2",
             x0=nearest_neighbor_idx, x1=nearest_neighbor_idx, y0=0, y1=9, line=dict(dash="dot"))
    ],
    height=700, showlegend=False)

fig.show()

# Visualising the discord

In [96]:
nn_to_discord = hour[7763: 7763+m].reset_index(drop=True)

In [97]:
#Potential discords are located at [index 372, dist 8.52], [index 5796, 8.22], [7835, 8.18]

In [110]:
fig = make_subplots(rows=3, cols=1)

fig.add_trace(go.Scatter(x=hour[372: 372+m].index, y=hour[372: 372+m]["PV_Watts3PhTotalW3PhT1"],
                         mode="lines", name="Anomaly 1"),
              row=1, col=1)

# fig.add_trace(go.Scatter(x=hour[372: 372+m].index, y=hour[372: 372+m]["PV_PVInsolationHArray"], mode="lines"),
#               row=1, col=1)

fig.add_trace(go.Scatter(x=hour[5796: 5796+m].index, y=hour[5796: 5796+m]["PV_Watts3PhTotalW3PhT1"],
                         mode="lines", name="Anomaly 2"),
              row=2, col=1)
fig.add_trace(go.Scatter(x=hour[7835: 7835+m].index, y=hour[7835: 7835+m]["PV_Watts3PhTotalW3PhT1"],
                         mode="lines", name="Anomaly 3"),
              row=3, col=1)

fig.update_layout(height=600)

fig.show()

In [99]:
fig = go.Figure(data=go.Scattergl(x=hour.index, y=hour["PV_PVInsolationHArray"], mode='lines'))
fig.show()

# Multi-dim motifs

In [100]:
mps, indices = stumpy.mstump(hour[["PV_Watts3PhTotalW3PhT1", "PV_Watts3PhTotalW3PhT2"]], m)
mps

array([[5.02767999, 5.02768035, 5.02768206, ...,        inf,        inf,
               inf],
       [5.0445916 , 5.04459277, 5.04459546, ...,        inf,        inf,
               inf]])

In [101]:
motifs_idx = np.argmin(mps, axis=1)
motifs_idx

array([2635, 2635], dtype=int64)

In [102]:
nn_idx = indices[np.arange(len(motifs_idx)), motifs_idx]
nn_idx

array([4099, 4099], dtype=int64)

In [103]:
fig = make_subplots(rows=4, cols=1)

fig.add_trace(
    go.Scatter(x=hour['Timestamp'], y=hour["PV_Watts3PhTotalW3PhT1"], name="Power generated 1"),
    row=1, col=1)

fig.add_trace(
    go.Scatter(x=hour['Timestamp'], y=hour["PV_Watts3PhTotalW3PhT2"], name="Power generated 2"),
    row=2, col=1)

fig.add_trace(
    go.Scatter(y=mps[0], name="Distance matrix"),
    row=3, col=1)

fig.add_trace(
    go.Scatter(y=mps[1], name="Distance matrix"),
    row=4, col=1)
