In [1]:
import numpy as np
import pandas as pd
import os
import sys
import csv
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.image as mpimg
import optuna
import random
import joblib
import math
import logging
from msig import Motif, NullModel
from config import RESULTS_MOTIF_DIR, RESULTS_DIR, IMAGES_DIR, DATA_DIR, DATASET_PATH, VARIABLES, STUMPY_EXCL_ZONE_DENOM, TOP_K_MP, INCLUDE, NORMALIZE, SUBSQUENCES_LENGTHS

print(f"Results will be saved in: {RESULTS_DIR}")
print(f"Images will be saved in: {IMAGES_DIR}")
print(f"Data will be accessed from: {DATA_DIR}")


if '__file__' in globals():
    # For standalone scripts
    base_dir = os.path.dirname(__file__)
else:
    # For Jupyter or interactive environments
    base_dir = os.getcwd()

# Add the parent directory of `utils` to the Python path
sys.path.append(os.path.abspath(os.path.join(base_dir, "../")))

2025-01-14 18:46:05,801 - INFO - Results will be saved in: /home/mgsilva/motifpred/results/household
2025-01-14 18:46:05,801 - INFO - Images will be saved in: /home/mgsilva/motifpred/images/household
2025-01-14 18:46:05,801 - INFO - Data will be accessed from: /home/mgsilva/motifpred/data/household


Results will be saved in: /home/mgsilva/motifpred/results/household
Images will be saved in: /home/mgsilva/motifpred/images/household
Data will be accessed from: /home/mgsilva/motifpred/data/household


In [5]:
data_df = pd.read_csv(DATASET_PATH, index_col=0).astype(float)

labels = pd.read_csv(DATA_DIR  / f"labels.csv", index_col=0).astype(float)
data = data_df.values.T
data.shape

(4, 77760)

In [3]:
# motif discovery
import stumpy
from stumpy import config

config.STUMPY_EXCL_ZONE_DENOM = STUMPY_EXCL_ZONE_DENOM  

for m in SUBSQUENCES_LENGTHS:
    mp, mp_indices = stumpy.mstump(data, m, normalize=NORMALIZE)
    np.save(
        RESULTS_MOTIF_DIR / f"normalized_{NORMALIZE}_top_{TOP_K_MP}_m_{m}_mp.npy",
        mp,
        allow_pickle=True,
    )
    np.save(
        RESULTS_MOTIF_DIR / f"normalized_{NORMALIZE}_top_{TOP_K_MP}_m_{m}_mp_indices.npy",
        mp_indices,
        allow_pickle=True,
    )

2025-01-14 18:29:50,778 - INFO - init


In [4]:
def multivar_subsequence_complexity(x):
    # complexity for multivariate time series can be calculated as the sum of the complexity of each dimension
    return np.sum(np.sqrt(np.sum(np.square(np.diff(x)), axis=1)))


def table_summary_motifs(
    motif_indices,
    motif_distances,
    motif_subspaces,
    data,
    k_distances,
    m,
    normalize,
    max_allowed_dist,
):
    mp_stats_table = pd.DataFrame(
        columns=[
            "ID",
            "k_distances",
            "Features",
            "m",
            "#Matches",
            "Indices",
            "max(dists)",
            "min(dists)",
            "med(dists)",
        ]
    )

    motif_index = 0

    n_vars, n_time = data.shape

    if normalize:
        data = (data - np.mean(data, axis=1)[:, np.newaxis]) / np.std(data, axis=1)[
            :, np.newaxis
        ]

    dtypes = [float] * len(data)
    model_empirical = NullModel(data, dtypes, model="empirical")

    for motif_indice, match_indices in enumerate(motif_indices):
        dimensions = motif_subspaces[motif_indice]

        # remove filling values of -1 and Nans from motif_indices and match_distances
        match_indices = match_indices[match_indices != -1]
        match_distances = motif_distances[motif_indice]
        match_distances = match_distances[~np.isnan(match_distances)]

        # if is empty, skip
        if len(match_indices) == 0:
            continue

        excl_zone = np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)

        # remove trivial matches
        non_trivial_matches = []
        for indice in match_indices:
            trivial = False
            for indice_new in non_trivial_matches:
                if abs(indice - indice_new) <= excl_zone:
                    trivial = True
                    break
            if not trivial:
                non_trivial_matches.append(indice)
        match_indices = non_trivial_matches

        max_possible_matches = int(np.floor((n_time - m) / excl_zone + 1))

        # get the multidim time serie motif in the dimensions
        multivar_subsequence = data[dimensions][
            :, match_indices[0] : match_indices[0] + m
        ]

        # minmax normalize subsequence
        epsilon = 1e-10  # to avoid division by zero
        min_values = multivar_subsequence.min(axis=1, keepdims=True)
        max_values = multivar_subsequence.max(axis=1, keepdims=True)
        normalized_multivar_subsequence = (multivar_subsequence - min_values) / (
            max_values - min_values + epsilon
        )
        ce_norm_subsequence = multivar_subsequence_complexity(
            normalized_multivar_subsequence
        )
        norm_ce_norm_subsequence = ce_norm_subsequence / (
            np.sqrt(len(multivar_subsequence[0]) - 1) * len(dimensions)
        )

        max_dist = np.max(match_distances)
        min_dist = np.min(match_distances[1:])

        if k_distances is None:  # consider all matches
            med_dist = np.median(match_distances[1:])
        else:  # consider only the k closest matches
            med_dist = np.median(match_distances[1 : k_distances + 1])

        # np.nanmax([np.nanmean(D) - 2.0 * np.nanstd(D), np.nanmin(D)])
        if max_allowed_dist is None:
            current_data = data[dimensions]
            n_vars, n_time = current_data.shape

            # D The distance profile of `Q` with `T`. It is a 1D numpy array of size
            # `len(T)-len(Q)+1`, where `D[i]` is the distance between query `Q` and
            # `T[i : i + len(Q)]`
            D = np.empty((n_vars, n_time - m + 1))
            for i in range(n_vars):
                D[i, :] = stumpy.mass(
                    multivar_subsequence[i], current_data[i], normalize=normalize
                )
            D = np.mean(D, axis=0)
            D_copy = D.copy().astype(np.float64)
            D_copy[np.isinf(D_copy)] = np.nan
            motif_max_allowed_dist = np.nanmax(
                [np.nanmean(D_copy) - 2.0 * np.nanstd(D_copy), np.nanmin(D_copy)]
            )
        else:
            motif_max_allowed_dist = max_allowed_dist

        unified_weights = "0.33,0.33,0.33"
        w1, w2, w3 = map(float, unified_weights.split(","))
        unified = (
            w1 * (1 - (med_dist / motif_max_allowed_dist))
            + w2 * (len(match_indices) / max_possible_matches)
            + w3 * norm_ce_norm_subsequence
        )

        # remove timepoints from time series in match all indices + m
        time_series_nomatches = data.copy()
        # list of indexes to remove
        indexes_to_remove = [
            i for index in match_indices for i in range(index, index + m)
        ]
        # put zero in the indexes to remove
        time_series_nomatches[:, indexes_to_remove] = 0

        # calculate variance explained by the motif
        vars_explained = []
        for i in range(len(dimensions)):
            vars_explained.append(
                100
                * (
                    1
                    - (
                        np.mean(np.abs(time_series_nomatches[i]))
                        / np.mean(np.abs(data[i]))
                    )
                )
            )

        variance_explained = np.mean(vars_explained)

        # data features are now the ones in the dimensions
        used_features = [f"{dimension}" for dimension in dimensions]

        # max_delta = motif_max_allowed_dist # (worst case) max_dist = sqrt(max_delta^2) <=> max_delta = max_dist
        max_delta = math.sqrt(motif_max_allowed_dist**2 / m)
        delta_thresholds = [max_delta] * len(data)

        #########SIG#########
        motif = Motif(
            multivar_subsequence, dimensions, delta_thresholds, len(match_indices)
        )
        p = motif.set_pattern_probability(model_empirical, vars_indep=True)
        pvalue = motif.set_significance(
            max_possible_matches, n_vars, idd_correction=False
        )

        stats_df = {
            "ID": str(motif_index),
            "k": len(dimensions),
            "Features": ",".join(used_features),
            "m": m,
            "#Matches": len(match_indices) - 1,
            "Indices": match_indices,
            "max(dists)": np.around(max_dist, 3),
            "min(dists)": np.around(min_dist, 3),
            "med(dists)": np.around(med_dist, 3),
            "CE": np.around(norm_ce_norm_subsequence, 3),
            "Score Unified": np.around(unified, 3),
            "Explained Var(%)": np.around(variance_explained, 2),
            "P": p,
            "p-value": pvalue,
        }

        mp_stats_table = (
            pd.DataFrame.from_records([stats_df])
            if mp_stats_table.empty
            else pd.concat(
                [mp_stats_table, pd.DataFrame.from_records([stats_df])],
                ignore_index=True,
            )
        )

        motif_index += 1
    return mp_stats_table

In [5]:
k_distances = None
min_neighbors = 2
cutoff = np.inf
max_matches = 99999
max_distance = None
max_motifs = 99999
k = 1

# Initialize mp_stats_table outside the loop to accumulate results
mp_stats_table = pd.DataFrame()

for m in SUBSQUENCES_LENGTHS:
    mp = np.load(
        RESULTS_MOTIF_DIR / f"normalized_{NORMALIZE}_top_{TOP_K_MP}_m_{m}_mp.npy",
        allow_pickle=True,
    )
    indices = np.load(
        RESULTS_MOTIF_DIR / f"normalized_{NORMALIZE}_top_{TOP_K_MP}_m_{m}_mp_indices.npy",
        allow_pickle=True,
    )

    motif_distances, motif_indices, motif_subspaces, motif_mdls = stumpy.mmotifs(
        data,
        mp,
        indices,
        min_neighbors=min_neighbors,
        max_distance=max_distance,
        cutoffs=cutoff,
        max_matches=max_matches,
        max_motifs=max_motifs,
        k=k,
        include=INCLUDE,
        normalize=NORMALIZE,
    )
    if len(motif_indices[0]) == 0:
        continue

    # Create the table for the current iteration
    table = table_summary_motifs(
        motif_indices,
        motif_distances,
        motif_subspaces,
        data,
        k_distances,
        m,
        NORMALIZE,
        max_distance,
    )
    logging.info(
        "m:{}, #Motifs:{}, Sig:{}".format(
            m, len(motif_indices), np.sum(table["p-value"] < 0.001)
        )
    )

    # Apply Hochberg procedure
    p_values = table["p-value"].to_numpy()
    critical_value = NullModel.hochberg_critical_value(p_values, 0.05)
    sig = (
        table["p-value"] < critical_value
        if critical_value != 0
        else table["p-value"] <= critical_value
    )
    table["Sig_Hochber"] = sig

    print(
        "Sig after Hochberg: {}, critical value: {}".format(np.sum(sig), critical_value)
    )

    # Append the current table to mp_stats_table
    mp_stats_table = pd.concat([mp_stats_table, table], ignore_index=True)

# Save the accumulated mp_stats_table to CSV
mp_stats_table.to_csv(
    RESULTS_DIR / f"mp_stats_table_normalized_{NORMALIZE}_top_{TOP_K_MP}.csv",
    index=False,
)

2025-01-14 18:42:04,933 - INFO - p_value = 1.000E+00 (p_pattern = 1.717E-02)
2025-01-14 18:42:05,013 - INFO - p_value = 0.000E+00 (p_pattern = 3.199E-05)


2025-01-14 18:42:05,095 - INFO - p_value = 0.000E+00 (p_pattern = 1.387E-10)
2025-01-14 18:42:05,179 - INFO - p_value = 0.000E+00 (p_pattern = 1.169E-08)
2025-01-14 18:42:05,260 - INFO - p_value = 0.000E+00 (p_pattern = 7.304E-05)
2025-01-14 18:42:05,357 - INFO - p_value = 0.000E+00 (p_pattern = 2.587E-02)
2025-01-14 18:42:05,438 - INFO - p_value = 0.000E+00 (p_pattern = 5.160E-06)
2025-01-14 18:42:05,520 - INFO - p_value = 7.530E-160 (p_pattern = 3.737E-02)
2025-01-14 18:42:05,611 - INFO - p_value = 0.000E+00 (p_pattern = 6.051E-05)
2025-01-14 18:42:05,692 - INFO - p_value = 0.000E+00 (p_pattern = 7.507E-05)
2025-01-14 18:42:05,782 - INFO - p_value = 5.259E-276 (p_pattern = 5.754E-02)
2025-01-14 18:42:05,865 - INFO - p_value = 0.000E+00 (p_pattern = 2.707E-24)
2025-01-14 18:42:05,947 - INFO - p_value = 0.000E+00 (p_pattern = 6.136E-20)
2025-01-14 18:42:06,031 - INFO - p_value = 0.000E+00 (p_pattern = 1.021E-09)
2025-01-14 18:42:06,112 - INFO - p_value = 0.000E+00 (p_pattern = 1.073E-4

Sig after Hochberg: 23, critical value: 7.529842902599592e-160


2025-01-14 18:42:07,234 - INFO - p_value = 4.412E-07 (p_pattern = 1.610E-05)
2025-01-14 18:42:07,295 - INFO - p_value = 0.000E+00 (p_pattern = 2.717E-13)
2025-01-14 18:42:07,357 - INFO - p_value = 0.000E+00 (p_pattern = 4.572E-11)
2025-01-14 18:42:07,419 - INFO - p_value = 0.000E+00 (p_pattern = 6.349E-08)
2025-01-14 18:42:07,481 - INFO - p_value = 0.000E+00 (p_pattern = 1.725E-10)
2025-01-14 18:42:07,542 - INFO - p_value = 0.000E+00 (p_pattern = 5.155E-32)
2025-01-14 18:42:07,604 - INFO - p_value = 0.000E+00 (p_pattern = 4.094E-28)
2025-01-14 18:42:07,665 - INFO - p_value = 0.000E+00 (p_pattern = 5.326E-06)
2025-01-14 18:42:07,727 - INFO - p_value = 0.000E+00 (p_pattern = 2.153E-04)
2025-01-14 18:42:07,788 - INFO - p_value = 0.000E+00 (p_pattern = 2.163E-21)
2025-01-14 18:42:07,850 - INFO - p_value = 0.000E+00 (p_pattern = 2.599E-04)
2025-01-14 18:42:07,912 - INFO - p_value = 0.000E+00 (p_pattern = 7.010E-22)
2025-01-14 18:42:07,973 - INFO - p_value = 0.000E+00 (p_pattern = 4.942E-31)

Sig after Hochberg: 13, critical value: 4.412009876600158e-07


2025-01-14 18:42:08,360 - INFO - p_value = 0.000E+00 (p_pattern = 5.061E-14)
2025-01-14 18:42:08,440 - INFO - p_value = 0.000E+00 (p_pattern = 2.349E-37)
2025-01-14 18:42:08,521 - INFO - p_value = 0.000E+00 (p_pattern = 3.490E-31)
2025-01-14 18:42:08,601 - INFO - p_value = 0.000E+00 (p_pattern = 3.599E-31)
2025-01-14 18:42:08,682 - INFO - p_value = 0.000E+00 (p_pattern = 1.296E-21)
2025-01-14 18:42:08,761 - INFO - p_value = 0.000E+00 (p_pattern = 5.280E-50)
2025-01-14 18:42:08,843 - INFO - p_value = 0.000E+00 (p_pattern = 1.413E-29)
2025-01-14 18:42:08,924 - INFO - p_value = 0.000E+00 (p_pattern = 1.232E-06)
2025-01-14 18:42:09,004 - INFO - p_value = 0.000E+00 (p_pattern = 5.704E-106)
2025-01-14 18:42:09,083 - INFO - p_value = 0.000E+00 (p_pattern = 4.804E-22)
2025-01-14 18:42:09,165 - INFO - p_value = 0.000E+00 (p_pattern = 6.514E-91)
2025-01-14 18:42:09,245 - INFO - p_value = 0.000E+00 (p_pattern = 3.339E-79)
2025-01-14 18:42:09,326 - INFO - p_value = 0.000E+00 (p_pattern = 6.805E-84

Sig after Hochberg: 15, critical value: 0.0


In [6]:
mp_stats_table = pd.read_csv(
    RESULTS_DIR / f"mp_stats_table_normalized_{NORMALIZE}_top_{TOP_K_MP}.csv"
)

motif_lengths = mp_stats_table["m"].unique()
motif_stats_table = pd.DataFrame(
    columns=[
        "m",
        "#motifs",
        "avg_n_matches",
        "avg_n_features",
        "avg_probability",
        "avg_pvalue",
        "#sig_motifs(<0.01)",
        "significant",
        "#sig_hochberg",
    ]
)
for m in motif_lengths:
    table = mp_stats_table[mp_stats_table["m"] == m]
    if table.empty:
        continue
    n_motifs = table.shape[0]
    n_sig_motifs_0001 = table[table["p-value"] < 0.001].shape[0]
    n_sig_motifs_hochberg = table[table["Sig_Hochber"]].shape[0]
    avg_n_matches = (
        round(table["#Matches"].mean(), 2),
        round(table["#Matches"].std(), 3),
    )
    avg_n_features = round(table["k"].mean(), 2), round(table["k"].std(), 3)
    avg_probability = table["P"].mean(), table["P"].std()
    avg_pvalue = table["p-value"].mean(), table["p-value"].std()

    stats_df = {
        "m": m,
        "#motifs": n_motifs,
        "#sig_motifs(<0.001)": n_sig_motifs_0001,
        "significant": (n_sig_motifs_0001 * 100) / n_motifs,
        "avg_n_matches": avg_n_matches,
        "avg_n_features": avg_n_features,
    }

    motif_stats_table = (
        pd.DataFrame.from_records([stats_df])
        if motif_stats_table.empty
        else pd.concat(
            [motif_stats_table, pd.DataFrame.from_records([stats_df])],
            ignore_index=True,
        )
    )

print(motif_stats_table.to_latex(index=False, float_format="%.3f"))

\begin{tabular}{rrrrll}
\toprule
m & #motifs & #sig_motifs(<0.001) & significant & avg_n_matches & avg_n_features \\
\midrule
60 & 25 & 24 & 96.000 & (395.12, 174.647) & (2.0, 0.0) \\
180 & 14 & 14 & 100.000 & (162.14, 97.063) & (2.0, 0.0) \\
360 & 15 & 15 & 100.000 & (74.67, 39.585) & (2.0, 0.0) \\
\bottomrule
\end{tabular}



In [7]:
mp_stats_table = pd.read_csv(
    RESULTS_DIR / f"mp_stats_table_normalized_{NORMALIZE}_top_{TOP_K_MP}.csv"
)

# excluded p-value > 0.001
mp_stats_table = mp_stats_table[mp_stats_table["p-value"] < 0.001]
subsequence_lengths = mp_stats_table["m"].unique()
for m in subsequence_lengths:
    print("########## m:{} #########".format(m))
    top_motifs = mp_stats_table[mp_stats_table["m"] == m]
    top_motifs = top_motifs.sort_values(by="Score Unified", ascending=False).head(5)
    top_motifs = top_motifs[
        [
            "ID",
            "#Matches",
            "k",
            "Features",
            "CE",
            "Score Unified",
            "max(dists)",
            "min(dists)",
            "med(dists)",
            "p-value",
            "Explained Var(%)",
        ]
    ]
    top_motifs["p-value"] = top_motifs["p-value"].apply(lambda x: f"{x:.2e}")
    print(top_motifs.to_latex(index=False, float_format="%.3f"))
    print("\n")

########## m:60 #########
\begin{tabular}{rrrlrrrrrlr}
\toprule
ID & #Matches & k & Features & CE & Score Unified & max(dists) & min(dists) & med(dists) & p-value & Explained Var(%) \\
\midrule
8 & 741 & 2 & 0,3 & 0.115 & 0.258 & 6.710 & 1.359 & 4.165 & 0.00e+00 & 51.700 \\
10 & 716 & 2 & 0,3 & 0.084 & 0.249 & 6.712 & 1.805 & 4.075 & 5.26e-276 & 49.440 \\
5 & 836 & 2 & 3,0 & 0.127 & 0.237 & 7.966 & 1.144 & 5.823 & 0.00e+00 & 58.820 \\
3 & 406 & 2 & 0,3 & 0.113 & 0.195 & 5.510 & 0.563 & 3.738 & 0.00e+00 & 30.990 \\
2 & 427 & 2 & 0,3 & 0.117 & 0.193 & 6.028 & 0.530 & 4.209 & 0.00e+00 & 33.360 \\
\bottomrule
\end{tabular}



########## m:180 #########
\begin{tabular}{rrrlrrrrrlr}
\toprule
ID & #Matches & k & Features & CE & Score Unified & max(dists) & min(dists) & med(dists) & p-value & Explained Var(%) \\
\midrule
0 & 2 & 2 & 0,1 & 0.000 & 0.331 & 0.000 & 0.000 & 0.000 & 4.41e-07 & 0.330 \\
8 & 349 & 2 & 0,3 & 0.075 & 0.196 & 15.648 & 5.152 & 13.890 & 0.00e+00 & 69.610 \\
1 & 235 & 2 & 

In [8]:
def plot_motif(ts_list, features, m, motif_indexes, motif_name):
    # Generate the time range starting at 2008-09-01 00:00:00 with 77760 minutes
    start_time = pd.Timestamp("2008-09-01 00:00:00")
    time_range = pd.date_range(start=start_time, periods=77760, freq="min")

    # Define 5 equally spaced indices for xticks on the right plots
    xtick_indices = np.linspace(0, len(time_range) - 1, 5, dtype=int)
    xtick_labels = [
        time_range[idx].strftime("%d,%b \n %H:%M:%S") for idx in xtick_indices
    ]

    fig, axes = plt.subplots(
        ncols=2, nrows=len(ts_list), figsize=(10, 2 * len(ts_list)), squeeze=False
    )

    for i in range(len(ts_list)):
        ts = ts_list[i]
        # plot light grey on the right side
        axes[i, 1].plot(ts, color="black", linewidth=0.5, alpha=0.5)

        # Set unique colors for motifs
        colors = plt.cm.tab20(np.linspace(0, 1, len(motif_indexes)))
        axes[i, 0].set_prop_cycle("color", colors)
        axes[i, 1].set_prop_cycle("color", colors)

        # Plot motifs on the left and highlight on the right
        for index in motif_indexes:
            subsequence_match = ts.iloc[index : index + m]
            axes[i, 0].plot(subsequence_match.values)  # Ensure left plot is drawn
            axes[i, 1].plot(subsequence_match, linewidth=2)

        # Set y-axis labels for left plot
        # Split long y-axis labels (e.g., those with 3 words) into multiple lines
        words = features[i].split()
        if len(words) == 3:
            wrapped_label = (
                f"{' '.join(words[:-1])}\n{words[-1]}"  # Add \n before the last word
            )
        else:
            wrapped_label = features[i]  # No line break for 2 words or less

        # Set y-axis labels for left plot
        axes[i, 0].set_ylabel(wrapped_label, rotation=90, size="large")
        xticks = [0] + list(range(max(1, m // 5), m - 1, max(1, m // 5))) + [m - 1]
        xticklabels = ["i"] + [f"i+{t}" for t in xticks[1:-1]] + [f"i+{m-1}"]

        axes[i, 0].set_xticks(xticks)
        axes[i, 0].set_xticklabels(xticklabels)
        plt.setp(axes[i, 0].xaxis.get_majorticklabels(), rotation=90)

        # Add custom xticks and labels to the right plots
        axes[i, 1].set_xticks(xtick_indices)
        axes[i, 1].set_xticklabels(xtick_labels, rotation=45)

        # Only display x-axis on the last row of plots
        if i != len(ts_list) - 1:
            axes[i, 0].axes.get_xaxis().set_visible(False)
            axes[i, 1].axes.get_xaxis().set_visible(False)

    # Set titles for the two columns
    axes[0, 0].set_title("Raw Subsequences")
    axes[0, 1].set_title("Motif in TS")

    plt.tight_layout()
    plt.savefig(
        images_dir + "/m=" + str(m) + "_motif_" + str(motif_name) + ".pdf",
        bbox_inches="tight",
    )
    plt.show()

    return None


In [None]:
# Load motif statistics table
file_path = os.path.join(
    results_dir,
    f"table_motifs_normalize={normalize}_min_neighbors={min_neighbors}_max_distance={max_distance}_cutoff={cutoff}_max_matches={max_matches}_max_motifs={max_motifs}.csv",
)
mp_stats_table = pd.read_csv(file_path)

# Extract unique subsequence lengths
subsequence_lengths = mp_stats_table["m"].unique()

ts = data

# Loop over each subsequence length
for m in subsequence_lengths:
    logging.info(f"Motif length: {m}")

    # Filter motifs by current subsequence length
    top_motifs = mp_stats_table[mp_stats_table["m"] == m]
    top_motifs = top_motifs.sort_values(by="Score Unified", ascending=False).head(5)

    # Loop over each top motif
    for top_motif in top_motifs.to_dict(orient="records"):
        print(top_motif)

        # Parse dimensions and indices
        dimensions = sorted(map(int, top_motif["Features"].split(",")))
        indices = sorted(map(int, top_motif["Indices"].strip("[]").split(",")))

        # Extract feature names
        features = [df_data.columns[dimension] for dimension in dimensions]

        # Add label feature to the list
        ts_list = [df_data[feature].reset_index(drop=True) for feature in features]

        # Generate motif name
        motif_name = top_motif["ID"]

        # Create the figure and axes once, with 3 subplots for the submeters
        fig, ax = plt.subplots(figsize=(10, 5), nrows=3, sharex=True)

        # Loop over each index and plot the subsequences
        for indice in indices:
            subseq = labels.iloc[indice: indice + m]

            # Plot subsequences for each submeter in the same 3 subplots
            for i, feature in enumerate(labels.columns):
                ax[i].plot(subseq[feature].values, alpha=0.7)  # Plot with transparency to differentiate subsequences
                ax[i].set_ylabel(feature.replace("_", " "))  # Format the y-axis label for better readability

        # Set common x-axis labels and formatting
        ax[-1].set_xlabel("Time Index")  # Only set x-axis label on the bottom plot

        # Adjust layout to prevent overlap
        plt.tight_layout()

        # Show the plot (or save it, depending on your requirements)
        plt.show()

        features = [feature.replace("_", " ") for feature in features]

        # Plot and save the motif
        plot_motif(ts_list, features, m, indices, motif_name)

Motif length:  60


2025-01-14 18:42:10,663 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.


2025-01-14 18:42:11,753 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
2025-01-14 18:42:11,762 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
2025-01-14 18:42:11,764 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
2025-01-14 18:42:11,765 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
2025-01-14 18:42:11,766 - INFO - Using categorical units to plot a list of strings that are all parsable as 

KeyboardInterrupt: 