# ***Matrix Profile*** (STUMP)

In this notebook, the Matrix Profile is evaluated. The implementation is the STUMP function of the stumpy module.

## Imports

In [1]:
import math
import os
import sys
import json
from time import time, sleep

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle as pkl
from tqdm import tqdm

from sklearn.preprocessing import MinMaxScaler
from pathlib import Path

In [2]:
from TSB_UAD.models.distance import Fourier
from TSB_UAD.models.feature import Window
from TSB_UAD.utils.slidingWindows import find_length, plotFig, printResult

from TSB_UAD.models.iforest import IForest

## Data Pre-Processing

In [3]:
# Load the data for the evaluation.
all_data = []

with open('dataset.pkl', 'rb') as f:
    data = pkl.load(f)

all_data.extend(data['evaluation']['single_normality'])
all_data.extend(data['evaluation']['double_normality'])
all_data.extend(data['evaluation']['triple_normality'])
all_data.extend(data['evaluation']['quadruple_normality'])
name_to_eval_series = {ts['Name']:ts for ts in all_data}

tuning_data = []
tuning_data.extend(data['tuning']['single_normality'])
tuning_data.extend(data['tuning']['double_normality'])
tuning_data.extend(data['tuning']['triple_normality'])
tuning_data.extend(data['tuning']['quadruple_normality'])
name_to_tune_series = {ts['Name']:ts for ts in tuning_data}

In [4]:
preprocessed_dict = {}

In [14]:
# Set the number of windows to be fit per batch.
from typing import Any

windows_per_batch = 150

def preprocess_series(series, slidingWindow=None, verbose=True, calculate_X_data=False):
    # === Pre-processing steps ===
    timeseries = series
    # Prepare data for unsupervised method
    name = timeseries['Name']

    data = timeseries['data']
    max_length = data.shape[0]
    label = timeseries['labels']

    if slidingWindow is None:
        slidingWindow = find_length(data)

    if calculate_X_data:
        X_data = Window(window=slidingWindow).convert(data).to_numpy()
    else:
        X_data = None

    # Take the series and batch it.
    batched_data = []

    i = 0
    flag = True
    # Keep taking batches until the point at which no new windows can be taken.
    while i < len(data) and flag:
        # The data batches begin at the index indicated. If first batch, then the beginning of the time series.
        batch_samples_begin = i

        # The data batches end at the index where `windows_per_batch` can be *completely* extracted since the batch beginning. 
        # Formula: 
        #   i: current beginning of batch / offset
        #   + slidingWindow: to have enough samples extract one window
        #   + windows_per_batch: to have enough samples to extract the rest of the windows
        #   - 1: because the first window extracted is counted twice
        batch_samples_end = i + windows_per_batch + slidingWindow - 1
        
        # Guard against the ending of the time series where a full batch cannot be formed.
        if batch_samples_end > len(data):
            batch_samples_end = len(data)
            flag = False
 
        # Guard against case where the batch cannot hold even one window.
        if len(data[batch_samples_begin:batch_samples_end]) < slidingWindow:
            break

        batched_data.append(data[batch_samples_begin:batch_samples_end])

        # The next batch starts at the point where a new window be created after the last window of the last batch.
        # So, end of the previous window - length of window = start of the last window.
        #   start of the last window + 1 = start of the first window of the next batch.
        i = batch_samples_end - slidingWindow + 1


    # Take the series and batch it for history batching: For each batch, also append have access to the data of the previous batch.
    batched_data_previous_access = []
    i = 0
    previous_window_beginning = 0
    flag = True
    # Keep taking batches until the point at which no new windows can be taken.
    while i < len(data) and flag:
        # The data batches begin at the index indicated. If first batch, then the beginning of the time series.
        batch_samples_begin = i

        # The data batches end at the index where `windows_per_batch` can be *completely* extracted since the batch beginning. 
        # Formula: 
        #   i: current beginning of batch / offset
        #   + slidingWindow: to have enough samples extract one window
        #   + windows_per_batch: to have enough samples to extract the rest of the windows
        #   - 1: because the first window extracted is counted twice
        batch_samples_end = i + windows_per_batch + slidingWindow - 1
        
        # Guard against the ending of the time series where a full batch cannot be formed.
        if batch_samples_end > len(data):
            batch_samples_end = len(data)
            flag = False
 
        # Guard against case where the batch cannot hold even one window.
        if len(data[batch_samples_begin:batch_samples_end]) < slidingWindow:
            break

        batched_data_previous_access.append(data[previous_window_beginning:batch_samples_end])

        previous_window_beginning = batch_samples_begin

        # The next batch starts at the point where a new window be created after the last window of the last batch.
        # So, end of the previous window - length of window = start of the last window.
        #   start of the last window + 1 = start of the first window of the next batch.
        i = batch_samples_end - slidingWindow + 1

    # Take the windows and batch them.
    if calculate_X_data:
        batched_X_data = []
        i = 0
        while i < len(X_data):
            begin = i
            end = i + windows_per_batch
            if end > len(X_data):
                end = len(X_data)

            batched_X_data.append(X_data[begin:end])
            i += windows_per_batch
    else:
        batched_X_data = None


    # Processing data for dynamic partitioning.
    initial_partition_length = 500
    initial_partition = data[:initial_partition_length]

    max_v = np.max(initial_partition)
    min_v = np.min(initial_partition)

    data_partitions = [initial_partition]
    current_partition = []
    change_detected = False

    p = 500
    change_point_threshold = 0.5
    exceed_threshold = 0.65
    post_change_points = []

    for point in data[initial_partition_length:]:
        
        # Check for significant change
        if (point > max_v * (1 + change_point_threshold)) or (point < min_v * (1 - change_point_threshold)):
            change_detected = True
     
        current_partition.append(point)


        # After change, collect additional points
        if change_detected:
            post_change_points.append(point)
            if len(post_change_points) == p:
                exceeds_threshold_points = [(pt > max_v * (1 + change_point_threshold) or pt < min_v * (1 - change_point_threshold)) for pt in post_change_points]
                if sum(exceeds_threshold_points) >= exceed_threshold * p:
                    max_v = np.mean([max_v] + [pt for pt in post_change_points if pt > max_v])
                    min_v = np.mean([min_v] + [pt for pt in post_change_points if pt < min_v])

                post_change_points = []

                # Add the current partition to data partitions
                data_partitions.append(np.array(current_partition))
                current_partition = []
                change_detected = False
                
        
    # Add any remaining points in current_partition to data_partitions
    if current_partition:
        data_partitions.append(np.array(current_partition))

    # Processing data for dynamic partitioning (Percentile variant)
    initial_partition_length = 500
    initial_partition = data[:initial_partition_length]

    # Compute initial percentiles
    percentile_5 = np.percentile(initial_partition, 5)
    percentile_95 = np.percentile(initial_partition, 95)

    percentile_data_partitions = []
    current_partition = []
    change_detected = False
    p = 500
    exceed_threshold = 0.5
    post_change_points = []

    for point in data[initial_partition_length:]:
        
        # Check for significant change
        if (point < percentile_5) or (point > percentile_95):
            change_detected = True
     
        current_partition.append(point)


        # After change, collect additional points
        if change_detected:
            post_change_points.append(point)
            if len(post_change_points) == p:
                exceeds_threshold_points = [(pt < percentile_5 or pt > percentile_95) for pt in post_change_points]
                if sum(exceeds_threshold_points) / p >= exceed_threshold:
                    # Update percentiles
                    percentile_5 = np.percentile(post_change_points, 5)
                    percentile_95 = np.percentile(post_change_points, 95)

                post_change_points = []
                # Add the current partition to data partitions
                percentile_data_partitions.append(np.array(current_partition))
                current_partition = []
                change_detected = False
                
        
    # Add any remaining points in current_partition to percentile_data_partitions
    if current_partition:
        percentile_data_partitions.append(np.array(current_partition))


    if verbose:
        print(f'Time-Series name: {name}')
        print("Estimated Subsequence length: ", slidingWindow)
        print()

    return {
        'name': name,
        'data': data,
        'label': label,
        'slidingWindow': slidingWindow,
        'X_data': X_data,
        'batched_X_data': batched_X_data,
        'batched_data': batched_data,
        'points_per_batch': len(batched_data[0]),
        'history_batched_data': batched_data_previous_access,
        'dynamic_partitioning_batches': data_partitions,
        'percentile_dynamic_partitioning_batches': percentile_data_partitions,
        'Time series length': len(data),
        'Number of abnormal points': list(label).count(1)
    }
    
for timeseries in all_data:
    # Store the pre-processed variables in the new dictionary
    preprocessed_dict[timeseries['Name']] = preprocess_series(series=timeseries)

Time-Series name: ECG1
Estimated Subsequence length:  100

Time-Series name: ECG1_20k
Estimated Subsequence length:  100

Time-Series name: IOPS1
Estimated Subsequence length:  288

Time-Series name: SMD1
Estimated Subsequence length:  125

Time-Series name: Occupancy1
Estimated Subsequence length:  125

Time-Series name: ECG1+IOPS1
Estimated Subsequence length:  100

Time-Series name: SMD1+Occupancy1
Estimated Subsequence length:  125

Time-Series name: ECG1+IOPS1+Occupancy1
Estimated Subsequence length:  100

Time-Series name: SMD1+ECG1+Occupancy1
Estimated Subsequence length:  125

Time-Series name: ECG1+IOPS1+SMD1+Occupancy1
Estimated Subsequence length:  100



## 1. Offline Running
The STUMP algorithm is run offline, with access to all subsequences.

In [6]:
import stumpy
modelName = 'STUMP'

In [None]:
results = []

for name in (p := tqdm(preprocessed_dict.keys())):
    p.set_description(name)
    ts = preprocessed_dict[name]
    window_size = ts['slidingWindow']
    x = ts['data']
    
    k = 1
    t0 = time()
    score_ = stumpy.stump(T_A=x, m=window_size, k=k, ignore_trivial=True, normalize=True)
    t1 = time()
    score = score_.T[k-1]
    
    score = np.array(score)
    score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel()
    score = np.array([score[0]]*math.ceil((ts['slidingWindow']-1)/2) + list(score) + [score[-1]]*((ts['slidingWindow']-1)//2))
    
    L = printResult(ts['data'], ts['label'], score, ts['slidingWindow'], ts['name'], modelName)
    results.append([name] + L + [t1-t0, len(x)])

ECG1+IOPS1+SMD1+Occupancy1: 100%|██████████| 10/10 [01:15<00:00,  7.60s/it]


In [None]:
columns = ['Name'] + ['AUC', 'Precision', 'Recall', 'F-score', 'Range-recall', 'ExistenceReward', 'OverlapReward', 'Range-precision', 'Range-Fscore', 'Precision@k', 'RangeAUC', 'Time', 'Number of Windows']
df = pd.DataFrame(results, columns=columns)

In [None]:
df['Number of anomalies'] = df['Name'].apply(lambda x: np.sum(preprocessed_dict[x]['label']))
df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']]

Unnamed: 0,Name,AUC,Precision@k,Number of anomalies,Time,Number of Windows
0,ECG1,0.635414,0.084577,21105.0,66.287434,229900
1,ECG1_20k,0.857905,0.133333,675.0,0.611055,20000
2,IOPS1,0.719323,0.18932,206.0,0.132818,8784
3,SMD1,0.472878,0.0,2694.0,0.448497,28479
4,Occupancy1,0.169576,0.0,972.0,0.030233,2665
5,ECG1+IOPS1,0.759207,0.098751,881.0,0.47516,28784
6,SMD1+Occupancy1,0.377355,0.0,3666.0,0.516636,31144
7,ECG1+IOPS1+Occupancy1,0.765244,0.058284,1853.0,0.555183,31449
8,SMD1+ECG1+Occupancy1,0.576887,0.0,4341.0,1.140249,51144
9,ECG1+IOPS1+SMD1+Occupancy1,0.613329,0.0,4547.0,1.561258,59928


In [None]:
print(df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']].to_latex())

\begin{tabular}{llrrrrr}
\toprule
{} &                        Name &       AUC &  Precision@k &  Number of anomalies &       Time &  Number of Windows \\
\midrule
0 &                        ECG1 &  0.635414 &     0.084577 &              21105.0 &  66.287434 &             229900 \\
1 &                    ECG1\_20k &  0.857905 &     0.133333 &                675.0 &   0.611055 &              20000 \\
2 &                       IOPS1 &  0.719323 &     0.189320 &                206.0 &   0.132818 &               8784 \\
3 &                        SMD1 &  0.472878 &     0.000000 &               2694.0 &   0.448497 &              28479 \\
4 &                  Occupancy1 &  0.169576 &     0.000000 &                972.0 &   0.030233 &               2665 \\
5 &                  ECG1+IOPS1 &  0.759207 &     0.098751 &                881.0 &   0.475160 &              28784 \\
6 &             SMD1+Occupancy1 &  0.377355 &     0.000000 &               3666.0 &   0.516636 &              31144 \\
7 &

In [None]:
df.to_csv('Results/Matrix Profile/STUMP-Offline-Static.csv', index=False)

## 2. Online Running: Variant 1
The STUMP algorithm is run online, with access to limited subsequences. In this variant, the algorithm only has access to the subsequences of a given (current) batch.

In [None]:
results = []

for name in (p := tqdm(preprocessed_dict.keys())):
    p.set_description(name)
    ts = preprocessed_dict[name]
    window_size = ts['slidingWindow']
    x = ts['data']
    
    k = 1
    score = []
    t0 = time()
    for batch in tqdm(ts['batched_data'], desc='Processing Batch'):
        score_ = stumpy.stump(T_A=batch, m=window_size, k=k, ignore_trivial=True, normalize=True)
        score.extend(score_.T[k-1])
    t1 = time()

    # In some combinations of batch size and window size, windows overlap with all closest-distance candidates and cannot be scored.
    # In this case, inf is returned. To fix this, any instances of infinite distances are replaced with zero distance.
    score = [s if s != np.inf else 0 for s in score]
    score = np.array(score)
    score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel()
    score = np.array([score[0]]*math.ceil((ts['slidingWindow']-1)/2) + list(score) + [score[-1]]*((ts['slidingWindow']-1)//2))
    
    L = printResult(ts['data'], ts['label'], score, ts['slidingWindow'], ts['name'], modelName)
    results.append([name] + L + [t1-t0, len(x)])

Processing Batch: 100%|██████████| 1533/1533 [00:07<00:00, 211.30it/s]
Processing Batch: 100%|██████████| 133/133 [00:00<00:00, 202.55it/s]
Processing Batch: 100%|██████████| 57/57 [00:00<00:00, 193.98it/s]
Processing Batch: 100%|██████████| 190/190 [00:00<00:00, 204.93it/s]
Processing Batch: 100%|██████████| 17/17 [00:00<00:00, 220.61it/s]
Processing Batch: 100%|██████████| 192/192 [00:00<00:00, 203.55it/s]
Processing Batch: 100%|██████████| 207/207 [00:00<00:00, 217.09it/s]
Processing Batch: 100%|██████████| 209/209 [00:01<00:00, 206.92it/s]]
Processing Batch: 100%|██████████| 341/341 [00:01<00:00, 207.60it/s] 
Processing Batch: 100%|██████████| 399/399 [00:01<00:00, 204.71it/s]0s/it]
ECG1+IOPS1+SMD1+Occupancy1: 100%|██████████| 10/10 [00:19<00:00,  1.99s/it]


In [None]:
columns = ['Name'] + ['AUC', 'Precision', 'Recall', 'F-score', 'Range-recall', 'ExistenceReward', 'OverlapReward', 'Range-precision', 'Range-Fscore', 'Precision@k', 'RangeAUC', 'Time', 'Number of Windows']
df = pd.DataFrame(results, columns=columns)

In [None]:
df['Number of anomalies'] = df['Name'].apply(lambda x: np.sum(preprocessed_dict[x]['label']))
df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']]

Unnamed: 0,Name,AUC,Precision@k,Number of anomalies,Time,Number of Windows
0,ECG1,0.696418,0.0,21105.0,7.259351,229900
1,ECG1_20k,0.689407,0.0,675.0,0.659636,20000
2,IOPS1,0.596923,0.0,206.0,0.297844,8784
3,SMD1,0.556622,0.0,2694.0,0.931335,28479
4,Occupancy1,0.186049,0.0,972.0,0.081062,2665
5,ECG1+IOPS1,0.708663,0.0,881.0,0.94637,28784
6,SMD1+Occupancy1,0.441063,0.0,3666.0,0.957632,31144
7,ECG1+IOPS1+Occupancy1,0.499768,0.0,1853.0,1.014065,31449
8,SMD1+ECG1+Occupancy1,0.542541,0.0,4341.0,1.645605,51144
9,ECG1+IOPS1+SMD1+Occupancy1,0.525118,0.0,4547.0,1.953239,59928


In [None]:
print(df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']].to_latex())

\begin{tabular}{llrrrrr}
\toprule
{} &                        Name &       AUC &  Precision@k &  Number of anomalies &      Time &  Number of Windows \\
\midrule
0 &                        ECG1 &  0.696418 &          0.0 &              21105.0 &  7.259351 &             229900 \\
1 &                    ECG1\_20k &  0.689407 &          0.0 &                675.0 &  0.659636 &              20000 \\
2 &                       IOPS1 &  0.596923 &          0.0 &                206.0 &  0.297844 &               8784 \\
3 &                        SMD1 &  0.556622 &          0.0 &               2694.0 &  0.931335 &              28479 \\
4 &                  Occupancy1 &  0.186049 &          0.0 &                972.0 &  0.081062 &               2665 \\
5 &                  ECG1+IOPS1 &  0.708663 &          0.0 &                881.0 &  0.946370 &              28784 \\
6 &             SMD1+Occupancy1 &  0.441063 &          0.0 &               3666.0 &  0.957632 &              31144 \\
7 &       E

In [None]:
df.to_csv('Results/Matrix Profile/STUMP-Variant1.csv', index=False)

## 3. Online Running: Variant 2: History Batching
The STUMP algorithm is run online, with access to limited subsequences. In this variant, the algorithm only has access to the subsequences of a given (current) batch and the previous batch.

In [None]:
results = []

for name in (p := tqdm(preprocessed_dict.keys())):
    p.set_description(name)
    ts = preprocessed_dict[name]
    window_size = ts['slidingWindow']
    x = ts['data']
    
    k = 1
    score = []
    t0 = time()

    for i, batch in enumerate(tqdm(ts['history_batched_data'], desc='Processing Batch')):
        score_ = stumpy.stump(T_A=batch, m=window_size, k=k, ignore_trivial=True, normalize=True)

        if i == 0:
            next_scores = score_.T[k-1]
        else:
            new_scores = list(score_.T[k-1])[:windows_per_batch]
            next_scores = list(score_.T[k-1])[windows_per_batch:]
        
            old_scores_new_scores_mean = list(np.add(previous_scores, new_scores)/2)
            score.extend(old_scores_new_scores_mean)

        previous_scores = next_scores
    # The final batch's scores don't have any future to average with, they are just added.
    score.extend(previous_scores)
    t1 = time()

    # In some combinations of batch size and window size, windows overlap with all closest-distance candidates and cannot be scored.
    # In this case, inf is returned. To fix this, any instances of infinite distances are replaced with zero distance.
    score = [s if s != np.inf else 0 for s in score]
    score = np.array(score)
    score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel()
    score = np.array([score[0]]*math.ceil((ts['slidingWindow']-1)/2) + list(score) + [score[-1]]*((ts['slidingWindow']-1)//2))
    
    L = printResult(ts['data'], ts['label'], score, ts['slidingWindow'], ts['name'], modelName)
    results.append([name] + L + [t1-t0, len(x)])

Processing Batch: 100%|██████████| 1533/1533 [00:09<00:00, 161.34it/s]
Processing Batch: 100%|██████████| 133/133 [00:00<00:00, 158.81it/s]
Processing Batch: 100%|██████████| 57/57 [00:00<00:00, 161.22it/s]
Processing Batch: 100%|██████████| 190/190 [00:01<00:00, 168.15it/s]
Processing Batch: 100%|██████████| 17/17 [00:00<00:00, 176.79it/s]
Processing Batch: 100%|██████████| 192/192 [00:01<00:00, 156.29it/s]
Processing Batch: 100%|██████████| 207/207 [00:01<00:00, 167.36it/s]
Processing Batch: 100%|██████████| 209/209 [00:01<00:00, 161.62it/s]]
Processing Batch: 100%|██████████| 341/341 [00:02<00:00, 158.53it/s] 
Processing Batch: 100%|██████████| 399/399 [00:02<00:00, 167.47it/s]6s/it]
ECG1+IOPS1+SMD1+Occupancy1: 100%|██████████| 10/10 [00:24<00:00,  2.46s/it]


In [None]:
columns = ['Name'] + ['AUC', 'Precision', 'Recall', 'F-score', 'Range-recall', 'ExistenceReward', 'OverlapReward', 'Range-precision', 'Range-Fscore', 'Precision@k', 'RangeAUC', 'Time', 'Number of Windows']
df = pd.DataFrame(results, columns=columns)

In [None]:
df['Number of anomalies'] = df['Name'].apply(lambda x: np.sum(preprocessed_dict[x]['label']))
df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']]

Unnamed: 0,Name,AUC,Precision@k,Number of anomalies,Time,Number of Windows
0,ECG1,0.845447,0.114949,21105.0,9.50475,229900
1,ECG1_20k,0.968435,0.299259,675.0,0.842503,20000
2,IOPS1,0.460942,0.0,206.0,0.356547,8784
3,SMD1,0.46462,0.0,2694.0,1.133917,28479
4,Occupancy1,0.190969,0.0,972.0,0.100159,2665
5,ECG1+IOPS1,0.901091,0.138479,881.0,1.232482,28784
6,SMD1+Occupancy1,0.374143,0.0,3666.0,1.240844,31144
7,ECG1+IOPS1+Occupancy1,0.684548,0.072315,1853.0,1.29625,31449
8,SMD1+ECG1+Occupancy1,0.569588,0.0,4341.0,2.154027,51144
9,ECG1+IOPS1+SMD1+Occupancy1,0.590303,0.0,4547.0,2.386611,59928


In [None]:
print(df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']].to_latex(index=False))

\begin{tabular}{lrrrrr}
\toprule
                      Name &      AUC &  Precision@k &  Number of anomalies &     Time &  Number of Windows \\
\midrule
                      ECG1 & 0.845447 &     0.114949 &              21105.0 & 9.504750 &             229900 \\
                  ECG1\_20k & 0.968435 &     0.299259 &                675.0 & 0.842503 &              20000 \\
                     IOPS1 & 0.460942 &     0.000000 &                206.0 & 0.356547 &               8784 \\
                      SMD1 & 0.464620 &     0.000000 &               2694.0 & 1.133917 &              28479 \\
                Occupancy1 & 0.190969 &     0.000000 &                972.0 & 0.100159 &               2665 \\
                ECG1+IOPS1 & 0.901091 &     0.138479 &                881.0 & 1.232482 &              28784 \\
           SMD1+Occupancy1 & 0.374143 &     0.000000 &               3666.0 & 1.240844 &              31144 \\
     ECG1+IOPS1+Occupancy1 & 0.684548 &     0.072315 &               

In [None]:
df.to_csv('Results/Matrix Profile/STUMP-variant2', index=False)

## 4. Online Running: Variant 3: Dynamic Partitioning.
The STUMP algorithm is run online, with access to limited subsequences. In this variant, the algorithm only has access to the subsequences of a given (current) batch. The partitioning of the original time series is being done dynamically.

In [None]:
results = []

for name in (p := tqdm(preprocessed_dict.keys())):
    p.set_description(name)
    ts = preprocessed_dict[name]
    window_size = ts['slidingWindow']
    x = ts['data']
    
    k = 1
    score = []
    t0 = time()
    for i, batch in enumerate(tqdm(ts['dynamic_partitioning_batches'], desc='Processing Batch')):
        score_ = stumpy.stump(T_A=batch, m=window_size, k=k, ignore_trivial=True, normalize=True)
        score_ = score_.T[k-1]

        # Because batches are being split in a way that doesn't allow windows to cross batch boundaries, pad predictions to account for lost
        # windows.
        score_ = np.array([score_[0]]*math.ceil((ts['slidingWindow']-1)/2) + list(score_) + [score_[-1]]*((ts['slidingWindow']-1)//2))

        score.extend(score_)
    t1 = time()

    # In some combinations of batch size and window size, windows overlap with all closest-distance candidates and cannot be scored.
    # In this case, inf is returned. To fix this, any instances of infinite distances are replaced with zero distance.
    score = [s if s != np.inf else 0 for s in score]
    score = np.array(score)
    score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel()
    # score = np.array([score[0]]*math.ceil((ts['slidingWindow']-1)/2) + list(score) + [score[-1]]*((ts['slidingWindow']-1)//2))
    
    L = printResult(ts['data'], ts['label'], score, ts['slidingWindow'], ts['name'], modelName)
    results.append([name] + L + [t1-t0, len(x)])

Processing Batch: 100%|██████████| 310/310 [00:03<00:00, 79.46it/s] 
Processing Batch: 100%|██████████| 29/29 [00:00<00:00, 115.02it/s]
Processing Batch: 100%|██████████| 2/2 [00:00<00:00, 15.89it/s]
Processing Batch: 100%|██████████| 5/5 [00:00<00:00, 15.93it/s]
Processing Batch: 100%|██████████| 2/2 [00:00<00:00, 68.35it/s]
Processing Batch: 100%|██████████| 26/26 [00:00<00:00, 67.19it/s]
Processing Batch: 100%|██████████| 6/6 [00:00<00:00, 17.30it/s]
Processing Batch: 100%|██████████| 26/26 [00:00<00:00, 66.89it/s] /s]
Processing Batch: 100%|██████████| 45/45 [00:00<00:00, 72.05it/s]/s] 
Processing Batch: 100%|██████████| 13/13 [00:01<00:00, 12.57it/s]1.27it/s]
ECG1+IOPS1+SMD1+Occupancy1: 100%|██████████| 10/10 [00:11<00:00,  1.19s/it]


In [None]:
columns = ['Name'] + ['AUC', 'Precision', 'Recall', 'F-score', 'Range-recall', 'ExistenceReward', 'OverlapReward', 'Range-precision', 'Range-Fscore', 'Precision@k', 'RangeAUC', 'Time', 'Number of Windows']
df = pd.DataFrame(results, columns=columns)

In [None]:
df['Number of anomalies'] = df['Name'].apply(lambda x: np.sum(preprocessed_dict[x]['label']))
df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']]

Unnamed: 0,Name,AUC,Precision@k,Number of anomalies,Time,Number of Windows
0,ECG1,0.717771,0.143994,21105.0,4.510805,229900
1,ECG1_20k,0.619746,0.125926,675.0,0.407297,20000
2,IOPS1,0.690399,0.0,206.0,0.124827,8784
3,SMD1,0.517911,0.0,2694.0,0.309536,28479
4,Occupancy1,0.165914,0.001029,972.0,0.032253,2665
5,ECG1+IOPS1,0.643129,0.108967,881.0,0.592734,28784
6,SMD1+Occupancy1,0.407649,0.0,3666.0,0.336422,31144
7,ECG1+IOPS1+Occupancy1,0.643716,0.061522,1853.0,0.405919,31449
8,SMD1+ECG1+Occupancy1,0.60401,0.0,4341.0,0.69564,51144
9,ECG1+IOPS1+SMD1+Occupancy1,0.604448,0.0,4547.0,1.010408,59928


In [None]:
print(df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']].to_latex(index=False))

\begin{tabular}{lrrrrr}
\toprule
                      Name &      AUC &  Precision@k &  Number of anomalies &     Time &  Number of Windows \\
\midrule
                      ECG1 & 0.717771 &     0.143994 &              21105.0 & 4.510805 &             229900 \\
                  ECG1\_20k & 0.619746 &     0.125926 &                675.0 & 0.407297 &              20000 \\
                     IOPS1 & 0.690399 &     0.000000 &                206.0 & 0.124827 &               8784 \\
                      SMD1 & 0.517911 &     0.000000 &               2694.0 & 0.309536 &              28479 \\
                Occupancy1 & 0.165914 &     0.001029 &                972.0 & 0.032253 &               2665 \\
                ECG1+IOPS1 & 0.643129 &     0.108967 &                881.0 & 0.592734 &              28784 \\
           SMD1+Occupancy1 & 0.407649 &     0.000000 &               3666.0 & 0.336422 &              31144 \\
     ECG1+IOPS1+Occupancy1 & 0.643716 &     0.061522 &               

In [None]:
df.to_csv('Results/Matrix Profile/STUMP-variant3', index=False)

## 5. Online Running: Variant 4: Dynamic Partitioning with Percentiles
The STUMP algorithm is run online, with access to limited subsequences. In this variant, the algorithm only has access to the subsequences of a given (current) batch. The partitioning of the original time series is being done dynamically using percentiles instead of min/max.

In [None]:
results = []

for name in (p := tqdm(preprocessed_dict.keys())):
    p.set_description(name)
    ts = preprocessed_dict[name]
    window_size = ts['slidingWindow']
    x = ts['data']
    
    k = 1
    score = []
    t0 = time()
    for i, batch in enumerate(tqdm(ts['percentile_dynamic_partitioning_batches'], desc='Processing Batch')):

        # If there are not enough points to do at least a window, pad with the last score.
        if len(batch) < window_size:
            score_ = [score[-1]] * len(batch)
        else:
            score_ = stumpy.stump(T_A=batch, m=window_size, k=k, ignore_trivial=True, normalize=True)
            score_ = score_.T[k-1]

            # Because batches are being split in a way that doesn't allow windows to cross batch boundaries, pad predictions to account for lost
            # windows.
            score_ = np.array([score_[0]]*math.ceil((ts['slidingWindow']-1)/2) + list(score_) + [score_[-1]]*((ts['slidingWindow']-1)//2))

        score.extend(score_)
    t1 = time()

    # In some combinations of batch size and window size, windows overlap with all closest-distance candidates and cannot be scored.
    # In this case, inf is returned. To fix this, any instances of infinite distances are replaced with zero distance.
    score = [s if s != np.inf else 0 for s in score]
    score = np.array(score)
    score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel()
    # score = np.array([score[0]]*math.ceil((ts['slidingWindow']-1)/2) + list(score) + [score[-1]]*((ts['slidingWindow']-1)//2))
    
    L = printResult(ts['data'], ts['label'], score, ts['slidingWindow'], ts['name'], modelName)
    results.append([name] + L + [t1-t0, len(x)])

Processing Batch: 100%|██████████| 430/430 [00:03<00:00, 136.24it/s]
Processing Batch: 100%|██████████| 39/39 [00:00<00:00, 145.76it/s]
Processing Batch: 100%|██████████| 16/16 [00:00<00:00, 177.98it/s]
Processing Batch: 100%|██████████| 51/51 [00:00<00:00, 147.70it/s]
Processing Batch: 100%|██████████| 5/5 [00:00<00:00, 148.59it/s]
Processing Batch: 100%|██████████| 55/55 [00:00<00:00, 142.11it/s]
Processing Batch: 100%|██████████| 52/52 [00:00<00:00, 137.68it/s]
Processing Batch: 100%|██████████| 58/58 [00:00<00:00, 128.93it/s]/s]
Processing Batch: 100%|██████████| 89/89 [00:00<00:00, 138.25it/s]s] 
Processing Batch: 100%|██████████| 107/107 [00:00<00:00, 126.55it/s]7it/s]
ECG1+IOPS1+SMD1+Occupancy1: 100%|██████████| 10/10 [00:11<00:00,  1.10s/it]


In [None]:
columns = ['Name'] + ['AUC', 'Precision', 'Recall', 'F-score', 'Range-recall', 'ExistenceReward', 'OverlapReward', 'Range-precision', 'Range-Fscore', 'Precision@k', 'RangeAUC', 'Time', 'Number of Windows']
df = pd.DataFrame(results, columns=columns)

In [None]:
df['Number of anomalies'] = df['Name'].apply(lambda x: np.sum(preprocessed_dict[x]['label']))
df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']]

Unnamed: 0,Name,AUC,Precision@k,Number of anomalies,Time,Number of Windows
0,ECG1,0.700964,0.168823,21105.0,3.159311,229900
1,ECG1_20k,0.718579,0.382222,675.0,0.272559,20000
2,IOPS1,0.59298,0.0,206.0,0.093899,8784
3,SMD1,0.498453,0.0,2694.0,0.348413,28479
4,Occupancy1,0.299461,0.0,972.0,0.037769,2665
5,ECG1+IOPS1,0.682912,0.303065,881.0,0.390198,28784
6,SMD1+Occupancy1,0.407287,0.0,3666.0,0.380699,31144
7,ECG1+IOPS1+Occupancy1,0.643717,0.165138,1853.0,0.453855,31449
8,SMD1+ECG1+Occupancy1,0.588531,0.0,4341.0,0.647888,51144
9,ECG1+IOPS1+SMD1+Occupancy1,0.572299,0.0,4547.0,0.848507,59928


In [None]:
print(df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']].to_latex(index=False))

\begin{tabular}{lrrrrr}
\toprule
                      Name &      AUC &  Precision@k &  Number of anomalies &     Time &  Number of Windows \\
\midrule
                      ECG1 & 0.700964 &     0.168823 &              21105.0 & 3.159311 &             229900 \\
                  ECG1\_20k & 0.718579 &     0.382222 &                675.0 & 0.272559 &              20000 \\
                     IOPS1 & 0.592980 &     0.000000 &                206.0 & 0.093899 &               8784 \\
                      SMD1 & 0.498453 &     0.000000 &               2694.0 & 0.348413 &              28479 \\
                Occupancy1 & 0.299461 &     0.000000 &                972.0 & 0.037769 &               2665 \\
                ECG1+IOPS1 & 0.682912 &     0.303065 &                881.0 & 0.390198 &              28784 \\
           SMD1+Occupancy1 & 0.407287 &     0.000000 &               3666.0 & 0.380699 &              31144 \\
     ECG1+IOPS1+Occupancy1 & 0.643717 &     0.165138 &               

In [None]:
df.to_csv('Results/Matrix Profile/STUMP-variant4', index=False)

## 6. Online Running: Variant 2-Tuned: History Batching with Tuning
The STUMP algorithm is run online, with access to limited subsequences. In this variant, the algorithm only has access to the subsequences of a given (current) batch and the previous batch.

Before the time series are evaluted, the algorithm's parameters are selected based on the most optimal AUC performance on another similar sequence.

In [16]:
from collections import defaultdict

results = []

# Parameters for tuning.
param_grid = {
    # Using the estimated window length from the autocorrelation, define alternate window sized as fractions/multiples of that.
    'window_length_modifier': [0.1, 0.5, 1.0, 1.5, 2.0, 5.0], 
    # What closest neighbor to get the distance for. May help with repeated similar anomalies.
    'k': [1, 2, 5, 10, 20],
    'normalized': [True, False]
}

params_to_AUC = defaultdict(dict)

total = np.product([len(pl) for pl in param_grid.values()])

for timeseries in (p := tqdm(tuning_data)):
    name = timeseries['Name']
    
    default_sliding_window = find_length(timeseries['data'])

    p.set_description(name)

    c = 0
    best_AUC = 0
    
    # Initial Best parameters are the defaults.
    best_params = (1, 1, True)
    for window_length_modifier in param_grid['window_length_modifier']:
        for k in param_grid['k']:
            for normalize in param_grid['normalized']:
                # Prevent too small windows.
                window_size = max(10, int(window_length_modifier * default_sliding_window))

                ts = preprocess_series(series=timeseries, slidingWindow=window_size, verbose=False)

                score = []
                for i, batch in enumerate(ts['history_batched_data']):
                    score_ = stumpy.stump(T_A=batch, m=window_size, k=k, ignore_trivial=True, normalize=normalize)

                    if i == 0:
                        next_scores = score_.T[k-1]
                    else:
                        new_scores = list(score_.T[k-1])[:windows_per_batch]
                        next_scores = list(score_.T[k-1])[windows_per_batch:]
                    
                        old_scores_new_scores_mean = list(np.add(previous_scores, new_scores)/2)
                        score.extend(old_scores_new_scores_mean)

                    previous_scores = next_scores
                # The final batch's scores don't have any future to average with, they are just added.
                score.extend(previous_scores)

                # In some combinations of batch size and window size, windows overlap with all closest-distance candidates and cannot be scored.
                # In this case, inf is returned. To fix this, any instances of infinite distances are replaced with zero distance.
                score = [s if s != np.inf else 0 for s in score]
                score = np.array(score)
                score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel()
                score = np.array([score[0]]*math.ceil((ts['slidingWindow']-1)/2) + list(score) + [score[-1]]*((ts['slidingWindow']-1)//2))
                
                AUC = printResult(ts['data'], ts['label'], score, window_size, ts['name'], modelName)[0]

                params_to_AUC[name][(window_length_modifier, k, normalize)] = AUC

                if AUC > best_AUC:
                    best_AUC = AUC
                    best_params = (window_length_modifier, k, normalize)

                c+=1
                print(f"\r[{c}/{total}]{name}  --  Best AUC = {best_AUC} for: {best_params}", end='')
    print()
    print(f"{name}  --  Best AUC = {best_AUC} for: {best_params}")

    # Evaluate evaluation time series with selected parameters.       
    eval_series_name = ''.join([n if n!='2' else '1' for n in name]).replace('10k', '20k')  # Replace 2s with 1s and fix 20k becoming 10k accidentally.
    default_sliding_window = find_length(name_to_eval_series[eval_series_name]['data'])
    window_size = max(10, int(default_sliding_window * best_params[0]))
    k = best_params[1]
    normalize = best_params[2]

    ts = preprocess_series(series=name_to_eval_series[eval_series_name], slidingWindow=window_size)
    x = ts['data']

    t0 = time()
    score = []
    for i, batch in enumerate(ts['history_batched_data']):
        score_ = stumpy.stump(T_A=batch, m=window_size, k=k, ignore_trivial=True, normalize=normalize)

        if i == 0:
            next_scores = score_.T[k-1]
        else:
            new_scores = list(score_.T[k-1])[:windows_per_batch]
            next_scores = list(score_.T[k-1])[windows_per_batch:]
        
            old_scores_new_scores_mean = list(np.add(previous_scores, new_scores)/2)
            score.extend(old_scores_new_scores_mean)

        previous_scores = next_scores
    # The final batch's scores don't have any future to average with, they are just added.
    score.extend(previous_scores)
    t1 = time()

    # In some combinations of batch size and window size, windows overlap with all closest-distance candidates and cannot be scored.
    # In this case, inf is returned. To fix this, any instances of infinite distances are replaced with zero distance.
    score = [s if s != np.inf else 0 for s in score]
    score = np.array(score)
    score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel()
    score = np.array([score[0]]*math.ceil((ts['slidingWindow']-1)/2) + list(score) + [score[-1]]*((ts['slidingWindow']-1)//2))
    
    L = printResult(ts['data'], ts['label'], score, window_size, ts['name'], modelName)
    print(f"{eval_series_name}  --  Eval AUC = {L[0]}")
    results.append([eval_series_name] + L + [t1-t0, len(x)])

    print()
    print('----------------------------------------------------------------')
    sleep(1)

ECG2:   0%|          | 0/10 [00:00<?, ?it/s]

[60/60]ECG2  --  Best AUC = 0.9050577268078559 for: (1.0, 1, False)
ECG2  --  Best AUC = 0.9050577268078559 for: (1.0, 1, False)
Time-Series name: ECG1
Estimated Subsequence length:  100

ECG1  --  Eval AUC = 0.9334972140968025

----------------------------------------------------------------


ECG2_20k:  10%|█         | 1/10 [14:21<2:09:10, 861.14s/it]

[60/60]ECG2_20k  --  Best AUC = 0.9463468900402492 for: (1.0, 1, False)
ECG2_20k  --  Best AUC = 0.9463468900402492 for: (1.0, 1, False)
Time-Series name: ECG1_20k
Estimated Subsequence length:  100

ECG1_20k  --  Eval AUC = 0.9820465143021417

----------------------------------------------------------------


IOPS2:  20%|██        | 2/10 [15:41<53:34, 401.76s/it]     

[60/60]IOPS2  --  Best AUC = 0.8045645399542686 for: (0.1, 2, True)
IOPS2  --  Best AUC = 0.8045645399542686 for: (0.1, 2, True)
Time-Series name: IOPS1
Estimated Subsequence length:  28

IOPS1  --  Eval AUC = 0.7100219120033864

----------------------------------------------------------------


SMD2:  30%|███       | 3/10 [16:11<27:04, 232.10s/it] 

[60/60]SMD2  --  Best AUC = 0.8807786793856283 for: (1.5, 1, False))
SMD2  --  Best AUC = 0.8807786793856283 for: (1.5, 1, False)
Time-Series name: SMD1
Estimated Subsequence length:  187

SMD1  --  Eval AUC = 0.9036190133159547

----------------------------------------------------------------


Occupancy2:  40%|████      | 4/10 [17:55<18:10, 181.68s/it]

[60/60]Occupancy2  --  Best AUC = 0.9420222217360761 for: (1.5, 2, False))
Occupancy2  --  Best AUC = 0.9420222217360761 for: (1.5, 2, False)
Time-Series name: Occupancy1
Estimated Subsequence length:  187

Occupancy1  --  Eval AUC = 0.8567348851115341

----------------------------------------------------------------


ECG2+IOPS2:  50%|█████     | 5/10 [18:06<09:59, 119.98s/it]

[60/60]ECG2+IOPS2  --  Best AUC = 0.908805949443263 for: (1.0, 1, False))
ECG2+IOPS2  --  Best AUC = 0.908805949443263 for: (1.0, 1, False)
Time-Series name: ECG1+IOPS1
Estimated Subsequence length:  100

ECG1+IOPS1  --  Eval AUC = 0.8787837979170828

----------------------------------------------------------------


SMD2+Occupancy2:  60%|██████    | 6/10 [19:49<07:37, 114.30s/it]

[60/60]SMD2+Occupancy2  --  Best AUC = 0.8688127658303799 for: (1.5, 1, False)
SMD2+Occupancy2  --  Best AUC = 0.8688127658303799 for: (1.5, 1, False)
Time-Series name: SMD1+Occupancy1
Estimated Subsequence length:  187

SMD1+Occupancy1  --  Eval AUC = 0.9038102921954685

----------------------------------------------------------------


ECG2+IOPS2+Occupancy2:  70%|███████   | 7/10 [21:45<05:44, 114.88s/it]

[60/60]ECG2+IOPS2+Occupancy2  --  Best AUC = 0.9307331697704162 for: (1.0, 1, False)
ECG2+IOPS2+Occupancy2  --  Best AUC = 0.9307331697704162 for: (1.0, 1, False)
Time-Series name: ECG1+IOPS1+Occupancy1
Estimated Subsequence length:  100

ECG1+IOPS1+Occupancy1  --  Eval AUC = 0.8947938188581223

----------------------------------------------------------------


SMD2+ECG2+Occupancy2:  80%|████████  | 8/10 [23:50<03:55, 117.87s/it] 

[60/60]SMD2+ECG2+Occupancy2  --  Best AUC = 0.7183028143690321 for: (1.0, 1, False)
SMD2+ECG2+Occupancy2  --  Best AUC = 0.7183028143690321 for: (1.0, 1, False)
Time-Series name: SMD1+ECG1+Occupancy1
Estimated Subsequence length:  125

SMD1+ECG1+Occupancy1  --  Eval AUC = 0.7861693351050948

----------------------------------------------------------------


ECG2+IOPS2+SMD2+Occupancy2:  90%|█████████ | 9/10 [27:16<02:25, 145.55s/it]

[60/60]ECG2+IOPS2+SMD2+Occupancy2  --  Best AUC = 0.6912772943108828 for: (0.5, 1, False)
ECG2+IOPS2+SMD2+Occupancy2  --  Best AUC = 0.6912772943108828 for: (0.5, 1, False)
Time-Series name: ECG1+IOPS1+SMD1+Occupancy1
Estimated Subsequence length:  50

ECG1+IOPS1+SMD1+Occupancy1  --  Eval AUC = 0.7889498262524799

----------------------------------------------------------------


ECG2+IOPS2+SMD2+Occupancy2: 100%|██████████| 10/10 [31:03<00:00, 186.31s/it]


In [17]:
columns = ['Name'] + ['AUC', 'Precision', 'Recall', 'F-score', 'Range-recall', 'ExistenceReward', 'OverlapReward', 'Range-precision', 'Range-Fscore', 'Precision@k', 'RangeAUC', 'Time', 'Number of Windows']
df = pd.DataFrame(results, columns=columns)

In [20]:
df

Unnamed: 0,Name,AUC,Precision,Recall,F-score,Range-recall,ExistenceReward,OverlapReward,Range-precision,Range-Fscore,Precision@k,RangeAUC,Time,Number of Windows
0,ECG2,0.933497,0.625213,0.173892,0.272104,0.166908,0.303502,0.13276,0.520308,0.252741,0.173892,0.991171,6.936406,229900
1,ECG2_20k,0.982047,0.464229,0.432593,0.447853,0.479407,0.666667,0.432593,0.340984,0.398518,0.432593,0.997722,0.620381,20000
2,IOPS2,0.710022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.805067,0.489505,8784
3,SMD2,0.903619,0.945626,0.445434,0.605602,0.271161,0.625,0.182701,0.9698,0.42382,0.445434,0.966146,0.853858,28479
4,Occupancy2,0.856735,0.965217,0.114198,0.204232,0.184034,0.214286,0.176471,0.321739,0.23414,0.114198,0.978683,0.076599,2665
5,ECG2+IOPS2,0.878784,0.511002,0.474461,0.492054,0.135513,0.214286,0.11582,0.532204,0.216022,0.474461,0.795521,0.868585,28784
6,SMD2+Occupancy2,0.90381,0.556098,0.031097,0.0589,0.03035,0.045455,0.026573,0.556098,0.057558,0.031097,0.964976,0.908767,31144
7,ECG2+IOPS2+Occupancy2,0.894794,0.507692,0.035618,0.066566,0.008407,0.017857,0.006044,0.286957,0.016335,0.035618,0.826387,0.998293,31449
8,SMD2+ECG2+Occupancy2,0.786169,0.466165,0.014282,0.027716,0.014657,0.032258,0.010256,0.25,0.02769,0.014282,0.832066,1.600388,51144
9,ECG2+IOPS2+SMD2+Occupancy2,0.78895,0.744444,0.02947,0.056696,0.035394,0.078125,0.024712,0.809597,0.067823,0.02947,0.770095,1.84877,59928


In [19]:
df['Number of anomalies'] = df['Name'].apply(lambda x: np.sum(name_to_eval_series[x]['label']))
df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']]

KeyError: 'ECG2'

In [None]:
print(df[['Name', 'AUC', 'Precision@k', 'Number of anomalies', 'Time', 'Number of Windows']].to_latex(index=False))

\begin{tabular}{lrrrrr}
\toprule
                      Name &      AUC &  Precision@k &  Number of anomalies &     Time &  Number of Windows \\
\midrule
                      ECG1 & 0.700964 &     0.168823 &              21105.0 & 3.159311 &             229900 \\
                  ECG1\_20k & 0.718579 &     0.382222 &                675.0 & 0.272559 &              20000 \\
                     IOPS1 & 0.592980 &     0.000000 &                206.0 & 0.093899 &               8784 \\
                      SMD1 & 0.498453 &     0.000000 &               2694.0 & 0.348413 &              28479 \\
                Occupancy1 & 0.299461 &     0.000000 &                972.0 & 0.037769 &               2665 \\
                ECG1+IOPS1 & 0.682912 &     0.303065 &                881.0 & 0.390198 &              28784 \\
           SMD1+Occupancy1 & 0.407287 &     0.000000 &               3666.0 & 0.380699 &              31144 \\
     ECG1+IOPS1+Occupancy1 & 0.643717 &     0.165138 &               

In [None]:
df.to_csv('Results/Matrix Profile/STUMP-variant2-TUNED', index=False)