In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import sys
import os

cloned_repo_path = os.path.abspath('')#insert path here
sys.path.insert(0, cloned_repo_path)
cloned_repo_path = os.path.abspath('.')
sys.path.insert(0, cloned_repo_path)

In [None]:
import os
import pickle 
import stumpy
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL

## Helping Functions

In [None]:
def create_directory(directory="Results"):
    if not os.path.exists(directory):
        os.makedirs(directory)
        print("Directory Created Successfully")
    
    else:
        print("Directory Already Exists")

In [None]:
def calculate_and_visualize_mdls(final_data, mps, indices, filepath, m=2016):
    motifs_idx = np.argmin(mps, axis=1)


    nn_idx = indices[np.arange(len(motifs_idx)), motifs_idx]

    mdls, subspaces = stumpy.mdl(final_data, m, motifs_idx, nn_idx)

    print(f"MDLS:\n\t {mdls}\n")

    print(f"Subspaces:\n\t {subspaces}\n")

    k = np.argmin(mdls)

    print(f"Suugested Columns For Multidimensional Matrix Profile:\n\t {final_data.columns[subspaces[k]]}\n")
    
    plt.plot(np.arange(len(mdls)), mdls, c='red', linewidth='4')
    plt.xlabel('k (zero-based)', fontsize='20')
    plt.ylabel('Bit Size', fontsize='20')
    plt.xticks(range(mps.shape[0]))
    
    plt.savefig(filepath, transparent=False, bbox_inches='tight')
    plt.show()
    
    return final_data.columns[subspaces[k]].tolist()

In [None]:
def mps_calculations_mstump_m(final_data, filepath, m=2016):
    # Calculating Multidimensional Matrix Profile For a weekly rhythm where subsequent length m=2016

    mps, indices = stumpy.mstump_m(final_data, m)

    print(f"MPS Shape: {mps.shape}")

    motifs_idx = np.argmin(mps, axis=1)

    # Displaying the motifs of each dim
    
    print(f"Motif Start Index: {motifs_idx}")
    

    nn_idx = indices[np.arange(len(motifs_idx)), motifs_idx]
    
    # Displaying the Nn
    print(f"Nearest Start Index: {nn_idx}")

    # Resetting the date index 

    df = final_data.reset_index(drop=True)

    # Plotting the dimensions of the time series 

    fig, axs = plt.subplots(mps.shape[0] * 2, sharex=True, gridspec_kw={'hspace': 0}, figsize=(25, mps.shape[0] * 10))
    label = ''
    for k, dim_name in enumerate(df.columns):
        axs[k].set_ylabel(dim_name, fontsize=10)
        axs[k].set_xlabel('Time', fontsize=10) 
        axs[k].plot(df[dim_name], label=sensor_id_type_mapping[dim_name])
        axs[k].legend(loc="upper right")
        axs[k].plot(range(motifs_idx[k], motifs_idx[k] + m), df[dim_name].iloc[motifs_idx[k] : motifs_idx[k] + m], c='red', linewidth=4)
        axs[k].plot(range(nn_idx[k], nn_idx[k] + m), df[dim_name].iloc[nn_idx[k] : nn_idx[k] + m], c='red', linewidth=4)

        label = label + ', ' + dim_name
        
        axs[k].axvline(x=motifs_idx[k], linestyle="dashed", c='black')
        axs[k].axvline(x=nn_idx[k], linestyle="dashed", c='black')

        axs[k + mps.shape[0]].set_ylabel(f"P_{k}", fontsize=10)
        axs[k + mps.shape[0]].plot(mps[k], c='orange', label=f"{label.strip(',')}")
        axs[k + mps.shape[0]].set_xlabel('Time', fontsize=10)    

        axs[k + mps.shape[0]].axvline(x=motifs_idx[k], linestyle="dashed", c='black')
        axs[k + mps.shape[0]].axvline(x=nn_idx[k], linestyle="dashed", c='black')    

        axs[k + mps.shape[0]].plot(motifs_idx[k], mps[k, motifs_idx[k]] + 1, marker="v", markersize=10, color='red')
        axs[k + mps.shape[0]].plot(nn_idx[k], mps[k, nn_idx[k]] + 1, marker="v", markersize=10, color='red')
        
        axs[k + mps.shape[0]].text(motifs_idx[k], mps[k, motifs_idx[k]], f"{1}m", fontsize="xx-large")
        axs[k + mps.shape[0]].text(nn_idx[k], mps[k, motifs_idx[k]], f"{1}n", fontsize="xx-large")
        axs[k + mps.shape[0]].legend(loc="upper right")
        
    plt.savefig(filepath, transparent=False, bbox_inches='tight')
    plt.show()
    
    return mps, indices



In [None]:
def mps_calculations_mstump(final_data, filepath, m=2016):
    # Calculating Multidimensional Matrix Profile

    mps, indices = stumpy.mstump(final_data, m)

    # Displaying the shape 

    print(f"MPS Shape: {mps.shape}")
    motifs_idx = np.argmin(mps, axis=1)

    
    print(f"Motif Start Index: {motifs_idx}")

    # Getting the nearest n

    nn_idx = indices[np.arange(len(motifs_idx)), motifs_idx]  
    print(f"Nearest Neighbor Start Index: {nn_idx}")


    df = final_data.reset_index(drop=True)

    # Plotting the dimensions o
 
    fig, axs = plt.subplots(mps.shape[0] * 2, sharex=True, gridspec_kw={'hspace': 0}, figsize=(25, mps.shape[0] * 10))
    label = ''
    for k, dim_name in enumerate(df.columns):
        axs[k].set_ylabel(dim_name, fontsize=10)
        axs[k].set_xlabel('Time', fontsize=10) 
        axs[k].plot(df[dim_name], label=sensor_id_type_mapping[dim_name])
        axs[k].legend(loc="upper right")
        axs[k].plot(range(motifs_idx[k], motifs_idx[k] + m), df[dim_name].iloc[motifs_idx[k] : motifs_idx[k] + m], c='red', linewidth=4)
        axs[k].plot(range(nn_idx[k], nn_idx[k] + m), df[dim_name].iloc[nn_idx[k] : nn_idx[k] + m], c='red', linewidth=4)
        
        label = label + ', ' + dim_name
        
        axs[k].axvline(x=motifs_idx[k], linestyle="dashed", c='black')
        axs[k].axvline(x=nn_idx[k], linestyle="dashed", c='black')

        axs[k + mps.shape[0]].set_ylabel(f"P_{k}", fontsize=10)
        axs[k + mps.shape[0]].plot(mps[k], c='orange', label=f"{label.strip(',')}")
        axs[k + mps.shape[0]].set_xlabel('Time', fontsize=10)    

        axs[k + mps.shape[0]].axvline(x=motifs_idx[k], linestyle="dashed", c='black')
        axs[k + mps.shape[0]].axvline(x=nn_idx[k], linestyle="dashed", c='black')    

        axs[k + mps.shape[0]].plot(motifs_idx[k], mps[k, motifs_idx[k]] + 1, marker="v", markersize=10, color='red')
        axs[k + mps.shape[0]].plot(nn_idx[k], mps[k, nn_idx[k]] + 1, marker="v", markersize=10, color='red')
        
        axs[k + mps.shape[0]].text(motifs_idx[k], mps[k, motifs_idx[k]], f"{1}m", fontsize="xx-large")
        axs[k + mps.shape[0]].text(nn_idx[k], mps[k, motifs_idx[k]], f"{1}n", fontsize="xx-large")
        axs[k + mps.shape[0]].legend(loc="upper right")
        
    plt.savefig(filepath, transparent=False, bbox_inches='tight')
    plt.show()
    
    return mps, indices

In [None]:
# Further Processing the motifs t
def get_percent_motif_start_index(motifs):
    final_motifs = []
    
    for i in range(0, len(motifs), 2016):
        final_motifs.append(motifs.index[i])
            
    return final_motifs

In [None]:
# Further Processing the motifs 
def get_motif_start_index(motifs):
    final_motifs = []
    j = 0
    check = False
    
    for i in range(0, len(motifs), 2016):
        if not check:
            j = i

        final_motifs.append(motifs[j])

        if check:
            j = i + value

        elif motifs[i] + 2016 > len(motifs):
            value = len(motifs) - motifs[i]
            j = i + value
            check = True
            
                
    return final_motifs


In [None]:
# A function that would take in percentage for discord and percentage for motif 

def select_motifs_discords_percentage(mps, dimension, motif_percentage, discord_percentage, motifs={}, discords={}):
    motif_threshold = mps.quantile(motif_percentage/100)
    discord_threshold = mps.quantile((100 - discord_percentage)/100)
    
    motif = mps[mps < motif_threshold]
    discord = mps[mps > discord_threshold]
    
    if len(motif):
        
        motifs[dimension] = get_percent_motif_start_index(motif)
        
    return motifs, discords

In [None]:
def select_top_k_motifs_discords(mps, dimension, k_motifs, k_discords, motifs={}, discords={}):
    # Getting the motifs_idx based on the standard np.argmin method 
    # It gives 1 single motif for each dimension
    sorted_mps = np.argsort(mps, kind='stable')
    
    motifs[dimension] = get_motif_start_index(sorted_mps)[:k_motifs]
    
    return motifs, discords

In [None]:
# Taking user's input for function

motif_thresholds_for_all_dimensions = {}

def take_function_and_function_parameters_input():
    motif_thresholds_for_all_dimensions = {}
    for dimension in range(mps.shape[0]):
        motif_thresholds_for_single_dimension = {}
        function = input(f"Select A Function To Be Applied to Dimension {dimension}\n"
                         f"\tPress 1 For Selection Based on Percentage\n "
                         f"\tPress 2 For Top K Motifs and Discords Selection: ")

        if int(function) == 1:
            motif_percentage = input(f"\tEnter a Specific Threshold Value For Motif Selection For Dimension {dimension}: ")
            discord_percentage = input(f"\tEnter a Specific Threshold Value For Discord Selection For Dimension {dimension}: ")
            motif_thresholds_for_single_dimension["function"] = int(function)
            motif_thresholds_for_single_dimension["motif_percentage"] = int(motif_percentage)
            motif_thresholds_for_single_dimension["discord_percentage"] = int(discord_percentage)

        elif int(function) == 2:
            k_motif = input(f"\tEnter Top K Motif Selection For Dimension {dimension}: ")
            k_discord = input(f"\tEnter Top K Discord Selection For Dimension {dimension}: ")
            motif_thresholds_for_single_dimension["function"] = int(function)
            motif_thresholds_for_single_dimension["k_motifs"] = int(k_motif)
            motif_thresholds_for_single_dimension["k_discords"] = int(k_discord)
        else:
            continue
        motif_thresholds_for_all_dimensions[dimension] = motif_thresholds_for_single_dimension
        
    return motif_thresholds_for_all_dimensions


In [None]:
# Calculating Motifs and Discords 

def calculate_motifs_discords_for_each_dimension(mps, motif_thresholds_for_all_dimensions):
    motifs = {}
    discords = {}

    mps_df = pd.DataFrame(mps).T

    for key, value in motif_thresholds_for_all_dimensions.items():
        if value.get('function') == 1:
            motifs, discords = select_motifs_discords_percentage(mps_df[key], key, value["motif_percentage"],
                                                                 value["discord_percentage"], motifs, discords)
        elif value.get('function') == 2:
            motifs, discords = select_top_k_motifs_discords(mps_df[key], key, value['k_motifs'], 
                                                            value['k_discords'], motifs, discords)
    return motifs, discords

In [None]:
# Selecting Nearest n
def calculate_nn_and_filter_motifs(motifs):
    final_nn = {}
    final_motifs = {}

    for key, values in motifs.items():
        nns = {}
        for value in values:
            if nns.get(value) is None:
                if final_nn.get(key):
                    final_nn[key].append(indices[key, value])
                    final_motifs[key].append(value)
                else:
                    final_nn.setdefault(key, []).append(indices[key, value])
                    final_motifs.setdefault(key, []).append(value)

                nns[indices[key, value]] = 1
    
    return final_motifs, final_nn

In [None]:

def create_sensor_id_type_mapping(location_input):
    sensors_id_type = sensor_location_type_ids.loc[:,location_input].to_dict()

    sensor_id_type_mapping = {}
    for key, values in sensors_id_type.items():
        if isinstance(values, str)  and 'list' not in key:
            for value in eval(values.replace(' ', ',')):
                sensor_id_type_mapping[f"{value}"] = key
            
    return sensor_id_type_mapping

In [None]:
# Plotting the dim

def plot_results_of_mps(df, mps, m, final_motifs, final_nn, sensor_id_type_mapping, filepath):
    fig, axs = plt.subplots(mps.shape[0] * 2, sharex=True, gridspec_kw={'hspace': 0}, figsize=(25, mps.shape[0] * 10))
    label = ''
    for k, dim_name in enumerate(df.columns):
        axs[k].set_ylabel(dim_name, fontsize=10)
        axs[k].set_xlabel('Time', fontsize=10) 
        axs[k].plot(df[dim_name], label=sensor_id_type_mapping[dim_name])
        axs[k].legend(loc="upper right")
        i = 0
        if final_motifs.get(k) and final_nn.get(k):
            for motifs_idx, nn_idx in zip(final_motifs.get(k), final_nn.get(k)):
                
                axs[k].plot(df[dim_name].iloc[motifs_idx : motifs_idx + m], c='red', linewidth=4)
                axs[k].plot(df[dim_name].iloc[nn_idx : nn_idx + m], c='red', linewidth=4)
                axs[k].axvline(x=motifs_idx, linestyle="dashed", c='black')
                axs[k].axvline(x=nn_idx, linestyle="dashed", c='black')

                axs[k + mps.shape[0]].plot(motifs_idx, mps[k, motifs_idx] + 1, marker="v", markersize=10, color='red')
                axs[k + mps.shape[0]].plot(nn_idx, mps[k, nn_idx] + 1, marker="v", markersize=10, color='red')

                axs[k + mps.shape[0]].axvline(x=motifs_idx, linestyle="dashed", c='black')
                axs[k + mps.shape[0]].axvline(x=nn_idx, linestyle="dashed", c='black')

                axs[k + mps.shape[0]].text(motifs_idx, mps[k][motifs_idx], f"{i+1}m", fontsize="xx-large")
                axs[k + mps.shape[0]].text(nn_idx, mps[k][nn_idx], f"{i+1}n", fontsize="xx-large")

                i += 1
        label = label + ', ' + dim_name
        
        axs[k + mps.shape[0]].set_ylabel(f"P_{k}", fontsize=10)
        axs[k + mps.shape[0]].plot(mps[k], c='orange', label=f"{label.strip(',')}")
        axs[k + mps.shape[0]].set_xlabel('Time', fontsize=10)
        axs[k + mps.shape[0]].legend(loc="upper right")

    plt.savefig(filepath, transparent=False, bbox_inches='tight')
    plt.show()


In [None]:
def apply_functions(final_data, mps, m, filepath):
    motif_thresholds_for_all_dimensions = take_function_and_function_parameters_input()


    print(f"\nMotif Threshold Dictionary:\n\t {motif_thresholds_for_all_dimensions}\n")

    motifs, discords = calculate_motifs_discords_for_each_dimension(mps, motif_thresholds_for_all_dimensions)


    print(f"Motifs Before Filtering:\n\t {motifs}\n")

    final_motifs, final_nn = calculate_nn_and_filter_motifs(motifs)

    df = final_data.reset_index(drop=True)

    sensor_id_type_mapping = create_sensor_id_type_mapping(location_input)

    plot_results_of_mps(df, mps, m, final_motifs, final_nn, sensor_id_type_mapping, filepath)

## Creating Folders and Reading Preprocessed Data Files

In [None]:
create_directory(directory="Results")

In [None]:
sensor_location_type_ids = pd.read_csv('./Processed_Data/Sensor_Location_Type_Ids.csv', index_col='name')

In [None]:

with open('./Processed_Data/sensor_type_names_dict.pkl', 'rb') as f:
    sensor_type_names_dict = pickle.load(f)

In [None]:
location_input = input("Enter a Specific Location For MMP: ")

In [None]:
directory = "Results/" + location_input.replace('/', '_') + '_Original'
create_directory(directory)

In [None]:
directory = directory + '/MDL_Runs'
create_directory(directory)

In [None]:

columns = []
bool_columns = []
decimal_columns = []

for key in sensor_type_names_dict.keys():
    if key == 'bool':
        bool_columns.extend(eval(sensor_location_type_ids.loc[f'{key}_list', location_input]))
    elif key == 'decimal':
        decimal_columns.extend(eval(sensor_location_type_ids.loc[f'{key}_list', location_input]))   
    columns.extend(eval(sensor_location_type_ids.loc[f'{key}_list', location_input]))
    
columns.append('Timestamp')

In [None]:

final_data = pd.read_csv("./Processed_Data/Final_Sensor_Time_Series_Imputed.csv", usecols=columns, 
                         index_col='Timestamp', parse_dates=True)


In [None]:

final_data

In [None]:
final_data[decimal_columns].to_csv(f"{directory}/decimal_df.csv")
final_data[bool_columns].to_csv(f"{directory}/bool_df.csv")

In [None]:

sensors_id_type = sensor_location_type_ids.loc[:,location_input].to_dict()

sensor_id_type_mapping = {}
for key, values in sensors_id_type.items():
    if isinstance(values, str)  and 'list' not in key:
        for value in eval(values.replace(' ', ',')):
            sensor_id_type_mapping[f"{value}"] = key

In [None]:
sensor_id_type_mapping

### MPS Calculation Using Constraint Algorithm For All Bool Dimensions (Weekly Rhythm)

In [None]:
mps, indices = mps_calculations_mstump(final_data[bool_columns], 
                                        f"{directory}/All_Bool_Dimensions_mstump_Weekly.jpg", 
                                        m=2016)

### Dimensionality Reduction Using MDLs for bool Dimensions (Weekly Rhythm)

In [None]:
columns = calculate_and_visualize_mdls(final_data[bool_columns], mps, indices, 
                                       f"{directory}/Ideal_Bool_Dimensions_Weekly.jpg", 
                                       m=2016)

In [None]:
final_data[columns].to_csv(f"{directory}/bool_df_reduced_weekly.csv")

### MPS Calculation Using Constraint Algorithm For Reduced Bool Dimensions (Weekly Rhythm)

In [None]:
mps, indices = mps_calculations_mstump(final_data[columns],
                                       f"{directory}/Reduced_Bool_Dimensions_mstump_Weekly.jpg",
                                       m=2016)

### MPS Calculation Using Constraint Algorithm For All Decimal Dimensions (Weekly Rhythm)

In [None]:
mps, indices = mps_calculations_mstump(final_data[decimal_columns],
                                       f"{directory}/All_Decimal_Dimensions_mstump_Weekly.jpg",
                                       m=2016)

### Dimensionality Reduction Using MDLs for Decimal Dimensions (Weekly Rhythm)

In [None]:
columns = calculate_and_visualize_mdls(final_data[decimal_columns], mps, indices, 
                                       f"{directory}/Ideal_Decimal_Dimensions_Weekly.jpg",
                                       m=2016)

In [None]:
final_data[columns].to_csv(f"{directory}/Decimal_df_reduced_weekly.csv")

### MPS Calculation Using Constraint Algorithm For Reduced Decimal Dimensions (Weekly Rhythm)

In [None]:
mps, indices = mps_calculations_mstump(final_data[columns], 
                                         f"{directory}/Reduced_Decimal_Dimensions_mstump_Weekly.jpg", 
                                         m=2016)

### MPS Calculation Using Constraint Algorithm For All Bool Dimensions (Daily Rhythm)

In [None]:
mps, indices = mps_calculations_mstump(final_data[bool_columns], 
                                         f"{directory}/All_Bool_Dimensions_mstump_Daily.jpg", 
                                         m=288)

### Dimensionality Reduction Using MDLs for bool Dimensions (Daily Rhythm)

In [None]:
columns = calculate_and_visualize_mdls(final_data[bool_columns], mps, indices,
                                       f"{directory}/Ideal_Bool_Dimensions_Daily.jpg",
                                       m=288)

In [None]:
final_data[columns].to_csv(f"{directory}/bool_df_reduced_daily.csv")

### MPS Calculation Using Constraint Algorithm For Reduced Bool Dimensions (Daily Rhythm)

In [None]:
mps, indices = mps_calculations_mstump(final_data[columns], 
                                         f"{directory}/Reduced_Bool_Dimensions_mstump_Daily.jpg", 
                                         m=288)

### MPS Calculation Using Constraint Algorithm For All Decimal Dimensions (Daily Rhythm)

In [None]:
mps, indices = mps_calculations_mstump(final_data[decimal_columns], 
                                         f"{directory}/All_Decimal_Dimensions_mstump_Daily.jpg", 
                                         m=288)

### Dimensionality Reduction Using MDLs for Decimal Dimensions (Daily Rhythm)

In [None]:
columns = calculate_and_visualize_mdls(final_data[decimal_columns], mps, indices, 
                                       f"{directory}/Ideal_Decimal_Dimensions_Daily.jpg", 
                                       m=288)

In [None]:
final_data[columns].to_csv(f"{directory}/Decimal_df_reduced_daily.csv")

### MPS Calculation Using Constraint Algorithm For Reduced Decimal Dimensions (Daily Rhythm)

In [None]:
mps, indices = mps_calculations_mstump(final_data[columns], 
                                         f"{directory}/Reduced_Decimal_Dimensions_mstump_Daily.jpg", 
                                         m=288)