In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import sys
import os

cloned_repo_path = os.path.abspath('')#insert path here
sys.path.insert(0, cloned_repo_path)
cloned_repo_path = os.path.abspath('.')
sys.path.insert(0, cloned_repo_path)

In [None]:
import os
import glob
import json
import pickle 
import stumpy
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL

## Helping Functions

In [None]:
def create_directory(directory="Results"):
    if not os.path.exists(directory):
        os.makedirs(directory)
        print("Directory Created Successfully")
    
    else:
        print("Directory Already Exists")

In [None]:
def calculate_and_visualize_mdls(final_data, motifs_idx, nn_idx, filepath, m=2016):
    mdls, subspaces = stumpy.mdl(final_data, m, motifs_idx, nn_idx)
    subspaces = [ele.tolist() for ele in subspaces]
    
    print(f"Subspaces:\n\t {subspaces}\n")

    k = np.argmin(mdls)

    print(f"Suugested Columns For Multidimensional Matrix Profile:\n\t {final_data.columns[subspaces[k]]}\n")
    
    plt.plot(np.arange(len(mdls)), mdls, c='red', linewidth='4')
    plt.xlabel('k (zero-based)', fontsize='20')
    plt.ylabel('Bit Size', fontsize='20')
    plt.xticks(range(mps.shape[0]))
    
    plt.savefig(filepath, transparent=False, bbox_inches='tight')
    plt.show()
    
    return final_data.columns[subspaces[k]].tolist(), subspaces

In [None]:
def mps_calculations_mstump_m(final_data, filepath, m=2016):
    # Calculating Multidimensional Matrix Profile For a weekly rhythm where subsequent length m=2016

    mps, indices = stumpy.mstump_m(final_data, m)

    # Displaying the shape of MultiDimensional Matrix Profile 

    print(f"MPS Shape: {mps.shape}")

    # 1 single motif for each dimension

    motifs_idx = np.argmin(mps, axis=1)   
    print(f"Motif Start Index: {motifs_idx}")
        nn_idx = indices[np.arange(len(motifs_idx)), motifs_idx]
    
    
    print(f"Nearest Start Index: {nn_idx}")

    df = final_data.reset_index(drop=True)


    fig, axs = plt.subplots(mps.shape[0] * 2, sharex=True, gridspec_kw={'hspace': 0}, figsize=(25, mps.shape[0] * 5))
    label = ''
    for k, dim_name in enumerate(df.columns):
        axs[k].set_ylabel(dim_name, fontsize=10)
        axs[k].set_xlabel('Time', fontsize=10) 
        
        axs[k].plot(df[dim_name], label=sensor_id_type_mapping[dim_name])
 
        axs[k].legend(loc="upper right")
        
        axs[k].plot(range(motifs_idx[k], motifs_idx[k] + m), df[dim_name].iloc[motifs_idx[k] : motifs_idx[k] + m], c='red', linewidth=4)
        axs[k].plot(range(nn_idx[k], nn_idx[k] + m), df[dim_name].iloc[nn_idx[k] : nn_idx[k] + m], c='red', linewidth=4)

        label = label + ', ' + dim_name
        
        axs[k].axvline(x=motifs_idx[k], linestyle="dashed", c='black')
        axs[k].axvline(x=nn_idx[k], linestyle="dashed", c='black')

        axs[k + mps.shape[0]].set_ylabel(f"P_{k}", fontsize=10)
        axs[k + mps.shape[0]].plot(mps[k], c='orange', label=f"{label.strip(',')}")
        axs[k + mps.shape[0]].set_xlabel('Time', fontsize=10)    

        axs[k + mps.shape[0]].axvline(x=motifs_idx[k], linestyle="dashed", c='black')
        axs[k + mps.shape[0]].axvline(x=nn_idx[k], linestyle="dashed", c='black')    

        axs[k + mps.shape[0]].plot(motifs_idx[k], mps[k, motifs_idx[k]] + 1, marker="v", markersize=10, color='red')
        axs[k + mps.shape[0]].plot(nn_idx[k], mps[k, nn_idx[k]] + 1, marker="v", markersize=10, color='red')
        
        axs[k + mps.shape[0]].text(motifs_idx[k], mps[k, motifs_idx[k]], f"{1}m", fontsize="xx-large")
        axs[k + mps.shape[0]].text(nn_idx[k], mps[k, motifs_idx[k]], f"{1}n", fontsize="xx-large")
        axs[k + mps.shape[0]].legend(loc="upper right")
        
    plt.savefig(filepath, transparent=False, bbox_inches='tight')
    plt.show()
    
    return mps, indices



In [None]:
  


def get_percent_motif_start_index(motifs):
    final_motifs = []
    
    for i in range(0, len(motifs), 2016):
        final_motifs.append(motifs.index[i])
            
    return final_motifs

In [None]:
# Further Processing the motifs 

def get_motif_start_index(motifs):
    final_motifs = []
    j = 0
    check = False
    
    for i in range(0, len(motifs), 2016):
        if not check:
            j = i

        final_motifs.append(motifs[j])

        if check:
            j = i + value

        elif motifs[i] + 2016 > len(motifs):
            value = len(motifs) - motifs[i]
            j = i + value
            check = True
            
                
    return final_motifs


In [None]:
# A function takes in percentage for discord and percentage for motif 


def select_motifs_discords_percentage(mps, dimension, motif_percentage, discord_percentage, motifs={}, discords={}):
    motif_threshold = mps.quantile(motif_percentage/100)
    discord_threshold = mps.quantile((100 - discord_percentage)/100)
    
    motif = mps[mps < motif_threshold]
    discord = mps[mps > discord_threshold]
    
    if len(motif):
        
        motifs[dimension] = get_percent_motif_start_index(motif)
        
    return motifs, discords

In [None]:
# Select the upper K of all points and lowest J 

def select_top_k_motifs_discords(mps, dimension, k_motifs, k_discords, motifs={}, discords={}):

    sorted_mps = np.argsort(mps, kind='stable')
    
    motifs[dimension] = get_motif_start_index(sorted_mps)[:k_motifs]
    
    return motifs, discords

In [None]:
# Taking user's input for function 


motif_thresholds_for_all_dimensions = {}

def take_function_and_function_parameters_input():
    motif_thresholds_for_all_dimensions = {}
    for dimension in range(mps.shape[0]):
        motif_thresholds_for_single_dimension = {}
        function = input(f"Select A Function To Be Applied to Dimension {dimension}\n"
                         f"\tPress 1 For Selection Based on Percentage\n "
                         f"\tPress 2 For Top K Motifs and Discords Selection: ")

        if int(function) == 1:
            motif_percentage = input(f"\tEnter a Specific Threshold Value For Motif Selection For Dimension {dimension}: ")
            discord_percentage = input(f"\tEnter a Specific Threshold Value For Discord Selection For Dimension {dimension}: ")
            motif_thresholds_for_single_dimension["function"] = int(function)
            motif_thresholds_for_single_dimension["motif_percentage"] = int(motif_percentage)
            motif_thresholds_for_single_dimension["discord_percentage"] = int(discord_percentage)

        elif int(function) == 2:
            k_motif = input(f"\tEnter Top K Motif Selection For Dimension {dimension}: ")
            k_discord = input(f"\tEnter Top K Discord Selection For Dimension {dimension}: ")
            motif_thresholds_for_single_dimension["function"] = int(function)
            motif_thresholds_for_single_dimension["k_motifs"] = int(k_motif)
            motif_thresholds_for_single_dimension["k_discords"] = int(k_discord)
        else:
            continue
        motif_thresholds_for_all_dimensions[dimension] = motif_thresholds_for_single_dimension
        
    return motif_thresholds_for_all_dimensions


In [None]:
# calc Motifs and Discords 


def calculate_motifs_discords_for_each_dimension(mps, motif_thresholds_for_all_dimensions):
    motifs = {}
    discords = {}

    mps_df = pd.DataFrame(mps).T

    for key, value in motif_thresholds_for_all_dimensions.items():
        if value.get('function') == 1:
            motifs, discords = select_motifs_discords_percentage(mps_df[key], key, value["motif_percentage"],
                                                                 value["discord_percentage"], motifs, discords)
        elif value.get('function') == 2:
            motifs, discords = select_top_k_motifs_discords(mps_df[key], key, value['k_motifs'], 
                                                            value['k_discords'], motifs, discords)
    return motifs, discords

In [None]:
# Selecting nn that correspond to each motif_idx  

def calculate_nn_and_filter_motifs(motifs):
    final_nn = {}
    final_motifs = {}

    for key, values in motifs.items():
        nns = {}
        for value in values:
            if nns.get(value) is None:
                if final_nn.get(key):
                    final_nn[key].append(indices[key, value])
                    final_motifs[key].append(value)
                else:
                    final_nn.setdefault(key, []).append(indices[key, value])
                    final_motifs.setdefault(key, []).append(value)

                nns[indices[key, value]] = 1
    
    return final_motifs, final_nn

In [None]:
def calculate_nns(motifs):
    final_nn = {}
    final_motifs = {}

    for key, values in motifs.items():
        nns = {}
        for value in values:
            if final_nn.get(key):
                final_nn[key].append(indices[key, value])
                final_motifs[key].append(value)
            else:
                final_nn.setdefault(key, []).append(indices[key, value])
                final_motifs.setdefault(key, []).append(value)

            nns[indices[key, value]] = 1
    
    return final_motifs, final_nn

In [None]:

def create_sensor_id_type_mapping(location_input):
    sensors_id_type = sensor_location_type_ids.loc[:,location_input].to_dict()

    sensor_id_type_mapping = {}
    for key, values in sensors_id_type.items():
        if isinstance(values, str)  and 'list' not in key:
            for value in eval(values.replace(' ', ',')):
                sensor_id_type_mapping[f"{value}"] = key
            
    return sensor_id_type_mapping

In [None]:
# Plotting the dimens  along with mp values


def plot_results_of_mps(df, mps, m, final_motifs, final_nn, sensor_id_type_mapping, filepath):
    fig, axs = plt.subplots(mps.shape[0] * 2, sharex=True, gridspec_kw={'hspace': 0}, figsize=(25, mps.shape[0] * 5))
    label = ''
    for k, dim_name in enumerate(df.columns):
        axs[k].set_ylabel(dim_name, fontsize=10)
        axs[k].set_xlabel('Time', fontsize=10) 
        axs[k].plot(df[dim_name], label=sensor_id_type_mapping[dim_name])
        axs[k].legend(loc="upper right")
        i = 0
        if final_motifs.get(k) and final_nn.get(k):
            for motifs_idx, nn_idx in zip(final_motifs.get(k), final_nn.get(k)):
                
                axs[k].plot(df[dim_name].iloc[motifs_idx : motifs_idx + m], c='red', linewidth=4)
                axs[k].plot(df[dim_name].iloc[nn_idx : nn_idx + m], c='red', linewidth=4)
                axs[k].axvline(x=motifs_idx, linestyle="dashed", c='black')
                axs[k].axvline(x=nn_idx, linestyle="dashed", c='black')

                axs[k + mps.shape[0]].plot(motifs_idx, mps[k, motifs_idx] + 1, marker="v", markersize=10, color='red')
                axs[k + mps.shape[0]].plot(nn_idx, mps[k, nn_idx] + 1, marker="v", markersize=10, color='red')

                axs[k + mps.shape[0]].axvline(x=motifs_idx, linestyle="dashed", c='black')
                axs[k + mps.shape[0]].axvline(x=nn_idx, linestyle="dashed", c='black')

                axs[k + mps.shape[0]].text(motifs_idx, mps[k][motifs_idx], f"{i+1}m", fontsize="xx-large")
                axs[k + mps.shape[0]].text(nn_idx, mps[k][nn_idx], f"{i+1}n", fontsize="xx-large")

                i += 1
        label = label + ', ' + dim_name
        
        axs[k + mps.shape[0]].set_ylabel(f"P_{k}", fontsize=10)
        axs[k + mps.shape[0]].plot(mps[k], c='orange', label=f"{label.strip(',')}")
        axs[k + mps.shape[0]].set_xlabel('Time', fontsize=10)
        axs[k + mps.shape[0]].legend(loc="upper right")

    plt.savefig(filepath, transparent=False, bbox_inches='tight')
    plt.show()


In [None]:
def apply_functions(final_data, mps, m, filepath):
    motif_thresholds_for_all_dimensions = take_function_and_function_parameters_input()

    print(f"\nMotif Threshold Dictionary:\n\t {motif_thresholds_for_all_dimensions}\n")

    motifs, discords = calculate_motifs_discords_for_each_dimension(mps, motif_thresholds_for_all_dimensions)


    print(f"Motifs Before Filtering:\n\t {motifs}\n")

    final_motifs, final_nn = calculate_nn_and_filter_motifs(motifs)


    df = final_data.reset_index(drop=True)

    sensor_id_type_mapping = create_sensor_id_type_mapping(location_input)

    plot_results_of_mps(df, mps, m, final_motifs, final_nn, sensor_id_type_mapping, filepath)
    
    return final_motifs, final_nn

In [None]:
def apply_functions_heatmap(final_data, mps, m, filepath, sensor_id_type_mapping, 
                            motif_thresholds_for_all_dimensions):
    

    print(f"\nMotif Threshold Dictionary:\n\t {motif_thresholds_for_all_dimensions}\n")

    motifs, discords = calculate_motifs_discords_for_each_dimension(mps, motif_thresholds_for_all_dimensions)

    print(f"Motifs Before Filtering:\n\t {motifs}\n")

    final_motifs, final_nn = calculate_nns(motifs)


    df = final_data.reset_index(drop=True)

    plot_results_of_mps(df, mps, m, final_motifs, final_nn, sensor_id_type_mapping, filepath)
    
    return final_motifs, final_nn

In [None]:
def build_json_subspaces(final_data, mps, m, filepath, json_path, sensor_id_type_mapping,
                         motif_thresholds_for_all_dimensions, sensor_type_id, device_type_id):
    
    final_motifs, final_nn = apply_functions_heatmap(final_data, mps, 2016, filepath, sensor_id_type_mapping, 
                                                    motif_thresholds_for_all_dimensions)

    # Creating a list of dictionaries that will be dumped to json file
        # ("motif_id","motif_idx", "nn_idx","subspace_column", "subspace_sensor_ids")

    json_data = []

    for k in range(final_data.shape[1]):
        values = {'k': k}
        i = 1
        subspaces = []
        
        if final_motifs and final_motifs.get(k):
            
            for motifs_idx, nn_idx in zip(final_motifs[k], final_nn[k]):
                subspace = stumpy.subspace(final_data, 2016, motifs_idx, nn_idx, k)
                motif_subspaces = {"motif_id": i, "motif_idx": int(motifs_idx), 
                                   "nn_idx": int(nn_idx), "subspace_column": subspace.tolist(), 
                                   "subspace_sensor_ids": final_data.columns[subspace].tolist(),
                                  "subspace_sensor_types": sensor_type_id[final_data.columns[subspace].tolist()].values.tolist()[0],
                                  "subspace_device_types": device_type_id[final_data.columns[subspace].tolist()].values.tolist()[0]}
                i += 1
                
                print(f"For k = {k}, the {k + 1}-dimensional subspace includes subsequences from {subspace}")
                subspaces.append(motif_subspaces)
        
        values['motif_subspaces'] = subspaces
        json_data.append(values)

        # Dumping the motifs and nearest neighbors  
 

    with open(json_path, 'w') as f:
        json.dump(json_data, f, indent=2)

In [None]:
def build_mdl_json(final_data, final_subspaces, json_path, sensor_type_id, device_type_id):
    final_subspaces = pd.DataFrame(final_subspaces).T
    json_data = []
    for i in range(final_subspaces.shape[1]):
        motif_id = 1 
        values = {'k': i}
        motif_subspaces = []

        for subspace in final_subspaces[i]:
            single_motif = {"motif_id": motif_id, "subspace_column": subspace, 
                           "subspace_sensor_ids": final_data.columns[subspace].tolist(),
                            "subspace_sensor_types": sensor_type_id[final_data.columns[subspace].tolist()].values.tolist()[0],
                            "subspace_device_types": device_type_id[final_data.columns[subspace].tolist()].values.tolist()[0]}
            motif_id += 1
            motif_subspaces.append(single_motif)

        values["motif_subspaces"] = motif_subspaces
        json_data.append(values)
    
    with open(json_path, 'w') as f:
        json.dump(json_data, f, indent=2)

In [None]:
def top_k_motifs_subspaces_function(final_data, mps, sensor_type_id, 
                                    device_type_id, threshold_input, sensor_id_type_mapping,
                                    image_path, m=2016):
    
    for i in range(0, int(threshold_input)):
        motif_thresholds_for_all_dimensions = {}
        motif_thresholds_for_single_dimension = {}
        
        for dimension in range(final_data.shape[1]):
            motif_thresholds_for_single_dimension["function"] = 2
            motif_thresholds_for_single_dimension["k_motifs"] = i+1
            motif_thresholds_for_single_dimension["k_discords"] = i+1
            motif_thresholds_for_all_dimensions[dimension] = motif_thresholds_for_single_dimension

        build_json_subspaces(final_data, mps, m, 
                             f"{image_path}//All_Dimensions_mstump_m_Weekly_Top_{i}_motifs.jpg", 
                             f"{image_path}//Subspaces_Data_Weekly_Top_{i}_motifs.json", sensor_id_type_mapping,
                             motif_thresholds_for_all_dimensions, sensor_type_id, device_type_id)

In [None]:
def top_k_motifs_mdl_function(final_data, mps, sensor_type_id, device_type_id, 
                              threshold_input, mdls_path, m=2016):
    
    for i in range(int(threshold_input)):
        motif_thresholds_for_all_dimensions = {}
        motif_thresholds_for_single_dimension = {}
        
        for dimension in range(final_data.shape[1]):
            motif_thresholds_for_single_dimension["function"] = 2
            motif_thresholds_for_single_dimension["k_motifs"] = i+1
            motif_thresholds_for_single_dimension["k_discords"] = i+1
            motif_thresholds_for_all_dimensions[dimension] = motif_thresholds_for_single_dimension


        motifs, discords = calculate_motifs_discords_for_each_dimension(mps, motif_thresholds_for_all_dimensions)

        final_motifs, final_nn = calculate_nns(motifs)

        final_motifs = pd.DataFrame(final_motifs).T
        final_nns = pd.DataFrame(final_nn).T

        final_subspaces = {}
        
        for j in range(i+1):
            columns, subspaces = calculate_and_visualize_mdls(final_data, final_motifs[j], final_nns[j],
                                                              f"{mdls_path}/Ideal_Dimensions_Motif_{j}.jpg", m)
            final_subspaces[j] = subspaces

        build_mdl_json(final_data, final_subspaces, 
                       f"{mdls_path}/MDL_Weekly_Top_{i}_motifs.json", 
                       sensor_type_id, device_type_id)

In [None]:
def count_changed_subspaces(motif_subspaces, sort=False):
    change_count = 0
    if not motif_subspaces:
        return None
    
    dataframe = []
    
    for subspace in motif_subspaces:
        if sort:
            dataframe.append({"subspaces": "".join(sorted(subspace["subspace_sensor_ids"]))})
        else:
            dataframe.append({"subspaces": "".join(subspace["subspace_sensor_ids"])})
    
    dataframe = pd.DataFrame(dataframe)
    
    return dataframe["subspaces"].nunique()-1

In [None]:
def calculate_change_count(filespath):
    files = sorted(glob.glob(filespath))
    motif_count = 0
    dimensions_change_count_motifs = []

    for file in files:
        motif_count += 1
        with open(file, 'r') as f:
            json_data = json.load(f)

        dimensions_change_count = {}
        highest = 0

        for value in json_data:
            dimensions_change_count[value['k']] = {"Total_Motifs": motif_count, 
                                                   "Changed_Count": count_changed_subspaces(value["motif_subspaces"])}

        dimensions_change_count_motifs.append(dimensions_change_count)    


    matrix = np.nan * np.ones(shape=(len(dimensions_change_count_motifs), len(dimensions_change_count_motifs[0])))

    for i in range(len(dimensions_change_count_motifs)):
        for key, value in dimensions_change_count_motifs[i].items():
            matrix[i][key] = (value["Changed_Count"]/value["Total_Motifs"])

    df = pd.DataFrame(matrix)
    
    return df, dimensions_change_count_motifs

In [None]:
def create_heatmap(df, filepath, dimensions_change_count_motifs):
    fig, ax = plt.subplots(figsize=(len(dimensions_change_count_motifs[0]), len(dimensions_change_count_motifs)))
    hm = sns.heatmap(data = df, annot=True, linewidths=2, ax=ax, cmap='crest')
    hm.set(xlabel='k', ylabel='Motifs_Count')
    ax.invert_yaxis()
    plt.savefig(filepath, transparent=False, bbox_inches='tight')
    plt.show()

In [None]:
def calculate_change_count_and_build_heatmap(jsons_path, heatmap_path):
    df, dimensions_change_count_motifs = calculate_change_count(jsons_path)
    create_heatmap(df, heatmap_path, dimensions_change_count_motifs)

In [None]:
def compare_subspace_mdl(subspace_motifs, mdl_motifs):
    changed_count = 0
    for subspace_motif, mdl_motif in zip(subspace_motifs, mdl_motifs):
        if subspace_motif["subspace_sensor_ids"] != mdl_motif["subspace_sensor_ids"]:
            changed_count += 1
    return changed_count


In [None]:
def compare_subspaces_with_mdl_heatmap(subspaces_directory, mdls_directory):
    dimensions_change_count_motifs = []
    for i in range(int(threshold_input)):
        subspace_file = f"{subspaces_directory}/Subspaces_Data_Weekly_Top_{i}_motifs.json"
        mdl_file = f"{mdls_directory}/MDL_Weekly_Top_{i}_motifs.json"
        
        with open(subspace_file, 'r') as f:
            subspace_json = json.load(f)
        with open(mdl_file, 'r') as f:
            mdl_json = json.load(f)
        
        dimensions_change_count = {}
        
        for subspace_value, mdl_value in zip(subspace_json, mdl_json):
            changed_count = compare_subspace_mdl(subspace_value["motif_subspaces"], mdl_value["motif_subspaces"])
            total_count = len(subspace_value["motif_subspaces"])
            dimensions_change_count[subspace_value['k']] = {"Total_Motifs": total_count, "Changed_Count": changed_count}
        
        dimensions_change_count_motifs.append(dimensions_change_count)
    
    matrix = np.nan * np.ones(shape=(len(dimensions_change_count_motifs), len(dimensions_change_count_motifs[0])))

    for i in range(len(dimensions_change_count_motifs)):
        for key, value in dimensions_change_count_motifs[i].items():
            matrix[i][key] = (value["Changed_Count"]/value["Total_Motifs"])

    df = pd.DataFrame(matrix)
    
    return df, dimensions_change_count_motifs

In [None]:
def get_column_from_json_for_heatmap(motif_subspaces, column, sort=False):
    change_count = 0
    if not motif_subspaces:
        return None
    
    string_data = []
    
    for subspace in motif_subspaces:
        string_data.append(subspace[column]) 
    
    return string_data

In [None]:
def build_heatmap_subspaces_column(filespath, column):
    files = sorted(glob.glob(filespath))
    motif_count = 0
    dimensions_change_count_motifs = []

    for file in files:
        motif_count += 1
        with open(file, 'r') as f:
            json_data = json.load(f)

        dimensions_change_count = {}
        highest = 0

        for value in json_data:
            dimensions_change_count[value['k']] = {"Changed_Count": get_column_from_json_for_heatmap(value["motif_subspaces"], column)}

        dimensions_change_count_motifs.append(dimensions_change_count)    

    matrix = np.nan * np.ones(shape=(len(dimensions_change_count_motifs), len(dimensions_change_count_motifs[0])))
    matrix = [[0]*len(dimensions_change_count_motifs[0]) for i in range(len(dimensions_change_count_motifs))]
    for i in range(len(dimensions_change_count_motifs)):
        for key, value in dimensions_change_count_motifs[i].items():
            matrix[i][key] = value["Changed_Count"]

    df = pd.DataFrame(matrix)
    
    return df

In [None]:
def different_csvs_for_subspaces_heatmap(subspaces_path, output_path, column):
    sensor_ids_df = build_heatmap_subspaces_column(subspaces_path, column)                                                       

    sensor_ids_df = sensor_ids_df.reindex(index=sensor_ids_df.index[::-1])

    sensor_ids_df.index.names = ['Motifs_Count']

    sensor_ids_df.to_csv(output_path)

In [None]:
def plot_all_dimensions(df, filepath, sensor_id_type_mapping):
    fig, axs = plt.subplots(df.shape[1], sharex=True, gridspec_kw={'hspace': 0}, figsize=(25, df.shape[1] * 5))
    for k, dim_name in enumerate(df.columns):
        axs[k].set_ylabel(dim_name, fontsize=10)
        axs[k].set_xlabel('Time', fontsize=10) 
        axs[k].plot(df[dim_name], label=sensor_id_type_mapping[dim_name])
        axs[k].legend(loc="upper right")
                
    plt.savefig(filepath, transparent=False, bbox_inches='tight')
    plt.show()

## Creating Folders and Reading Preprocessed Data Files

In [None]:
create_directory(directory="Results_Subspaces")

In [None]:
sensor_location_type_ids = pd.read_csv('./Processed_Data/Sensor_Location_Type_Ids.csv', index_col='name')

In [None]:
# Reading Dictionary of sensor names 
with open('./Processed_Data/sensor_type_names_dict.pkl', 'rb') as f:
    sensor_type_names_dict = pickle.load(f)

In [None]:
location_input = input("Enter a Specific Location For MMP: ")

In [None]:
directory = "Results_Subspaces/" + location_input.replace('/', '_')
create_directory(directory)

In [None]:
subspaces_directory = directory + "/Subspaces"
create_directory(subspaces_directory)

In [None]:
aggregated_subspaces_directory = subspaces_directory + "/Aggregated"
create_directory(aggregated_subspaces_directory)

In [None]:
mdls_directory = directory + "/MDLs"
create_directory(mdls_directory)

In [None]:
aggregated_mdls_directory = mdls_directory + "/Aggregated"
create_directory(aggregated_mdls_directory)

In [None]:
# Getting list of sensor_ids/columns

columns = []
bool_columns = []
decimal_columns = []

for key in sensor_type_names_dict.keys():
    if key == 'bool':
        bool_columns.extend(eval(sensor_location_type_ids.loc[f'{key}_list', location_input]))
    elif key == 'decimal':
        decimal_columns.extend(eval(sensor_location_type_ids.loc[f'{key}_list', location_input]))   
    columns.extend(eval(sensor_location_type_ids.loc[f'{key}_list', location_input]))
    
columns.append('Timestamp')

In [None]:
sensor_type_id = {}
for column in decimal_columns:
    sensor_type_id[column] = "decimal"
for column in bool_columns:
    sensor_type_id[column] = "bool"

In [None]:
final_data = pd.read_csv("./Processed_Data/Final_Sensor_Time_Series_Imputed.csv", usecols=columns, 
                         index_col='Timestamp', parse_dates=True)


In [None]:
final_data

In [None]:
# Create mapping of Sensor id to Device Type 

sensors_id_type = sensor_location_type_ids.loc[:,location_input].to_dict()

sensor_id_type_mapping = {}
for key, values in sensors_id_type.items():
    if isinstance(values, str)  and 'list' not in key:
        for value in eval(values.replace(' ', ',')):
            sensor_id_type_mapping[f"{value}"] = key

In [None]:
sensor_id_type_mapping

In [None]:
plot_all_dimensions(final_data, f"{directory}/All_Dimensions.jpg", sensor_id_type_mapping)

In [None]:
select_choice = input("Do You Want Filter Specific Columns to be used for MMP Calculation"
                      "\n\t Press 1 to Select the Columns"
                      "\n\t Press Any Other Key to Use All the Columns (No Filtering):\n")

new_final_data = final_data.copy()

if select_choice == '1':
    select_dimensions = input("Enter Comma Separated Sensor Ids to be used for MMP Calculation: ")
    columns_to_use = select_dimensions.split(',')
    columns_to_use = [value.strip() for value in columns_to_use]
    
    new_final_data = final_data[columns_to_use]
    plot_all_dimensions(new_final_data, f"{directory}/After_Filtering_Dimensions.jpg", sensor_id_type_mapping)

## Select The Columns Which Need to be Aggregated Using Mean

In [None]:
select_choice_mean = input("Do You Want to Calculate Mean over Specific Selection of Columns"
                      "\n\t Press 1 to Select the Columns to Calculate Mean"
                      "\n\t Press Any Other Key For No Mean:\n")

mean_column_pairs = {}
i = 1

while select_choice_mean == '1':
    select_dimensions_mean = input("Enter Comma Separated Sensor Ids to Calculate Mean: ")
    
    columns_for_mean = select_dimensions_mean.split(',')
    columns_for_mean = [value.strip() for value in columns_for_mean]
    
    mean_column_pairs[f'Mean_{i}'] = columns_for_mean
    sensor_id_type_mapping[f'Mean_{i}'] = select_dimensions_mean
    sensor_type_id[f"Mean_{i}"] = f"Mean Aggregated {i}"
    
    select_choice_mean = input("Do You Want to Calculate Mean for Some Other Selection of Columns"
                      "\n\t Press 1 to Select the Columns to Calculate Mean"
                      "\n\t Press Any Other Key For No More Mean Calculation:\n")
    i += 1
    
    


In [None]:
if mean_column_pairs:
    mean_columns = []

    for key, value in mean_column_pairs.items():
        new_final_data[key] = final_data[value].mean(axis=1)
        mean_columns.extend(value)

    new_final_data = new_final_data[list(set(new_final_data.columns) - set(mean_columns))]
    plot_all_dimensions(new_final_data, f"{directory}/After_Applying_Mean_Aggregation_to_Dimensions.jpg", sensor_id_type_mapping)

## Select The Columns Which Need to be Aggregated Using Sum

In [None]:
select_choice_sum = input("Do You Want to Calculate Sum over Specific Selection of Columns"
                      "\n\t Press 1 to Select the Columns to Calculate Sum"
                      "\n\t Press Any Other Key For No Sum:\n")

sum_column_pairs = {}
i = 1

while select_choice_sum == '1':
    select_dimensions_sum = input("Enter Comma Separated Sensor Ids to Calculate Sum: ")
    
    columns_for_sum = select_dimensions_sum.split(',')
    columns_for_sum = [value.strip() for value in columns_for_sum]
    
    sum_column_pairs[f'Sum_{i}'] = columns_for_sum
    sensor_id_type_mapping[f'Sum_{i}'] = select_dimensions_sum
    sensor_type_id[f"Sum_{i}"] = f"Sum Aggregated {i}"
    
    select_choice_sum = input("Do You Want to Calculate Sum for Some Other Selection of Columns"
                      "\n\t Press 1 to Select the Columns to Calculate Sum"
                      "\n\t Press Any Other Key For No More Sum Calculation:\n")
        
    i += 1
    


In [None]:
if sum_column_pairs:
    sum_columns = []

    for key, value in sum_column_pairs.items():
        new_final_data[key] = final_data[value].sum(axis=1)
        sum_columns.extend(value)

    new_final_data = new_final_data[list(set(new_final_data.columns) - set(sum_columns))]
    plot_all_dimensions(new_final_data, f"{directory}/After_Applying_Sum_Aggregation_to_Dimensions.jpg", sensor_id_type_mapping)

In [None]:
device_type_id = pd.DataFrame([sensor_id_type_mapping])

In [None]:
device_type_id 

In [None]:
sensor_type_id = pd.DataFrame([sensor_type_id])

In [None]:
sensor_type_id

In [None]:
new_final_data

In [None]:
new_final_data.to_csv(f"{directory}/Aggregated_df.csv")

### MPS Calculation Using Constraint Algorithm For Aggregated Dimensions (Weekly Rhythm)

In [None]:
mps, indices = mps_calculations_mstump_m(new_final_data, 
                                         f"{directory}/Aggregated_Dimensions_mstump_m_Weekly.jpg", 
                                         m=2016)

### Applying Subspaces Function For Aggregated Dimensions With Top K Motifs (Weekly Rhythm)

In [None]:
threshold_input = input("How Many Times Do you want to select Top K Motifs (Build Json Data): ")

In [None]:
top_k_motifs_subspaces_function(new_final_data, mps, sensor_type_id, 
                                    device_type_id, threshold_input, sensor_id_type_mapping,
                                f"{aggregated_subspaces_directory}", m=2016)

### Applying MDL Function For Aggregated Dimensions With Top K Motifs (Weekly Rhythm)

In [None]:
top_k_motifs_mdl_function(new_final_data, mps, sensor_type_id, device_type_id, 
                          threshold_input, f"{aggregated_mdls_directory}", m=2016)

### Calculating the Change in the Subspaces Using Subspaces Function For Aggregated Dimensions and Building a Heatmap 

In [None]:
calculate_change_count_and_build_heatmap(f"{aggregated_subspaces_directory}/*.json", 
                                         f"{aggregated_subspaces_directory}/heatmap.jpg")

### Calculating the Change in the Subspaces Using MDL Function For Aggregated Dimensions and Building a Heatmap 

In [None]:
calculate_change_count_and_build_heatmap(f"{aggregated_mdls_directory}/*.json", 
                                         f"{aggregated_mdls_directory}/heatmap.jpg")

### Comparison Between Subspaces of MDL and Subspaces Function For Aggregated Dimensions and Building a Heatmap

In [None]:
df, dimensions_change_count_motifs = compare_subspaces_with_mdl_heatmap(f"{aggregated_subspaces_directory}", 
                                                                        f"{aggregated_mdls_directory}")

In [None]:
create_heatmap(df, f"{directory}/Aggregated_Subspaces_vs_MDL_heatmap.jpg", dimensions_change_count_motifs)

### Getting Sensor_Ids For Each Quadrant of the Heatmap For Aggregated Diemsnions

In [None]:
column = "subspace_sensor_ids"
different_csvs_for_subspaces_heatmap(f"{aggregated_subspaces_directory}/*.json", 
                                     f"{aggregated_subspaces_directory}/heatmap_{column}.csv",
                                     column)

### Getting Sensor_Types For Each Quadrant of the Heatmap For Aggregated Diemsnions

In [None]:
column = "subspace_sensor_types"
different_csvs_for_subspaces_heatmap(f"{aggregated_subspaces_directory}/*.json",
                                     f"{aggregated_subspaces_directory}/heatmap_{column}.csv",
                                     column)

### Getting Device_Types For Each Quadrant of the Heatmap For Aggregated Diemsnions

In [None]:
column = "subspace_device_types"
different_csvs_for_subspaces_heatmap(f"{aggregated_subspaces_directory}/*.json",
                                     f"{aggregated_subspaces_directory}/heatmap_{column}.csv",
                                     column)