In [None]:
from plotnine import *
from scipy.spatial import distance
import pandas as pd
from dtw import *
import dtaidistance as dd
from tslearn import metrics
from tssearch import plot_alignment
from tssearch.search.query_search import time_series_search
from dtaidistance import dtw_visualisation as dtwvis
import matplotlib.pyplot as plt

In [None]:
def calculate_euclidean_distance(a_cpu, b_cpu, a_ram, b_ram):
    cpu_dst = distance.euclidean(a_cpu, b_cpu)
    ram_dst = distance.euclidean(a_ram, b_ram)
    return cpu_dst, ram_dst

def calculate_dtw_dtw_library(a_cpu, b_cpu, a_ram, b_ram):
    step_pattern = symmetric2

    alignment_cpu = dtw(a_cpu, b_cpu, step_pattern=step_pattern, keep_internals=True)
    alignment_ram = dtw(a_ram, b_ram, step_pattern=step_pattern, keep_internals=True)
    #alignment_cpu.plot(type="twoway", offset=80)
    #plt.show()
    return alignment_cpu.normalizedDistance, alignment_cpu.distance, alignment_ram.normalizedDistance, alignment_ram.distance

def calculate_dtw_dtaidistance_library(a_cpu, b_cpu, a_ram, b_ram):
    d_cpu = dd.dtw.distance(numpy.array(a_cpu), numpy.array(b_cpu), use_c=True, use_pruning=True)
    #path = dd.dtw.warping_path(numpy.array(a_cpu), numpy.array(b_cpu))
    #dtwvis.plot_warping(numpy.array(a_cpu), numpy.array(b_cpu), path)

    d_ram = dd.dtw.distance(numpy.array(a_ram), numpy.array(b_ram))
    return d_cpu, d_ram

def calculate_lcss(a_cpu, b_cpu, a_ram, b_ram):
    lcss_path_cpu, sim_lcss_cpu = metrics.lcss_path(a_cpu, b_cpu, eps=2.0)
    lcss_path_ram, sim_lcss_ram = metrics.lcss_path(a_ram, b_ram, eps=1.5)
    return sim_lcss_cpu, sim_lcss_ram

def calculate_lcss2(a_cpu, b_cpu, a_ram, b_ram):
    dict_distances = {
        "elastic": {"Longest Common Subsequence": {
            "multivariate": "yes",
            "description": "",
            "function": "lcss",
            "parameters": {"eps": 1.5, "report": "distance"},
            "use": "yes"}
        }
    }
    sim_lcss_cpu = time_series_search(dict_distances, numpy.array(a_cpu), numpy.array(b_cpu), output=("number", 1))["Longest Common Subsequence"]["distance"]
    sim_lcss_ram = time_series_search(dict_distances, numpy.array(a_ram), numpy.array(b_ram), output=("number", 1))["Longest Common Subsequence"]["distance"]

    #result = time_series_search(dict_distances, numpy.array(a_ram), numpy.array(b_ram), output=("number", 1))
    #plt.figure()
    #plt.title("Longest Common Subsequence")
    #plot_alignment(a_ram, b_ram, result["Longest Common Subsequence"]["path"][0])
    return sim_lcss_cpu, sim_lcss_ram

## Creating comparison dataFrame

In [None]:
full_df = pd.read_csv("MachineLearning/ts_datasets/Default/Default_TEST.csv")
max_snapshot = full_df["snapshot"].max()

In [None]:
def calculate_distance_and_save_as_df(df, data, data2, normalized=0, smoothed=0):
    a_cpu = list(data['CPU'])
    b_cpu = list(data2['CPU'])
    a_ram = list(data['RAM'])
    b_ram = list(data2['RAM'])

    euc_cpu_dist, euc_ram_dist = calculate_euclidean_distance(a_cpu, b_cpu, a_ram, b_ram)
    dtw_cpu_dist_normalized, dtw_cpu_dist, dtw_ram_dist_normalized, dtw_ram_dist = \
        calculate_dtw_dtw_library(a_cpu, b_cpu, a_ram, b_ram)
    #dtaidistance_cpu, dtaidistance_ram = calculate_dtw_dtaidistance_library(a_cpu, b_cpu, a_ram, b_ram)
    lcss_distance_cpu, lcss_distance_ram = calculate_lcss(a_cpu, b_cpu, a_ram, b_ram)
    new_cpu_row = {
        'dataType' : 'CPU',
        'function1' : data['label'].max(),
        'snapshot1' : data['snapshot'].max(),
        'udf1' : data['udf'].max(),
        'function2' : data2['label'].max(),
        'snapshot2': data2['snapshot'].max(),
        'udf2' : data2['udf'].max(),
        'euclidean': euc_cpu_dist,
        'dtw-python-distance': dtw_cpu_dist,
        'dtw-python-normalizedDistance': dtw_ram_dist_normalized,
        #'dtaidistance': dtaidistance_cpu,
        'lcss_similarity': lcss_distance_cpu,
        'is_normalized': normalized,
        'is_smoothed': smoothed,
        'size1': data['size'].max(),
        'size2': data2['size'].max(),
    }
    new_ram_row = {
        'dataType' : 'RAM',
        'function1' : data['label'].max(),
        'snapshot1' : data['snapshot'].max(),
        'udf1' : data['udf'].max(),
        'function2' : data2['label'].max(),
        'snapshot2': data2['snapshot'].max(),
        'udf2' : data2['udf'].max(),
        'euclidean': euc_ram_dist,
        'dtw-python-distance': dtw_ram_dist,
        'dtw-python-normalizedDistance': dtw_ram_dist_normalized,
        #'dtaidistance': dtaidistance_ram,
        'lcss_similarity': lcss_distance_ram,
        'is_normalized': normalized,
        'is_smoothed': smoothed,
        'size1': data['size'].max(),
        'size2': data2['size'].max(),
    }
    df = df.append(new_cpu_row, ignore_index = True)
    df = df.append(new_ram_row, ignore_index = True)
    return df

In [None]:
full_df_snapshots = full_df.groupby('snapshot')[['snapshot']].max().reset_index(drop=True)
full_df_snapshots

In [None]:
columns = ['dataType', 'function1', 'snapshot1','function2', 'snapshot2', 'euclidean',
           'dtw-python-distance', 'dtw-python-normalizedDistance', 'lcss_similarity', 'is_normalized', 'is_smoothed', 'size1', 'size2']

In [None]:
import warnings
warnings.filterwarnings('ignore')
dataFrame2 = pd.DataFrame([], columns = columns)
dataFrame2.to_csv("SimilaritiesData/comparison_distance_all.csv", index=False)

for i in full_df_snapshots.index:
    dataFrame2 = pd.DataFrame([], columns = columns)
    s1 = full_df_snapshots.loc[i]['snapshot']
    data_1 = full_df[full_df["snapshot"]==s1]
    for j in full_df_snapshots.index:
        if j < i: continue;
        s2 = full_df_snapshots.loc[j]['snapshot']
        data_2 = full_df[full_df["snapshot"]==s2]
        dataFrame2 = calculate_distance_and_save_as_df(dataFrame2, data_1, data_2, 0, 1)
    dataFrame2.to_csv('SimilaritiesData/comparison_distance_all.csv', mode='a', header=False)
    print(f"Przetworzono {i}")

