In [None]:
from plotnine import *
from scipy.spatial import distance
import pandas as pd
from dtw import *
import dtaidistance as dd
from dtaidistance import dtw_visualisation as dtwvis

In [None]:
%matplotlib inline
joined_aggregation_all = pd.read_csv("Organised/aggregation/joined_aggregation.csv")
joined_aggregation = joined_aggregation_all[joined_aggregation_all["snapshot"] == 1]

(ggplot(joined_aggregation)         # defining what data to use
  + aes(x='epoch', y = "CPU")    # defining what variable to use
  + geom_line(size=0.5, color = "red") # defining the type of plot to use
 )

In [None]:
joined_aggregation_smooth_all = pd.read_csv("Organised/aggregation/6_smooth_aggregation.csv")
joined_aggregation_smooth = joined_aggregation_smooth_all[joined_aggregation_smooth_all["snapshot"] == 1]

(ggplot(joined_aggregation_smooth)         # defining what data to use
 + aes(x='epoch', y = "CPU")    # defining what variable to use
 + geom_line(size=0.5, color = "red") # defining the type of plot to use
)

In [None]:
def calculate_euclidean_distance(data, snapshot1, snapshot2, max_length):
    a = data[data["snapshot"] == snapshot1].head(max_length)
    b = data[data["snapshot"] == snapshot2].head(max_length)
    cpu_dst = distance.euclidean(a['CPU'], b['CPU'])
    ram_dst = distance.euclidean(a['RAM'], b['RAM'])
    return cpu_dst, ram_dst

def calculate_dtw_dtw_library(data, snapshot1, snapshot2, data2=pd.DataFrame()):
    if data2.empty:
        data2 = data
    a = data[data["snapshot"] == snapshot1]
    b = data2[data2["snapshot"] == snapshot2]
    alignment_cpu = dtw(a['CPU'], b['CPU'], keep_internals=True)
    alignment_ram = dtw(a['RAM'], b['RAM'], keep_internals=True)
    return alignment_cpu.normalizedDistance, alignment_cpu.distance, alignment_ram.normalizedDistance, alignment_ram.distance

def calculate_dtw_dtaidistance_library(data, snapshot1, snapshot2, data2=pd.DataFrame()):
    if data2.empty:
        data2 = data
    a = data[data["snapshot"] == snapshot1]
    b = data2[data2["snapshot"] == snapshot2]
    d_cpu = dd.dtw.distance(a['CPU'].to_numpy(), b['CPU'].to_numpy())
    d_ram = dd.dtw.distance(a['RAM'].to_numpy(), b['RAM'].to_numpy())
    return d_cpu, d_ram

In [None]:
def draw_comparison_diagram(data, snapshot1, snapshot2, title="Title", y_data = "CPU", data2=pd.DataFrame(), max_length=496):
    if data2.empty:
        data2 = data
    a = data[data["snapshot"] == snapshot1]
    b = data2[data2["snapshot"] == snapshot2]
    comparison_data = pd.concat([a, b])
    euc_cpu_dist, euc_ram_dist = calculate_euclidean_distance(comparison_data, snapshot1, snapshot2, max_length)
    dtw_cpu_dist_normalized, dtw_cpu_dist, dtw_ram_dist_normalized, dtw_ram_dist = \
        calculate_dtw_dtw_library(a, snapshot1, snapshot2, b)
    dtaidistance_cpu, dtaidistance_ram = calculate_dtw_dtaidistance_library(a, snapshot1, snapshot2, b)
    if y_data == "CPU":
        euc_dist = euc_cpu_dist
        dtw_dist_normalized = dtw_cpu_dist_normalized
        dtw_dist = dtw_cpu_dist
        dtaidistance = dtaidistance_cpu
    else:
        euc_dist = euc_ram_dist
        dtw_dist_normalized = dtw_ram_dist_normalized
        dtw_dist = dtw_ram_dist
        dtaidistance = dtaidistance_ram
    return qplot(data=comparison_data, x="epoch", y=y_data,
          facets="snapshot",
          color = "factor(snapshot)",
          geom="line",
          xlab="Epoch", ylab=y_data,
          main=title
               + "\nEuclidan distance:"+ str(round(euc_dist, 2))
               + "\nDTW normalized distance - dtw library:"+ str(round(dtw_dist_normalized, 2))
               + "\nDTW distance - dtw library:"+ str(round(dtw_dist, 2))
               + "\nDTW distance - dtaidistance library:"+ str(round(dtaidistance, 2))) \
    + \
    theme(figure_size=(11,6))

In [None]:
print(draw_comparison_diagram(joined_aggregation_all, 1, 1, "Two identical data"))

In [None]:
## CPU
print(draw_comparison_diagram(joined_aggregation_all, 0, 1, "Aggregation 0 1"))
print(draw_comparison_diagram(joined_aggregation_all, 0, 2, "Aggregation 0 2"))
print(draw_comparison_diagram(joined_aggregation_all, 1, 2, "Aggregation 1 2"))
## RAM
print(draw_comparison_diagram(joined_aggregation_all, 0, 1, "Aggregation 0 1", "RAM"))
print(draw_comparison_diagram(joined_aggregation_all, 0, 2, "Aggregation 0 2", "RAM"))
print(draw_comparison_diagram(joined_aggregation_all, 1, 2, "Aggregation 1 2", "RAM"))

In [None]:
## CPU
print(draw_comparison_diagram(joined_aggregation_smooth_all, 0, 1, "Smooth aggregation 0 1"))
print(draw_comparison_diagram(joined_aggregation_smooth_all, 0, 2, "Smooth aggregation 0 2"))
print(draw_comparison_diagram(joined_aggregation_smooth_all, 1, 2, "Smooth aggregation 1 2"))
## RAM
print(draw_comparison_diagram(joined_aggregation_smooth_all, 0, 1, "Smooth aggregation 0 1", "RAM"))
print(draw_comparison_diagram(joined_aggregation_smooth_all, 0, 2, "Smooth aggregation 0 2", "RAM"))
print(draw_comparison_diagram(joined_aggregation_smooth_all, 1, 2, "Smooth aggregation 1 2", "RAM"))

In [None]:
# Compare two different functions
joined_filtration_all = pd.read_csv("Organised/filtration/6_smooth_filtration.csv")
## CPU
print(draw_comparison_diagram(joined_aggregation_smooth_all, 0, 1,  "Aggregation vs Filtration 0 1", "CPU", joined_filtration_all, 151))
print(draw_comparison_diagram(joined_aggregation_smooth_all, 0, 2,  "Aggregation vs Filtration 0 2", "CPU", joined_filtration_all, 139))
print(draw_comparison_diagram(joined_aggregation_smooth_all, 1, 2,  "Aggregation vs Filtration 1 2", "CPU", joined_filtration_all, 139))

## RAM
print(draw_comparison_diagram(joined_aggregation_smooth_all, 0, 1,  "Aggregation vs Filtration 0 1", "RAM", joined_filtration_all, 151))
print(draw_comparison_diagram(joined_aggregation_smooth_all, 0, 2,  "Aggregation vs Filtration 0 2", "RAM", joined_filtration_all, 139))
print(draw_comparison_diagram(joined_aggregation_smooth_all, 1, 2,  "Aggregation vs Filtration 1 2", "RAM", joined_filtration_all, 139))

In [None]:
help(DTW)