In [None]:
from helper import *

import pandas as pd
import numpy as np
import os
import csv
import sys
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [16, 8]
# use LaTeX fonts in the plot
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})
%matplotlib inline

In [None]:
OUTPUT = "single"
EVAL_DIR = "/home/felix/todo/algohol/single"
MLP_METHODS = ["kmeans", "gonzalez", "merge"]
MLP_LEVELS = [[int(2 ** i)] for i in np.arange(9.0, 12.5, 1.0)]
FAST_QUERY_METHODS = ["pcrp", "pch", "prp"]
QUERY_METHODS = ["normal"] + FAST_QUERY_METHODS
AREAS = ["baden-wuerttemberg"]
SKIP_COUNTS = False
print(MLP_METHODS, "with", MLP_LEVELS)

# Time

In [None]:
df_query = pd.DataFrame()
for area in AREAS:
    for query in QUERY_METHODS:
        if query == "normal" or query == "bi":
            tmp = pd.read_json(EVAL_DIR + "/" + area + "-" + query + "-time.json")
            tmp["Area"] = area
            tmp["Query"] = query
            df_query = df_query.append(tmp, ignore_index = True)
        else:
            for mlp in MLP_METHODS:
                for partitions in MLP_LEVELS:
                    tmp = pd.read_json(EVAL_DIR + "/" + area + "-" + mlp + "-" + "_".join(map(str, partitions)) + "-" + query + "-time.json")
                    tmp["Area"] = area
                    tmp["Query"] = query
                    tmp["Mlp"] = mlp
                    tmp["Mlp_partitions"] = "_".join(map(str, partitions))
                    df_query = df_query.append(tmp, ignore_index = True)

In [None]:
data = list()
labels = list()
for area in AREAS:
    for query in FAST_QUERY_METHODS:
        for mlp in MLP_METHODS:
            for partitions in MLP_LEVELS:
                tmp = df_query[(df_query.Area == area) & (df_query.Query == query) & (df_query.Mlp == mlp) & (df_query.Mlp_partitions == "_".join(map(str, partitions)))]
                data.append(tmp["time"])
                labels.append(query.upper() + "-" + area + "-" + "_".join(map(str, partitions)) + "-" + mlp_title(mlp))
        
fig, ax = plt.subplots()
ax.boxplot(data, labels=labels)#, showmeans=True)
plt.xticks(rotation=40, ha='right')
fig.savefig(OUTPUT + "-comparison.pgf", bbox_inches="tight")

In [None]:
fig, ax = plt.subplots()
speedups = list()
for area in AREAS:
    dijkstra = df_query[(df_query.Query == "normal") & (df_query.Area == area)]["time"].mean()
    bidijkstra = df_query[(df_query.Query == "bi") & (df_query.Area == area)]["time"].mean()
    for query in FAST_QUERY_METHODS:
        for mlp in MLP_METHODS:
            x = list()
            y = list()
            for partitions in MLP_LEVELS:
                tmp = df_query[(df_query.Area == area) & (df_query.Query == query) & (df_query.Mlp == mlp) & (df_query.Mlp_partitions == "_".join(map(str, partitions)))]
                x.append(partitions[0])
                y.append(ns_to_ms(tmp["time"].mean()))
                speedups.append({"Query": query.upper(), "MLP": mlp_title(mlp), "_".join(map(str, partitions))+"-time": ns_to_ms(tmp["time"].mean()), "_".join(map(str, partitions))+"-speedup": dijkstra / tmp["time"].mean()})
            plt.plot(x, y, marker=plot_get(query), color=plot_get(mlp), label=query.upper() + "-" + mlp_title(mlp), alpha=0.7)
plt.xlabel("MLP partition size")
plt.ylabel("Query time [ms]")
plt.legend(ncol=3, loc=(0.005, 1.02))
fig.savefig(OUTPUT + "-query.pgf", bbox_inches="tight")

In [None]:
df_speedups = pd.DataFrame(speedups)
assert((df_speedups.groupby(["Query", "MLP"]).count().all() == 1).iloc[0])
df_speedups = df_speedups.groupby(["Query", "MLP"], sort = False).first()
# df_speedups.reset_index(inplace=True)
# df_speedups.to_latex('speedups.tex', float_format="{:0.1f}".format, index=False)
latex = df_speedups.to_latex(float_format="{:0.1f}".format)
df_speedups

In [None]:
# fixup ugly latex code to have single line header
latex_list = latex.splitlines()

columns = latex_list[2].split("&")
indices = latex_list[3].split("&")

latex_list[2] = "& "
for partition in MLP_LEVELS:
    latex_list[2] += "& \multicolumn{2}{c|}{" + "-".join(map(str, partition)) + "} "
latex_list[2] += " \\\\"
# remove last pipe from "c"
latex_list[2] = "c".join(latex_list[2].rsplit("c|", 1))

latex_list[3] = "&".join(indices[:2] + columns[2:])

for partition in MLP_LEVELS:
    latex_list[3] = latex_list[3].replace("\\_".join(map(str, partition)) + "-speedup", "speedup").replace("\\_".join(map(str, partition)) + "-time", "[ms]")

latex_list[0] = latex_list[0].replace('ll', 'll|', 1)
latex_list[0] = latex_list[0].replace('rr', 'rr|', len(MLP_LEVELS) - 1)

latex_list[0] = latex_list[0].replace("\\begin{tabular}{l", "\\begin{tabularx}{\\textwidth}{X")
latex_list[-1] = latex_list[-1].replace("\\end{tabular}", "\\end{tabularx}")

if len(MLP_METHODS) == 3:
    latex_list.insert(len(latex_list)-8, '\midrule')
    latex_list.insert(len(latex_list)-5, '\midrule')
else:
    latex_list.insert(len(latex_list)-4, '\midrule')
    latex_list.insert(len(latex_list)-3, '\midrule')
    


latex_new = '\n'.join(latex_list)
with open(OUTPUT + "-speedups.tex", "w") as latex_file:
    latex_file.writelines(latex_new)

# Count

In [None]:
if not SKIP_COUNTS:
    df_count = pd.DataFrame()
    for area in AREAS:
        for query in QUERY_METHODS:
            for mlp in MLP_METHODS:
                for partitions in MLP_LEVELS:
                    if query == "normal" or query == "bi":
                        tmp = pd.read_json(EVAL_DIR + "/" + area + "-" + query + "-count.json")
                    else:
                        tmp = pd.read_json(EVAL_DIR + "/" + area + "-" + mlp + "-" + "_".join(map(str, partitions)) + "-" + query + "-count.json")
                    tmp["Query"] = query
                    tmp["Area"] = area
                    tmp["Mlp"] = mlp
                    tmp["Mlp_partitions"] = "_".join(map(str, partitions))
                    df_count = df_count.append(tmp, ignore_index = True)

In [None]:
if not SKIP_COUNTS:
#     fig, ax = plt.subplots()
    fig, ax = plt.subplots(1, 1, figsize=(set_pgf_size(TEXT_WIDTH * 0.95)[0], set_pgf_size(TEXT_WIDTH * 1.0)[1]))
    for area in AREAS:
        for query in FAST_QUERY_METHODS:
            for mlp in MLP_METHODS:
                x = list()
                y = list()
                for partitions in MLP_LEVELS:
                    tmp = df_count[(df_count.Area == area) & (df_count.Query == query) & (df_count.Mlp == mlp) & (df_count.Mlp_partitions == "_".join(map(str, partitions)))]
                    x.append(partitions[0])
                    y.append(tmp["heap_pops"].mean())
                plt.plot(x, y, marker=plot_get(query), color=plot_get(mlp), label=query.upper() + "-" + mlp_title(mlp), alpha=0.7)

    plt.xlabel("MLP partition size")
    plt.ylabel("Average heap-pops")
#     plt.legend(ncol=3, loc=(0.005, 1.02))
#     plt.legend(ncol=3, loc=(0.005, 1.07))

    plt.ticklabel_format(style='sci', axis='y', scilimits=(4,4))
    fig.savefig(OUTPUT + "-query-heap_pops.pgf", bbox_inches="tight")

In [None]:
if not SKIP_COUNTS:
#     fig, ax = plt.subplots()
    fig, ax = plt.subplots(1, 1, figsize=(set_pgf_size(TEXT_WIDTH * 0.95)[0], set_pgf_size(TEXT_WIDTH * 1.0)[1]))
    for area in AREAS:
        for query in FAST_QUERY_METHODS:
            for mlp in MLP_METHODS:
                x = list()
                y = list()
                for partitions in MLP_LEVELS:
                    tmp = df_count[(df_count.Area == area) & (df_count.Query == query) & (df_count.Mlp == mlp) & (df_count.Mlp_partitions == "_".join(map(str, partitions)))]
                    x.append(partitions[0])
                    y.append(tmp["relaxed_edges"].mean())
                plt.plot(x, y, marker=plot_get(query), color=plot_get(mlp), label=query.upper() + "-" + mlp_title(mlp), alpha=0.7)

    plt.xlabel("MLP partition size")
    plt.ylabel("Average relaxed-edges")
    plt.legend(ncol=3, loc=(-0.06, 1.07))
    plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    fig.savefig(OUTPUT + "-query-relaxed_edges.pgf", bbox_inches="tight")