In [None]:
from helper import *

import pandas as pd
import numpy as np
import os
import csv
import sys
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [16, 8]
# use LaTeX fonts in the plot
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})
%matplotlib inline

In [None]:
EVAL_DIR = "/home/felix/todo/algohol/single"
MLP_METHODS = ["kmeans", "gonzalez", "merge"]
MLP_LEVELS = [[int(2 ** i)] for i in np.arange(9.0, 12.5, 1.0)]
FAST_QUERY_METHODS = ["pch", "pch-pch"]
QUERY_METHODS = ["normal"] + FAST_QUERY_METHODS
AREAS = ["baden-wuerttemberg"]
print(MLP_METHODS, "with", MLP_LEVELS)

In [None]:
df_pch = pd.DataFrame()
for area in AREAS:
    for query in QUERY_METHODS:
        if query == "normal" or query == "bi":
            tmp = pd.read_json(EVAL_DIR + "/" + area + "-" + query + "-time.json")
            tmp["Area"] = area
            tmp["Query"] = query
            df_pch = df_pch.append(tmp, ignore_index = True)
        else:
            for mlp in MLP_METHODS:
                for partitions in MLP_LEVELS:
                    tmp = pd.read_json(EVAL_DIR + "/" + area + "-" + mlp + "-" + "_".join(map(str, partitions)) + "-" + query + "-time.json")
                    tmp["Area"] = area
                    tmp["Query"] = query
                    tmp["Mlp"] = mlp
                    tmp["Mlp_partitions"] = "_".join(map(str, partitions))
                    df_pch = df_pch.append(tmp, ignore_index = True)

In [None]:
fig, ax = plt.subplots()
for area in AREAS:
    for query in FAST_QUERY_METHODS:
        for mlp in MLP_METHODS:
            x = list()
            y = list()
            for partitions in MLP_LEVELS:
                tmp = df_pch[(df_pch.Area == area) & (df_pch.Query == query) & (df_pch.Mlp == mlp) & (df_pch.Mlp_partitions == "_".join(map(str, partitions)))]
                df_pch_csv = pd.read_csv(EVAL_DIR + "/" + area + "-" + mlp + "-" + "_".join(map(str, partitions)) + ".csv")
                contracted_nodes = sum(df_pch_csv.highest_diff.value_counts().loc[1:])
                percentage = 1 - (contracted_nodes / len(df_pch_csv))
                x.append(percentage * 100)
                y.append(ns_to_ms(tmp["time"].mean()))
            if "pch-pch" in query:
                label = query.replace("pch-pch", "PCH") + " \% of " + mlp_title(mlp)
            else:
                label = query.upper() + " MLP-" + mlp_title(mlp)
            plt.plot(x, y, marker=plot_get(query), color=plot_get(mlp), label=label, alpha=0.7)
plt.xlabel("Percentage of contracted nodes")
plt.ylabel("Query time [ms]")
plt.legend(ncol=2, loc=(0.120, 1.02))
fig.savefig("pch.pgf", bbox_inches="tight")

In [None]:
fig, ax = plt.subplots()
for area in AREAS:
    for mlp in MLP_METHODS:
        x = list()
        y = list()
        for partitions in MLP_LEVELS:
            tmp = df_pch[(df_pch.Area == area) & (df_pch.Mlp == mlp) & (df_pch.Mlp_partitions == "_".join(map(str, partitions)))]
            means = tmp.groupby("Query")["time"].mean()
            df_pch_csv = pd.read_csv(EVAL_DIR + "/" + area + "-" + mlp + "-" + "_".join(map(str, partitions)) + ".csv")
            contracted_nodes = sum(df_pch_csv.highest_diff.value_counts().loc[1:])
            percentage = 1 - (contracted_nodes / len(df_pch_csv))
            x.append(percentage * 100)
            y.append(ns_to_ms(means["pch"] - means["pch-pch"]))
        plt.plot(x, y, color=plot_get(mlp), marker=".", label=mlp_title(mlp), alpha=0.7)
plt.xlabel("Percentage of contracted nodes")
plt.ylabel("Difference of query time [ms]")
plt.legend(loc='lower left')
fig.savefig("pch-diff.pgf", bbox_inches="tight")

In [None]:
table = list()
for area in AREAS:
    dijkstra = df_pch[(df_pch.Query == "normal") & (df_pch.Area == area)]["time"].mean()
    for mlp in MLP_METHODS:
        for partitions in MLP_LEVELS:
            line = dict()
            for query in FAST_QUERY_METHODS:
                tmp = df_pch[(df_pch.Area == area) & (df_pch.Query == query) & (df_pch.Mlp == mlp) & (df_pch.Mlp_partitions == "_".join(map(str, partitions)))]
                df_pch_csv = pd.read_csv(EVAL_DIR + "/" + area + "-" + mlp + "-" + "_".join(map(str, partitions)) + ".csv")
                contracted_nodes = sum(df_pch_csv.highest_diff.value_counts().loc[1:])
                percentage = (1 - (contracted_nodes / len(df_pch_csv))) * 100
                current = {"MLP": mlp_title(mlp), "Partitions": "_".join(map(str, partitions)), "Percentage": "{:0.2f} %".format(percentage), query: ns_to_ms(tmp["time"].mean()), query + "-speedup": dijkstra / tmp["time"].mean()}
                line = {** line, **current}
            table.append(line)

In [None]:
df_table = pd.DataFrame(table)
df_table = df_table.rename(columns={"pch": "mlp pch [ms]", "pch-speedup": "mlp pch speedup", "pch-pch": "pch [ms]", "pch-pch-speedup": "pch speedup"})
df_table = df_table.rename(columns={"Percentage": "contracted"})
assert((df_table.groupby(["MLP", "Partitions", "contracted"]).count().all() == 1).iloc[0])
df_table = df_table.groupby(["MLP", "Partitions", "contracted"], sort = False).first()
latex = df_table.to_latex(float_format="{:0.2f}".format)
df_table

In [None]:
# fixup ugly latex code to have single line header
latex_list = latex.splitlines()

latex_list[0] = latex_list[0].replace('lllrrrr', 'lll|rr|rr', 1)

columns = latex_list[2].split("&")
indices = latex_list[3].split("&")

latex_list[2] = "& & & \multicolumn{2}{c|}{with MLP} & \multicolumn{2}{c}{no MLP}  \\\\"
latex_list[3] = "&".join(indices[:3] + columns[3:])
latex_list[3] = latex_list[3].replace("mlp", "").replace("pch", "").replace("[ms]", "time [ms]")

latex_list.insert(len(latex_list)-10, '\midrule')
latex_list.insert(len(latex_list)-6, '\midrule')
latex_new = '\n'.join(latex_list)
with open("pch.tex", "w") as latex_file:
    latex_file.writelines(latex_new)