In [None]:
from helper import *

import pandas as pd
import numpy as np
import os
import csv
import sys
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [16, 8]
# use LaTeX fonts in the plot
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})
%matplotlib inline

In [None]:
OUTPUT = "bicycle"
EVAL_DIR = "/home/felix/todo/algohol/bicycle"
MLP_METHODS = ["merge"]
MLP_LEVELS = [[int(2 ** i)] for i in np.arange(13.0, 15.5, 1.0)]
FAST_QUERY_METHODS = ["pcrp", "pch", "prp"]
QUERY_METHODS = ["normal"] + FAST_QUERY_METHODS
AREAS = ["baden-wuerttemberg"]
SKIP_COUNTS = False
print(MLP_METHODS, "with", MLP_LEVELS)

# Time

In [None]:
df_query = pd.DataFrame()
for area in AREAS:
    for query in QUERY_METHODS:
        if query == "normal" or query == "bi":
            tmp = pd.read_json(EVAL_DIR + "/" + area + "-" + query + "-time.json")
            tmp["Area"] = area
            tmp["Query"] = query
            df_query = df_query.append(tmp, ignore_index = True)
        else:
            for mlp in MLP_METHODS:
                for partitions in MLP_LEVELS:
                    tmp = pd.read_json(EVAL_DIR + "/" + area + "-" + mlp + "-" + "_".join(map(str, partitions)) + "-" + query + "-time.json")
                    tmp["Area"] = area
                    tmp["Query"] = query
                    tmp["Mlp"] = mlp
                    tmp["Mlp_partitions"] = "_".join(map(str, partitions))
                    df_query = df_query.append(tmp, ignore_index = True)

In [None]:
data = list()
labels = list()
for area in AREAS:
    for query in FAST_QUERY_METHODS:
        for mlp in MLP_METHODS:
            for partitions in MLP_LEVELS:
                tmp = df_query[(df_query.Area == area) & (df_query.Query == query) & (df_query.Mlp == mlp) & (df_query.Mlp_partitions == "_".join(map(str, partitions)))]
                data.append(tmp["time"])
                labels.append(query.upper() + "-" + area + "-" + "_".join(map(str, partitions)) + "-" + mlp_title(mlp))
        
fig, ax = plt.subplots()
ax.boxplot(data, labels=labels)#, showmeans=True)
plt.xticks(rotation=40, ha='right')
fig.savefig(OUTPUT + "-comparison.pgf", bbox_inches="tight")

In [None]:
fig, ax = plt.subplots()
speedups = list()
for area in AREAS:
    dijkstra = df_query[(df_query.Query == "normal") & (df_query.Area == area)]["time"].mean()
    bidijkstra = df_query[(df_query.Query == "bi") & (df_query.Area == area)]["time"].mean()
    for query in FAST_QUERY_METHODS:
        for mlp in MLP_METHODS:
            x = list()
            y = list()
            for partitions in MLP_LEVELS:
                tmp = df_query[(df_query.Area == area) & (df_query.Query == query) & (df_query.Mlp == mlp) & (df_query.Mlp_partitions == "_".join(map(str, partitions)))]
                x.append(partitions[0])
                y.append(ns_to_ms(tmp["time"].mean()))
                speedups.append({"Query": query.upper(), "MLP": mlp_title(mlp), "_".join(map(str, partitions))+"-time": ns_to_ms(tmp["time"].mean()), "_".join(map(str, partitions))+"-speedup": dijkstra / tmp["time"].mean()})
            plt.plot(x, y, marker=plot_get(query), color=plot_get(mlp), label=query.upper() + "-" + mlp_title(mlp), alpha=0.7)
plt.xlabel("MLP-Partition-Size")
plt.ylabel("Query-time in ms")
plt.legend(loc='upper left')
plt.legend(loc=(0.6, 0.4))
fig.savefig(OUTPUT + "-query.pgf", bbox_inches="tight")

In [None]:
# df_speedups = pd.DataFrame(speedups)
# assert((df_speedups.groupby(["Query", "MLP"]).count().all() == 1).iloc[0])
# df_speedups = df_speedups.groupby(["Query", "MLP"], sort = False).first()
# # df_speedups.reset_index(inplace=True)
# # df_speedups.to_latex('speedups.tex', float_format="{:0.1f}".format, index=False)
# latex = df_speedups.to_latex(float_format="{:0.1f}".format)
# df_speedups

In [None]:
# # fixup ugly latex code to have single line header
# latex_list = latex.splitlines()

# columns = latex_list[2].split("&")
# indices = latex_list[3].split("&")

# latex_list[2] = "& "
# for partition in MLP_LEVELS:
#     latex_list[2] += "& \multicolumn{2}{c|}{" + "-".join(map(str, partition)) + "} "
# latex_list[2] += " \\\\"
# # remove last pipe from "c"
# latex_list[2] = "c".join(latex_list[2].rsplit("c|", 1))

# latex_list[3] = "&".join(indices[:2] + columns[2:])

# for partition in MLP_LEVELS:
#     latex_list[3] = latex_list[3].replace("\\_".join(map(str, partition)) + "-speedup", "speedup").replace("\\_".join(map(str, partition)) + "-time", "[ms]")

# latex_list[0] = latex_list[0].replace('ll', 'll|', 1)
# latex_list[0] = latex_list[0].replace('rr', 'rr|', len(MLP_LEVELS) - 1)

# if len(MLP_METHODS) == 3:
#     latex_list.insert(len(latex_list)-8, '\midrule')
#     latex_list.insert(len(latex_list)-5, '\midrule')
# else:
#     latex_list.insert(len(latex_list)-4, '\midrule')
#     latex_list.insert(len(latex_list)-3, '\midrule')

# latex_new = '\n'.join(latex_list)
# with open(OUTPUT + "-speedups.tex", "w") as latex_file:
#     latex_file.writelines(latex_new)

In [None]:
with open(EVAL_DIR + "/log.json") as file:
    tmp = file.readlines()
    log = pd.read_json(" ".join(tmp), orient='index')
# create command string to not match against list
log['command_string'] = log['command'].agg(lambda x: ' '.join(map(str, x)))

In [None]:
speedups = list()
for area in AREAS:
    dijkstra = df_query[(df_query.Query == "normal") & (df_query.Area == area)]["time"].mean()
    bidijkstra = df_query[(df_query.Query == "bi") & (df_query.Area == area)]["time"].mean()
    for mlp in MLP_METHODS:
        for partitions in MLP_LEVELS:
            row = dict()
            for query in FAST_QUERY_METHODS:
                tmp = df_query[(df_query.Area == area) & (df_query.Query == query) & (df_query.Mlp == mlp) & (df_query.Mlp_partitions == "_".join(map(str, partitions)))]
                row[query.upper() + "-time"] = ns_to_ms(tmp["time"].mean())
                row[query.upper() + "-speedup"] = dijkstra / tmp["time"].mean()

            df_pch_csv = pd.read_csv(EVAL_DIR + "/" + area + "-" + mlp + "-" + "_".join(map(str, partitions)) + ".csv")
            contracted_nodes = sum(df_pch_csv.highest_diff.value_counts().loc[1:])
            percentage = 1 - (contracted_nodes / len(df_pch_csv))
            
            lines = log[(log['command_string'].str.contains("prp_pre")) & (log['command_string'].str.contains(mlp + "-" + "_".join(map(str, partitions)) + ".bin"))]
            if lines.shape[0] == 0:
                continue
            line = lines.iloc[-1]
            runtime = int(sec_to_min(float(line['time'].replace(" seconds", ""))))
            
            pindex = {"partitions": int("_".join(map(str, partitions))), "contracted": "{:0.2f} %".format(percentage * 100.0), "pre [min]": runtime}
            speedups.append({**pindex, **row})

In [None]:
df_speedups = pd.DataFrame(speedups)
assert((df_speedups.groupby(["partitions", "contracted", "pre [min]"]).count().all() == 1).iloc[0])
df_speedups = df_speedups.groupby(["partitions", "contracted", "pre [min]"], sort=False).first()
latex = df_speedups.to_latex(float_format="{:0.1f}".format)
df_speedups

In [None]:
# fixup ugly latex code to have single line header
latex_list = latex.splitlines()

columns = latex_list[2].split("&")
indices = latex_list[3].split("&")

latex_list[2] = "& & "
for query in FAST_QUERY_METHODS:
    latex_list[2] += "& \multicolumn{2}{c|}{" + query.upper() + "} "
latex_list[2] += " \\\\"
# remove last pipe from "c"
latex_list[2] = "c".join(latex_list[2].rsplit("c|", 1))

latex_list[3] = "&".join(indices[:3] + columns[3:])

for query in FAST_QUERY_METHODS:
    latex_list[3] = latex_list[3].replace(query.upper() + "-speedup", "speedup").replace(query.upper() + "-time", "[ms]")

latex_list[0] = latex_list[0].replace('lr', 'l|r', 1)
latex_list[0] = latex_list[0].replace('rr', 'rr|', len(FAST_QUERY_METHODS) - 1)

# latex_list.insert(len(latex_list)-6, '\midrule')
# latex_list.insert(len(latex_list)-5, '\midrule')

latex_new = '\n'.join(latex_list)
with open(OUTPUT + "-speedups.tex", "w") as latex_file:
    latex_file.writelines(latex_new)