In [None]:
from helper import *

import pandas as pd
import numpy as np
import os
import csv
import sys
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [8, 8]
# use LaTeX fonts in the plot
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})
%matplotlib inline

In [None]:
OUTPUT = "single"
EVAL_DIR = "/home/felix/todo/algohol/single"
MLP_METHODS = ["kmeans", "gonzalez", "merge"]
MLP_LEVELS = [[int(2 ** i)] for i in np.arange(9.0, 12.5, 1.0)]
FAST_QUERY_METHODS = ["pcrp", "pch", "prp"]
QUERY_METHODS = ["normal"] + FAST_QUERY_METHODS
AREAS = ["baden-wuerttemberg"]
print(MLP_METHODS, "with", MLP_LEVELS)

# eval

In [None]:
df_graph = pd.DataFrame()
for area in AREAS:
    for mlp_method in MLP_METHODS:
        for level in MLP_LEVELS:
            for query in QUERY_METHODS:
                df_new = pd.read_json(EVAL_DIR + "/" + area + "-" + mlp_method + "-" +  "_".join(map(str, level)) + "-" + query + "-info.json", typ='series')
                df_new = pd.DataFrame([df_new])
                df_new["Area"] = area
                df_new["MLP_method"] = mlp_method
                df_new["Levels"] = "_".join(map(str, level))
                df_new["Query"] = query
                df_graph = pd.concat([df_graph, df_new], ignore_index=True)

In [None]:
assert((df_graph.groupby(["amount_edges"]).size() == len(QUERY_METHODS)).all())

In [None]:
df_graph.groupby(["MLP_method", "Levels", "Query"])["amount_used_edges"].first()

In [None]:
def format_tex(float_number):
#     exponent = np.floor(np.log10(float_number))
    exponent = 6
    mantissa = float_number/10**exponent
    return "${:0.1f}\\times10^{{{:}}}$".format(float(mantissa), str(int(exponent)))

In [None]:
df_graph["amount_used_edges"] = pd.to_numeric(df_graph["amount_used_edges"], downcast="float")

In [None]:
df_table = pd.DataFrame()
for area in AREAS:
    dijkstra = df_graph[(df_graph.Query == "normal") & (df_graph.Area == area)]["amount_used_edges"].iloc[0]
    print("original edge amount:", dijkstra)
    for query in FAST_QUERY_METHODS:
        for mlp in MLP_METHODS:
            line = dict()
            for partitions in MLP_LEVELS:
                tmp = df_graph[(df_graph.Area == area) & (df_graph.Query == query) & (df_graph.MLP_method == mlp) & (df_graph.Levels == "_".join(map(str, partitions)))]
                line[partitions[0]] = tmp["amount_used_edges"].values[0]
            df_new = pd.DataFrame([line])
            df_new["MLP_method"] = mlp_title(mlp)
            df_new["Query"] = query.upper()
            df_table = pd.concat([df_table, df_new], ignore_index=True)

In [None]:
df_edges = df_table.groupby(["Query", "MLP_method"]).first()
# latex = df_edges.to_latex(float_format="{:0.1f}".format)
latex = df_edges.to_latex(float_format=format_tex, escape=False)
df_edges

In [None]:
# fixup ugly latex code to have single line header
latex_list = latex.splitlines()

latex_list[0] = latex_list[0].replace('ll', 'll|', 1)

partitions = latex_list[2]
headers = latex_list[3].split("&")
headers = [headers[0], headers[1], " \multicolumn{" + str(len(headers) - 2) + "}{c}{amount of partitions} \\\\"]

latex_list[3] = partitions
latex_list[2] = "&".join(headers)

latex_list.insert(len(latex_list)-8, '\midrule')
latex_list.insert(len(latex_list)-5, '\midrule')
latex_new = '\n'.join(latex_list)
latex_new = latex_new.replace("MLP_method", "MLP-method")
with open(OUTPUT + "-edges.tex", "w") as latex_file:
    latex_file.writelines(latex_new)

# level

In [None]:
OUTPUT = "level"
EVAL_DIR = "/home/felix/todo/algohol/level"
MLP_METHODS = ["merge"]
MLP_LEVELS = [[int(2 ** i)] for i in np.arange(9.0, 11.5, 1.0)]
MLP_LEVELS = MLP_LEVELS + [[int(2 ** i), 4] for i in np.arange(9.0, 11.5, 1.0)]
MLP_LEVELS = MLP_LEVELS + [[int(2 ** i), 16] for i in np.arange(9.0, 11.5, 1.0)]
MLP_LEVELS = MLP_LEVELS + [[int(2 ** i), 4, 4] for i in np.arange(9.0, 11.5, 1.0)]
FAST_QUERY_METHODS = ["pcrp", "pch", "prp"]
QUERY_METHODS = ["normal"] + FAST_QUERY_METHODS
AREAS = ["baden-wuerttemberg"]
print(MLP_METHODS, "with", MLP_LEVELS)

In [None]:
df_graph = pd.DataFrame()
for area in AREAS:
    for mlp_method in MLP_METHODS:
        for level in MLP_LEVELS:
            for query in QUERY_METHODS:
                df_new = pd.read_json(EVAL_DIR + "/" + area + "-" + mlp_method + "-" +  "_".join(map(str, level)) + "-" + query + "-info.json", typ='series')
                df_new = pd.DataFrame([df_new])
                df_new["Area"] = area
                df_new["MLP_method"] = mlp_method
                df_new["Levels"] = "_".join(map(str, level))
                df_new["Query"] = query
                df_graph = pd.concat([df_graph, df_new], ignore_index=True)

In [None]:
assert((df_graph.groupby(["amount_edges"]).size() == len(QUERY_METHODS)).all())

In [None]:
df_graph["amount_used_edges"] = pd.to_numeric(df_graph["amount_used_edges"], downcast="float")

In [None]:
df_table = pd.DataFrame()
for area in AREAS:
    dijkstra = df_graph[(df_graph.Query == "normal") & (df_graph.Area == area)]["amount_used_edges"].iloc[0]
    print("original edge amount:", dijkstra)
    for mlp in MLP_METHODS:
        for partitions in MLP_LEVELS:
            line = dict()
            for query in FAST_QUERY_METHODS:
                tmp = df_graph[(df_graph.Area == area) & (df_graph.Query == query) & (df_graph.MLP_method == mlp) & (df_graph.Levels == "_".join(map(str, partitions)))]
                line[query.upper()] = tmp["amount_used_edges"].values[0]
            df_new = pd.DataFrame([line])
            df_new["partitions"] = "_".join(map(str, partitions))
            df_table = pd.concat([df_table, df_new], ignore_index=True)

In [None]:
def special_hacky_sort(x):
    splited = x.str.split("-", expand=True)
    return pd.DataFrame(splited).astype(float).sum(axis=1)

In [None]:
df_table['partitions'] = df_table['partitions'].str.replace('_','-')
df_edges = df_table.groupby("partitions").first()
df_edges = df_edges.sort_values(by="partitions", key=special_hacky_sort)
latex = df_edges.to_latex(float_format=format_tex, escape=False)
df_edges

In [None]:
# fixup ugly latex code to have single line header
latex_list = latex.splitlines()

latex_list[0] = latex_list[0].replace('lr', 'l|r', 1)

columns = latex_list[2].split("&")
indices = latex_list[3].split("&")

latex_list[2] = " & \multicolumn{" + str(len(FAST_QUERY_METHODS)) + "}{c}{Dijkstra-Query} \\\\"

latex_list[3] = "&".join(indices[:1] + columns[1:])


latex_list.insert(len(latex_list)-10, '\midrule')
latex_list.insert(len(latex_list)-6, '\midrule')
latex_new = '\n'.join(latex_list)

with open(OUTPUT + "-edges.tex", "w") as latex_file:
    latex_file.writelines(latex_new)