In [None]:
from helper import *

import pandas as pd
import numpy as np
import os
import csv
import sys
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [8, 8]
# use LaTeX fonts in the plot
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})
%matplotlib inline

In [None]:
EVAL_DIR = "/home/felix/todo/algohol/single"
MLP_METHODS = ["kmeans", "gonzalez", "merge"]
MLP_LEVELS = [[int(2 ** i)] for i in np.arange(9.0, 12.5, 1.0)]
print(MLP_METHODS, "with", MLP_LEVELS)

# csv

In [None]:
combinations = dict()
for fmi_file in find_files_ending(".fmi", EVAL_DIR):
    for method in MLP_METHODS:
        for level in MLP_LEVELS:
            combinations[(fmi_file, method, "_".join(map(str, level)))] = {"fmi_file": fmi_file, "mlp_file": fmi_file.replace(".fmi", "") + "-" + method + "-" + "_".join(map(str, level)) + ".mlp"}

In [None]:
df = pd.DataFrame()
for ((fmi_file, method, partitions), inputs) in combinations.items():
    csv_file = inputs["mlp_file"].replace(".mlp", ".csv")
    df_new = pd.read_csv(csv_file)
    df_new.drop(["lat", "lon"], axis=1, inplace=True)
    df_new["method"] = method
    df_new["partitions"] = partitions
    df = pd.concat([df, df_new]) #, ignore_index=True)

In [None]:
fig, ax = plt.subplots()
# fig, ax = plt.subplots(figsize=(4, 3))

for level in MLP_LEVELS:
    x = list()
    y = list()
    for method in MLP_METHODS:
        cut_size = sum(df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['highest_diff'].value_counts()[1:])
        max_partition_size = max(df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['partition0'].value_counts())
        x.append(max_partition_size)
        y.append(cut_size)
    # for making triangle
    x.append(x[0])
    y.append(y[0])
    ax.plot(x, y, linestyle="dashed", color="black", alpha=0.3, zorder=0)
for method in MLP_METHODS:
    x = list()
    y = list()
    for level in MLP_LEVELS:
        cut_size = sum(df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['highest_diff'].value_counts()[1:])
        max_partition_size = max(df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['partition0'].value_counts())
        x.append(max_partition_size)
        y.append(cut_size)
    ax.scatter(x, y, color=plot_get(method), label=mlp_title(method), zorder=999)

ax.set_xlabel("Maximum partition size")
ax.set_ylabel("Boundary nodes")
ax.legend(loc='upper right')
fig.savefig("mlp-cut_size.pgf", bbox_inches="tight")

In [None]:
fig, ax = plt.subplots()

for method in MLP_METHODS:
    x = list()
    y = list()
    for level in MLP_LEVELS:
        number_of_partitions = len(df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['partition0'].unique())
        value_counts = df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['partition0'].value_counts()
        x.append(number_of_partitions)
        y.append(min(value_counts))
    ax.plot(x, y, color=plot_get(method))
    ax.scatter(x, y, color=plot_get(method), label=mlp_title(method))
    
ax.set_xlabel("Partitions")
ax.set_ylabel("Minimum partition size")
# ax.set_yscale('log')
ax.legend(loc='upper right')
# plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
fig.savefig("mlp-min_partition_size.pgf", bbox_inches="tight")

In [None]:
# fig, ax = plt.subplots(1, 1, figsize=set_pgf_size(TEXT_WIDTH * 0.45))
fig, ax = plt.subplots()

for method in MLP_METHODS:
    x = list()
    y = list()
    for level in MLP_LEVELS:
        number_of_partitions = len(df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['partition0'].unique())
        value_counts = df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['partition0'].value_counts()
        x.append(number_of_partitions)
        y.append(max(value_counts))
    ax.plot(x, y, color=plot_get(method))
    ax.scatter(x, y, color=plot_get(method), label=mlp_title(method))
    
ax.set_xlabel("Partitions")
ax.set_ylabel("Maximum partition size")
# ax.set_yscale('log')
ax.legend(loc='upper right')
# plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
fig.savefig("mlp-max_partition_size.pgf", bbox_inches="tight")

In [None]:
# fig, ax = plt.subplots(1, 1, figsize=set_pgf_size(TEXT_WIDTH * 0.45))
fig, ax = plt.subplots()

for method in MLP_METHODS:
    x = list()
    y = list()
    for level in MLP_LEVELS:
        cut_size = sum(df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['highest_diff'].value_counts()[1:])
        number_of_partitions = len(df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['partition0'].unique())
        x.append(number_of_partitions)
        y.append(cut_size)
    ax.plot(x, y, color=plot_get(method))
    ax.scatter(x, y, color=plot_get(method), label=mlp_title(method))
    
ax.set_xlabel("Partitions")
ax.set_ylabel("Border nodes")
ax.legend(loc='upper left')
# plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
fig.savefig("mlp-partition_size.pgf", bbox_inches="tight")

# log

In [None]:
with open(EVAL_DIR + "/log.json") as file:
    tmp = file.readlines()
    log = pd.read_json(" ".join(tmp), orient='index')

In [None]:
# create command string to not match against list
log['command_string'] = log['command'].agg(lambda x: ' '.join(map(str, x)))

In [None]:
fig, ax = plt.subplots()
# fig, ax = plt.subplots(figsize=(4, 3))
for method in MLP_METHODS:
    x = list()
    y = list()
    for level in MLP_LEVELS:
        lines = log[(log['command_string'].str.contains("mlp_" + method)) & (log['command_string'].str.endswith(" " + " ".join(map(str, level))))]
        # ignore if not present
        if lines.shape[0] == 0:
            continue
        # take last line of execution
        line = lines.iloc[-1]
        runtime = line['time'].replace(" seconds", "")
        number_of_partitions = len(df[(df.method == method) & (df.partitions == "_".join(map(str, level)))]['partition0'].unique())
        x.append(number_of_partitions)
        y.append(float(runtime))
    ax.plot(x, y, color=plot_get(method))
    ax.scatter(x, y, color=plot_get(method), label=mlp_title(method))
    
ax.set_xlabel("Partitions")
ax.set_ylabel("Runtime [sec]")
ax.set_yscale('log')
ax.legend(loc='center right')
fig.savefig("mlp-runtime.pgf", bbox_inches="tight")