In [None]:
import pandas as pd
import numpy as np
import os
import csv
import sys
import matplotlib.pyplot as plt
from helper import *
plt.rcParams['figure.figsize'] = [8, 8]

In [None]:
EVAL_DIR = "/home/felix/todo/osm-tmp"
# kmeans, merge, hop
MLP_METHODS = ["kmeans", "merge", "hop"]
MLP_LAYERS = [250, 500, 750, 1_000, 1_500, 2_000, 3_000, 4_000, 6_000, 8_000, 12_000]

In [None]:
combinations = dict()
for fmi_file in find_files_ending(".fmi", EVAL_DIR):
    for method in MLP_METHODS:
        for layer in MLP_LAYERS:
            combinations[(fmi_file, method, layer)] = {"fmi_file": fmi_file, "mlp_file": fmi_file.replace(".fmi", "") + "-" + method + "-" + str(layer) + ".mlp"}

In [None]:
for ((fmi_file, method, partitions), inputs) in combinations.items():
    output = inputs["mlp_file"].replace(".mlp", ".csv")
    if not_created_yet(output, EVAL_DIR):
        shell_execute([sys.executable, "../analysis/fmi+mlp_csv-export.py", "-f", inputs["fmi_file"], "-m", inputs["mlp_file"], "-o", output], EVAL_DIR)

In [None]:
df = pd.DataFrame()
for ((fmi_file, method, partitions), inputs) in combinations.items():
    csv_file = inputs["mlp_file"].replace(".mlp", ".csv")
    df_new = pd.read_csv(csv_file)
    df_new.drop(["lat", "lng"], axis=1, inplace=True)
    df_new["method"] = method
    df_new["partitions"] = partitions
    df = pd.concat([df, df_new]) #, ignore_index=True)

In [None]:
fig, ax = plt.subplots()
ax.set_title('MLP-Layers')
markers = ['o', '+', 'x', '*', '.']

for (method, marker) in zip(MLP_METHODS, markers):
    x = list()
    y = list()
    for layer in MLP_LAYERS:
        cut_size = sum(df[(df.method == method) & (df.partitions == layer)]['highest_diff'].value_counts()[1:])
        max_partition_size = max(df[(df.method == method) & (df.partitions == layer)]['partition0'].value_counts())
        x.append(max_partition_size)
        y.append(cut_size)
    plt.scatter(x, y, marker=marker, label=method)
    
plt.xlabel("maximum partition size")
plt.ylabel("amount of border nodes")
plt.legend(loc='upper right')
plt.savefig("mlp-max_partition_size-cut_size.png")
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.set_title('MLP-Layers')
markers = ['o', '+', 'x', '*', '.']

for (method, marker) in zip(MLP_METHODS, markers):
    x = list()
    y = list()
    for layer in MLP_LAYERS:
        number_of_partitions = len(df[(df.method == method) & (df.partitions == layer)]['partition0'].unique())
        max_partition_size = max(df[(df.method == method) & (df.partitions == layer)]['partition0'].value_counts())
        x.append(number_of_partitions)
        y.append(max_partition_size)
    plt.scatter(x, y, marker=marker, label=method)
    
plt.xlabel("number of partitions")
plt.ylabel("maximum partition size")
plt.legend(loc='upper right')
plt.savefig("mlp-number_of_partitions-max_partition_size.png")
plt.show()

In [None]:
with open(EVAL_DIR + "/log.json") as file:
    tmp = file.readlines()
    log = pd.read_json(" ".join(tmp), orient='index')

In [None]:
# create command string to not match against list
log['command_string'] = log['command'].agg(lambda x: ' '.join(map(str, x)))

In [None]:
fig, ax = plt.subplots()
ax.set_title('MLP-Layers')
markers = ['o', '+', 'x', '*', '.']

for (method, marker) in zip(MLP_METHODS, markers):
    x = list()
    y = list()
    for layer in MLP_LAYERS:
        lines = log[(log['command_string'].str.contains("mlp_" + method)) & (log['command_string'].str.endswith(" " + str(layer)))]
        if lines.shape[0] == 0:
            continue
        if lines.shape[0] > 1:
            raise Exception('invalid amounts (unsure which to take)')
        line = lines.iloc[0]
        runtime = line['time'].replace(" seconds", "")
        number_of_partitions = len(df[(df.method == method) & (df.partitions == layer)]['partition0'].unique())
        x.append(number_of_partitions)
        y.append(float(runtime))
    plt.scatter(x, y, marker=marker, label=method, alpha=0.8)
    
plt.xlabel("number of partitions")
plt.ylabel("runtime in seconds")
ax.set_yscale('log')
plt.legend(loc='upper center')
plt.savefig("mlp-number_of_partitions-runtime.png")
plt.show()