In [None]:
import re

import pandas as pd
from pathlib import Path
from ploting import *
import json

In [None]:
root_path = "frontier"

In [None]:
name_re = re.compile("(weak|strong)_scaling-nodes_(\d+)-tpn_(\d+)-(gpu|cpu)")

In [None]:
def flatten(json_data, extra_data):
    flat_data = []
    for run in json_data:
        for spmv_name, spmv_data in run["spmv"].items():
            flat_run = {}
            flat_run["spmv"] = spmv_name
            flat_run.update(**extra_data)
            flat_run.update(**spmv_data)
            flat_run.update((k, v) for k, v in run.items() if k not in ["spmv", "optimal"])
            flat_data.append(flat_run)
    return flat_data

In [None]:
def read_data(path, *, progress_thread):
    data = []
    for file in Path(path).iterdir():
        if m := name_re.search(file.name):
            data += flatten(json.load(open(file)), {"tasks": int(m.group(2)) * int(m.group(3)),
                                                    "progress_thread": progress_thread})
    df = pd.DataFrame(data)
    df["local_size"] = df["size"] // df["tasks"]
    df["nnz"] = df["nnz"] * df["tasks"]  # benchmarks report only local nnz
    return df.drop(columns=["size", "max_relative_norm2", "completed", "repetitions", "comm_pattern"])


In [None]:
df = pd.concat([
    read_data(f"{root_path}/with", progress_thread=True),
    read_data(f"{root_path}/without", progress_thread=False),
])
df

In [None]:
piv = df.set_index(["tasks", "local_size", "progress_thread"]).sort_index()
piv

In [None]:
time = piv.time.unstack("progress_thread")
time

In [None]:
speedup = (time[False] / time[True]).unstack("local_size")
speedup

In [None]:
speedup.plot(logx=True)