In [None]:
import re

import pandas as pd
from pathlib import Path
from ploting import *
import json

In [None]:
root_path = "frontier"

In [None]:
name_re = re.compile("(weak|strong)_scaling-nodes_(\d+)-tpn_(\d+)-(gpu|cpu)")

In [None]:
data = json.load(open(f"frontier/data/weak_scaling-nodes_1-tpn_1-gpu.json"))
data

In [None]:
def flatten(json_data, extra_data):
    flat_data = []
    for run in json_data:
        for spmv_name, spmv_data in run["spmv"].items():
            flat_run = {}
            flat_run["spmv"] = spmv_name
            flat_run.update(**extra_data)
            flat_run.update(**spmv_data)
            flat_run.update((k, v) for k, v in run.items() if k not in ["spmv", "optimal"])
            flat_data.append(flat_run)
    return flat_data

In [None]:
data = []
for file in Path(f"{root_path}/data").iterdir():
    if m := name_re.search(file.name):
        data += flatten(json.load(open(file)), {"tasks": int(m.group(2)) * int(m.group(3))})
df = pd.DataFrame(data)
df["local_size"] = df["size"] // df["tasks"]
df["nnz"] = df["nnz"] * df["tasks"]  # benchmarks report only local nnz
df = df.drop(columns=["size", "max_relative_norm2", "completed", "repetitions", "comm_pattern"])
df

In [None]:
piv = df.pivot(index=["tasks", "local_size", "stencil"], columns=["spmv"])
piv = piv[piv.columns.sort_values()].sort_index()
piv

In [None]:
100000**(2/3)*6 *8 / 1024

In [None]:
efficiency = piv.time.loc[1] / piv.time
efficiency = efficiency.query("tasks > 1")

In [None]:
spmv = "csr-coo"
tmp_flops = efficiency[spmv].unstack(["stencil", "local_size"])
fig, axs = plt.subplots(2, 2, figsize=(14 * 0.75, 12 * 0.75), dpi=300, sharex=True, sharey=True)
for stencil, ax in zip(tmp_flops.columns.levels[0], axs.flatten()):
    _df = tmp_flops[stencil]
    ax.set_prop_cycle(default_cycler)
    ax.set_title(f"{stencil} with {spmv}")
    _df.plot(ax=ax, legend=True, logx=True, )
    ax.set_ylabel("Efficiency w.r.t. 1 GPU")
    ax.set_xlabel("Num GPUs")

In [None]:
flops = (2 * piv.nnz ) / piv.time / 1e12
print(flops)

In [None]:
spmv = "csr-coo"
tmp_flops = flops[spmv].unstack(["stencil", "local_size"])
fig, axs = plt.subplots(2, 2, figsize=(14 * 0.75, 12 * 0.75), dpi=300, sharex=True, sharey=True)
for stencil, ax in zip(tmp_flops.columns.levels[0], axs.flatten()):
    _df = tmp_flops[stencil]
    ax.set_prop_cycle(default_cycler)
    _df.plot(ax=ax, legend=True, logx=True, logy=True)
    ax.set_title(f"{stencil} with {spmv}")
    ax.set_ylabel("TFLOP/s")
    ax.set_xlabel("Num GPUs")
     #ax.legend([f"{n:g}" for n in _df.columns.levels[1]])
    #fig.savefig(f"{root_path}/img/{dim}d.png")