In [None]:
import re

import pandas as pd
from pathlib import Path
from ploting import *
import json

In [None]:
root_path = "perlmutter"

In [None]:
name_re = re.compile("timings\.(\d+)\.json")

In [None]:
data = json.load(open(f"frontier/data/timings.16.json"))
data["benchmarks"]

In [None]:
def to_seconds(t, unit):
    map = {'ns': 1e-9}
    return float(t) * map[unit]

In [None]:
def flatten(json_data, extra_data=None):
    extra_data = extra_data or dict()
    flat_data = []
    for run in json_data["benchmarks"]:
        full_name = run["name"].split("/")
        operation = full_name[0]
        comm = full_name[1]
        msg_size = full_name[2]
        kernels = full_name[3]

        flat_run = {}
        flat_run.update(operation=operation, comm=comm, msg_size=int(msg_size), kernels=int(kernels),
                        real_time=to_seconds(run["real_time"], run["time_unit"]),
                        cpu_time=to_seconds(run["real_time"], run["time_unit"]),
                        **extra_data)
        flat_data.append(flat_run)
    return flat_data

In [None]:
data = []
for file in Path(f"{root_path}/data").iterdir():
    if m := name_re.search(file.name):
        data += flatten(json.load(open(file)), {"tasks": int(m.group(1))})
df = pd.DataFrame(data)
df = df[df.comm != 'None']
df

In [None]:
piv = df.set_index(["tasks", "operation", "msg_size", "kernels", "comm" ])
piv = piv.sort_index()
piv

In [None]:
piv.xs(("AllToAll", 5), level=("operation", "kernels")).query("comm in ['MPI', 'NCCL']")

In [None]:
piv.query("comm in ['MPI', 'NCCL']").xs(5, level="kernels").unstack(["operation", "comm"]).real_time.columns

In [None]:
piv.query("comm in ['MPI', 'NCCL']").xs(5, level="kernels").unstack(["operation", "comm"]).real_time.columns

In [None]:
tmp_df = piv.query("comm in ['MPI', 'NCCL']").xs(5, level="kernels").unstack(["operation", "comm"]).real_time
fig, axs = plt.subplots(3, 2,
                        figsize=(14 * 0.75, 12 * 0.75), dpi=300, sharex=True, sharey=True)
for (operation, comm), ax in zip(tmp_df.columns, axs.flatten()):
    _df = tmp_df[operation][comm].dropna().unstack("msg_size")
    ax.set_prop_cycle(default_cycler)
    ax.set_title(f"{operation} with {comm}")
    _df.plot(ax=ax, legend=True, logx=True, logy=True)
    ax.set_ylabel("Runtime in s")
    ax.set_xlabel("Num GPUs")
fig.savefig(f"{root_path}/img/runtime.png")

In [None]:
tmp_df = piv.xs(("AllToAll", 5), level=("operation", "kernels")).unstack("comm").real_time
comms = [n for n in tmp_df.columns if n != "MPI"]
fig, axs = plt.subplots(len(comms), 1, figsize=(14 * 0.7, 12 * 0.7), dpi=300, sharex=True)
for comm, ax in zip(comms, axs.flatten()):
    operation = "AllToAll"
    _mpi = tmp_df["MPI"].dropna()
    _nccl = tmp_df[comm].dropna()
    speedup = _mpi / _nccl
    speedup = speedup.unstack("msg_size")
    ax.set_prop_cycle(default_cycler)
    ax.set_title(f"{operation} Speedup {comm} over MPI")
    speedup.plot(ax=ax, legend=True, logx=True, logy=True)
    ax.axhline(1, c="gray", ls="--")
    ax.set_ylabel("Speedup")
    ax.set_xlabel("Num GPUs")
fig.savefig(f"{root_path}/img/all-to-all.png")

In [None]:
tmp_df = piv.real_time.unstack(["operation", "comm"]).xs(5, level="kernels")
fig, axs = plt.subplots(len(tmp_df.columns.levels[0]), 1, figsize=(14 * 0.7, 12 * 0.7), dpi=300, sharex=True, sharey=True)
for operation, ax in zip(tmp_df.columns.levels[0], axs.flatten()):
    _mpi = tmp_df[operation]["MPI"].dropna()
    _nccl = tmp_df[operation]["NCCL"].dropna()
    speedup = _mpi / _nccl
    speedup = speedup.unstack("msg_size")
    ax.set_prop_cycle(default_cycler)
    ax.set_title(f"{operation} Speedup NCCL over MPI")
    speedup.plot(ax=ax, legend=True, logx=True, logy=True)
    ax.axhline(1, c="gray", ls="--")
    ax.set_ylabel("Speedup")
    ax.set_xlabel("Num GPUs")
fig.savefig(f"{root_path}/img/speedup.png")

In [None]:
flops = (2 * piv.nnz) / piv.time / 1e12
print(flops)

In [None]:
spmv = "csr-coo"
tmp_flops = flops[spmv].unstack(["stencil", "local_size"])
fig, axs = plt.subplots(2, 2, figsize=(14 * 0.75, 12 * 0.75), dpi=300, sharex=True, sharey=True)
for stencil, ax in zip(tmp_flops.columns.levels[0], axs.flatten()):
    _df = tmp_flops[stencil]
    ax.set_prop_cycle(default_cycler)
    _df.plot(ax=ax, legend=True, logx=True, logy=True)
    ax.set_title(f"{stencil} with {spmv}")
    ax.set_ylabel("TFLOP/s")
    ax.set_xlabel("Num GPUs")
    #ax.legend([f"{n:g}" for n in _df.columns.levels[1]])
    #fig.savefig(f"{root_path}/img/{dim}d.png")