In [None]:
import re

import pandas as pd
from pathlib import Path
from ploting import *
import json
import math

In [None]:
root_path = "crusher"

In [None]:
name_re = re.compile("(weak|strong)_scaling-nodes_(\d+)-ppn_(\d+)-(gpu|cpu)-size_(\d+\w)")

In [None]:
data = json.load(open(f"crusher/data_strong/strong_scaling-nodes_1-ppn_1-gpu-size_41M.json"))
data

In [None]:
def flatten(json_data, extra_data):
    flat_data = []
    for run in json_data:
        for solver_name, solver_data in run["solver"].items():
            flat_run = {}
            flat_run["solver"] = solver_name
            flat_run.update(**extra_data)
            flat_run.update(time=solver_data["apply"]["time"],
                            iterations=solver_data["apply"]["iterations"],
                            res_norm=solver_data["residual_norm"],
                            spmv=run["optimal"]["spmv"])
            flat_run.update((k, v) for k, v in run.items() if not isinstance(v, dict))
            flat_data.append(flat_run)
    return flat_data

In [None]:
flatten(data, {})

In [None]:
data = []
for file in Path(f"{root_path}/data_strong").iterdir():
    if m := name_re.search(file.name):
        data += flatten(json.load(open(file)), {"tasks": int(m.group(2)) * int(m.group(3)),
                                                "common_size": m.group(5)})
df = pd.DataFrame(data).drop(columns=["comm_pattern"])
df

In [None]:
dfs = df.set_index(["solver", "common_size", "tasks"]).sort_index()
#dfs["ideal"] = dfs["time"][1] / dfs.index
dfs

In [None]:
def format_dofs(dofs):
    suffix = {0: "", 1: "K", 2: "M", 3: "G", 4: "T"}
    magnitude = int(math.log(dofs, 1_000))
    return f"{dofs // 1_000 ** magnitude}{suffix[magnitude]} DOFs"


In [None]:
fig, axs = plt.subplots(1, 2, figsize=(9, 5))
t_per_it_dof = dfs.time / dfs.iterations
time = dfs.time.unstack(["solver", "common_size"])
time = t_per_it_dof.unstack(["solver", "common_size"])
for i, solver in enumerate(time.columns.levels[0]):
    ax = axs[i]
    for cs in reversed(time[solver].columns):
        cs_time = time[solver][cs].dropna()
        ideal = pd.Series(cs_time.iloc[0], cs_time.index) / (cs_time.index / cs_time.index[0])
        ideal.plot(ax=ax, c="k", ls="--", marker="")
        cs_time.plot(ax=ax, label=cs, legend=True)
    ax.set_xscale("log")
    ax.set_yscale("log")
    ax.set_title(f"Strong Scaling - {solver.replace('schwarz', 'local').replace('-', ' ').title()}")
    ax.set_ylabel("Time per Iteration")
    ax.set_xlabel("Num GPUs")

In [None]:
fig, ax = plt.subplots()
time = dfs.time.unstack("common_size")
for cs in time.columns:
    cs_time = time[cs].dropna()
    speedup = cs_time.iloc[0] / cs_time
    ax.plot(cs_time.index, cs_time.index / cs_time.index[0],c="k", ls="--", marker="")
    speedup.plot(ax=ax, label=cs, legend=True)
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_title("Speedup - CG with Jacobi")
ax.set_ylabel("Speedup")
ax.set_xlabel("Num GPUs")

In [None]:
dfs.iterations.unstack("common_size").describe()

In [None]:
r = dfs["size"].reset_index()
r["tasks_2"] = r.tasks
r = r.set_index(["tasks", "common_size"])
(r["size"] / r.tasks_2).unstack("common_size")