In [None]:
import pandas as pd

results_jsc = pd.read_csv("./../data/performance-results-juwels.csv")
results_e4 = pd.read_csv("./../data/performance-results-e4.csv")

In [None]:
results_jsc

In [None]:
results_e4

In [None]:
results_jsc.columns

In [None]:
import matplotlib.pyplot as plt

colors = ["blue", "green", "orange"]
markers = ["x", "^", "s"]

fig, axs = plt.subplots(figsize=(7, 3), ncols=2, nrows=1, sharey=True)


def get_n_gpus_total_runtime(df: pd.DataFrame):
    n_gpus = df["gpus"]
    epoch_time = df["total runtime [s]"] / 60
    return n_gpus, total_runtime


def get_n_gpus_total_training_time(df: pd.DataFrame):
    n_gpus = df["gpus"]
    epoch_time = df["total training time [s]"] / 60
    return n_gpus, total_runtime


def get_n_gpus_and_epoch_time(df: pd.DataFrame):
    n_gpus = df["gpus"]
    epoch_time = df["avg. epoch time [s]"] / 60
    return n_gpus, epoch_time


def get_n_nodes_and_epoch_time(df: pd.DataFrame):
    n_nodes = df["nodes"]
    epoch_time = df["avg. epoch time [s]"] / 60
    return n_nodes, epoch_time


# Scaling with GPU plot
ax = axs[0]
# single node experiment on JUWELS Booster
subset = results_jsc.iloc[[3, 4, 5]]
n_gpus, epoch_time = get_n_gpus_and_epoch_time(subset)
ax.plot(n_gpus, epoch_time, label="NVIDIA A100", c=colors[0], marker=markers[0])

# single node experiment on A2
subset = results_e4.iloc[[0, 1]]
n_gpus, epoch_time = get_n_gpus_and_epoch_time(subset)
ax.plot(n_gpus, epoch_time, label="NVIDIA A2", c=colors[1], marker=markers[1])

# single node experiment on GH200
subset = results_e4.iloc[4]
n_gpus, epoch_time = get_n_gpus_and_epoch_time(subset)
ax.plot(
    n_gpus, epoch_time, label="NVIDIA GH200", c=colors[2], marker=markers[2]
)

ax.set_title("(a)")
ax.set_xlabel("$N_{\mathrm{GPUs}}$")
ax.set_ylabel("avg. epoch time [m]")

ax.set_xticks([1, 2, 3])
ax.set_xticklabels(["1", "2", "3"])

ax.legend()

# Scaling with nodes plot
ax = axs[1]
# multi node experiment on JUWELS Booster
subset = results_jsc.iloc[[5, 8, 9]]
n_nodes, epoch_time = get_n_nodes_and_epoch_time(subset)
ax.plot(
    n_nodes, epoch_time, label="3x NVIDIA A100", c=colors[0], marker=markers[0]
)

# multi node experiment on A2
subset = results_e4.iloc[[0, 2]]
n_nodes, epoch_time = get_n_nodes_and_epoch_time(subset)
ax.plot(
    n_nodes, epoch_time, label="1x NVIDIA A2", c=colors[1], marker=markers[1]
)

subset = results_e4.iloc[[1, 3]]
n_nodes, epoch_time = get_n_nodes_and_epoch_time(subset)
ax.plot(
    n_nodes,
    epoch_time,
    label="2x NVIDIA A2",
    c=colors[1],
    linestyle="--",
    marker=markers[1],
)

# multi node experiment on GH200
subset = results_e4.iloc[[4, 5]]
n_nodes, epoch_time = get_n_nodes_and_epoch_time(subset)
ax.plot(
    n_nodes, epoch_time, label="1x NVIDIA GH200", c=colors[2], marker=markers[2]
)

ax.set_title("(b)")
ax.set_xlabel("$N_{\mathrm{nodes}}$")
ax.set_xticks([1, 2, 3, 4])
ax.set_xticklabels(["1", "2", "3", "4"])

ax.legend()

fig.tight_layout()
plt.savefig("performance-benchmark-results.pdf")