In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from rich import print as rprint
import seaborn as sns
import os
from scipy.stats import bootstrap

# Check if we are on root directory
if "bench_metadata.pkl" in os.listdir():
    benchmark_data = pickle.load(open("./bench_metadata.pkl", "rb"))
else:
    benchmark_data = pickle.load(open("../bench_metadata.pkl", "rb"))

metadata = benchmark_data["metadata"]
df = pd.DataFrame(benchmark_data["dataframe"])

rprint(df.head())
rprint(df.tail())
rprint(metadata)

In [None]:
sns.set_theme()

colors = [
    "#7f7f7f",  # grey
    "#621dac",  # main purple
    "#c5702d",  # orange
    "#000000",  # black,
    "#099892",  # teal
    "#ffd400",  # yellow
    "#7e57c4",  # pink/purple,
]
colors_backup = colors.copy()

In [None]:
def calculate_bootstrap_error(data):
    res = bootstrap(
        (data,), np.mean, confidence_level=0.95, n_resamples=1000, method="basic"
    )
    return res.standard_error

In [None]:
tasks_df = df[df["name"] == "mpi-io"].copy()
tasks_df["time"] = pd.to_numeric(tasks_df["time"])
columns_with_nan = tasks_df.columns[tasks_df.isna().any()].tolist()
tasks_df = tasks_df.drop(columns=columns_with_nan)
tasks_df = tasks_df.loc[:, ~tasks_df.columns.duplicated()]


grouped = (
    tasks_df.groupby(["read_step", "nodes"])["time"]
    .agg(["mean", calculate_bootstrap_error])
    .reset_index()
)
grouped = grouped.rename(
    columns={"mean": "time_mean", "calculate_bootstrap_error": "bootstrap_error"}
)

rprint(grouped)


plt.figure(figsize=(
    3.3, 
    2.4))
grouped = grouped.groupby("nodes")
# Map each node value to a color
unique_nodes = df["nodes"].unique()


for name, group in grouped:
    if colors == []:
        colors = colors_backup
    if name == 3:
        continue
    if name == 5:
        continue
    if name == 6:
        continue
    if name == 7:
        continue
    plt.errorbar(
        group["read_step"],
        group["time_mean"],
        yerr=group["bootstrap_error"],
        fmt="o-",
        capsize=5,
        label=f"{name} nodes",
        color=colors.pop(0),
    )

plt.xlabel("Read Step (Byte)")
plt.ylabel("Mean Time (s)")
# set to log X axis
plt.xscale("log")
# plt.yscale("log")
nruns = metadata["runs"]
filesize_mb = metadata["input_file_size"] / 1024 / 1024
plt.legend()
plt.grid(True)

plt.title("")
plt.tight_layout()
plt.savefig("output/mpi-io-1-8-notitle.pdf")

plt.title(
    f"Mean time per read step | MPI-IO | Errorbar: bootstrap 95% CI | Runs: {nruns} | Input: {filesize_mb:.2f} MB"
)
plt.tight_layout()
plt.savefig("output/mpi-io-1-8.pdf")
plt.show()

In [None]:
tasks_df = df[df["name"] == "mpi-io"].copy()
tasks_df["time"] = pd.to_numeric(tasks_df["time"])
columns_with_nan = tasks_df.columns[tasks_df.isna().any()].tolist()
tasks_df = tasks_df.drop(columns=columns_with_nan)
tasks_df = tasks_df.loc[:, ~tasks_df.columns.duplicated()]


grouped = (
    tasks_df.groupby(["read_step", "nodes"])["time"]
    .agg(["mean", calculate_bootstrap_error])
    .reset_index()
)
grouped = grouped.rename(
    columns={"mean": "time_mean", "calculate_bootstrap_error": "bootstrap_error"}
)

rprint(grouped)

plt.figure(figsize=(
    3.3, 
    2.4))

grouped = grouped.groupby("nodes")
# Map each node value to a color
unique_nodes = df["nodes"].unique()


for name, group in grouped:
    if colors == []:
        colors = colors_backup
    if name == 1:
        continue
    if name == 3:
        continue
    if name == 5:
        continue
    if name == 6:
        continue
    if name == 7:
        continue
    plt.errorbar(
        group["read_step"],
        group["time_mean"],
        yerr=group["bootstrap_error"],
        fmt="o-",
        capsize=5,
        label=f"{name} nodes",
        color=colors.pop(0),
    )

plt.xlabel("Read Step (Byte)")
plt.ylabel("Mean Time (s)")
# set to log X axis
plt.xscale("log")
# plt.yscale("log")
nruns = metadata["runs"]
filesize_mb = metadata["input_file_size"] / 1024 / 1024
plt.legend()
plt.grid(True)
plt.title("")
plt.tight_layout()
plt.savefig("output/mpi-io-2-8-notitle.pdf")
plt.title(
    f"Mean time per read step | MPI-IO \n\
    Errorbar: bootstrap 95% CI | Runs: {nruns} \n\
        Input: {filesize_mb:.2f} MB"
)
plt.tight_layout()
plt.savefig("output/mpi-io-2-8.pdf")
plt.show()

In [None]:
tasks_df = df[df["name"] == "omp-tasks"].copy()
tasks_df["time"] = pd.to_numeric(tasks_df["time"])
columns_with_nan = tasks_df.columns[tasks_df.isna().any()].tolist()
tasks_df = tasks_df.drop(columns=columns_with_nan)
tasks_df = tasks_df.loc[:, ~tasks_df.columns.duplicated()]

rprint(tasks_df)


grouped = (
    tasks_df.groupby(["read_step", "tasks"])["time"]
    .agg(["mean", calculate_bootstrap_error])
    .reset_index()
)
grouped = grouped.rename(
    columns={"mean": "time_mean", "calculate_bootstrap_error": "bootstrap_error"}
)

rprint(grouped)


plt.figure(figsize=(
    3.3, 
    2.4))
grouped = grouped.groupby("tasks")
# Map each node value to a color
unique_nodes = df["tasks"].unique()


for name, group in grouped:
    if colors == []:
        colors = colors_backup
    if name == 1:
        continue
    if name == 3:
        continue
    if name == 5:
        continue
    if name == 6:
        continue
    if name == 7:
        continue
    plt.errorbar(
        group["read_step"],
        group["time_mean"],
        yerr=group["bootstrap_error"],
        fmt="o-",
        capsize=5,
        label=f"{name} tasks",
        color=colors.pop(0),
    )

plt.xlabel("Read Step (Byte)")
plt.ylabel("Mean Time (s)")
# set to log X axis
plt.xscale("log")
# plt.yscale("log")
nruns = metadata["runs"]
filesize_mb = metadata["input_file_size"] / 1024 / 1024
plt.legend()
plt.grid(True)

plt.title("")
plt.tight_layout()
plt.savefig("output/omp-tasks-2-64-notitle.pdf")

plt.title(
    f"Mean time per read step | OpenMP Tasks | Errorbar: bootstrap 95% CI | Runs: {nruns} | Input: {filesize_mb:.2f} MB"
)
plt.tight_layout()
plt.savefig("output/omp-tasks-2-64.pdf")
plt.show()