In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from rich import print as rprint
import seaborn as sns
import os
from scipy.stats import bootstrap

# Check if we are on root directory
if "bench_metadata.pkl" in os.listdir():
    benchmark_data = pickle.load(open("./bench_metadata.pkl", "rb"))
else:
    benchmark_data = pickle.load(open("../bench_metadata.pkl", "rb"))

metadata = benchmark_data["metadata"]
df = pd.DataFrame(benchmark_data["dataframe"])

rprint(df.head())
rprint(df.tail())

In [None]:
sns.set_theme()

colors = [
    "#7f7f7f",  # grey
    "#621dac",  # main purple
    "#c5702d",  # orange
    "#000000",  # black,
    "#099892",  # teal
    "#ffd400",  # yellow
    "#7e57c4",  # pink/purple,
]

In [None]:
def calculate_bootstrap_error(data):
    res = bootstrap(
        (data,), np.mean, confidence_level=0.95, n_resamples=1000, method="basic"
    )
    return res.standard_error

In [None]:
mpi_io_df = df[df["name"] == "mpi-io"].copy()
mpi_io_df["time"] = pd.to_numeric(mpi_io_df["time"])
columns_with_nan = mpi_io_df.columns[mpi_io_df.isna().any()].tolist()
mpi_io_df = mpi_io_df.drop(columns=columns_with_nan)
mpi_io_df = mpi_io_df.loc[:, ~mpi_io_df.columns.duplicated()]


grouped = (
    mpi_io_df.groupby(["read_step", "nodes"])["time"]
    .agg(["mean", calculate_bootstrap_error])
    .reset_index()
)
grouped = grouped.rename(
    columns={"mean": "time_mean", "calculate_bootstrap_error": "bootstrap_error"}
)

rprint(grouped)


plt.figure(figsize=(10, 6))
grouped = grouped.groupby("nodes")
# Map each node value to a color
unique_nodes = df["nodes"].unique()
color_dict = {node: colors[i % len(colors)] for i, node in enumerate(unique_nodes)}

for name, group in grouped:
    plt.errorbar(
        group["read_step"],
        group["time_mean"],
        yerr=group["bootstrap_error"],
        fmt="o-",
        capsize=5,
        label=f"{name} nodes",
        color=color_dict[name]
    )
plt.xlabel("Read Step (Byte)")
plt.ylabel("Mean Time (s)")
# set to log X axis
plt.xscale("log")
plt.title("Mean time per read step | MPI-IO | Errorbar: bootstrap")
plt.legend()
plt.grid(True)
plt.show()