## Load benchmark files to memory

In [None]:
import re
import os
import pandas as pd
import numpy as np

# filename = "./xsbench-setup/benchmark_results.txt"
# filename = "./rsbench-setup/benchmark_results.txt"
# filename = "./minifmm-setup/benchmark_results.txt"
filename = "./simple-offload/benchmark_results.txt"

# set metadata based on filename
if "minifmm" in filename:
    exp_kind = "MiniFMM"
    instrumented_label = "Simulation time"
    hide_x_ticks = True
    x_axis_label = "plummer"

if "rsbench" in filename:
    exp_kind = "RSBench"
    instrumented_label = "Simulation time"
    hide_x_ticks = True
    x_axis_label = "Doppler broadening"

if "xsbench" in filename:
    exp_kind = "XSBench"
    instrumented_label = "Simulation time"
    hide_x_ticks = True
    x_axis_label = "unionize"

if "simple-offload" in filename:
    exp_kind = "Host to Device offload"
    instrumented_label = "target data map"
    x_axis_label = "Payload size (MB)"
    hide_x_ticks = False


benchmarks = []

data = open(f"{filename}", "r").read()
output_dir = f"./pdf/{filename}"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for line in data.split('\n'):
    info = line.split(',')
    if len(info) > 2:
        wall_time = float(info[0])
        data_time = float(info[1])
        size = 0
        if info[2] != "none":
            size = float(info[2])
        use_as = True if int(info[3]) == 1 else False
        if use_as:
            heuristic = info[4]
        topo = info[5]

        if use_as:
            exp_name = f"{heuristic}"
        else:
            exp_name = "Baseline"

        benchmark_info = {
            "experiment_kind": exp_name,
            "x_axis_metric": float(size),
            "wall_execution_time": wall_time,
            "instrumented_time": data_time,
            "application_name": exp_kind,
        }
        benchmarks.append(benchmark_info)

df = pd.DataFrame(benchmarks)

# get number of executions per x_axis_metric
n_executions = df.groupby(["experiment_kind", "x_axis_metric"]).size().reset_index(name='n_executions')
# check if all experiments have the same number of executions
assert len(n_executions["n_executions"].unique()) == 1
n_executions = n_executions["n_executions"].unique()[0]
print(df.head())

### Cosmetic options

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set_theme(style="whitegrid")

colors = [
    "#7f7f7f", # grey
    "#621dac", # main purple
    "#c5702d", # orange
    "#000000", # black,
    "#099892", # teal
    "#ffd400", # yellow
    "#7e57c4", # pink/purple,
]

# Calculate the remainder time (wall time - instrumented time)
df["remainder_time"] = df["wall_execution_time"] - df["instrumented_time"]
print(df.head())

### Plot total execution times with bootstrap errorbars

In [None]:
import math
# Set the style for the plot (optional)
sns.set(style="whitegrid")

for measurment in ['wall_execution_time', 'instrumented_time']:
    measurment_name = 'Total time' if measurment == 'wall_execution_time' else instrumented_label

    for experiment in df["application_name"].unique():
        application_df = df[df["application_name"] == experiment]

        # Create the bar plot with bootstrap confidence intervals
        ax = plt.figure(figsize=(5, 5))  # Adjust the figure size as needed

        x_clips = [
            (-math.inf, math.inf),
            # (0, 201),
            # (201, 16000)
        ]

        # in percentages
        y_clips = [
            0,
           10
        ]

        for y_clip in y_clips:
            for x_clip in x_clips:
                _application = application_df[application_df["x_axis_metric"] >= x_clip[0]]
                _application = _application[application_df["x_axis_metric"] < x_clip[1]]
                if _application.empty:
                    continue

                sns.barplot(
                    data=_application,
                    x="x_axis_metric",
                    y=measurment,
                    hue="experiment_kind",
                    errorbar='ci', n_boot=1000,
                    alpha=.95,
                )

                if y_clip > 0:
                    # minimum value
                    min_value = _application[measurment].min() 
                    min_value = min_value - (min_value * y_clip / 100)
                    min_value = math.floor(min_value)
                    min_value = min_value if min_value > 0 else 0
                    plt.ylim(bottom=min_value)

                plt.xlabel("Message size (MB)")
                plt.ylabel(f"{measurment_name} (s)")
                plt.xticks(rotation=45)
                if hide_x_ticks:
                    plt.xticks([])
                plt.xlabel(x_axis_label)
                plt.title(f"{experiment} - {measurment_name}\n {n_executions} executions - Error bar = 95% CI (1000 bootstraps)\n")
                plt.legend(title="Strategy")

                pdf_name = f"{experiment.replace(' ', '_')}_{measurment}_yclip_{y_clip}_{x_clip[0]}_{x_clip[1]}.pdf"
                plt.savefig(f"{output_dir}/{pdf_name}", bbox_inches='tight')
                plt.show()


## Stacked segmented time

In [None]:
for application in df["application_name"].unique():
    application_df = df[df["application_name"] == application]

    # Get the unique kinds of experiments
    for experiment in application_df["experiment_kind"].unique():
        # Filter the DataFrame to include only 
        experiment_df = application_df[application_df["experiment_kind"] == experiment]

        # Group the data by 'x_axis_metric' and calculate the mean for each group
        mean_df = experiment_df.groupby("x_axis_metric").mean(numeric_only=True).reset_index()

        # Create the stacked bar plot
        plt.figure(figsize=(5, 5))  # Adjust the figure size as needed

        # Set the x-axis positions for each bar
        x_names = mean_df["x_axis_metric"]
        x_names = [str(x) for x in x_names]
        x = np.arange(len(x_names))

        # Plot the instrumented time as a stacked bar
        plt.bar(
            x,
            mean_df["instrumented_time"],
            label=instrumented_label,
            color=colors[2],
            alpha=0.9,
        )

        # Plot the remainder time as a stacked bar on top of the instrumented time
        plt.bar(
            x,
            mean_df["remainder_time"],
            bottom=mean_df["instrumented_time"],
            label='Remainder',
            color=colors[4],
            alpha=0.9,
        )
        plt.xlabel(x_axis_label)
        plt.xticks(x, x_names, rotation=45)
        plt.ylabel("Time (s)")
        plt.title(f"{application} - {experiment} experiment")
        plt.legend(title="Time component")
        if hide_x_ticks:
            plt.xticks([])

        pdf_name = f"stacked_{experiment.replace(' ', '_')}.pdf"
        plt.savefig(f"{output_dir}/{pdf_name}", bbox_inches='tight')
        plt.show()