In [None]:
import glob
import os

import matplotlib.pyplot as plt
import pandas as pd

In [None]:
path = r"./result/"
all_files = glob.glob(os.path.join(path, "*.csv"))
dtypes = {
    "batch_size": int,
    "context_length": int,
    "prediction_length": int,
    "time": float,
    "throughput": float,
    "compiled": bool,
}
df = pd.concat((pd.read_csv(f, dtype=dtypes) for f in all_files), ignore_index=True)

df["time"] = df["time"].round(3)
df["throughput"] = df["throughput"].round(1)
df = df.sort_values(by=["model", "backend", "device", "compiled", "hardware"])
# select_no_uncompiled = ((df["backend"] == "torch") & (df["compiled"] == True)) | (df["backend"] != "torch")
# df = df[select_no_uncompiled]

df.head(2)

In [None]:
def plot_by_metric(df, col, post_title=""):
    fig, ax = plt.subplots(figsize=(8, 5))

    x_ticks = sorted(df[col].unique())
    ax.set_xscale("log")
    ax.set_xticks(x_ticks, labels=map(str, x_ticks))
    ax.xaxis.set_minor_locator(plt.FixedLocator(x_ticks))

    grouping_cols = df.columns.drop([col, "time", "throughput"]).tolist()
    for name, group in df.groupby(grouping_cols):
        label = f"{name[2]}/{name[3]}{'' if name[4] else '-uncompiled'} ({name[5]})"
        ax.plot(group[col], group["throughput"], label=label, marker="o", linestyle="-")

    ax.grid(True, which="both", ls="--", alpha=0.5)
    ax.legend(title="Configuration Backend/Device, (Hardware)")

    col_name = col.replace("_", " ").title()
    ax.set_xlabel(f"{col_name} (Log Scale)", fontsize=10)
    ax.set_ylabel("Throughput (Tokens/s)", fontsize=10)
    ax.set_title(f"Throughput vs. {col_name} {post_title}", fontsize=12)

    fig.tight_layout()
    return fig, ax

In [None]:
df_select_batch_size = df[(df["prediction_length"] == 32) & (df["context_length"] == 2048)]
df_select_batch_size

In [None]:
plot_by_metric(df_select_batch_size, "batch_size", post_title="for prediction_length=32")
plt.show()

In [None]:
df_select_pred_length = df[(df["batch_size"] == 16) & (df["context_length"] == 2048)]
df_select_pred_length

In [None]:
plot_by_metric(df_select_pred_length, "prediction_length", post_title="for batch_size=16")
plt.show()

In [None]:
df_select_context_length = df[(df["prediction_length"] == 32) & (df["batch_size"] == 16)]
df_select_context_length

In [None]:
plot_by_metric(df_select_context_length, "context_length", post_title="for prediction_length=32 and batch_size=16")
plt.show()