In [1]:
import polars as pl
import altair as alt

In [2]:
df = pl.read_csv("../data/memory/heap_usage.csv")
df_long = df.unpivot(index="Benchmark", variable_name="runtime_and_metric")

df_split = df_long.with_columns(
    pl.col("runtime_and_metric").str.split_exact(" - ", 1).alias("split_cols")
) \
    .unnest("split_cols").rename({"field_0": "runtime", "field_1": "metric"}) \
    .with_columns(
        pl.col("metric").str.to_lowercase(),
        pl.col("value").cast(pl.Int64)
    ).with_columns(
        pl.col("metric").str.replace_all("usage", "peak usage"),
    ).with_columns(
        pl.col("metric").str.replace_all("heap", "assigned heap"),
    )

df_split
# df_wide = df_split.pivot(
#     index=["Benchmark", "runtime"], 
#     on="metric", 
#     values="value"
# )

# df_wide

Benchmark,runtime_and_metric,value,runtime,metric
str,str,i64,str,str
"""crc_32""","""Jerryscript - heap""",30712,"""Jerryscript""","""assigned heap"""
"""libud""","""Jerryscript - heap""",7160,"""Jerryscript""","""assigned heap"""
"""md5""","""Jerryscript - heap""",20472,"""Jerryscript""","""assigned heap"""
"""tarfind""","""Jerryscript - heap""",8184,"""Jerryscript""","""assigned heap"""
"""xgboost""","""Jerryscript - heap""",51192,"""Jerryscript""","""assigned heap"""
…,…,…,…,…
"""crc_32""","""Wamr - usage""",0,"""Wamr""","""peak usage"""
"""libud""","""Wamr - usage""",0,"""Wamr""","""peak usage"""
"""md5""","""Wamr - usage""",2080,"""Wamr""","""peak usage"""
"""tarfind""","""Wamr - usage""",8995,"""Wamr""","""peak usage"""


In [3]:
benchmarks = df_split.get_column("Benchmark").unique().to_list()

# Loop through each benchmark and display its chart
for benchmark in benchmarks:
    chart = alt.Chart(
        df_split.filter(pl.col("Benchmark").eq(benchmark)),
        title=f"Memory Usage for {benchmark}"
    ).encode(
        alt.X("metric", title=""),
        alt.Y("value").title("memory in bytes"),
        alt.Column("runtime"),
        alt.Color("metric")
    ).mark_bar()
    
    chart.display(scaleFactor=2)

In [5]:
alt.Chart(
        df_split,
        title="Memory Usage accumulated"
    ).encode(
        alt.X("metric", title=""),
        alt.Y("value", title="memory in bytes", axis=alt.Axis(format="~s")),
        alt.Column("runtime"),
        alt.Color("Benchmark")
    ).mark_bar() \
    .display(scaleFactor=2)