In [47]:
import polars as pl
import altair as alt
from ipywidgets import GridspecLayout, Output
from IPython.display import display


In [48]:
instruction_profile_file =  "../data/benchmark_instruction.csv"
instruction_profile_df = pl.read_csv(instruction_profile_file, separator=";")
instruction_profile_df = instruction_profile_df.unpivot(index="Benchmark", variable_name="Instruction_Class")
instruction_profile_df


Benchmark,Instruction_Class,value
str,str,i64
"""crc_32""","""Branching""",14
"""libud""","""Branching""",17
"""md5""","""Branching""",14
"""tarfind""","""Branching""",27
"""xgboost""","""Branching""",15
…,…,…
"""crc_32""","""Compute""",72
"""libud""","""Compute""",59
"""md5""","""Compute""",73
"""tarfind""","""Compute""",47


In [49]:
def plot_instruction_profile(instruction_profile_df):
    instruction_profile_chart = alt.Chart(instruction_profile_df).mark_bar().encode(
        alt.X("value", title="Share of Instruction Type (%)"),
        alt.Color("Instruction_Class", title="Intruction Type"),
    ).properties(width=200) \
    .facet(column=alt.Column("Benchmark", title="") )

    return instruction_profile_chart

plot_instruction_profile(instruction_profile_df).display(scaleFactor=2)


In [50]:
board_file = "../data/runtime/bench-feather.csv"
# board_file = "../data/runtime/bench-feather-2025-10-22_0950.csv"
benchmark_df = pl.read_csv(board_file)

all_correct = benchmark_df.select(pl.col("correct").all()).item(0,0)

if not all_correct:
    print("ALARM !!!")

benchmark_df

ALARM !!!


benchmark,board,correct,environment,execution_time_us,init_runtime_us,iteration,load_program_us,scale_factor
str,str,bool,str,i64,i64,i64,i64,i64
"""tarfind""","""adafruit-feather-nrf52840-sens…",true,"""jerryscript""",710852,104,0,11571,5
"""tarfind""","""adafruit-feather-nrf52840-sens…",true,"""jerryscript""",710908,103,1,11571,5
"""tarfind""","""adafruit-feather-nrf52840-sens…",true,"""jerryscript""",711411,104,2,11570,5
"""tarfind""","""adafruit-feather-nrf52840-sens…",true,"""jerryscript""",711003,103,3,11571,5
"""tarfind""","""adafruit-feather-nrf52840-sens…",true,"""jerryscript""",710126,103,4,11570,5
…,…,…,…,…,…,…,…,…
"""libud""","""adafruit-feather-nrf52840-sens…",true,"""micro-bpf""",1101885,0,0,376,308
"""libud""","""adafruit-feather-nrf52840-sens…",true,"""micro-bpf""",1101881,0,1,375,308
"""libud""","""adafruit-feather-nrf52840-sens…",true,"""micro-bpf""",1101881,0,2,375,308
"""libud""","""adafruit-feather-nrf52840-sens…",true,"""micro-bpf""",1101881,0,3,375,308


## Runtime charts
### slowdown compared to native

In [51]:
HEAP_BENCHMARKS = ["md5", "tarfind"]
HEAP_LESS_BENCHMARKS = ["crc_32", "libud", "xgboost"]

Y_MAX = 1_400

benchmarks = (
    benchmark_df
    .select(pl.col("benchmark").unique().sort())
    .to_series()
    .to_list()
)

colors = {
    "jerryscript":"#1b9e77" ,
    "micropython": "#d95f02",
    "lua": "#7570b3",
    "micro-bpf": "#e7298a",
    "femto-container": "#66a61e",
    "wamr": "#e6ab02",
    "native": "#a6761d"
}
colors_scale = alt.Scale(domain=list(colors.keys()), range=list(colors.values()))


benchmark_avg_over_iteration = benchmark_df.group_by(["environment", "benchmark"]).agg(
        pl.col("execution_time_us").mean(),
        pl.col("execution_time_us").std().alias("execution_time_us_std"),
        (pl.col("init_runtime_us").mean() + pl.col("load_program_us").mean()).alias("load_time_us"),
        (pl.col("init_runtime_us") + pl.col("load_program_us")).std().alias("load_time_us_std")
    )

def format_duration(us):
    if us >= 1_000_000:
        return f"{us / 1_000_000:.1f}s"
    elif us >= 1_000:
        return f"{us / 1_000:.1f}ms"
    else:
        return f"{us:.1f}µs"

benchmark_avg_over_iteration = benchmark_avg_over_iteration.with_columns(
    pl.col("execution_time_us").map_elements(format_duration, return_dtype=pl.Utf8).alias("execution_time_formatted"),
    pl.col("execution_time_us_std").map_elements(format_duration, return_dtype=pl.Utf8).alias("execution_time_std_formatted"),
    pl.col("load_time_us").map_elements(format_duration, return_dtype=pl.Utf8).alias("load_time_formatted")
)



native_exec_times = benchmark_avg_over_iteration.filter(pl.col("environment") == "native") \
    .select(pl.col("benchmark"), pl.col("execution_time_us").alias("native_execution_time_us"))

benchmark_times_native = benchmark_avg_over_iteration.join(native_exec_times, on="benchmark") \
    .with_columns( (pl.col("execution_time_us") / pl.col("native_execution_time_us")).alias("times_native").round(0))

def slowdown_compared_to_native(data):
    base_chart = (
        alt.Chart(data)
        .encode(
            alt.X("environment:N", title="", sort=["jerryscript", "micropython", "lua", "micro-bpf", "femto-container", "wamr"]),
            alt.Y("times_native:Q", title="Slowdown compared to native", scale=alt.Scale(domain=[0, Y_MAX])),
        ).properties(width=200)
    )

    times_native_chart = base_chart.encode(
        alt.Color("environment:N", title="", scale=colors_scale, legend=None),
    )

    times_native_text = base_chart.transform_calculate(
        times_native_str='datum.times_native + "x"'
    ).encode(
        alt.Text("times_native_str:N"),
    )

    return (times_native_chart.mark_bar() + times_native_text.mark_text(dy=-5, color="black", fontSize=8)) \
        .facet(column=alt.Column("benchmark:N", title=""), title="") \


except_native_boards = benchmark_times_native.filter(pl.col("environment") != "native")
heap_less = except_native_boards.filter(pl.col("benchmark").is_in(HEAP_LESS_BENCHMARKS))
heap = except_native_boards.filter(pl.col("benchmark").is_in(HEAP_BENCHMARKS))

plot_instruction_profile(instruction_profile_df.filter(pl.col("Benchmark").is_in(HEAP_LESS_BENCHMARKS))) \
    .properties(padding={"left": 52}) \
    .display(scaleFactor=2)
slowdown_compared_to_native(heap_less).display(scaleFactor=2)
plot_instruction_profile(instruction_profile_df.filter(pl.col("Benchmark").is_in(HEAP_BENCHMARKS))) \
    .properties(padding={"left": 52}) \
    .display(scaleFactor=2)
slowdown_compared_to_native(heap).display(scaleFactor=2)
    


### execution time of each benchmark

In [52]:
Y_MAX = 7_000_000

def plot_exec_chart(data):
    base_chart = (
        alt.Chart(data)
        .encode(
            alt.X("environment:N", title="", sort=["jerryscript", "micropython", "lua", "micro-bpf", "femto-container", "wamr"]),
            alt.Y("execution_time_us:Q", title="Execution Time in µs", scale=alt.Scale(domain=[0, Y_MAX])),
            alt.YError("execution_time_us_std:Q")
        ).properties(width=200)
        )

    bar_chart = base_chart.encode(
            alt.Color("environment:N", title="", scale=colors_scale, legend=None),
    ).mark_bar()

    text_chart = base_chart.encode(
            alt.Text("execution_time_formatted"),
    ).mark_text(dy=-5, color="black", fontSize=8)

    return (bar_chart + text_chart + bar_chart.mark_errorbar(color="black")) \
            .facet(column=alt.Column("benchmark:N", title=""))

heap_load_time_df = benchmark_avg_over_iteration.filter(pl.col("benchmark").is_in(HEAP_BENCHMARKS))
heap_less_load_time_df = benchmark_avg_over_iteration.filter(pl.col("benchmark").is_in(HEAP_LESS_BENCHMARKS))

plot_exec_chart(heap_less_load_time_df).display(scaleFactor=2)
plot_exec_chart(heap_load_time_df).display(scaleFactor=2)

### Standard deviation of each benchmark

In [53]:
table_chart = (
    alt.Chart(benchmark_avg_over_iteration, title="Execution Time Standard Deviation")
    .mark_text()
    .encode(
        text=alt.Text('execution_time_std_formatted:N'),
        x=alt.X('environment:N'),
        y=alt.Y('benchmark:N'),
    )
    .properties(width=400, height=200)
)

table_chart.display(scaleFactor=2)

## Load time charts

In [None]:
Y_MAX = 120_000

def plot_load_chart(data, ymax=Y_MAX):
    base_chart = (
        alt.Chart(data)
        .encode(
            alt.X("environment:N", title="", sort=["jerryscript", "micropython", "lua", "micro-bpf", "femto-container", "wamr"]),
            alt.Y("load_time_us:Q", title="Load Time in µs", scale=alt.Scale(domain=[0, ymax])),
            alt.YError("load_time_us_std:Q")
        ).properties(width=200)
        )

    bar_chart = base_chart.encode(
            alt.Color("environment:N", title="", scale=colors_scale, legend=None),
    ).mark_bar()

    text_chart = base_chart.encode(
            alt.Text("load_time_formatted"),
    ).mark_text(dy=-5, color="black", fontSize=8)

    return (bar_chart + text_chart + bar_chart.mark_errorbar(color="black")) \
            .facet(column=alt.Column("benchmark:N", title=""), title="") 

without_native_df = benchmark_avg_over_iteration.filter(pl.col("environment") != "native")
heap_load_time_df = without_native_df.filter(pl.col("benchmark").is_in(HEAP_BENCHMARKS))
heap_less_load_time_df = without_native_df.filter(pl.col("benchmark").is_in(["libud", "crc_32"] ))
xgboost_load_time_df = without_native_df.filter(pl.col("benchmark").is_in(["xgboost"]))

plot_load_chart(heap_less_load_time_df).display(scaleFactor=2)
plot_load_chart(heap_load_time_df).display(scaleFactor=2)
plot_load_chart(xgboost_load_time_df, ymax=1_600_000).display(scaleFactor=2)

In [55]:
native_exec_times = benchmark_avg_over_iteration.filter(pl.col("environment") == "native") \
    .select(pl.col("benchmark"), pl.col("execution_time_us").alias("native_execution_time_us"))

benchmark_times_native = benchmark_avg_over_iteration.join(native_exec_times, on="benchmark") \
    .with_columns( (pl.col("execution_time_us") / pl.col("native_execution_time_us")).alias("times_native"))


In [56]:
# benchmark_avg_over_iteration \
#     .sort("benchmark") \
#     .select("environment", "benchmark", pl.col("execution_time_us").round(1), pl.col("load_time_us").round(1)) \
# .write_csv("../data/cpu_per_benchmark.csv")