In [72]:
import holoviews as hv
import polars as pl
from holoviews import opts

hv.extension("bokeh")

In [2]:
# load cached dataset. It's multiple datasets queried independently from clickhouse and then joined together with polars.
# TODO - Replace this with live query.
# TODO - Where is the query?
block_sidecar_joined_df = (
    pl.read_parquet("cached_cl_7day.parquet")
    .sort(by="propagation_slot_start_diff", descending=True)
    .filter(pl.col("slot") < 8743229)   # latest slot timing for 3.29.24. Required because canonical blocks table lags behind
                        )  

In [62]:
# Build reorged blocks dataset
reorged_blocks_table = (
    # Trim the dataset to include columns of interest
    block_sidecar_joined_df.drop(
        "meta_network_name",
        # "meta_consensus_implementation",
        "meta_consensus_implementation_canonical_block",
        "first_event_time",
        "last_event_time",
        "time_diff_milliseconds",
        # "max_blob_index",
        "block_total_bytes_compressed",
        # "execution_payload_transactions_total_bytes_compressed",
        "blob_size",
        "blob_empty_size",
    )
    .group_by("slot", "meta_consensus_implementation")  # filter by slot and consensus client
    .agg(
        pl.col("slot_start_date_time").first(),
        (pl.col("propagation_slot_start_diff") / 1000).mean(),  # propagation time in seconds
        pl.col("block").first(),
        pl.col("epoch").first(),
        pl.col("slot_start_date_time_canonical_block").first(),
        pl.col("block_canonical_block").first(),
        pl.col("max_blob_index").max().alias("blob_count"),
    )
    .with_columns(
        # add classifier for blocks with blobs or no blobs
        pl.when(pl.col('blob_count') == 0).then(False).otherwise(True).alias('has_blobs'),
        # add classifier for reorged vs finalized blocks
        pl.when(pl.col("slot_start_date_time_canonical_block").is_null()).then(True).otherwise(False).alias('reorged')
    )
    .with_columns([
    pl.col('has_blobs').replace({True: 'blobs', False: 'no blobs'}),
    pl.col('reorged').replace({True: 'reorged', False: 'finalized'})
])
    .sort(by='slot', descending=True)
)

In [66]:
df_sample = reorged_blocks_table.select(
    "has_blobs", "reorged", "propagation_slot_start_diff"
).to_pandas()

# Convert boolean columns to categorical (string) types to avoid Bokeh type error
# df_sample["has_blobs"] = df_sample["has_blobs"].astype(str)
# df_sample["reorged"] = df_sample["reorged"].astype(str)

# Attempt the violin plot again
violin = hv.Violin(df_sample, ["has_blobs", "reorged"]).opts(
    tools=["hover"],
    width=900,
    height=400,
    show_legend=True,
    title="Block Propagation",
    ylabel="propagation time (seconds)",
    xlabel="blocks",
    fontscale=1.5,
    show_grid=True
)

In [67]:
boxwhisker = hv.BoxWhisker(df_sample, ["has_blobs", "reorged"]).opts(
    opts.BoxWhisker(
        box_color="white",
        width=900,
        height=400,
        show_legend=False,
        whisker_color="gray",
        title="Block Propagation",
        ylabel="block propagation time (seconds)",
        xlabel="blocks",
        fontscale=1.5,
        show_grid=True
    )
)

In [70]:
violin

In [71]:
boxwhisker