In [1]:
from clickhouse_connect.datatypes.format import set_read_format
import clickhouse_connect
import os
import panel as pn
import polars as pl

pn.extension("plotly", template="material", sizing_mode="stretch_width")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

# Clickhouse Format Settings - https://clickhouse.com/docs/en/integrations/python#read-formats

# Return both IPv6 and IPv4 values as strings
set_read_format("IPv*", "string")

# Return binary as string
set_read_format("FixedString", "string")

# sets large ints to floats so that there are no large int overflow errors when converting to polars dataframe
set_read_format("Int*", "float")

# Create ClickHouse client
client = clickhouse_connect.get_client(
    host=os.environ.get("HOST"),
    username=os.environ.get("USERNAME"),
    password=os.environ.get("PASSWORD"),
    secure=True,
)

In [2]:
# when blob sidecars are propagated
blob_sidecar_query = """ 
SELECT 
    kzg_commitment,
    slot,
    meta_consensus_implementation,
    MIN(slot_start_date_time) AS min_slot_start_date_time,
    MIN(event_date_time) AS first_event_time,
    MAX(event_date_time) AS last_event_time,
    (toUnixTimestamp64Milli(MAX(event_date_time)) - toUnixTimestamp64Milli(MIN(event_date_time))) AS time_diff_milliseconds, -- time difference from when first and last blob was seen. It's put in the query because it's easier to do here
    MAX(blob_index) AS max_blob_index
FROM beacon_api_eth_v1_events_blob_sidecar 
WHERE event_date_time > NOW() - INTERVAL '7 DAYS'
AND meta_network_name = 'mainnet'
GROUP BY kzg_commitment, slot, meta_consensus_implementation
"""
blob_sidecar_df = client.query_df(blob_sidecar_query)

blob_sidecar_df_pl = pl.from_pandas(blob_sidecar_df)

In [3]:
# when validated blocks are propagated
block_publish_timing_query = """ 
SELECT 
 event_date_time,
 slot,
 slot_start_date_time,
 propagation_slot_start_diff,
 block,
 epoch,
 meta_network_name,
 meta_consensus_implementation
FROM beacon_api_eth_v1_events_block 
WHERE event_date_time > NOW() - INTERVAL '7 DAYS'
AND meta_network_name = 'mainnet'
"""
block_publish_timing_df = client.query_df(block_publish_timing_query)

block_publish_timing_df_pl = pl.from_pandas(block_publish_timing_df)

In [74]:
# canonical beacon block data
canonical_beacon_block_query = """
SELECT 
    slot,
    slot_start_date_time,
    block_root as block,
    block_total_bytes_compressed,
    execution_payload_transactions_total_bytes_compressed,
    meta_consensus_implementation
FROM canonical_beacon_block FINAL
WHERE event_date_time > NOW() - INTERVAL '7 DAYS'
AND meta_network_name = 'mainnet'
"""

canonical_beacon_block_df = client.query_df(canonical_beacon_block_query)

canonical_beacon_block_df_pl = pl.from_pandas(canonical_beacon_block_df)

In [5]:
# canonical beacon blob sidecar data
canonical_beacon_blob_sidecar_query = """
SELECT 
    kzg_commitment,
    meta_consensus_implementation,
    blob_size,
    blob_empty_size
FROM canonical_beacon_blob_sidecar
WHERE event_date_time > NOW() - INTERVAL '7 DAYS'
AND meta_network_name = 'mainnet'
"""

canonical_beacon_blob_sidecar_df = client.query_df(
    canonical_beacon_blob_sidecar_query)

canonical_beacon_blob_sidecar_df_pl = pl.from_pandas(
    canonical_beacon_blob_sidecar_df)

In [75]:
# null fields indicate that there were either no blob sidecars or no canonical block data for that slot.
# if a row has canonical blobs but no canonical block info, then the block was orphaned https://beaconcha.in/slot/8714624
block_sidecar_joined_df = (
    block_publish_timing_df_pl
    # left join canonical block data
    .join(
        canonical_beacon_block_df_pl,
        on=["slot"],
        how="left",
        suffix="_canonical_block",
    )
    # left join blob sidecar data to block publish timing data so slots without sidecars are not excluded.
    .join(
        blob_sidecar_df_pl,
        on=["slot", "meta_consensus_implementation"],
        how="left",
        suffix="_sidecar",
    )
    .drop("min_slot_start_date_time")
    .filter(
        # filter outlier datapoints that are longer than the slot time + 500ms
        pl.col("propagation_slot_start_diff")
        < 12500
    )
    # left join canonical sidecar data
    .join(
        canonical_beacon_blob_sidecar_df_pl,
        on=["kzg_commitment", "meta_consensus_implementation"],
        how="left",
        suffix="_canonical_blob",
    )
    .fill_null(-1)
)

In [77]:
block_sidecar_joined_df.write_parquet("cached_cl_7day.parquet")

# block_sidecar_joined_df = pl.read_parquet("cached_cl_7day.parquet")

In [79]:
# there's a clear correlation between blocks with less sidecars and faster block propagation times.
block_blob_table = (
    (
        block_sidecar_joined_df.group_by(
            "max_blob_index", "meta_consensus_implementation"
        )
        .agg(
            # aggregate count
            pl.len().alias("count"),
            # propagation timing
            pl.col("propagation_slot_start_diff")
            .mean()
            .alias("mean_propagation_slot_start_diff"),
            # size
            pl.col("block_total_bytes_compressed")
            .mean()
            .alias("mean_block_size_compressed"),
            pl.col("blob_size").first().alias("blob_size"),
        )
        .with_columns(
            [
                (pl.col("blob_size") / 1000 * (1 + pl.col("max_blob_index"))).alias(
                    "total_blob_size"
                ),
                (pl.col("mean_block_size_compressed") / 1000),
            ]
        )
    )
    .drop("blob_size", "total_blob_size")
    .sort(by="mean_propagation_slot_start_diff", descending=True)
)

In [80]:
block_blob_table

max_blob_index,meta_consensus_implementation,count,mean_propagation_slot_start_diff,mean_block_size_compressed
f64,str,u32,f64,f64
5,"""nimbus""",25968,3557.7064078866297,115.25338108441159
4,"""nimbus""",26260,3554.8174409748667,115.61199466869763
3,"""nimbus""",27930,3536.2558897243107,115.07142201933405
2,"""nimbus""",32118,3473.210598418332,117.85747752039354
1,"""nimbus""",41346,3379.301746239056,120.69229981134814
…,…,…,…,…
0,"""lighthouse""",68727,2690.6343067498947,128.37832656743348
-1,"""lodestar""",180216,2629.438213033249,121.79094360656103
-1,"""nimbus""",156862,2588.044338335607,128.07152976501638
-1,"""lighthouse""",156618,2302.966874816432,128.05060629684965


In [91]:
(
    block_blob_table.pivot(
        index="max_blob_index",
        columns="meta_consensus_implementation",
        values="mean_propagation_slot_start_diff",
    ).plot.line(
        x="max_blob_index",
        y=["lighthouse", "prysm", "teku", "lodestar", "nimbus"],
        xlabel="blob count",
        y_label="block propagation time",
        title="Mean Block Propagation Time per Blob Count (-1 is 0 blobs)",
    )
)



In [164]:
(
    block_sidecar_joined_df.group_by('slot', 'max_blob_index').agg(
        pl.col('slot_start_date_time').first(),
        pl.col('propagation_slot_start_diff').mean()
    ).pivot(
        index="slot_start_date_time",
        columns="max_blob_index",
        values="propagation_slot_start_diff",
    )
    .with_columns(
        (pl.col("-1.0") / 1000).rolling_mean(window_size=32, min_periods=1).alias("no_blob"),
        (pl.col("0.0") / 1000).rolling_mean(window_size=32, min_periods=1).alias("one_blob"),
        # (pl.col("-1.0")).alias("no_blob"),
        # (pl.col("0.0")).alias("one_blob"),
    )
    .plot.line(
        x="slot_start_date_time",
        y=["no_blob", "one_blob"],
        xlabel="Datetime",
        y_label="propagation time (seconds)",
        title="Block Propagation Time",
        line_width=2,
        alpha=0.8,
    )
)

