In [1]:
from clickhouse_connect.datatypes.format import set_read_format
import clickhouse_connect
import os
import panel as pn
import polars as pl

pn.extension("plotly", template="material", sizing_mode="stretch_width")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

# Clickhouse Format Settings - https://clickhouse.com/docs/en/integrations/python#read-formats

# Return both IPv6 and IPv4 values as strings
set_read_format("IPv*", "string")

# Return binary as string
set_read_format("FixedString", "string")

# sets large ints to floats so that there are no large int overflow errors when converting to polars dataframe
set_read_format("Int*", "float")

# Create ClickHouse client
client = clickhouse_connect.get_client(
    host=os.environ.get("HOST"),
    username=os.environ.get("USERNAME"),
    password=os.environ.get("PASSWORD"),
    secure=True,
)

In [2]:
# when blob sidecars are propagated
blob_sidecar_query = """ 
SELECT 
    kzg_commitment,
    slot,
    meta_consensus_implementation,
    MIN(slot_start_date_time) AS min_slot_start_date_time,
    MIN(event_date_time) AS first_event_time,
    MAX(event_date_time) AS last_event_time,
    (toUnixTimestamp64Milli(MAX(event_date_time)) - toUnixTimestamp64Milli(MIN(event_date_time))) AS time_diff_milliseconds, -- time difference from when first and last blob was seen. It's put in the query because it's easier to do here
    MAX(blob_index) AS max_blob_index
FROM beacon_api_eth_v1_events_blob_sidecar 
WHERE event_date_time > NOW() - INTERVAL '1 DAYS'
AND meta_network_name = 'mainnet'
GROUP BY kzg_commitment, slot, meta_consensus_implementation
"""
blob_sidecar_df = client.query_df(blob_sidecar_query)

blob_sidecar_df_pl = pl.from_pandas(blob_sidecar_df)

In [3]:
# when validated blocks are propagated
block_publish_timing_query = """ 
SELECT 
 event_date_time,
 slot,
 slot_start_date_time,
 propagation_slot_start_diff,
 block,
 epoch,
 meta_network_name,
 meta_consensus_implementation
FROM beacon_api_eth_v1_events_block 
WHERE event_date_time > NOW() - INTERVAL '1 DAYS'
AND meta_network_name = 'mainnet'
"""
block_publish_timing_df = client.query_df(block_publish_timing_query)

block_publish_timing_df_pl = pl.from_pandas(block_publish_timing_df)

In [4]:
# canonical beacon block data
canonical_beacon_block_query = """
SELECT 
    slot,
    slot_start_date_time,
    block_root as block,
    block_total_bytes_compressed,
    execution_payload_transactions_total_bytes_compressed,
    meta_consensus_implementation
FROM canonical_beacon_block FINAL
WHERE event_date_time > NOW() - INTERVAL '1 DAYS'
AND meta_network_name = 'mainnet'
"""

canonical_beacon_block_df = client.query_df(canonical_beacon_block_query)

canonical_beacon_block_df_pl = pl.from_pandas(canonical_beacon_block_df)

In [9]:
canonical_beacon_block_df_pl

slot,slot_start_date_time,block,block_total_bytes_compressed,execution_payload_transactions_total_bytes_compressed,meta_consensus_implementation
u32,datetime[ms],str,u32,u32,str
8736032,2024-03-28 20:06:47,"""0x44cd99e710f6a22ed1cca4be4b99989ffd4229166979f8281151dddebbfa2bb3""",440347,213888,"""lighthouse"""
8736036,2024-03-28 20:07:35,"""0xaeef32eac79b89bf314d3bd9f81cfb1caf190e2d166e1983bacf585afea55cec""",140027,59246,"""lighthouse"""
8736037,2024-03-28 20:07:47,"""0xc7622ec6e155ef713dee2fb08de32a32c36c47ae3757198602e22d4608dd82f5""",170600,77133,"""lighthouse"""
8736039,2024-03-28 20:08:11,"""0x4559c57d67994bd6d5f23992504b2a649013c18b23f1b66ae47212bece323536""",112897,45395,"""lighthouse"""
8736041,2024-03-28 20:08:35,"""0x981778c0496d74204a806bb2acba36a359b83a26da18d8f209d555126802e2e3""",71827,21028,"""lighthouse"""
…,…,…,…,…,…
8743212,2024-03-29 20:02:47,"""0xff763df8235cfa7c25d41a9e5a8dee0c79c3e9bf86accde5d4dcd048606310db""",193355,90819,"""lighthouse"""
8743218,2024-03-29 20:03:59,"""0xdfe86ba4e51d62416b19d15accc63d4a52dc68ca5615e14ef27cbc837cc89e1c""",107466,41049,"""lighthouse"""
8743221,2024-03-29 20:04:35,"""0x43fff946e972e15d4d8621c0bbcea58d835884963ab90d973ded3bdaf56c14ba""",82213,34390,"""lighthouse"""
8743222,2024-03-29 20:04:47,"""0x524e964a5abd027af9c8b723b09761f3a3f9ec6c973d724de2f9bfd8402e2c57""",71037,29746,"""lighthouse"""


In [5]:
# canonical beacon blob sidecar data
canonical_beacon_blob_sidecar_query = """
SELECT 
    kzg_commitment,
    meta_consensus_implementation,
    blob_size,
    blob_empty_size
FROM canonical_beacon_blob_sidecar
WHERE event_date_time > NOW() - INTERVAL '1 DAYS'
AND meta_network_name = 'mainnet'
"""

canonical_beacon_blob_sidecar_df = client.query_df(canonical_beacon_blob_sidecar_query)

canonical_beacon_blob_sidecar_df_pl = pl.from_pandas(canonical_beacon_blob_sidecar_df)

In [6]:
# null fields indicate that there were either no blob sidecars or no canonical block data for that slot.
# if a row has canonical blobs but no canonical block info, then the block was orphaned https://beaconcha.in/slot/8714624
block_sidecar_joined_df = (
    block_publish_timing_df_pl
    # left join canonical block data
    .join(
        canonical_beacon_block_df_pl,
        on=["slot"],
        how="left",
        suffix="_canonical_block",
    )
    # left join blob sidecar data to block publish timing data so slots without sidecars are not excluded.
    .join(
        blob_sidecar_df_pl,
        on=["slot", "meta_consensus_implementation"],
        how="left",
        suffix="_sidecar",
    )
    .drop("min_slot_start_date_time")
    .filter(
        # filter outlier datapoints that are longer than the slot time + 500ms
        pl.col("propagation_slot_start_diff")
        < 12500
    )
    # left join canonical sidecar data
    .join(
        canonical_beacon_blob_sidecar_df_pl,
        on=["kzg_commitment", "meta_consensus_implementation"],
        how="left",
        suffix="_canonical_blob",
    )
    .fill_null(-1)
    .with_columns((pl.col("max_blob_index") + 1))
).unique()

In [7]:
block_sidecar_joined_df.write_parquet("cached_cl_3.29.parquet")

# block_sidecar_joined_df = pl.read_parquet("cached_cl_7day.parquet")

### Consensus Client Block Propagation Time with Blobs

In [3]:
# there's a clear correlation between blocks with less sidecars and faster block propagation times.
block_blob_table = (
    (
        block_sidecar_joined_df.group_by(
            "max_blob_index", "meta_consensus_implementation"
        )
        .agg(
            # aggregate count
            pl.len().alias("count"),
            # propagation timing
            pl.col("propagation_slot_start_diff")
            .mean()
            .alias("mean_propagation_slot_start_diff"),
            # size
            pl.col("block_total_bytes_compressed")
            .mean()
            .alias("mean_block_size_compressed"),
            pl.col("blob_size").first().alias("blob_size"),
        )
        .with_columns(
            [
                (pl.col("blob_size") / 1000 * (1 + pl.col("max_blob_index"))).alias(
                    "total_blob_size"
                ),
                (pl.col("mean_block_size_compressed") / 1000),
            ]
        )
    )
    .drop("blob_size", "total_blob_size")
    .sort(by="mean_propagation_slot_start_diff", descending=True)
)

In [4]:
(
    block_blob_table.with_columns((pl.col("mean_propagation_slot_start_diff") / 1000))
    .pivot(
        index="max_blob_index",
        columns="meta_consensus_implementation",
        values="mean_propagation_slot_start_diff",
    )
    .sort(by="max_blob_index")
    .plot.line(
        x="max_blob_index",
        y=["lighthouse", "prysm", "teku", "lodestar", "nimbus"],
        xlabel="blob count",
        ylabel="block propagation time (seconds)",
        title="Mean Block Propagation Time per Blob Count",
    )
)

### Time Series for Block Propagation with Blobs

In [5]:
(
    block_sidecar_joined_df.group_by("slot", "max_blob_index")
    .agg(
        pl.col("slot_start_date_time").first(),
        pl.col("propagation_slot_start_diff").mean(),
    )
    .pivot(
        index="slot_start_date_time",
        columns="max_blob_index",
        values="propagation_slot_start_diff",
    )
).filter(pl.col("1.0") > 8000)

slot_start_date_time,0.0,1.0,2.0,6.0,5.0,4.0,3.0
datetime[ms],f64,f64,f64,f64,f64,f64,f64
2024-03-27 13:17:23,6514.75,8169.857142857143,8169.857142857143,8169.857142857143,8169.857142857143,8169.857142857143,8169.857142857143
2024-03-27 13:55:47,,9034.4,9034.4,9034.4,9034.4,9034.4,9034.4
2024-03-27 22:54:23,,8419.722222222223,,8419.722222222223,,,
2024-03-28 16:48:47,,9096.555555555555,9096.555555555555,9096.555555555555,9096.555555555555,9096.555555555555,9096.555555555555
2024-03-28 18:44:35,,8189.777777777777,8189.777777777777,8189.777777777777,8189.777777777777,8189.777777777777,8189.777777777777
…,…,…,…,…,…,…,…
2024-03-28 17:20:23,,9711.57142857143,,9711.57142857143,,,
2024-03-28 17:17:59,,8157.705882352941,,,8157.705882352941,8157.705882352941,
2024-03-27 16:13:23,,8137,,,,,
2024-03-28 17:52:23,,9587.125,,,,,


In [6]:
(
    block_sidecar_joined_df.group_by("slot", "max_blob_index")
    .agg(
        pl.col("slot_start_date_time").first(),
        pl.col("propagation_slot_start_diff").mean(),
    )
    .pivot(
        index="slot_start_date_time",
        columns="max_blob_index",
        values="propagation_slot_start_diff",
    )
    .with_columns(
        (pl.col("0.0") / 1000)
        .rolling_mean(window_size=32, min_periods=1)
        .alias("no_blob"),
        (pl.col("1.0") / 1000)
        .rolling_mean(window_size=32, min_periods=1)
        .alias("one_blob"),
    )
    .plot.line(
        x="slot_start_date_time",
        y=["no_blob", "one_blob"],
        xlabel="Datetime",
        ylabel="propagation time (seconds)",
        title="32 Slot Avg Propagation Time",
        line_width=2,
        alpha=0.8,
    )
)

### Reorged blocks with blobs in them

In [7]:
reorged_blocks_table = (
    block_sidecar_joined_df.drop(
        "meta_network_name",
        # "meta_consensus_implementation",
        "meta_consensus_implementation_canonical_block",
        "first_event_time",
        "last_event_time",
        "time_diff_milliseconds",
        # "max_blob_index",
        # "block_total_bytes_compressed",
        # "execution_payload_transactions_total_bytes_compressed",
        "blob_size",
        "blob_empty_size",
    )
    .sort(by="propagation_slot_start_diff", descending=True)
    .filter(pl.col("slot") < 8736287)  # latest slot timing
    .group_by("slot", "meta_consensus_implementation")
    .agg(
        pl.col("slot_start_date_time").first(),
        pl.col("propagation_slot_start_diff").first(),
        pl.col("block").first(),
        pl.col("epoch").first(),
        pl.col("slot_start_date_time_canonical_block").first(),
        pl.col("block_canonical_block").first(),
        pl.col("max_blob_index").max().alias("blob_count"),
    )
).sort(by="block_canonical_block", descending=True)

In [8]:
# reorged blocks after 3/26/24
(
    reorged_blocks_table.filter(
        pl.col("slot_start_date_time") > pl.datetime(2024, 3, 26)
    )
    .group_by("blob_count")
    .agg(
        pl.col("block_canonical_block").is_null().sum().alias("missed_blocks"),
        pl.col("block_canonical_block").is_not_null(
        ).sum().alias("finalized_blocks"),
    )
    .sort(by="blob_count", descending=False)
)

blob_count,missed_blocks,finalized_blocks
f64,u32,u32
0,271,60569
1,34,4587
2,67,5535
3,88,2493
4,63,2617
5,33,1395
6,653,23480


In [9]:
# reorged blocks after 3/26/24
(
    reorged_blocks_table.filter(
        pl.col("slot_start_date_time") > pl.datetime(2024, 3, 26)
    )
    .group_by("blob_count", "meta_consensus_implementation")
    .agg(
        pl.col("block_canonical_block").is_null().sum().alias("missed_blocks"),
        pl.col("block_canonical_block").is_not_null().sum().alias("finalized_blocks"),
    )
    .sort(by="blob_count", descending=False)
)

blob_count,meta_consensus_implementation,missed_blocks,finalized_blocks
f64,str,u32,u32
0,"""nimbus""",36,11254
0,"""prysm""",36,11255
0,"""lodestar""",130,15552
0,"""lighthouse""",36,11254
0,"""teku""",33,11254
…,…,…,…
6,"""prysm""",143,5340
6,"""nimbus""",146,5343
6,"""lodestar""",71,2108
6,"""lighthouse""",147,5345


### Reorg Time Series

In [10]:
reorged_blocks = reorged_blocks_table.filter(pl.col('slot_start_date_time_canonical_block').is_null())
finalized_blocks = reorged_blocks_table.filter(pl.col('slot_start_date_time_canonical_block').is_not_null())

In [11]:
reorged_blocks.head(5)

slot,meta_consensus_implementation,slot_start_date_time,propagation_slot_start_diff,block,epoch,slot_start_date_time_canonical_block,block_canonical_block,blob_count
i64,str,datetime[ms],i64,str,i64,datetime[ms],str,f64
8725059,"""prysm""",2024-03-27 07:32:11,2377,"""0xb476f7bfc6a68876932a0a2e0bea84d6b48c4c4820f2cf1d85de6f5f70398dbf""",272658,,,0
8727889,"""teku""",2024-03-27 16:58:11,2434,"""0x3fd0a509855a43190609ac9a24a873443de35737ad8d34d7095fb213932d302d""",272746,,,6
8725048,"""teku""",2024-03-27 07:29:59,2648,"""0x4859d5bfcbcef7ae9f78b326cc7cc6d7bc7dd74b17d87ee81886f25e1cea336e""",272657,,,6
8720582,"""prysm""",2024-03-26 16:36:47,2726,"""0x96949c16d74f90cc77fc38a467cd75b69a15be1e96fcb00177879dff8f8d07c6""",272518,,,0
8731523,"""prysm""",2024-03-28 05:04:59,2831,"""0x2c2cb1a04902545579d040f72a02ba204fbfa81ad4fd5d1d399818cb6d125937""",272860,,,4


In [12]:
reorg_blocks_prop_hist = reorged_blocks.plot.hist(
    'propagation_slot_start_diff', 
    xlabel='propagation time (ms)', 
    ylabel='count', 
    title='missed slot block propagation time distribution',
    bins=100, 
    shared_axes=False
    )
finalized_blocks_prop_hist = finalized_blocks.plot.hist(
    'propagation_slot_start_diff', 
    xlabel='propagation time (ms)', 
    ylabel='count', 
    title='finalized block propagation time distribution',
    bins=100, 
    shared_axes=False
    )

In [13]:
reorg_blocks_prop_hist + finalized_blocks_prop_hist

In [14]:
reorged_blocks

slot,meta_consensus_implementation,slot_start_date_time,propagation_slot_start_diff,block,epoch,slot_start_date_time_canonical_block,block_canonical_block,blob_count
i64,str,datetime[ms],i64,str,i64,datetime[ms],str,f64
8725059,"""prysm""",2024-03-27 07:32:11,2377,"""0xb476f7bfc6a68876932a0a2e0bea84d6b48c4c4820f2cf1d85de6f5f70398dbf""",272658,,,0
8727889,"""teku""",2024-03-27 16:58:11,2434,"""0x3fd0a509855a43190609ac9a24a873443de35737ad8d34d7095fb213932d302d""",272746,,,6
8725048,"""teku""",2024-03-27 07:29:59,2648,"""0x4859d5bfcbcef7ae9f78b326cc7cc6d7bc7dd74b17d87ee81886f25e1cea336e""",272657,,,6
8720582,"""prysm""",2024-03-26 16:36:47,2726,"""0x96949c16d74f90cc77fc38a467cd75b69a15be1e96fcb00177879dff8f8d07c6""",272518,,,0
8731523,"""prysm""",2024-03-28 05:04:59,2831,"""0x2c2cb1a04902545579d040f72a02ba204fbfa81ad4fd5d1d399818cb6d125937""",272860,,,4
…,…,…,…,…,…,…,…,…
8731672,"""lighthouse""",2024-03-28 05:34:47,11816,"""0xf5dffee50aae3bf3cf210aac3f99b5608c66090260db9d8547cd21348b72c940""",272864,,,3
8729729,"""lodestar""",2024-03-27 23:06:11,11883,"""0xd5c9abc79d7846b57a4db580c359c02913f8a7f820403d30ec35732124cd5e44""",272804,,,6
8728956,"""lighthouse""",2024-03-27 20:31:35,12225,"""0xaf7790a10b684dbf586611ce58618129fbd6e6eabb646b43cb1c7cb47387c6f9""",272779,,,6
8706689,"""lodestar""",2024-03-24 18:18:11,12264,"""0x6012e76109a35f95d107c619a6709a26edbe66a8df1186a9b2fc742199d0d2fe""",272084,,,0
