In [1]:
import clickhouse_connect
import os
import panel as pn
import polars as pl

pn.extension("plotly", template="material", sizing_mode="stretch_width")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

polars.config.Config

In [2]:
# Clickhouse Format Settings - https://clickhouse.com/docs/en/integrations/python#read-formats
from clickhouse_connect.datatypes.format import set_read_format

# Return both IPv6 and IPv4 values as strings
set_read_format("IPv*", "string")

# Return binary as string
set_read_format("FixedString", "string")

### Clickhouse Queries

In [3]:
# Create ClickHouse client
client = clickhouse_connect.get_client(
    host=os.environ.get("HOST"),
    username=os.environ.get("USERNAME"),
    password=os.environ.get("PASSWORD"),
    secure=True,
)

In [4]:
# block + blob size
block_size_query = """
SELECT 
    a.event_date_time,
    a.slot,
    a.slot_start_date_time,
    a.epoch,
    a.epoch_start_date_time,
    a.block_total_bytes,
    a.block_total_bytes_compressed,
    a.block_root AS block_root_block,
    a.execution_payload_block_number,
    a.execution_payload_transactions_count,
    a.execution_payload_transactions_total_bytes,
    a.execution_payload_transactions_total_bytes_compressed,
    b.blob_index,
    b.blob_size,
    a.meta_network_name AS meta_network_name_block,
    b.meta_network_name AS meta_network_name_blob
FROM 
    (SELECT 
        event_date_time,
        slot,
        slot_start_date_time,
        epoch,
        epoch_start_date_time,
        block_total_bytes,
        block_total_bytes_compressed,
        block_root,
        execution_payload_block_number,
        execution_payload_transactions_count,
        execution_payload_transactions_total_bytes,
        execution_payload_transactions_total_bytes_compressed,
        meta_network_name
     FROM canonical_beacon_block
     WHERE slot_start_date_time > NOW() - INTERVAL '7 DAYS'
     AND meta_network_name IN ('mainnet', 'holesky')
    ) AS a
LEFT JOIN 
    (SELECT 
        event_date_time,
        slot,
        slot_start_date_time,
        epoch,
        block_root,
        blob_index,
        blob_size,
        meta_network_name
     FROM canonical_beacon_blob_sidecar
     WHERE slot_start_date_time > NOW() - INTERVAL '7 DAYS'
     AND meta_network_name IN ('mainnet', 'holesky')
    ) AS b
ON a.slot = b.slot
"""

In [5]:
# blob sidecar propagation
blob_propagation_slot: str = """
SELECT
    event_date_time,
    slot_start_date_time,
    propagation_slot_start_diff,
    slot,
    epoch,
    meta_network_name,
    meta_client_name
FROM beacon_api_eth_v1_events_blob_sidecar
WHERE slot_start_date_time > NOW() - INTERVAL '7 DAYS'
AND meta_network_name IN ('mainnet', 'holesky')
"""

In [6]:
# clickhouse queries
# block size
block_size_df = client.query_df(block_size_query)
block_size_df_pl: pl.DataFrame = pl.from_pandas(block_size_df).unique()

In [7]:
# blob propagation
blob_propagation_slot_query = client.query_df(blob_propagation_slot)
blob_propagation_df_pl: pl.DataFrame = pl.from_pandas(
    blob_propagation_slot_query
).unique()

In [8]:
# block blob size table
block_size_df_table = (
    block_size_df_pl.drop("event_date_time")
    .unique()
    .group_by("meta_network_name_block", "block_root_block")
    .agg(
        [
            pl.col("slot_start_date_time").first().alias("slot_start_date_time"),
            pl.col("block_total_bytes").first().alias("block_total_bytes"),
            # how do beacon blocks get compressed? Does the blob part get compressed as well?
            # https://github.com/ethereum/consensus-specs/blob/45b1026cb65dc7f5f45aa38cb89ed63294048484/specs/phase0/p2p-interface.md#compressionencoding
            pl.col("block_total_bytes_compressed")
            .first()
            .alias("block_total_bytes_compressed"),
            pl.col("execution_payload_transactions_total_bytes")
            .first()
            .alias("execution_payload_transactions_total_bytes"),
            pl.col("blob_index").max().alias("blob_count"),
            pl.col("blob_size").sum().alias("blob_size_sum"),
            pl.col("slot").first().alias("slot"),
            pl.col("epoch").first().alias("epoch"),
        ]
    )
)

In [9]:
# blob propagation table
blob_propagation_table = (
    pl.from_pandas(blob_propagation_slot_query)
    .unique()
    .group_by("slot")
    .agg(
        [
            pl.col("slot_start_date_time").first(),
            pl.col("meta_network_name").first(),
            pl.col("propagation_slot_start_diff")
            .mean()
            .round(2)
            .alias("mean_propagation_slot_start_diff"),
            pl.len().alias("count"),
            pl.col("epoch").first(),
        ]
    )
)

### Panel Charts

In [10]:
def block_size_chart(network: str, group_by_epoch: bool = True):
    """Create a line chart comparing compressed block size and uncompressed blob size.

    Args:
        network (str): The network name to filter data by.
        group_by_epoch (bool, optional): If True, group data by epoch for a cleaner chart. Defaults to True.
    """
    # Filter the dataframe for the specified network
    filtered_df = block_size_df_table.filter(
        pl.col("meta_network_name_block") == network
    )

    # Use a match-case statement to handle grouping
    match group_by_epoch:
        case True:
            return (
                filtered_df.group_by("epoch").agg(
                    [
                        pl.col("slot_start_date_time").first(),
                        (pl.col("block_total_bytes_compressed").mean() / 1000).alias(
                            "compressed block"
                        ),
                        (pl.col("blob_size_sum").mean() / 1000).alias(
                            "uncompressed blob"
                        ),
                    ]
                )
            ).plot.line(
                x="slot_start_date_time",
                y=["compressed block", "uncompressed blob"],
                title=f"Compressed Block vs Blob size ({network})",
                xlabel="Date",
                ylabel="Size (in kb)",
                line_width=2,
                alpha=0.8,
            )
        case False:
            return (
                filtered_df.with_columns(
                    [
                        (pl.col("block_total_bytes_compressed") / 1000).alias(
                            "compressed block"
                        ),
                        (pl.col("blob_size_sum") / 1000).alias("uncompressed blob"),
                    ]
                )
            ).plot.line(
                x="slot_start_date_time",
                y=["compressed block", "uncompressed blob"],
                title=f"Compressed Block vs Blob size ({network})",
                xlabel="Date",
                ylabel="Size (in kb)",
                line_width=2,
                alpha=0.8,
            )

In [18]:
def blob_propagation_chart(network: str, group_by_epoch: bool = True):
    """Create a line chart comparing mean propagation slot start difference by network.

    Args:
        network (str): The network name to filter data by.
        group_by_epoch (bool, optional): If True, group data by epoch for a cleaner chart. Defaults to True.
    """
    # Filter the dataframe for the specified network
    filtered_df = blob_propagation_table.filter(pl.col("meta_network_name") == network)

    match group_by_epoch:
        case False:
            return filtered_df.sort(by="slot_start_date_time").plot.line(
                x="slot_start_date_time",
                y="mean_propagation_slot_start_diff",
                title=f"Propagation Time {network})",
                xlabel="Date",
                ylabel="time (ms)",
                line_width=2,
                alpha=0.8,
            )
        case True:
            return (
                filtered_df.group_by("epoch")
                .agg(
                    [
                        pl.col("slot_start_date_time").first(),
                        pl.col("mean_propagation_slot_start_diff").mean(),
                    ]
                )
                .sort(by="slot_start_date_time")
            ).plot.line(
                x="slot_start_date_time",
                y="mean_propagation_slot_start_diff",
                title=f"Propagation Time {network})",
                xlabel="Date",
                ylabel="time (ms)",
                line_width=2,
                alpha=0.8,
            )

In [22]:
blob_propagation_chart("holesky", group_by_epoch=True)

### Panel Dashboard

In [35]:
block_size = pn.Row(
    block_size_chart("mainnet", group_by_epoch=True),
    block_size_chart("holesky", group_by_epoch=True),
)

holesky_blob_prop = pn.Row(
    blob_propagation_chart("holesky", group_by_epoch=True),
)

mainnet_blob_prop = pn.Row(
    blob_propagation_chart("mainnet", group_by_epoch=True),
)

In [36]:
combined_panel = pn.Column(
    block_size,
    #    mainnet_blob_prop,
    holesky_blob_prop,
)

# mainnet doesn't work because there is no data yet
mainnet_prop = pn.Column(combined_panel, mainnet_blob_prop)

In [38]:
combined_panel.show()

Launching server at http://localhost:35699


<panel.io.server.Server at 0x7ac5dbc7a510>

Gtk-Message: 18:37:06.731: Failed to load module "canberra-gtk-module"
Gtk-Message: 18:37:06.733: Failed to load module "canberra-gtk-module"
[2 zypak-helper] Wait found events, but sd-event found none


Opening in existing browser session.
