In [1]:
import clickhouse_connect
import os
import polars as pl

pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

polars.config.Config

In [2]:
# https://clickhouse.com/docs/en/integrations/python#read-formats
from clickhouse_connect.datatypes.format import set_read_format

# Return both IPv6 and IPv4 values as strings
set_read_format("IPv*", "string")

# Return binary as string
set_read_format("FixedString", "string")

In [3]:
# Create ClickHouse client
client = clickhouse_connect.get_client(
    host=os.environ.get("HOST"),
    username=os.environ.get("USERNAME"),
    password=os.environ.get("PASSWORD"),
    secure=True,
)

In [56]:
# beacon block
beacon_block: str = """
SELECT 
    *
FROM canonical_beacon_block
WHERE 
    (slot_start_date_time BETWEEN '2024-03-09' AND '2024-03-10')
"""

# Query
beacon_block_query = client.query_df(beacon_block)

In [57]:
beacon_block_query.head(5)

Unnamed: 0,updated_date_time,event_date_time,slot,slot_start_date_time,epoch,epoch_start_date_time,block_root,block_version,block_total_bytes,block_total_bytes_compressed,...,meta_client_geo_autonomous_system_number,meta_client_geo_autonomous_system_organization,meta_network_id,meta_network_name,meta_consensus_version,meta_consensus_version_major,meta_consensus_version_minor,meta_consensus_version_patch,meta_consensus_implementation,meta_labels
0,2024-03-01 00:08:12+00:00,2024-03-01 00:08:09.395,4459800,2024-03-01 00:00:00+00:00,139368,2024-02-29 23:55:12+00:00,0x72ea50301436095e2b33d677cd8d19cc324ba6ab5127...,deneb,383046,299366,...,24940,Hetzner Online GmbH,11155111,sepolia,v4.6.0-1be5253,4,6,0,lighthouse,{}
1,2024-03-01 00:16:38+00:00,2024-03-01 00:16:33.145,8535601,2024-03-01 00:00:35+00:00,266737,2024-02-29 23:57:11+00:00,0x170a19b3d301bd4cc605741ea62eef4ab91d17e53bbf...,capella,799175,673184,...,24940,Hetzner Online GmbH,1,mainnet,v4.6.0-1be5253,4,6,0,lighthouse,{}
2,2024-03-01 00:08:12+00:00,2024-03-01 00:08:10.288,4459804,2024-03-01 00:00:48+00:00,139368,2024-02-29 23:55:12+00:00,0x812b9959ac59c09439a55ed3ce14fc93c0305348fbfd...,deneb,992950,752948,...,24940,Hetzner Online GmbH,11155111,sepolia,v4.6.0-1be5253,4,6,0,lighthouse,{}
3,2024-03-01 00:16:39+00:00,2024-03-01 00:16:33.555,8535603,2024-03-01 00:00:59+00:00,266737,2024-02-29 23:57:11+00:00,0xaa216233e67122752b694666c134ad58f1dc33693323...,capella,137468,70493,...,24940,Hetzner Online GmbH,1,mainnet,v4.6.0-1be5253,4,6,0,lighthouse,{}
4,2024-03-01 00:16:38+00:00,2024-03-01 00:16:33.771,8535604,2024-03-01 00:01:11+00:00,266737,2024-02-29 23:57:11+00:00,0x2da8f0506ab664e90c722cf769a803348682f09d06cd...,capella,688446,529916,...,24940,Hetzner Online GmbH,1,mainnet,v4.6.0-1be5253,4,6,0,lighthouse,{}


In [58]:
blob_sidecar: str = """
SELECT
    *
FROM canonical_beacon_blob_sidecar
WHERE 
    (slot_start_date_time BETWEEN '2024-03-09' AND '2024-03-10')
"""

# Query
blob_sidecar_query = client.query_df(blob_sidecar)

In [59]:
blob_sidecar_query.head(5)

Unnamed: 0,unique_key,updated_date_time,event_date_time,slot,slot_start_date_time,epoch,epoch_start_date_time,block_root,block_parent_root,versioned_hash,...,meta_client_geo_autonomous_system_number,meta_client_geo_autonomous_system_organization,meta_network_id,meta_network_name,meta_consensus_version,meta_consensus_version_major,meta_consensus_version_minor,meta_consensus_version_patch,meta_consensus_implementation,meta_labels
0,-7459047438904391558,2024-03-08 00:08:14+00:00,2024-03-08 00:08:10.241,4510200,2024-03-08 00:00:00+00:00,140943,2024-03-07 23:55:12+00:00,0xc3a5cadac1ccb0da3bb44478e4e72e9288311a65203b...,0x6684f95daf1d19958fd07541c9cd90f4c3e15be5083f...,0x019a1654f32f3a854125ad73dbb21c78771073f72a28...,...,24940,Hetzner Online GmbH,11155111,sepolia,v4.6.0-1be5253,4,6,0,lighthouse,{}
1,676216861889648292,2024-03-08 00:08:14+00:00,2024-03-08 00:08:10.241,4510200,2024-03-08 00:00:00+00:00,140943,2024-03-07 23:55:12+00:00,0xc3a5cadac1ccb0da3bb44478e4e72e9288311a65203b...,0x6684f95daf1d19958fd07541c9cd90f4c3e15be5083f...,0x015a49bab45e2bae649cdcd709e96bde8b5fee3fd63f...,...,24940,Hetzner Online GmbH,11155111,sepolia,v4.6.0-1be5253,4,6,0,lighthouse,{}
2,-3093373798735648140,2024-03-08 00:08:14+00:00,2024-03-08 00:08:10.440,4510201,2024-03-08 00:00:12+00:00,140943,2024-03-07 23:55:12+00:00,0x16908a4fa43e34fdfb0162632d1ca1e1e450fb2c4ff8...,0xbb309f209c604dda1c6de34a1625e496a03fc558da8b...,0x011474770f8804b00df98d85f3eec8647d363e9dbe74...,...,24940,Hetzner Online GmbH,11155111,sepolia,v4.6.0-1be5253,4,6,0,lighthouse,{}
3,6355567115066532721,2024-03-08 00:08:14+00:00,2024-03-08 00:08:10.440,4510201,2024-03-08 00:00:12+00:00,140943,2024-03-07 23:55:12+00:00,0x16908a4fa43e34fdfb0162632d1ca1e1e450fb2c4ff8...,0xbb309f209c604dda1c6de34a1625e496a03fc558da8b...,0x016257e2fec350afa0317350e062528a0b4a5906f866...,...,24940,Hetzner Online GmbH,11155111,sepolia,v4.6.0-1be5253,4,6,0,lighthouse,{}
4,8150536267413592607,2024-03-08 00:08:14+00:00,2024-03-08 00:08:10.441,4510201,2024-03-08 00:00:12+00:00,140943,2024-03-07 23:55:12+00:00,0x16908a4fa43e34fdfb0162632d1ca1e1e450fb2c4ff8...,0xbb309f209c604dda1c6de34a1625e496a03fc558da8b...,0x017d9401687400bc03351599da41129907e75b82ce2f...,...,24940,Hetzner Online GmbH,11155111,sepolia,v4.6.0-1be5253,4,6,0,lighthouse,{}


In [68]:
join_query = """
SELECT 
    a.event_date_time,
    a.slot,
    a.slot_start_date_time,
    a.epoch,
    a.epoch_start_date_time,
    a.block_total_bytes,
    a.block_total_bytes_compressed,
    a.block_root AS block_root_block,
    a.execution_payload_block_number,
    a.execution_payload_transactions_count,
    a.execution_payload_transactions_total_bytes,
    a.execution_payload_transactions_total_bytes_compressed,
    b.blob_index,
    b.blob_size,
    a.meta_network_name AS meta_network_name_block,
    b.meta_network_name AS meta_network_name_blob
FROM 
    (SELECT 
        event_date_time,
        slot,
        slot_start_date_time,
        epoch,
        epoch_start_date_time,
        block_total_bytes,
        block_total_bytes_compressed,
        block_root,
        execution_payload_block_number,
        execution_payload_transactions_count,
        execution_payload_transactions_total_bytes,
        execution_payload_transactions_total_bytes_compressed,
        meta_network_name
     FROM canonical_beacon_block
     WHERE slot_start_date_time BETWEEN '2024-03-01' AND '2024-03-10'
    ) AS a
LEFT JOIN 
    (SELECT 
        event_date_time,
        slot,
        slot_start_date_time,
        epoch,
        block_root,
        blob_index,
        blob_size,
        meta_network_name
     FROM canonical_beacon_blob_sidecar
     WHERE slot_start_date_time BETWEEN '2024-03-01' AND '2024-03-10'
    ) AS b
ON a.slot = b.slot
"""

In [69]:
join_query_query = client.query_df(join_query)
join_query_query_df: pl.DataFrame = pl.from_pandas(join_query_query).unique()

In [70]:
join_query_query_df.shape

(485291, 16)

In [74]:
beacon_block_blob_table = (
    join_query_query_df.drop("event_date_time")
    .unique()
    .group_by("meta_network_name_block", "block_root_block")
    .agg(
        [
            pl.col("slot_start_date_time").first().alias("slot_start_date_time"),
            pl.col("block_total_bytes").first().alias("block_total_bytes"),
            pl.col("block_total_bytes_compressed")
            .first()
            .alias("block_total_bytes_compressed"),
            pl.col("execution_payload_transactions_total_bytes")
            .first()
            .alias("execution_payload_transactions_total_bytes"),
            pl.col("blob_index").max().alias("blob_count"),
            pl.col("blob_size").sum().alias("blob_size_sum"),
            pl.col("slot").first().alias("slot"),
            pl.col("epoch").first().alias("epoch"),
        ]
    )
)

In [75]:
beacon_block_blob_table.sort(by="slot_start_date_time", descending=True)

meta_network_name_block,block_root_block,slot_start_date_time,block_total_bytes,block_total_bytes_compressed,execution_payload_transactions_total_bytes,blob_count,blob_size_sum,slot,epoch
str,str,"datetime[ms, UTC]",u32,u32,u32,u64,u32,u32,u32
"""sepolia""","""0xb5e2ae06c704babb6289eaba7818fd097ae74ccd6c2562cba75c9c8c5920c45b""",2024-03-10 00:00:00 UTC,449280,281151,221318,5,786432,4524600,141393
"""holesky""","""0x3a4c02152ec9d9ccdb08feb3e29d033259554784a68372a33a5c9f53cd4995be""",2024-03-10 00:00:00 UTC,127819,50318,12419,5,786432,1177200,36787
"""mainnet""","""0x4340e0217ecf0a0bcf0195406f97f6b486812c2da31d398f303f655e8d625371""",2024-03-09 23:59:59 UTC,378862,222641,142454,0,0,8600398,268762
"""sepolia""","""0xa0a32c8b6ab4843f1afe69cb356c2016b957e91c12c9f71f5e5c873e2b9749e1""",2024-03-09 23:59:48 UTC,304743,183708,149249,4,655360,4524599,141393
"""holesky""","""0xb6f87fe4ec8cbc88d96a33e55d3e3d9a7c17fc597a7dd93703ebf5ba666d19b1""",2024-03-09 23:59:48 UTC,127797,49676,12461,5,786432,1177199,36787
…,…,…,…,…,…,…,…,…,…
"""goerli""","""0xc3be996124ac29f2efe819f5768109bd63556d56e137814bc8ea2f074c6c54ab""",2024-03-01 00:00:12 UTC,351363,200802,132148,0,0,7728601,241518
"""mainnet""","""0x466e97f5b19b3391f865600b79bf4df7cfaf4b964f5fb4b89f1a36ab91db9545""",2024-03-01 00:00:11 UTC,242913,119799,74279,0,0,8535599,266737
"""goerli""","""0xd0c8538800ffdbfa17bbcf2dac6ac0740dbb00cc1bebe69eb06f1297f38c2c42""",2024-03-01 00:00:00 UTC,126577,59226,19572,3,524288,7728600,241518
"""holesky""","""0x8068be607e3dae4832f90b7b01465e10991581fc8ff2d2c8b044d0bcfb5e5346""",2024-03-01 00:00:00 UTC,102855,37494,276,0,131072,1112400,34762


In [81]:
(
    beacon_block_blob_table
    .filter(pl.col('meta_network_name_block') == "holesky")
    .group_by('epoch').agg([
        pl.col("slot_start_date_time").first(),
        (pl.col('block_total_bytes').mean() / 1000),
        (pl.col('blob_size_sum').mean() / 1000),
    ])
).plot.line(
    x="slot_start_date_time",
    y=['block_total_bytes', 'blob_size_sum'],
    title="Block vs Blob size (Holesky)",
    xlabel='date',
    ylabel='size (in kb)',
    line_width=2,
    alpha=.8
)

In [82]:
(
    beacon_block_blob_table
    .filter(pl.col('meta_network_name_block') == "sepolia")
    .group_by('epoch').agg([
        pl.col("slot_start_date_time").first(),
        (pl.col('block_total_bytes').mean() / 1000),
        (pl.col('blob_size_sum').mean() / 1000),
    ])
).plot.line(
    x="slot_start_date_time",
    y=['block_total_bytes', 'blob_size_sum'],
    title="Block vs Blob size (Sepolia)",
    xlabel='date',
    ylabel='size (in kb)',
    line_width=2,
    alpha=.8
)