In [None]:
from clickhouse_connect.datatypes.format import set_read_format
import clickhouse_connect
import os
import panel as pn
import polars as pl

pn.extension("plotly", template="material", sizing_mode="stretch_width")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

# Clickhouse Format Settings - https://clickhouse.com/docs/en/integrations/python#read-formats

# Return both IPv6 and IPv4 values as strings
set_read_format("IPv*", "string")

# Return binary as string
set_read_format("FixedString", "string")

# sets large ints to floats so that there are no large int overflow errors when converting to polars dataframe
set_read_format("Int*", "float")

# Create ClickHouse client
client = clickhouse_connect.get_client(
    host=os.environ.get("HOST"),
    username=os.environ.get("USERNAME"),
    password=os.environ.get("PASSWORD"),
    secure=True,
)

In [None]:
# when blob sidecars are propagated
blob_sidecar_query = """ 
SELECT 
    kzg_commitment,
    slot,
    meta_consensus_implementation,
    MIN(slot_start_date_time) AS min_slot_start_date_time,
    MIN(event_date_time) AS first_event_time,
    MAX(event_date_time) AS last_event_time,
    (toUnixTimestamp64Milli(MAX(event_date_time)) - toUnixTimestamp64Milli(MIN(event_date_time))) AS time_diff_milliseconds, -- time difference from when first and last blob was seen. It's put in the query because it's easier to do here
    MAX(blob_index) AS max_blob_index
FROM beacon_api_eth_v1_events_blob_sidecar 
WHERE event_date_time > NOW() - INTERVAL '1 DAYS'
AND meta_network_name = 'mainnet'
GROUP BY kzg_commitment, slot, meta_consensus_implementation
"""
blob_sidecar_df = client.query_df(blob_sidecar_query)

blob_sidecar_df_pl = pl.from_pandas(blob_sidecar_df)