In [1]:
from ethpandaops_python.preprocessor import Preprocessor
from eip4844_blob_data.panel_charts import create_slot_inclusion_line_chart, create_priority_fee_chart, get_slot_inclusion_table
from eip4844_blob_data.polars_preprocess import create_slot_inclusion_df, create_slot_gas_bidding_df
from holoviews import opts
import nest_asyncio
import polars as pl
import panel as pn
import time


nest_asyncio.apply()
pn.extension("tabulator", template="material", sizing_mode="stretch_width")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

polars.config.Config

### Data Query

In [2]:
# labeled blobs - https://dune.com/queries/3521610
sequencers_l2: dict[list[str], list[str]] = {
    "sequencer_addresses": [
        # should be the "from" addresses, this is what hilldobby SQL query does.
        # should also be proper checksum, not lowercase
        "0xC1b634853Cb333D3aD8663715b08f41A3Aec47cc",
        "0x5050F69a9786F081509234F1a7F4684b5E5b76C9",
        "0x6887246668a3b87F54DeB3b94Ba47a6f63F32985",
        "0x000000633b68f5D8D3a86593ebB815b4663BCBe0",
        "0x415c8893D514F9BC5211d36eEDA4183226b84AA7",
        "0xa9268341831eFa4937537bc3e9EB36DbecE83C7e",
        "0xcF2898225ED05Be911D3709d9417e86E0b4Cfc8f",
        "0x0D3250c3D5FAcb74Ac15834096397a3Ef790ec99",
        "0xC70ae19B5FeAA5c19f576e621d2bad9771864fe2",
        "0xC94C243f8fb37223F3EB2f7961F7072602A51B8B"
    ],
    "sequencer_names": [
        "arbitrum",
        "base",
        "optimism",
        "taiko",
        "blast",
        "linea",
        "scroll",
        "zksync",
        "paradex",
        "metal"
    ],
}

num_days: int = 7

In [3]:
def get_data() -> dict[str, pl.DataFrame]:
    return Preprocessor(
        blob_producer=sequencers_l2,
        period=num_days,
        network="mainnet",  # mainnet
    ).cached_data

In [4]:
def filter_data_seq(
    sequencers: list[str], slot_inclusion_joined_df: pl.DataFrame, cached_data: dict[str, pl.DataFrame]
) -> dict[str: pl.DataFrame]:
    """
    This function filters a dataframe and returns updated chart data, based on the input of the dashboard user.
    """

    # slot inclusion
    slot_inclusion_df = (
        slot_inclusion_joined_df.filter(
            pl.col("sequencer_names").is_in(sequencers))
        .filter(pl.col('meta_network_name') == 'mainnet')
        .unique()
        .sort(by="slot")
    )

    # gas bidding scatterplot median
    slot_gas_groupby_df = (
        slot_inclusion_df.group_by("slot_inclusion_rate", "sequencer_names")
        .agg(
            pl.col("priority_fee_bid_percent_premium").median(),
            pl.col("base_fee_per_gas").mean(),
        )
        .sort(by="slot_inclusion_rate")
        .drop_nulls()
    )

    return {
        # time series
        "slot_inclusion_df": slot_inclusion_df,
        "slot_gas_groupby_df": slot_gas_groupby_df,
    }

In [5]:
# start dashboard
def start_interactive_panel(filtered_data_dict, sequencer_names_list):
    multi_select = pn.widgets.MultiSelect(
        name="Sequencers",
        size=10,
        options=sequencer_names_list,
        value=sequencer_names_list,
    )

    # initial chart and table data
    slot_inclusion_line_chart = create_slot_inclusion_line_chart(
        filtered_data_dict["slot_inclusion_df"], sequencer_names_list
    )

    priority_fee_chart = create_priority_fee_chart(
        # add in filter to remove outliers and make chart look better
        filtered_data_dict["slot_inclusion_df"].filter(
            pl.col('slot_inclusion_rate') < 50),
        filtered_data_dict["slot_gas_groupby_df"].filter(
            pl.col('slot_inclusion_rate') < 50),
        sequencer_names_list,
    )

    avg_slot_inclusion_scatterplot = (filtered_data_dict['slot_inclusion_df'].group_by(
        'sequencer_names').agg(
        pl.col('fill_percentage').mean().alias('avg_fill_percentage'),
        pl.col('submission_count').mean().alias(
            'avg_submission_count'),
        pl.col('slot_inclusion_rate').mean().round(
            3).alias('avg_slot_inclusion_rate'),
        pl.col('blob_hashes_length').mean().alias(
            'avg_blob_hashes_length'),
        pl.len().alias('blob_tx_count')
    ).sort(by='avg_slot_inclusion_rate')
        .plot.scatter(x='avg_slot_inclusion_rate', y='avg_blob_hashes_length', by='sequencer_names', width=800,
                      height=375, xlabel='Avg Slot Inclusion Rate', ylabel='Blob Tx Size', title='Avg Slot Inclusion Rate')
    )

    sequencer_macro_blob_table: pl.DataFrame = (
        filtered_data_dict['slot_inclusion_df'].group_by(
            'sequencer_names').agg(
            pl.col('fill_percentage').mean().alias('avg_fill_percentage'),
            pl.col('submission_count').mean().alias(
                'avg_submission_count'),
            pl.col('slot_inclusion_rate').mean().round(
                3).alias('avg_slot_inclusion_rate'),
            pl.col('blob_hashes_length').mean().alias(
                'avg_blob_hashes_length'),
            pl.len().alias('blob_tx_count')
        ).rename({'sequencer_names': 'rollup', 'avg_blob_hashes_length': 'avg_blobs_in_tx'}))

    slot_inclusion_table_tabulator = get_slot_inclusion_table(
        filtered_data_dict["slot_inclusion_df"], sequencer_names_list)

    entire_panel = pn.Column(
        pn.Row(
            pn.pane.Markdown(
                """
            # EIP-4844 Slot Inclusion Dashboard

            ## About
            This dashboard shows detailed analytics for blob inclusion rates as well as the efficiency of using EIP-1559 priority fees
            as a bidding mechanism for faster slot inclusion. This dashboard is made using [Xatu Data](https://github.com/ethpandaops/xatu-data?tab=readme-ov-file) for EL mempool and Beacon chain data and [Hypersync](https://github.com/enviodev/hypersync-client-python) 
            for transaction gas data for the [EIP-4844 data challenge](https://esp.ethereum.foundation/data-challenge-4844).
            """
            ),
            multi_select,
            styles=dict(background="WhiteSmoke"),
        ),
        pn.pane.Markdown(
            """
            ## Historical Slot Inclusion
            When a transaction is resubmitted with updated gas parameters, the transaction hash changes. For example take this blob reference hash - 0x01c738cf37c911334c771f2295c060e5bd7d084f347e4334863336724934c59a. 
            On [etherscan](https://etherscan.io/tx/0x763d823c0f933c4d2eb84406b37aa2649753f2f563fa3ee6d27251c6a52a8d69) we can see that the transaction was replaced by the user. We can see on Ethernow that the transaction contains 
            the same blob reference hash in both the [original tx](https://www.ethernow.xyz/tx/0x763d823c0f933c4d2eb84406b37aa2649753f2f563fa3ee6d27251c6a52a8d69?batchIndex=1) and the [resubmitted tx](https://www.ethernow.xyz/tx/0x5a4094662bd05ff3639a8979927ab527e007a6925387951a9c1b3d2958b13a86?batchIndex=1).
            
            We can measure the total time that a blob hash sat in the mempool by subtracting the original tx was first seen from the slot time, when it eventually is finalized on the beacon chain. 
            In this particular example, the total time that the blob sat in the mempool was not from 18:56:27 to 18:57:11 (4 slots), but really 18:54:29 to 18:57:11 (14 slots)
            """
        ),
        pn.Row(
            slot_inclusion_line_chart.opts(axiswise=True),
            priority_fee_chart.opts(legend_position="left", show_legend=True),
            styles=dict(background="WhiteSmoke"),

        ),
        pn.Row(
            pn.pane.Markdown(
                """
            ## Slot Inclusion Rates
            **Slot Inclusion Rate** - The slot inclusion rate indicates the number of slots required for a blob to be included in the beacon chain, 
            with a higher rate signifying a longer inclusion time. The accompanying time-series chart tracks this metric from initial mempool 
            appearance to final beacon block inclusion. A 50 blob slot inclusion average is taken to smooth out the performance. 
            The target slot inclusion rate is 2. 
                """
            ),
            pn.pane.Markdown(
                """
            ## EIP-1559 Priority Fee Premium Correlation with Slot Rates
            The scatterplot illustrates the relationship between the EIP-1559 priority fee bid premiums and slot inclusion rates. The scatterplot points
            are individual blob bid datapoints and the line is a median bid premium. A higher priority fee bid premium tends to coincide 
            with longer slot inclusion times. This unexpected twist underscores the value of efficient slot utilization. The data indicates a trend 
            where higher bid premiums are associated with longer slot inclusion times, suggesting that as the time for a blob to be included 
            in the beacon chain increases, so does the priority fee bid premium. This behavior comes from the fact that if a blob sits in the 
            mempool for too long, then it is resubmitted with a higher priority fee. 
            """
            ),
            styles=dict(background="WhiteSmoke")
        ),
        pn.Row(
            pn.pane.Markdown(
                """
                ## Blob Data Per Sequencer:
                Features the following values (WIP 6/8/24)
                - scatter plot shows basic pattern for average slot inclusion time for the average number of blob hashes in a transaction.
                """
            ),
            styles=dict(background="WhiteSmoke"),
        ),
        pn.Row(
            pn.widgets.Tabulator(
                sequencer_macro_blob_table.to_pandas(), layout='fit_data'
            ),
            avg_slot_inclusion_scatterplot,
            styles=dict(background="WhiteSmoke")
        ),
        pn.Column(
            pn.pane.Markdown(
                """
                # Slot Inclusion Data Table
                The table shows raw data for blobs that were included in the beacon chain.
                """
            ),
            slot_inclusion_table_tabulator,
            styles=dict(background="WhiteSmoke")
        )
    )

    def update_bar_chart(event):
        """
        Use this to update charts based on sequencer name user selection
        """
        entire_panel[2][0].object = create_slot_inclusion_line_chart(
            filtered_data_dict["slot_inclusion_df"],
            sequencers=multi_select.value,
        )

        entire_panel[2][1].object = create_priority_fee_chart(
            filtered_data_dict["slot_inclusion_df"],
            filtered_data_dict["slot_gas_groupby_df"],
            sequencers=multi_select.value,
        )

    multi_select.param.watch(update_bar_chart, "value")

    return entire_panel

### What charts to add in?

In [6]:
# # retrieve data from database and store in memory
cached_data = get_data()

# prepare dataframes
slot_inclusion_df = create_slot_inclusion_df(
    cached_data).join(
    pl.from_dict(sequencers_l2),
    left_on="from",
    right_on="sequencer_addresses",
    how="left",
    coalesce=True
).select('slot', 'slot_time', 'hash', 'fill_percentage', 'submission_count', 'slot_inclusion_rate', 'sequencer_names', 'meta_network_name')

slot_gas_bidding_df = create_slot_gas_bidding_df(
    cached_data).select('block_number', 'hash', 'base_tx_fee_eth', 'priority_tx_fee_eth', "base_fee_per_gas",
                        "priority_fee_gas", 'total_tx_fee_eth', 'priority_fee_bid_percent_premium')

slot_inclusion_joined_df = slot_inclusion_df.join(
    slot_gas_bidding_df, on="hash", how="left", coalesce=True
)

2024-06-09 is within a day of 2024-06-09


In [7]:
slot_inclusion_joined_df.head(5)

slot,slot_time,hash,fill_percentage,submission_count,slot_inclusion_rate,sequencer_names,meta_network_name,block_number,base_tx_fee_eth,priority_tx_fee_eth,base_fee_per_gas,priority_fee_gas,total_tx_fee_eth,priority_fee_bid_percent_premium
u32,datetime[ms],str,f64,u32,f64,str,str,u64,f64,f64,f64,f64,f64,f64
9210349,2024-06-02 17:10:11,"""0x269efb0423abeba6af52c484ea3ec264f81cb82b0ead3233a9abb81914207734""",98.82,1,3,"""base""","""mainnet""",20005537,0.00028838998608,0.000168,13.73285648,8,0.00045638998608,0.368
9210349,2024-06-02 17:10:11,"""0x269efb0423abeba6af52c484ea3ec264f81cb82b0ead3233a9abb81914207734""",98.82,1,3,"""base""","""mainnet""",20005537,0.00028838998608,0.000168,13.73285648,8,0.00045638998608,0.368
9210349,2024-06-02 17:10:11,"""0x269efb0423abeba6af52c484ea3ec264f81cb82b0ead3233a9abb81914207734""",98.82,1,3,"""base""","""mainnet""",20005537,0.00028838998608,0.000168,13.73285648,8,0.00045638998608,0.368
9210349,2024-06-02 17:10:11,"""0x269efb0423abeba6af52c484ea3ec264f81cb82b0ead3233a9abb81914207734""",98.82,1,3,"""base""","""mainnet""",20005537,0.00028838998608,0.000168,13.73285648,8,0.00045638998608,0.368
9210349,2024-06-02 17:10:11,"""0x269efb0423abeba6af52c484ea3ec264f81cb82b0ead3233a9abb81914207734""",98.82,1,3,"""base""","""mainnet""",20005537,0.00028838998608,0.000168,13.73285648,8,0.00045638998608,0.368


In [8]:
slot_inclusion_joined_df.group_by('sequencer_names').agg(
    pl.col('base_tx_fee_eth').sum().alias('total_base_fees_eth'),
    pl.col('priority_tx_fee_eth').sum().alias('total_priority_fees_eth'),
    pl.col('total_tx_fee_eth').sum().alias('total_eth_fees'),
    pl.col('priority_fee_gas').mean().alias('avg_priority_fee_bid'),
).sort(by='sequencer_names')

sequencer_names,total_base_fees_eth,total_priority_fees_eth,total_eth_fees,avg_priority_fee_bid
str,f64,f64,f64,f64
"""arbitrum""",34.051688650892615,2.70854467500004,36.76023332589261,1.0045618703668504
"""base""",9.016536601574082,3.1377779999999564,12.154314601574082,4.301036269430052
"""blast""",1.7751212398906349,0.7434210000000044,2.5185422398906363,5.488527131782946
"""linea""",19.40068443373698,1.8708725353632727,21.271556969100253,1.4212495981460154
"""metal""",0.148822002924714,0.0326340000000001,0.1814560029247139,2.005161290322581
"""optimism""",3.850094180614857,0.7212240000000053,4.571318180614855,2.365778053316801
"""paradex""",3.375864514535886,0.0280989648,3.4039634793358866,0.1
"""scroll""",13.41284060295179,0.1081366316032017,13.520977234554987,0.09560892932174
"""taiko""",89.46026295149315,36.68788168569988,126.14814463719313,4.997539882451721
"""zksync""",6.047803590475573,0.9060063196875188,6.9538099101630895,2.035475947852132


### run the dashboard

In [10]:
# # retrieve data from database and store in memory
cached_data = get_data()

# prepare dataframes
slot_inclusion_df = create_slot_inclusion_df(
    cached_data).join(
    pl.from_dict(sequencers_l2),
    left_on="from",
    right_on="sequencer_addresses",
    how="left",
    coalesce=True
).select('slot', 'slot_time', 'hash', 'blob_hashes_length', 'fill_percentage', 'submission_count', 'slot_inclusion_rate', 'sequencer_names', 'meta_network_name')

slot_gas_bidding_df = create_slot_gas_bidding_df(
    cached_data).select('block_number', 'hash', 'base_tx_fee_eth', 'priority_tx_fee_eth', "base_fee_per_gas",
                        "priority_fee_gas", 'total_tx_fee_eth', 'priority_fee_bid_percent_premium')

slot_inclusion_joined_df = slot_inclusion_df.join(
    slot_gas_bidding_df, on="hash", how="left", coalesce=True
)

filtered_data_dict = filter_data_seq(
    sequencers_l2["sequencer_names"], slot_inclusion_joined_df, cached_data
)

sequencer_names_list: list[str] = sorted(sequencers_l2["sequencer_names"])

dashboard = start_interactive_panel(filtered_data_dict, sequencer_names_list)
dashboard.show()

2024-06-09 is within a day of 2024-06-09
Launching server at http://localhost:46855


Gtk-Message: 14:38:44.566: Failed to load module "canberra-gtk-module"
Gtk-Message: 14:38:44.570: Failed to load module "canberra-gtk-module"


Opening in existing browser session.


<panel.io.server.Server at 0x737df40f00d0>