In [10]:
from ethpandaops_python.preprocessor import Preprocessor
from eip4844_blob_data.panel_charts import create_slot_inclusion_line_chart, create_priority_fee_chart, get_slot_inclusion_table, fee_breakdown_line
from eip4844_blob_data.polars_preprocess import create_slot_inclusion_df, create_slot_gas_bidding_df
from holoviews import opts
import nest_asyncio
import polars as pl
import panel as pn

nest_asyncio.apply()
pn.extension("tabulator", template="material", sizing_mode="stretch_width")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

polars.config.Config

### Data Query

In [11]:
# labeled blobs - https://dune.com/queries/3521610
sequencers_l2: dict[list[str], list[str]] = {
    "sequencer_addresses": [
        # should be the "from" addresses, this is what hilldobby SQL query does.
        # should also be proper checksum, not lowercase
        "0xC1b634853Cb333D3aD8663715b08f41A3Aec47cc",
        "0x5050F69a9786F081509234F1a7F4684b5E5b76C9",
        "0x6887246668a3b87F54DeB3b94Ba47a6f63F32985",
        "0x000000633b68f5D8D3a86593ebB815b4663BCBe0",
        "0x415c8893D514F9BC5211d36eEDA4183226b84AA7",
        "0xa9268341831eFa4937537bc3e9EB36DbecE83C7e",
        "0xcF2898225ED05Be911D3709d9417e86E0b4Cfc8f",
        "0x0D3250c3D5FAcb74Ac15834096397a3Ef790ec99",
        "0xC70ae19B5FeAA5c19f576e621d2bad9771864fe2",
        "0xC94C243f8fb37223F3EB2f7961F7072602A51B8B"
    ],
    "sequencer_names": [
        "arbitrum",
        "base",
        "optimism",
        "taiko",
        "blast",
        "linea",
        "scroll",
        "zksync",
        "paradex",
        "metal"
    ],
}

num_days: int = 7

In [12]:
def get_data() -> dict[str, pl.DataFrame]:
    return Preprocessor(
        blob_producer=sequencers_l2,
        period=num_days,
        network="mainnet",  # mainnet
    ).cached_data

In [13]:
def filter_data_seq(
    sequencers: list[str], slot_inclusion_joined_df: pl.DataFrame, cached_data: dict[str, pl.DataFrame]
) -> dict[str: pl.DataFrame]:
    """
    This function filters a dataframe and returns updated chart data, based on the input of the dashboard user.
    """

    # slot inclusion
    slot_inclusion_df = (
        slot_inclusion_joined_df.filter(
            pl.col("sequencer_names").is_in(sequencers))
        .filter(pl.col('meta_network_name') == 'mainnet')
        .unique()
        .sort(by="slot")
    )

    # gas bidding scatterplot median
    slot_gas_groupby_df = (
        slot_inclusion_df.group_by("slot_inclusion_rate", "sequencer_names")
        .agg(
            pl.col("priority_fee_bid_percent_premium").median(),
            pl.col("base_fee_per_gas").mean(),
        )
        .sort(by="slot_inclusion_rate")
        .drop_nulls()
    )

    return {
        # time series
        "slot_inclusion_df": slot_inclusion_df,
        "slot_gas_groupby_df": slot_gas_groupby_df,
    }

In [14]:
# start dashboard
def start_interactive_panel(filtered_data_dict, sequencer_names_list):
    multi_select = pn.widgets.MultiSelect(
        name="Sequencers",
        size=10,
        options=sequencer_names_list,
        value=sequencer_names_list,
    )

    # initial chart and table data
    slot_inclusion_line_chart = create_slot_inclusion_line_chart(
        filtered_data_dict["slot_inclusion_df"], sequencer_names_list
    )

    priority_fee_chart = create_priority_fee_chart(
        # add in filter to remove outliers and make chart look better
        filtered_data_dict["slot_inclusion_df"].filter(
            pl.col('slot_inclusion_rate') < 50),
        filtered_data_dict["slot_gas_groupby_df"].filter(
            pl.col('slot_inclusion_rate') < 50),
        sequencer_names_list,
    )

    fee_breakdown_line_chart = filtered_data_dict["slot_inclusion_df"].sort(by='slot_time').plot.scatter(
        x='slot_time', y=['base_fee_per_gas', 'priority_fee_gas'], groupby='sequencer_names', s=1,
        xlabel='datetime', ylabel='gas (gwei)', title='Base Fee vs Priority Fee (gwei)',
        # need `slot_time` so that it doesn't share the same y-axis.
        shared_axes=False
    )

    sequencer_macro_blob_table: pl.DataFrame = (
        filtered_data_dict['slot_inclusion_df'].drop_nulls().unique().group_by(
            'sequencer_names').agg(
            pl.col('fill_percentage').mean().alias('avg_fill_percentage'),
            pl.col('submission_count').mean().alias(
                'avg_submission_count'),
            pl.col('slot_inclusion_rate').mean().round(
                3).alias('avg_slot_inclusion_rate'),
            pl.col('blob_hashes_length').mean().alias(
                'avg_blob_hashes_length'),
            pl.len().alias('tx_count'),
            pl.col('blob_hashes_length').sum().alias('blob_count'),
            pl.col('base_tx_fee_eth').sum().round(
                3).alias('total_base_fees_eth'),
            pl.col('priority_tx_fee_eth').sum().round(
                3).alias('total_priority_fees_eth'),
            pl.col('total_tx_fee_eth').sum().round(3).alias('total_eth_fees'),
            pl.col('priority_fee_gas').mean().round(
                3).alias('avg_priority_fee_bid'),
        ).rename({'sequencer_names': 'rollup', 'avg_blob_hashes_length': 'avg_blobs_in_tx'}))

    slot_inclusion_table_tabulator = get_slot_inclusion_table(
        filtered_data_dict["slot_inclusion_df"], sequencer_names_list)

    filename, button = slot_inclusion_table_tabulator.download_menu(
        text_kwargs={'name': 'Enter filename', 'value': 'default.csv'},
        button_kwargs={'name': 'Download data'},
    )

    entire_panel = pn.Column(
        pn.Row(
            pn.pane.Markdown(
                """
            # EIP-4844 Slot Inclusion Dashboard

            ## About
            This dashboard shows detailed analytics for blob inclusion rates as well as the efficiency of using EIP-1559 priority fees
            as a bidding mechanism for faster slot inclusion. This dashboard is made using [Xatu Data](https://github.com/ethpandaops/xatu-data?tab=readme-ov-file) for EL mempool and Beacon chain data and [Hypersync](https://github.com/enviodev/hypersync-client-python) 
            for transaction gas data for the [EIP-4844 data challenge](https://esp.ethereum.foundation/data-challenge-4844).
            """
            ),
            multi_select,
            styles=dict(background="WhiteSmoke"),
        ),
        pn.pane.Markdown(
            """
            ## 7 Day Historical Slot Inclusion
            When a transaction is resubmitted with updated gas parameters, the transaction hash changes. For example take this blob reference hash - 0x01c738cf37c911334c771f2295c060e5bd7d084f347e4334863336724934c59a. 
            On [etherscan](https://etherscan.io/tx/0x763d823c0f933c4d2eb84406b37aa2649753f2f563fa3ee6d27251c6a52a8d69) we can see that the transaction was replaced by the user. We can see on Ethernow that the transaction contains 
            the same blob reference hash in both the [original tx](https://www.ethernow.xyz/tx/0x763d823c0f933c4d2eb84406b37aa2649753f2f563fa3ee6d27251c6a52a8d69?batchIndex=1) and the [resubmitted tx](https://www.ethernow.xyz/tx/0x5a4094662bd05ff3639a8979927ab527e007a6925387951a9c1b3d2958b13a86?batchIndex=1).
            
            We can measure the total time that a blob hash sat in the mempool by subtracting the original tx was first seen from the slot time, when it eventually is finalized on the beacon chain. 
            In this particular example, the total time that the blob sat in the mempool was not from 18:56:27 to 18:57:11 (4 slots), but really 18:54:29 to 18:57:11 (14 slots)
            """
        ),
        pn.Row(
            slot_inclusion_line_chart.opts(axiswise=True),
            priority_fee_chart.opts(legend_position="left", show_legend=True),
            styles=dict(background="WhiteSmoke"),

        ),
        pn.Row(
            pn.pane.Markdown(
                """
            ## Slot Inclusion Rates
            **Slot Inclusion Rate** - The slot inclusion rate indicates the number of slots required for a blob to be included in the beacon chain, 
            with a higher rate signifying a longer inclusion time. The accompanying time-series chart tracks this metric from initial mempool 
            appearance to final beacon block inclusion. A 50 blob slot inclusion average is taken to smooth out the performance. 
            The target slot inclusion rate is 2. 
                """
            ),
            pn.pane.Markdown(
                """
            ## EIP-1559 Priority Fee Premium Correlation with Slot Rates
            The scatterplot illustrates the relationship between the EIP-1559 priority fee bid premiums and slot inclusion rates. The scatterplot points
            are individual blob bid datapoints and the line is a median bid premium. A higher priority fee bid premium tends to coincide 
            with longer slot inclusion times. This unexpected twist underscores the value of efficient slot utilization. The data indicates a trend 
            where higher bid premiums are associated with longer slot inclusion times, suggesting that as the time for a blob to be included 
            in the beacon chain increases, so does the priority fee bid premium. This behavior comes from the fact that if a blob sits in the 
            mempool for too long, then it is resubmitted with a higher priority fee. 
            """
            ),
            styles=dict(background="WhiteSmoke")
        ),
        pn.Row(
            pn.pane.Markdown(
                """
                # Blob Transaction Data (Past 7 days):
                
                This table provides detailed information on various metrics related to traditional transaction hashes that carry blob hashes. The metrics include:

                Blob Fill Percentage: Indicates the percentage of the transaction space filled by blobs.
                Transaction Resubmission Count: The number of times a transaction has been resubmitted with the same blobs.
                Number of Blobs in a Transaction: The count of blobs contained within a single transaction.
                ETH Priority Fees: The priority fees associated with each transaction in ETH.
                Additional Metrics: Various other relevant metrics, such as fees and timings.
                """
            ),
            pn.Column(
                pn.pane.Markdown("""
                                 #### **Bid Competitiveness**: the amount of priority fees being paid by the rollup compared to the block base fee.
                                 """
                                 ),
                fee_breakdown_line_chart.opts(axiswise=True),
                styles=dict(background="WhiteSmoke")
            ),
            styles=dict(background="WhiteSmoke"),
        ),
        pn.Row(
            pn.widgets.Tabulator(
                sequencer_macro_blob_table.to_pandas(), layout='fit_data'
            ),
            # avg_slot_inclusion_scatterplot,
            styles=dict(background="WhiteSmoke")
        ),
        pn.Column(
            pn.pane.Markdown(
                """
                # Slot Inclusion Data Table
                The table shows raw data that the dashboard was built on
                """
            ),
            pn.Column(filename, button),
            slot_inclusion_table_tabulator,
            styles=dict(background="WhiteSmoke")
        )
    )

    def update_bar_chart(event):
        """
        Use this to update charts based on sequencer name user selection
        """
        entire_panel[2][0].object = create_slot_inclusion_line_chart(
            filtered_data_dict["slot_inclusion_df"],
            sequencers=multi_select.value,
        )

        entire_panel[2][1].object = create_priority_fee_chart(
            filtered_data_dict["slot_inclusion_df"],
            filtered_data_dict["slot_gas_groupby_df"],
            sequencers=multi_select.value,
        )

        # I don't thnk this currently works right now
        entire_panel[4][1].object = filtered_data_dict["slot_inclusion_df"].sort(by='slot_time').plot.scatter(
            x='slot_time', y=['base_fee_per_gas', 'priority_fee_gas'], groupby='sequencer_names', s=1,
            xlabel='datetime', ylabel='gas (gwei)', title='Base Fee vs Priority Fee (gwei)',
            # need `slot_time` so that it doesn't share the same y-axis.
            shared_axes=False
        )

    multi_select.param.watch(update_bar_chart, "value")

    return entire_panel

In [15]:
# # retrieve data from database and store in memory
cached_data = get_data()

# prepare dataframes
slot_inclusion_df = create_slot_inclusion_df(
    cached_data).join(
    pl.from_dict(sequencers_l2),
    left_on="from",
    right_on="sequencer_addresses",
    how="left",
    coalesce=True
).select('slot', 'slot_time', 'hash', 'fill_percentage', 'submission_count', 'slot_inclusion_rate', 'sequencer_names', 'meta_network_name')

slot_gas_bidding_df = create_slot_gas_bidding_df(
    cached_data).select('block_number', 'hash', 'base_tx_fee_eth', 'priority_tx_fee_eth', "base_fee_per_gas",
                        "priority_fee_gas", 'total_tx_fee_eth', 'priority_fee_bid_percent_premium')

slot_inclusion_joined_df = slot_inclusion_df.join(
    slot_gas_bidding_df, on="hash", how="left", coalesce=True
)

sequencer_names_list: list[str] = sorted(sequencers_l2["sequencer_names"])

2024-06-09 is within a day of 2024-06-09


In [16]:
slot_inclusion_joined_df.drop_nulls().sort(
    by='base_fee_per_gas', descending=True)

slot,slot_time,hash,fill_percentage,submission_count,slot_inclusion_rate,sequencer_names,meta_network_name,block_number,base_tx_fee_eth,priority_tx_fee_eth,base_fee_per_gas,priority_fee_gas,total_tx_fee_eth,priority_fee_bid_percent_premium
u32,datetime[ms],str,f64,u32,f64,str,str,u64,f64,f64,f64,f64,f64,f64
9246684,2024-06-07 18:17:11,"""0xd943e6b2c18c0f660bef9057ddff778034ff2a179c811ec1a059acaa464cc6fb""",99.35000000000001,1,40,"""base""","""mainnet""",20041685,0.0023859579910530003,0.00033600000000000004,113.617047193,16,0.002721957991053,0.123
9246684,2024-06-07 18:17:11,"""0xd943e6b2c18c0f660bef9057ddff778034ff2a179c811ec1a059acaa464cc6fb""",99.35000000000001,1,40,"""base""","""mainnet""",20041685,0.0023859579910530003,0.00033600000000000004,113.617047193,16,0.002721957991053,0.123
9246684,2024-06-07 18:17:11,"""0xd943e6b2c18c0f660bef9057ddff778034ff2a179c811ec1a059acaa464cc6fb""",99.35000000000001,1,40,"""base""","""mainnet""",20041685,0.0023859579910530003,0.00033600000000000004,113.617047193,16,0.002721957991053,0.123
9246684,2024-06-07 18:17:11,"""0xd943e6b2c18c0f660bef9057ddff778034ff2a179c811ec1a059acaa464cc6fb""",99.35000000000001,1,40,"""base""","""mainnet""",20041685,0.0023859579910530003,0.00033600000000000004,113.617047193,16,0.002721957991053,0.123
9246684,2024-06-07 18:17:11,"""0xd943e6b2c18c0f660bef9057ddff778034ff2a179c811ec1a059acaa464cc6fb""",99.35000000000001,1,40,"""base""","""mainnet""",20041685,0.0023859579910530003,0.00033600000000000004,113.617047193,16,0.002721957991053,0.123
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9258854,2024-06-09 10:51:11,"""0xbb4fb89958bf8255ac554e81df07e8569365c319943c922e4311e661411fc1d9""",39.74,1,11,"""scroll""","""mainnet""",20053785,0.00032062242807414304,0.0000445692,2.8775246410000004,0.4,0.00036519162807414305,0.122
9258854,2024-06-09 10:51:11,"""0x5c240f1ddff84fc4d600d0f3e5308e92d948cb9ad876325396ad8be7cd5e4131""",100,1,9,"""arbitrum""","""mainnet""",20053785,0.000497227625390877,0.000172797,2.8775246410000004,1,0.0006700246253908771,0.258
9258854,2024-06-09 10:51:11,"""0x5c240f1ddff84fc4d600d0f3e5308e92d948cb9ad876325396ad8be7cd5e4131""",100,1,9,"""arbitrum""","""mainnet""",20053785,0.000497227625390877,0.000172797,2.8775246410000004,1,0.0006700246253908771,0.258
9258854,2024-06-09 10:51:11,"""0x5c240f1ddff84fc4d600d0f3e5308e92d948cb9ad876325396ad8be7cd5e4131""",100,1,9,"""arbitrum""","""mainnet""",20053785,0.000497227625390877,0.000172797,2.8775246410000004,1,0.0006700246253908771,0.258


In [17]:
slot_inclusion_joined_df.drop_nulls().unique().group_by('sequencer_names').agg(
    pl.col('base_tx_fee_eth').sum().alias('total_base_fees_eth'),
    pl.col('priority_tx_fee_eth').sum().alias('total_priority_fees_eth'),
    pl.col('total_tx_fee_eth').sum().alias('total_eth_fees'),
    pl.col('priority_fee_gas').mean().alias('avg_priority_gas_bid'),
).sort(by='sequencer_names')

sequencer_names,total_base_fees_eth,total_priority_fees_eth,total_eth_fees,avg_priority_gas_bid
str,f64,f64,f64,f64
"""arbitrum""",11.27932051910018,0.9040326159999994,12.18335313510018,1.0045558086560364
"""base""",1.615559917976109,0.4772880000000022,2.0928479179761093,3.8528564163417527
"""blast""",0.4956986284629751,0.1469790000000001,0.6426776284629749,3.7487948580610606
"""linea""",5.56231571164354,0.6113755293463411,6.173691240989881,1.3913711816075258
"""metal""",0.1528065357273209,0.0336419999999999,0.186448535727321,2.005006257822278
"""optimism""",0.7635791479210651,0.1388100000000004,0.902389147921065,2.288781163434903
"""paradex""",3.3758645145358863,0.0280989648,3.403963479335886,0.0999999999999997
"""scroll""",13.294968835797045,0.109187664257408,13.40415650005445,0.0970455074616444
"""taiko""",88.73702521420964,36.42867701169986,125.16570222590956,4.98133703563465
"""zksync""",3.015771661015174,0.4523702068437593,3.468141867858933,2.034452395335135


### run the dashboard

In [18]:
# # retrieve data from database and store in memory
cached_data = get_data()

# prepare dataframes
slot_inclusion_df = create_slot_inclusion_df(
    cached_data).join(
    pl.from_dict(sequencers_l2),
    left_on="from",
    right_on="sequencer_addresses",
    how="left",
    coalesce=True
).select('slot', 'slot_time', 'hash', 'blob_hashes_length', 'fill_percentage', 'submission_count', 'slot_inclusion_rate', 'sequencer_names', 'meta_network_name')

slot_gas_bidding_df = create_slot_gas_bidding_df(
    cached_data).select('block_number', 'hash', 'base_tx_fee_eth', 'priority_tx_fee_eth', "base_fee_per_gas",
                        "priority_fee_gas", 'total_tx_fee_eth', 'priority_fee_bid_percent_premium')

slot_inclusion_joined_df = slot_inclusion_df.join(
    slot_gas_bidding_df, on="hash", how="left", coalesce=True
)

filtered_data_dict = filter_data_seq(
    sequencers_l2["sequencer_names"], slot_inclusion_joined_df, cached_data
)

sequencer_names_list: list[str] = sorted(sequencers_l2["sequencer_names"])

dashboard = start_interactive_panel(filtered_data_dict, sequencer_names_list)
dashboard.servable()

2024-06-09 is within a day of 2024-06-09
Launching server at http://localhost:38543


<panel.io.server.Server at 0x7ab11bb897d0>

Gtk-Message: 17:22:30.631: Failed to load module "canberra-gtk-module"
Gtk-Message: 17:22:30.640: Failed to load module "canberra-gtk-module"


Opening in existing browser session.
