In [1]:
from ethpandaops_python.preprocessor import Preprocessor
from eip4844_blob_data.panel_charts import create_slot_inclusion_line_chart, create_priority_fee_chart
from eip4844_blob_data.polars_preprocess import create_slot_inclusion_df, create_slot_count_breakdown_df, create_slot_gas_bidding_df, create_bid_premium_df
from holoviews import opts
import nest_asyncio
import polars as pl
import panel as pn
import time


nest_asyncio.apply()
pn.extension("plotly", template="material", sizing_mode="stretch_width")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

KeyboardInterrupt: 

### Data Query

In [None]:
# labeled blobs - https://dune.com/queries/3521610
sequencers_l2: dict[list[str], list[str]] = {
    "sequencer_addresses": [
        # should be the "from" addresses, this is what hilldobby SQL query does.
        # should also be proper checksum, not lowercase
        "0xC1b634853Cb333D3aD8663715b08f41A3Aec47cc",
        "0x5050F69a9786F081509234F1a7F4684b5E5b76C9",
        "0x6887246668a3b87F54DeB3b94Ba47a6f63F32985",
        "0x000000633b68f5D8D3a86593ebB815b4663BCBe0",
        "0x415c8893D514F9BC5211d36eEDA4183226b84AA7",
        "0xa9268341831eFa4937537bc3e9EB36DbecE83C7e",
        "0xcF2898225ED05Be911D3709d9417e86E0b4Cfc8f",
        "0x0D3250c3D5FAcb74Ac15834096397a3Ef790ec99",
    ],
    "sequencer_names": [
        "arbitrum",
        "base",
        "optimism",
        "taiko",
        "blast",
        "linea",
        "scroll",
        "zksync",
    ],
}

num_days: int = 7

In [None]:
def get_data() -> dict[str, pl.DataFrame]:
    return Preprocessor(
        blob_producer=sequencers_l2,
        period=num_days,
        network="mainnet",  # mainnet
    ).cached_data

In [None]:
def filter_data_seq(
    sequencers: list[str], df: pl.DataFrame, cached_data: dict[str, pl.DataFrame]
) -> dict[str: pl.DataFrame]:
    """
    This function filters a dataframe and returns updated chart data, based on the input of the dashboard user.
    """

    # slot count breakdown (not currently used)
    slot_count_breakdown_df = (
        df.filter(pl.col("sequencer_names").is_in(sequencers))
        .filter(pl.col('meta_network_name') == 'mainnet')
        .select("hash", "slot_inclusion_rate", "sequencer_names")
        .unique()
        .with_columns(
            pl.when(pl.col("slot_inclusion_rate") == 1)
            .then(True)
            .otherwise(False)
            .alias("1_slot"),
            pl.when(pl.col("slot_inclusion_rate") == 2)
            .then(True)
            .otherwise(False)
            .alias("2_slots"),
            pl.when(pl.col("slot_inclusion_rate") >= 3)
            .then(True)
            .otherwise(False)
            .alias("3_plus_slots"),
        )
        .with_columns(
            pl.col("1_slot").sum(),
            pl.col("2_slots").sum(),
            pl.col("3_plus_slots").sum(),
        )
        .select("1_slot", "2_slots", "3_plus_slots", "sequencer_names")[0]
    )
    # gas bidding scatterplot
    slot_gas_bidding_df = (
        df.filter(pl.col("sequencer_names").is_in(sequencers))
        .filter(pl.col('meta_network_name') == 'mainnet')
        .join(cached_data["txs"], on="hash", how="left")
        .with_columns(
            (pl.col("effective_gas_price") / 10**9)
            .round(3)
            .alias(
                "effective_gas_price_gwei"
            ),  # gas price in gwei that was paid, including priority fee
            (pl.col("max_fee_per_gas") / 10**9)
            .round(3)
            .alias(
                "max_fee_per_gas_gwei"
            ),  # max gas price in gwei that rollup is willing to pay
            (pl.col("max_priority_fee_per_gas") / 10**9)
            .round(3)
            # priority gas fee in gwei,
            .alias("max_priority_fee_per_gas_gwei"),
        )
        .with_columns(
            (
                (
                    pl.col("max_priority_fee_per_gas_gwei")
                    / pl.col("effective_gas_price_gwei")
                )
                * 100
            )
            .round(3)
            .alias("priority_fee_bid_percent_premium")
        )
        .select(
            "block_number",
            "max_priority_fee_per_gas_gwei",
            "effective_gas_price_gwei",
            "priority_fee_bid_percent_premium",
            "slot_inclusion_rate",
            "submission_count",
            "sequencer_names",
        )
        .unique()
        .sort(by="block_number")
        .with_columns(
            (
                # estimate min block gas by taking the gwei paid minus the priority fee
                pl.col("effective_gas_price_gwei")
                - pl.col("max_priority_fee_per_gas_gwei")
            ).alias("min_block_gas_gwei")
        )
        .with_columns(
            # calculate per tx gas fluctuations
            pl.col("min_block_gas_gwei").diff().abs().alias(
                "gas_fluctuation_gwei")
        )
        .with_columns(
            (pl.col("gas_fluctuation_gwei") / pl.col("min_block_gas_gwei") * 100).alias(
                "gas_fluctuation_percent"
            )
        )
        .drop_nulls()
    )

    # gas bidding scatterplot median
    slot_gas_groupby_df = (
        slot_gas_bidding_df.group_by("slot_inclusion_rate", "sequencer_names")
        .agg(
            pl.col("priority_fee_bid_percent_premium").median(),
            pl.col("effective_gas_price_gwei").mean(),
        )
        .sort(by="slot_inclusion_rate")
        .drop_nulls()
    )

    return {
        # time series
        "slot_inclusion_joined": df,
        # histogram
        "slot_count_breakdown_df": slot_count_breakdown_df,
        # these two are for scatterplot
        "slot_gas_bidding_df": slot_gas_bidding_df,
        "slot_gas_groupby_df": slot_gas_groupby_df,
    }

In [None]:
# start dashboard
def start_interactive_panel(filtered_data_dict, sequencer_names_list):
    multi_select = pn.widgets.MultiSelect(
        name="Sequencers",
        size=8,
        options=sequencer_names_list,
        value=sequencer_names_list,
    )

    slot_inclusion_line_chart = create_slot_inclusion_line_chart(
        filtered_data_dict["slot_inclusion_joined"], sequencer_names_list
    )

    priority_fee_chart = create_priority_fee_chart(
        # add in filter to remove outliers and make chart look better
        filtered_data_dict["slot_gas_bidding_df"].filter(
            pl.col('slot_inclusion_rate') < 50),
        filtered_data_dict["slot_gas_groupby_df"].filter(
            pl.col('slot_inclusion_rate') < 50),
        sequencer_names_list,
    )

    entire_panel = pn.Column(
        pn.Row(
            pn.pane.Markdown(
                """
            # EIP-4844 Slot Inclusion Dashboard

            ## About
            This dashboard shows detailed analytics for blobs and how fast they are included into the next slot as well as the efficiency of using EIP-1559 priority fees
            as a bidding mechanism for faster slot inclusion. This dashboard is made using [Xatu Data](https://github.com/ethpandaops/xatu-data?tab=readme-ov-file) for EL mempool and Beacon chain data and [Hypersync](https://github.com/enviodev/hypersync-client-python) 
            for transaction gas data for the [EIP-4844 data challenge](https://esp.ethereum.foundation/data-challenge-4844).
            """
            ),
            multi_select,
        ),
        pn.pane.Markdown(
            """
            ## Calculating Blob Inclusion in the Beacon Chain
            When a transaction is resubmitted with updated gas parameters, the transaction hash changes. For example take this blob reference hash - 0x01c738cf37c911334c771f2295c060e5bd7d084f347e4334863336724934c59a. 
            On [etherscan](https://etherscan.io/tx/0x763d823c0f933c4d2eb84406b37aa2649753f2f563fa3ee6d27251c6a52a8d69) we can see that the transaction was replaced by the user. We can see on Ethernow that the transaction contains 
            the same blob reference hash in both the [original tx](https://www.ethernow.xyz/tx/0x763d823c0f933c4d2eb84406b37aa2649753f2f563fa3ee6d27251c6a52a8d69?batchIndex=1) and the [resubmitted tx](https://www.ethernow.xyz/tx/0x5a4094662bd05ff3639a8979927ab527e007a6925387951a9c1b3d2958b13a86?batchIndex=1).
            
            We can measure the total time that a blob hash sat in the mempool by subtracting the original tx was first seen from the slot time, when it eventually is finalized on the beacon chain. 
            In this particular example, the total time that the blob sat in the mempool was not from 18:56:27 to 18:57:11 (4 slots), but really 18:54:29 to 18:57:11 (14 slots)
            """
        ),
        pn.Row(
            slot_inclusion_line_chart.opts(axiswise=True),
            priority_fee_chart.opts(legend_position="left", show_legend=True),
            styles=dict(background="WhiteSmoke"),
        ),
        pn.Row(
            pn.pane.Markdown(
                """
            ## Slot Inclusion Rates
            **Slot Inclusion Rate** - The slot inclusion rate indicates the number of slots required for a blob to be included in the beacon chain, 
            with a higher rate signifying a longer inclusion time. The accompanying time-series chart tracks this metric from initial mempool 
            appearance to final beacon block inclusion. A 50 blob slot inclusion average is taken to smooth out the performance. 
            The target slot inclusion rate is 2. 
                """
            ),
            pn.pane.Markdown(
                """
            ## EIP-1559 Priority Fee Premium Correlation with Slot Rates
            The scatterplot illustrates the relationship between the EIP-1559 priority fee bid premiums and slot inclusion rates. The scatterplot points
            are individual blob bid datapoints and the line is a median bid premium. A higher priority fee bid premium tends to coincide 
            with longer slot inclusion times. This unexpected twist underscores the value of efficient slot utilization. The data indicates a trend 
            where higher bid premiums are associated with longer slot inclusion times, suggesting that as the time for a blob to be included 
            in the beacon chain increases, so does the priority fee bid premium. This behavior comes from the fact that if a blob sits in the 
            mempool for too long, then it is resubmitted with a higher priority fee. 
            """
            ),
            styles=dict(background="WhiteSmoke"),
        ),
    )

    def update_bar_chart(event):
        entire_panel[2][0].object = create_slot_inclusion_line_chart(
            filtered_data_dict["slot_inclusion_joined"],
            sequencers=multi_select.value,
        )

        entire_panel[2][1].object = create_priority_fee_chart(
            filtered_data_dict["slot_gas_bidding_df"],
            filtered_data_dict["slot_gas_groupby_df"],
            sequencers=multi_select.value,
        )

    multi_select.param.watch(update_bar_chart, "value")

    return entire_panel

### Troubleshooting

In [None]:
# retrieve data from database and store in memory
# cached_data = get_data()

# # prepare dataframes
# slot_inclusion_df = create_slot_inclusion_df(cached_data)
# slot_count_breakdown_df = create_slot_count_breakdown_df(cached_data)
# slot_gas_bidding_df = create_slot_gas_bidding_df(cached_data)

# slot_inclusion_joined: pl.DataFrame = slot_inclusion_df.join(
#     pl.from_dict(sequencers_l2),
#     left_on="from",
#     right_on="sequencer_addresses",
#     how="left",
#     coalesce=True
# )

In [None]:
# # groupby sequencer address name to see if they are all showing up with addresses correctly
# slot_inclusion_joined.group_by('sequencer_names').agg(
#     pl.len().alias('count')).sort(by='count', descending=True)

In [None]:
# filtered_data_dict = filter_data_seq(
#     sequencers_l2["sequencer_names"], slot_inclusion_joined, cached_data
# )

# filtered_data_dict['slot_inclusion_joined'].filter(
#     pl.col('sequencer_names') == "taiko")

In [None]:
# filtered_data_dict['slot_inclusion_joined'].filter(
#     pl.col('sequencer_names') == "taiko").plot.line(x='slot_time', y='slot_inclusion_rate')

In [None]:
# filtered_data_dict = filter_data_seq(
#     sequencers_l2["sequencer_names"], slot_inclusion_joined, cached_data
# )

# sequencer_names_list: list[str] = sorted(sequencers_l2["sequencer_names"])

# cmap_list: list[str] = [
#     "red",
#     "green",
#     "blue",
#     "orange",
#     "purple",
#     "cyan",
#     "magenta",
#     "yellow",
#     "brown",
# ]

In [None]:
# retrieve data from database and store in memory
cached_data = get_data()

# prepare dataframes
slot_inclusion_df = create_slot_inclusion_df(
    cached_data)
slot_count_breakdown_df = create_slot_count_breakdown_df(
    cached_data)
slot_gas_bidding_df = create_slot_gas_bidding_df(
    cached_data)

slot_inclusion_joined: pl.DataFrame = slot_inclusion_df.join(
    pl.from_dict(sequencers_l2),
    left_on="from",
    right_on="sequencer_addresses",
    how="left",
    coalesce=True
)

filtered_data_dict = filter_data_seq(
    sequencers_l2["sequencer_names"], slot_inclusion_joined, cached_data
)

sequencer_names_list: list[str] = sorted(sequencers_l2["sequencer_names"])

dashboard = start_interactive_panel(filtered_data_dict, sequencer_names_list)
dashboard.show()

Scanned up to block 20013764
Scanned up to block 20023086
Scanned up to block 20031425


  .join(cached_data["txs"], on="hash", how="left")


Launching server at http://localhost:34467


Gtk-Message: 13:02:33.010: Failed to load module "canberra-gtk-module"
Gtk-Message: 13:02:33.011: Failed to load module "canberra-gtk-module"


Opening in existing browser session.


<panel.io.server.Server at 0x7dcf5d1616d0>

In [None]:
# # panel dashboard in a while loop to update the data every 24 hours
# while True:
#     # retrieve data from database and store in memory
#     cached_data = get_data()

#     # prepare dataframes
#     slot_inclusion_df = create_slot_inclusion_df(cached_data)
#     slot_count_breakdown_df = create_slot_count_breakdown_df(cached_data)
#     slot_gas_bidding_df = create_slot_gas_bidding_df(cached_data)

#     slot_inclusion_joined: pl.DataFrame = slot_inclusion_df.join(
#         pl.from_dict(sequencers_l2),
#         left_on="from",
#         right_on="sequencer_addresses",
#         how="left"
#     )

#     filtered_data_dict = filter_data_seq(
#         sequencers_l2["sequencer_names"], slot_inclusion_joined, cached_data
#     )

#     sequencer_names_list: list[str] = sorted(sequencers_l2["sequencer_names"])

#     start_interactive_panel(filtered_data_dict, sequencer_names_list)
#     time.sleep(86400 / 3)