# rollups_blobs_economics

> This notebook explores the economics of rollups and blobs from cryo transactions and blocks dataset.

In [1]:
from ethereum_block_explorer.polars_transforms import get_txs_blocks_mempool

import polars as pl
import pprint
import jupyter_black

pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")
jupyter_black.load()

In [2]:
cd ../

/home/evan/Documents/ethereum_block_explorer


### Methodology
We label the sequencer submitter addresses from each L2 and analyze how much gas they consume in blocks. We measure the block utilization behavior of L2s pre EIP-4844 and then extrapolate, from historical demand, how saturated the initial data blob market will be.
* we focus on L1 gas usage instead of L2 TPS so that we can analyze gas costs independent to L2 data compression improvements.

In [4]:
# TAG - PREPROCESSING SCRIPT!
# https://dune.com/queries/3302607
sequencers_l2: dict[str] = {
    "sequencer_addresses": [
        "0xc1b634853cb333d3ad8663715b08f41a3aec47cc",
        "0x6887246668a3b87f54deb3b94ba47a6f63f32985",
        "0x9228624c3185fcbcf24c1c9db76d8bef5f5dad64",
        "0x6667961f5e9c98a76a48767522150889703ed77d",
        "0xcf2898225ed05be911d3709d9417e86e0b4cfc8f",
        "0x148ee7daf16574cd020afa34cc658f8f3fbd2800",
        "0x16d5783a96ab20c9157d7933ac236646b29589a4",
        "0x5050f69a9786f081509234f1a7f4684b5e5b76c9",
    ],
    "sequencer_names": [
        "arbitrum",
        "optimism",
        "linea",
        "mantle",
        "scroll",
        "polygon_zkevm",
        "starknet",
        "base",
    ],
}

sequencer_labels_lf: pl.LazyFrame = pl.from_dict(sequencers_l2).lazy()

# calculate calldata size
txs_calldata = (
    pl.scan_parquet("data/raw/transactions/*.parquet")
    .select(
        "block_number",
        "transaction_hash",
        "input",
    )
    .with_columns([pl.col("input").str.len_bytes().alias("calldata_size")])
    .with_columns(pl.col("calldata_size") - 1)
    .select("transaction_hash", "block_number", "calldata_size")
).with_columns(
    pl.col("calldata_size")
    .sum()
    .over(pl.col("block_number"))
    .alias("total_calldata_block_size")
)

# cryo txs
txs = (
    pl.scan_parquet("data/raw/transactions/*.parquet")
    .select(
        "block_number",
        "transaction_index",
        "transaction_hash",
        "from_address",
        "gas_used",
        "gas_price",
        "max_priority_fee_per_gas",
        "max_fee_per_gas",
    )
    .join(
        txs_calldata,
        on="transaction_hash",
        how="left",
    )
    .join(
        sequencer_labels_lf,
        left_on="from_address",
        right_on="sequencer_addresses",
        how="left",
    )
)

# cryo blocks
blocks = pl.scan_parquet("data/raw/blocks/*.parquet").select(
    "author", "block_number", "timestamp", "gas_used", "base_fee_per_gas"
)

# final df
txs_blocks_lf: pl.LazyFrame = (
    get_txs_blocks_mempool(txs, blocks)
    .filter(pl.col("from_address").is_in(sequencers_l2["sequencer_addresses"]))
    .drop("from_address", "timestamp", "transaction_hash", "author")
    .with_columns(
        # convert gas to gwei
        (pl.col("gas_price") / 10**9),
        (pl.col("max_priority_fee_per_gas") / 10**9),
        (pl.col("max_fee_per_gas") / 10**9),
        (pl.col("base_fee_per_gas") / 10**9),
        # convert bytes to kilobytes
        (pl.col("calldata_size") / 10**3),
        (pl.col("total_calldata_block_size") / 10**3),
    )
)

None Type Expected


In [5]:
explore_df = txs_blocks_lf.collect(streaming=True)

### Data Overview, Schema

- gas is shown in gwei
- calldata is shown in kilobytes

In [None]:
pprint.pprint(explore_df.shape)
pprint.pprint(explore_df["block_number"].unique().len())
pprint.pprint(explore_df.schema)

(220970, 17)
80632
OrderedDict([('block_number', UInt32),
             ('transaction_index', UInt64),
             ('gas_used', UInt64),
             ('gas_price', Float64),
             ('max_priority_fee_per_gas', Float64),
             ('max_fee_per_gas', Float64),
             ('calldata_size', Float64),
             ('total_calldata_block_size', Float64),
             ('sequencer_names', String),
             ('gas_used_block', UInt64),
             ('base_fee_per_gas', Float64),
             ('tx_gas_cost', Float64),
             ('block_datetime', Datetime(time_unit='us', time_zone=None)),
             ('transaction_index_max', UInt64),
             ('block_gas_premium', Float64),
             ('blockspace_percentile', Float64),
             ('rounded_blockspace_percentile', Float64)])


In [9]:
# overall block stats
explore_df.group_by("block_number").agg(
    pl.col("gas_used").sum().alias("sequencer_gas_used"),
    pl.col("tx_gas_cost").sum().alias("tx_gas_cost_sum_eth"),
    pl.col("gas_used_block").first(),
    pl.col("total_calldata_block_size").first(),
    pl.col("calldata_size").sum().alias("sequencer_calldata_size_sum"),
).with_columns(
    [
        (pl.col("sequencer_gas_used") / pl.col("gas_used_block")).alias(
            "sequencer_gas_utilization"
        ),
        (
            pl.col("sequencer_calldata_size_sum") / pl.col("total_calldata_block_size")
        ).alias("sequencer_calldata_utilization"),
    ]
).sort(
    by="sequencer_calldata_utilization", descending=True
).head(
    10
)

block_number,sequencer_gas_used,tx_gas_cost_sum_eth,gas_used_block,total_calldata_block_size,sequencer_calldata_size_sum,sequencer_gas_utilization,sequencer_calldata_utilization
u32,u64,f64,u64,f64,f64,f64,f64
18797056,28961012,1.7000435146608432,29995570,3111.44,3108.7610000000004,0.965509640256878,0.9991389838788473
18796373,28169200,1.649537324620968,29969354,3033.765,3027.344,0.9399335067415868,0.997883488009124
18797060,28223596,1.6839872476479734,29994335,3039.962,3033.4880000000003,0.940964218743306,0.9978703681164436
18796548,23110513,1.3083858321028337,24878482,2491.008,2484.981,0.9289358169039412,0.9975804975335288
18797119,28420491,1.70522946,29992226,3065.353,3056.656,0.9475952535166946,0.9971628063717294
18796395,28026634,1.6741086157787448,29988286,3019.328,3010.576000000001,0.9345860580361278,0.9971013417555168
18796280,17648661,1.222858475142066,19918611,1902.89,1896.922,0.8860387403519251,0.996863717818686
18792722,26488855,3.7494052914641465,29974089,2856.027,2846.663,0.883725106707997,0.996721319511335
18793062,26703504,3.745338335129193,29962775,2882.188,2872.071,0.8912226587824392,0.9964898195398773
18797022,21784820,1.2198081602072193,24311721,2355.07,2346.796,0.8960624383604928,0.9964867286322698


In [8]:
# sequencer stats
explore_df.group_by("block_number", "sequencer_names").agg(
    pl.col("gas_used").sum().alias("sequencer_gas_used"),
    pl.col("tx_gas_cost").sum().alias("tx_gas_cost_sum_eth"),
    pl.col("gas_used_block").first(),
    pl.col("total_calldata_block_size").first(),
    pl.col("calldata_size").sum().alias("sequencer_calldata_size_sum"),
).with_columns(
    [
        (pl.col("sequencer_gas_used") / pl.col("gas_used_block")).alias(
            "sequencer_gas_utilization"
        ),
        (
            pl.col("sequencer_calldata_size_sum") / pl.col("total_calldata_block_size")
        ).alias("sequencer_calldata_utilization"),
    ]
).sort(
    by="sequencer_calldata_utilization", descending=True
).head(
    10
)

block_number,sequencer_names,sequencer_gas_used,tx_gas_cost_sum_eth,gas_used_block,total_calldata_block_size,sequencer_calldata_size_sum,sequencer_gas_utilization,sequencer_calldata_utilization
u32,str,u64,f64,u64,f64,f64,f64,f64
18796373,"""arbitrum""",28169200,1.649537324620968,29969354,3033.765,3027.344,0.9399335067415868,0.997883488009124
18797060,"""arbitrum""",28223596,1.6839872476479734,29994335,3039.962,3033.4880000000003,0.940964218743306,0.9978703681164436
18796548,"""arbitrum""",23110513,1.3083858321028337,24878482,2491.008,2484.981,0.9289358169039412,0.9975804975335288
18797119,"""arbitrum""",28420491,1.70522946,29992226,3065.353,3056.656,0.9475952535166946,0.9971628063717294
18796395,"""arbitrum""",28026634,1.6741086157787448,29988286,3019.328,3010.576000000001,0.9345860580361278,0.9971013417555168
18796280,"""arbitrum""",17648661,1.222858475142066,19918611,1902.89,1896.922,0.8860387403519251,0.996863717818686
18792722,"""arbitrum""",26488855,3.7494052914641465,29974089,2856.027,2846.663,0.883725106707997,0.996721319511335
18793062,"""arbitrum""",26703504,3.745338335129193,29962775,2882.188,2872.071,0.8912226587824392,0.9964898195398773
18797022,"""arbitrum""",21784820,1.2198081602072193,24311721,2355.07,2346.796,0.8960624383604928,0.9964867286322698
18793076,"""arbitrum""",26999957,3.656290825137227,29997674,2917.638,2906.311,0.9000683519662225,0.9961177500430144


### How does EIP-4844 work?
EIP-4844 introduces a new transaction type, called a blob-carrying transaction. A blob-carrying transaction is like a regular transaction, except it also carries an extra piece of data called a blob. Blobs are extremely large (~125 kB), and can be much cheaper than similar amounts of calldata. [source](https://notes.ethereum.org/@vbuterin/proto_danksharding_faq#What-format-is-blob-data-in-and-how-is-it-committed-to)

EIP-4844 replaces CALLDATA with a new “Blob layer” to increase rollup throughput and make posting data cheaper. With EIP-4844, data is posted as a “blob” and a new transaction type is introduced, a blob-carrying transaction, that rollup sequencers will use [source](https://taiko.mirror.xyz/3_5rjTXFT_bLYnRBD2ZxqLQu9-qgTF4JOkwK7rUpwJs). The blob space is a new type of data storage that is not accessible to the Ethereum Virtual Machine (EVM), unlike calldata. Unlike calldata, which is stored in history forever, blobs are persisted in beacon nodes for a short period of time (approximately 2 weeks), which allows them to be priced cheaper

### Blob Gas Math

Pre EIP-4844 block size in gas at 16 gas per calldata byte (most expensive type of data) is
$$\frac{30,000,000}{16} = 1.875MB $$

Each blob size is 0.125 MB. The target is 3 blobs (0.375 MB)and the maximum is 6 blobs (0.75 MB) per block. Each blob is worth 131,072 gas [2**17, source](https://eips.ethereum.org/EIPS/eip-4844)

The blob gas limit is 2**17 value which turns out to be 131,072 gas. This is about 1.04 gas per byte, compared to 16 gas per calldata byte.


After EIP-4844, block size in gas will increase to 42,000,000 gas assuming a 16 gas per byte cost similar to calldata. We can do various model estimates based on how much gas each byte of data will cost. Calldata represents the majority of L2 costs right now [source](https://docs.alchemy.com/docs/understanding-the-transaction-object-on-ethereum) I think that the majority costs is the `input` data, where L2s save state transactions using calldata. If so, I need to redownload the dataset because I excluded `input` data to save on space. 



### Blob Pricing
Regular gas pricing: The price of gas in a block is a simple function of how much gas was used in the previous block compared to the target.
Blob gas pricing: The price of blob gas is a function of the running tally excess blobs there have been in total. [source](https://domothy.com/blobspace/)

### Charts

In [None]:
# TITLE - Gas usage per Sequencer. This chart shows the individual gas usage for each sequencer in a block
(
    explore_df.group_by("block_number", "sequencer_names").agg(
        (pl.col("gas_used").sum() / pl.col("gas_used_block").first()).alias(
            "sequencer_gas_used_pct"
        ),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="sequencer_gas_used_pct", descending=True).plot.scatter(
    x="sequencer_gas_used_pct",
    y="total_gas_in_block",
    by="sequencer_names",
    height=600,
    width=800,
    xlabel="Sequencer Gas % Used of Total Block Size",  # Custom X-axis label
    ylabel="Block Size (in millions of Gas)",  # Custom Y-axis label
    title="Gas Usage Per Sequencer",
)

In [None]:
# TITLE - KDE of Total Sequencer Gas Usage
(
    explore_df.group_by("block_number").agg(
        (pl.col("gas_used").sum()).alias("sequencer_gas_used_sum"),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="sequencer_gas_used_sum", descending=True).select(
    "sequencer_gas_used_sum"
).plot.kde(
    title="Collective Gas Usage Per Sequencer",
)

In [None]:
# TITLE - KDE of Total Sequencer Gas Usage
(
    explore_df.group_by("block_number").agg(
        (pl.col("gas_used").sum() / pl.col("gas_used_block").first()).alias(
            "sequencer_gas_used_pct"
        ),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="sequencer_gas_used_pct", descending=True).select(
    "sequencer_gas_used_pct"
).plot.kde(
    title="Collective Gas Usage Per Sequencer",
)

In [None]:
# TITLE - Total Sequencer Gas Usage Over Time
(
    explore_df.group_by("block_number").agg(
        (pl.col("gas_used").sum() / pl.col("gas_used_block").first()).alias(
            "sequencer_gas_used_pct"
        ),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="block_number", descending=True).plot.line(
    x="block_number",
    y="sequencer_gas_used_pct",
    title="Collective Gas Usage Per Sequencer (%)",
)

In [None]:
# TITLE - Total Sequencer Gas Usage Over Time
(
    explore_df.group_by("block_number").agg(
        (pl.col("gas_used").sum()).alias("sequencer_gas_used_sum"),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="block_number", descending=True).plot.line(
    x="block_number",
    y=["sequencer_gas_used_sum"],
    title="Collective Gas Usage Per Sequencer (Gas)",
)

### Further Analysis

- Based on historical estimates, how fast do we expect the data blob market to be filled up?


* EIP-4844 increases the bandwidth requirements per beacon block by a maximum of ~0.75 MB. This is 40% larger than the theoretical maximum size of a block today (30M gas / 16 gas per calldata byte = 1.875M bytes) (how does this work???)