# rollups_blobs_economics

> This notebook explores the economics of rollups and blobs from cryo transactions and blocks dataset.

In [1]:
# |default_exp rollups_blobs

In [16]:
# | export
from ethereum_block_explorer.cryo_query import cryoTransform

import os
import polars as pl
import re
import pprint
import jupyter_black

pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")
jupyter_black.load()

In [3]:
cd ../

/home/evan/Documents/ethereum_block_explorer


### Setup & Methodology
We label the sequencer submitter addresses from each L2 and analyze how much gas they consume in blocks. We measure the block utilization behavior of L2s pre EIP-4844 and then extrapolate, from historical demand, how saturated the initial data blob market will be. We focus on L1 gas usage instead of L2 TPS so that we can analyze gas costs independent to L2 data compression improvements.

* Gas is shown in gwei and calldata is shown in KB.

In [4]:
ct = cryoTransform()

In [5]:
directory_a: str = "data/raw/transactions"
directory_b: str = "data/raw/blocks"

synced_files: dict[str] = ct.sync_filenames(directory_a=directory_a, directory_b=directory_b)

In [9]:
# Sequencer Tags
# https://dune.com/queries/3302607
sequencers_l2: dict[str] = {
    "sequencer_addresses": [
        "0xc1b634853cb333d3ad8663715b08f41a3aec47cc",
        "0x6887246668a3b87f54deb3b94ba47a6f63f32985",
        "0x9228624c3185fcbcf24c1c9db76d8bef5f5dad64",
        "0x6667961f5e9c98a76a48767522150889703ed77d",
        "0xcf2898225ed05be911d3709d9417e86e0b4cfc8f",
        "0x148ee7daf16574cd020afa34cc658f8f3fbd2800",
        "0x16d5783a96ab20c9157d7933ac236646b29589a4",
        "0x5050f69a9786f081509234f1a7f4684b5e5b76c9",
    ],
    "sequencer_names": [
        "arbitrum",
        "optimism",
        "linea",
        "mantle",
        "scroll",
        "polygon_zkevm",
        "starknet",
        "base",
    ],
}

sequencer_labels_lf: pl.LazyFrame = pl.from_dict(sequencers_l2).lazy()

In [15]:
# Check if the lists have the same length
if len(synced_files[directory_a]) == len(synced_files[directory_b]):
    # Assuming both lists in synced_files have the same length
    for i in range(len(synced_files[directory_a])):
        # load LazyFrames
        txs_file: str = directory_a + "/" + synced_files[directory_a][i]
        blocks_file: str = directory_b + "/" + synced_files[directory_b][i]

        txs_lf: pl.LazyFrame = pl.scan_parquet(txs_file)
        blocks_lf: pl.LazyFrame = pl.scan_parquet(blocks_file)

        # combine and save
        tx_blocks_lf: pl.LazyFrame = (
            ct.extend_txs_blocks(txs_lf, blocks_lf)
            .join(
                sequencer_labels_lf,
                left_on="from_address",
                right_on="sequencer_addresses",
                how="left",
            )
            .drop("timestamp", "block_number_right")
            .filter(pl.col("sequencer_names").is_in(sequencers_l2["sequencer_names"]))
        )

        # make a new directory to save the files
        new_directory = "data/rollups"
        if not os.path.exists(new_directory):
            os.makedirs(new_directory)

        block_range_match: str = re.search(r"__(\d+_to_\d+)", txs_file)
        block_range = block_range_match.group(1)
        new_filename = f"{block_range}.parquet"

        tx_blocks_lf.collect(streaming=True).write_parquet(
            new_directory + "/" + new_filename
        )

else:
    print("The synced file lists for the two directories are not of the same length.")

KeyboardInterrupt: 

In [21]:
df_test = pl.read_parquet("data/rollups/*.parquet")

In [6]:
STOP

NameError: name 'STOP' is not defined

In [None]:


# TODO - 
# cryo txs
txs_lf = pl.scan_parquet("data/raw/transactions/*.parquet")

# cryo blocks
blocks_lf = pl.scan_parquet("data/raw/blocks/*.parquet").select(
    "author", "block_number", "timestamp", "gas_used", "base_fee_per_gas"
)

# final df
tx_blocks_lf: pl.LazyFrame = (
    ct.extend_txs_blocks(txs_lf, blocks_lf)
    .join(
        sequencer_labels_lf,
        left_on="from_address",
        right_on="sequencer_addresses",
        how="left",
    )
    .drop("timestamp", "block_number_right")
    .filter(pl.col("sequencer_names").is_in(sequencers_l2["sequencer_names"]))
)

In [None]:
batch_tx.collect(streaming=True)

In [None]:
STOP

In [None]:
# tx_blocks_lf.sink_parquet("data/rollup_blob.parquet")

In [None]:
explore_df = pl.read_parquet("data/rollup_blob.parquet")

In [None]:
# cryo txs
dataset_info = (
    (explore_df.select("block_number", "block_datetime"))
    .unique()
    .sort(by="block_number")
)

print(
    f"block range: {dataset_info['block_number'][0]} to {dataset_info['block_number'][-1]} = {dataset_info['block_number'][-1] - dataset_info['block_number'][0]} range"
)
pprint.pprint(dataset_info["block_number"].len())
print(
    f"block range: {dataset_info['block_datetime'][0]} to {dataset_info['block_datetime'][-1]}"
)

block range: 18030002 to 18254999 = 224997 range
127425
block range: 2023-08-30 21:10:23 to 2023-10-01 10:09:47


In [None]:
pprint.pprint(explore_df.shape)
print(
    f"block range: {dataset_info['block_number'][-1]- dataset_info['block_number'][0]}"
)
pprint.pprint(explore_df["block_number"].unique().len())
pprint.pprint(explore_df.schema)

(292045, 30)
block range: 224997
127425
OrderedDict([('block_number', UInt32),
             ('transaction_index', UInt64),
             ('transaction_hash', String),
             ('nonce', UInt64),
             ('from_address', String),
             ('to_address', String),
             ('gas_limit', UInt64),
             ('gas_used', UInt64),
             ('transaction_type', UInt32),
             ('success', Boolean),
             ('n_input_bytes', UInt32),
             ('n_input_zero_bytes', UInt32),
             ('n_input_nonzero_bytes', UInt32),
             ('n_rlp_bytes', UInt32),
             ('chain_id', UInt64),
             ('author', String),
             ('gas_used_block', UInt64),
             ('transaction_index_max', UInt64),
             ('tx_gas_cost', Float64),
             ('block_datetime', Datetime(time_unit='us', time_zone=None)),
             ('block_gas_premium', Float64),
             ('blockspace_percentile', Float64),
             ('rounded_blockspace_percent

### Base gas fee volatility is the asset that is being bought/sold.

Preconfirmations are critical to securing future blockspace at less volatile gas prices. This section uses a rolling daily (7200 blocks) average base gas fee as the "breakeven" point. A preconfirmation is the right, but not obligation, to reserve future blockspace at the breakeven price. A future contract "in the money" would be when the future block base gas > breakeven gas price and being "out of money" would be whent he future block base gas < breakeven gas price and the future expires worthless. 

In [None]:
blocks_df = blocks_lf.collect(streaming=True)

In [None]:
# mean block gas values
mean_block_vals = blocks_df.sort(by="block_datetime").with_columns(
    (pl.col("base_fee_per_gas").rolling_mean(window_size=7200)).alias(
        "avg_base_fee_daily"
    ),
    (pl.col("base_fee_per_gas").rolling_mean(window_size=5)).alias(
        "avg_base_fee_minute"
    ),  # calculates rolling average over 5 blocks (1 minute)
)

mean_block_vals.mean().select("gas_used", "avg_base_fee_daily", "avg_base_fee_minute")

gas_used,avg_base_fee_daily,avg_base_fee_minute
f64,f64,f64
15.120564535669969,12.589467136167862,12.613622453882948


In [None]:
itm_count = (
    blocks_df.sort(by="block_datetime")
    .with_columns(
        (pl.col("base_fee_per_gas") - mean_block_vals["avg_base_fee_daily"]).alias(
            "mean_gas_delta_daily"
        )  # negative is "out of money". positive is "in the money"
    )
    .filter(pl.col("mean_gas_delta_daily") > 0)
)

otm_count = (
    blocks_df.sort(by="block_datetime")
    .with_columns(
        (pl.col("base_fee_per_gas") - mean_block_vals["avg_base_fee_daily"]).alias(
            "mean_gas_delta_daily"
        )  # negative is "out of money". positive is "in the money"
    )
    .filter(pl.col("mean_gas_delta_daily") < 0)
)


print(
    f"in the money: {itm_count.shape[0]}, {itm_count.select('mean_gas_delta_daily').sum().item()} gas (gwei)"
)

print(
    f"out of the money: {otm_count.shape[0]}, {otm_count.select('mean_gas_delta_daily').sum().item()} gas (gwei)"
)

in the money: 92438, 868349.2646429302 gas (gwei)
out of the money: 210562, -860985.9495222494 gas (gwei)


In [None]:
# using the above results, we append the blocks_df gas rolling averages to the sequencer tx dataset
explore_df_gas_vol = mean_block_vals.join(explore_df, on="block_number", how="left")

In [None]:
explore_df_gas_vol.columns

['author',
 'block_number',
 'timestamp',
 'gas_used',
 'base_fee_per_gas',
 'block_datetime',
 'avg_base_fee_daily',
 'avg_base_fee_minute',
 'transaction_index',
 'transaction_hash',
 'nonce',
 'from_address',
 'to_address',
 'gas_limit',
 'gas_used_right',
 'transaction_type',
 'success',
 'n_input_bytes',
 'n_input_zero_bytes',
 'n_input_nonzero_bytes',
 'n_rlp_bytes',
 'chain_id',
 'author_right',
 'gas_used_block',
 'transaction_index_max',
 'tx_gas_cost',
 'block_datetime_right',
 'block_gas_premium',
 'blockspace_percentile',
 'rounded_blockspace_percentile',
 'gas_price_gwei',
 'max_priority_fee_per_gas_gwei',
 'max_fee_per_gas_gwei',
 'base_fee_per_gas_gwei',
 'block_encoded_kbytes',
 'block_calldata_kbytes',
 'sequencer_names']

In [None]:
# prototype construction
(
    explore_df_gas_vol.filter(~pl.col("sequencer_names").is_null())
    .select(
        "block_number",
        "sequencer_names",
        "base_fee_per_gas_gwei",
        "avg_base_fee_daily",
        "avg_base_fee_minute",
        "gas_price_gwei",
        "gas_used_right",
        "tx_gas_cost",
    )
    .with_columns(
        (pl.col("avg_base_fee_daily") * pl.col("gas_used_right") / 10**9).alias(
            "tx_gas_cost_avg_daily"
        ),
        (pl.col("avg_base_fee_minute") * pl.col("gas_used_right") / 10**9).alias(
            "tx_gas_cost_avg_minute"
        ),
    )
    .sort(by="block_number")
).head(5)

block_number,sequencer_names,base_fee_per_gas_gwei,avg_base_fee_daily,avg_base_fee_minute,gas_price_gwei,gas_used_right,tx_gas_cost,tx_gas_cost_avg_daily,tx_gas_cost_avg_minute
u32,str,f64,f64,f64,f64,u64,f64,f64,f64
18030002,"""base""",32.024751684,,,32.074751684,578204,0.0185457497226955,,
18030006,"""arbitrum""",32.722553091,,32.47983449000001,32.772553091,1844309,0.0604427146187091,,0.0599028510684174
18030006,"""optimism""",32.722553091,,32.47983449000001,32.772553091,1836180,0.0601763065346323,,0.0596388224938482
18030007,"""base""",33.275658042,,32.73001576160001,33.325658042,636948,0.0212267112385358,,0.0208473180793196
18030012,"""base""",34.617542597,,33.63625064620001,34.667542597,724572,0.0251191306745934,,0.0243718854032184


In [None]:
# cost adjustment
adj_block_gas: pl.DataFrame = (
    (
        explore_df_gas_vol.filter(~pl.col("sequencer_names").is_null())
        .select(
            "block_number",
            "sequencer_names",
            "base_fee_per_gas_gwei",
            "avg_base_fee_daily",
            "avg_base_fee_minute",
            "gas_price_gwei",
            "gas_used_right",
            "tx_gas_cost",
        )
        .with_columns(
            (pl.col("avg_base_fee_daily") * pl.col("gas_used_right") / 10**9).alias(
                "tx_gas_cost_avg_daily"
            ),
            (pl.col("avg_base_fee_minute") * pl.col("gas_used_right") / 10**9).alias(
                "tx_gas_cost_avg_minute"
            ),
        )
        .with_columns(
            # ITM when positive, OTM when negative
            (pl.col("tx_gas_cost") - pl.col("tx_gas_cost_avg_daily")).alias(
                "tx_gas_cost_delta_daily"
            ),
            (pl.col("tx_gas_cost") - pl.col("tx_gas_cost_avg_minute")).alias(
                "tx_gas_cost_delta_minute"
            ),
        )
    )
    .select(
        "block_number",
        "sequencer_names",
        "tx_gas_cost_delta_daily",
        "tx_gas_cost_delta_minute",
    )
    .group_by("sequencer_names")
    .agg(
        pl.col("tx_gas_cost_delta_daily").sum().alias("tx_gas_cost_delta_daily_sum"),
        pl.col("tx_gas_cost_delta_minute").sum().alias("tx_gas_cost_delta_minute_sum"),
    )
)

adj_block_gas

sequencer_names,tx_gas_cost_delta_daily_sum,tx_gas_cost_delta_minute_sum
str,f64,f64
"""mantle""",-9.144422926214286,1.679253692288393
"""linea""",1.0423141583801456,-0.3072991952510987
"""polygon_zkevm""",-2.397012205483913,0.2365406151627962
"""base""",7.810627296202728,2.766477655946272
"""arbitrum""",2.667095412723976,5.127072386666014
"""starknet""",267.1387349156266,83.20356869636893
"""optimism""",-0.1041679871095602,1.862359719906382


### Aggregate Sequencer Stats

This section is an overview of Sequencer activity on Ethereum such as how many transactions they submit, how much gas they consumed, and how much calldata they are saving.

In [None]:
def agg_sequencer_stats(df: pl.DataFrame):
    """
    Prints aggregate sequencer statistics
    """
    return (
        df.group_by(["block_number", "sequencer_names"])
        .agg(
            pl.count().alias("n_txs"),
            pl.col("gas_used_block").sum().alias("sequencer_gas_used"),
            pl.col("tx_gas_cost").sum().alias("tx_gas_cost_sum_eth"),
            pl.col("n_input_bytes").sum().alias("sequencer_n_input_bytes_sum"),
            pl.col("n_input_zero_bytes")
            .sum()
            .alias("sequencer_n_input_zero_bytes_sum"),
            pl.col("n_input_nonzero_bytes")
            .mean()
            .alias("sequencer_n_input_nonzero_bytes_mean"),
            pl.col("n_rlp_bytes").sum().alias("sequencer_n_rlp_bytes_sum"),
            pl.col("gas_price_gwei").mean().alias("sequencer_gas_price_mean"),
            # ! Currently not used - selecting columns that are the same for all rows in the group
            pl.col("base_fee_per_gas_gwei").first(),
            pl.col("block_datetime").first(),
            pl.col("block_encoded_kbytes").first(),
            pl.col("block_calldata_kbytes").first(),
        )
        .group_by(["sequencer_names"])
        .agg(
            pl.col("block_number").count().alias("n_blocks"),
            pl.col("n_txs").sum(),
            pl.col("tx_gas_cost_sum_eth").sum().round(2),
            pl.col("sequencer_n_input_bytes_sum").sum(),
            pl.col("sequencer_n_input_zero_bytes_sum").sum(),
            pl.col("sequencer_n_input_nonzero_bytes_mean").mean().round(2),
            pl.col("sequencer_n_rlp_bytes_sum").sum(),
            pl.col("sequencer_gas_price_mean").mean().round(2),
        )
        .with_columns(
            (pl.col("sequencer_n_input_bytes_sum") / 10**6)
            .alias("n_input_mbytes_sum")
            .round(2),
            (pl.col("sequencer_n_rlp_bytes_sum") / 10**6)
            .alias("n_rlp_mbytes_sum")
            .round(2),
        )
        .drop("sequencer_n_input_bytes_sum", "sequencer_n_rlp_bytes_sum")
    )

**aggregate sequencer stats pre EIP-4844** - The below table shows aggregate sequencer statistics for block usage.

In [None]:
agg_seq_stats = (
    agg_sequencer_stats(explore_df)
    .with_columns(
        (pl.col("n_rlp_mbytes_sum") / pl.col("tx_gas_cost_sum_eth"))
        .alias("mb_data_per_eth")
        .round(2),
    )
    .sort(by="mb_data_per_eth", descending=True)
).select(
    "sequencer_names",
    "n_blocks",
    "n_txs",
    "tx_gas_cost_sum_eth",
    "mb_data_per_eth",
    "n_input_mbytes_sum",
)

agg_seq_stats.join(adj_block_gas, on="sequencer_names", how="left").rename(
    {
        "sequencer_names": "sequencer",
        "mb_data_per_eth": "mb_per_eth",
        "n_input_mbytes_sum": "total_mbytes",
        "tx_gas_cost_sum_eth": "tota_cost_eth",
    }
).sort(by="n_txs", descending=True)

sequencer,n_blocks,n_txs,tota_cost_eth,mb_per_eth,total_mbytes,tx_gas_cost_delta_daily_sum,tx_gas_cost_delta_minute_sum
str,u32,u32,f64,f64,f64,f64,f64
"""starknet""",89200,176263,1435.82,0.63,885.22,267.1387349156266,83.20356869636893
"""base""",38315,39440,537.96,4.36,2339.36,7.810627296202728,2.766477655946272
"""linea""",21864,31260,435.03,4.46,1938.02,1.0423141583801456,-0.3072991952510987
"""arbitrum""",14961,23686,603.92,3.88,2340.73,2.667095412723976,5.127072386666014
"""optimism""",15344,15354,401.3,4.41,1769.85,-0.1041679871095602,1.862359719906382
"""polygon_zkevm""",4024,4024,70.14,5.47,382.9,-2.397012205483913,0.2365406151627962
"""mantle""",2018,2018,124.9,1.54,191.76,-9.144422926214286,1.679253692288393


### Variable Sequencer Stats

To see the variable sequencer stats, it seems most efficient to create a function, compute numbers for a single sequencer, and then combine with other sequencer calculations. 

In [None]:
def variable_sequencer_stats(df: pl.DataFrame, sequencer_name: str) -> pl.DataFrame:
    return (
        df.filter(pl.col("sequencer_names") == sequencer_name)
        .sort(by="block_number")
        .with_columns(
            (pl.col("block_number").diff(n=1)).alias("block_number_diff"),
            (pl.col("n_input_nonzero_bytes").diff(n=1)).alias(
                "n_input_nonzero_bytes_diff"
            ),
            (pl.col("n_input_zero_bytes").diff(n=1)).alias("n_input_zero_bytes_diff"),
            (pl.col("gas_used").diff(n=1)).alias("gas_used_diff"),
            (pl.col("base_fee_per_gas_gwei").diff(n=1)).alias(
                "base_fee_per_gas_gwei_diff"
            ),
        )
        .select(
            "sequencer_names",
            "block_number",
            "block_number_diff",
            "n_input_nonzero_bytes_diff",
            "n_input_zero_bytes_diff",
            "gas_used_diff",
            "base_fee_per_gas_gwei_diff",
        )
    )

In [None]:
variable_seq_dfs_list: list[pl.DataFrame] = []
for name in sequencers_l2["sequencer_names"]:
    df: pl.DataFrame = variable_sequencer_stats(explore_df, name)
    variable_seq_dfs_list.append(df)

In [None]:
concat_df = pl.concat(variable_seq_dfs_list)

In [None]:
# example of multiple transactions in the same block
explore_df.filter(pl.col("block_number") == 18072641).filter(
    pl.col("sequencer_names") == "arbitrum"
)

block_number,transaction_index,transaction_hash,nonce,from_address,to_address,gas_limit,gas_used,transaction_type,success,n_input_bytes,n_input_zero_bytes,n_input_nonzero_bytes,n_rlp_bytes,chain_id,author,gas_used_block,transaction_index_max,tx_gas_cost,block_datetime,block_gas_premium,blockspace_percentile,rounded_blockspace_percentile,gas_price_gwei,max_priority_fee_per_gas_gwei,max_fee_per_gas_gwei,base_fee_per_gas_gwei,block_encoded_kbytes,block_calldata_kbytes,sequencer_names
u32,u64,str,u64,str,str,u64,u64,u32,bool,u32,u32,u32,u32,u64,str,u64,u64,f64,datetime[μs],f64,f64,f64,f64,f64,f64,f64,f64,f64,str
18072641,131,"""0x24add26cf55e50e0fa15092a03c307b026f060ffc83f1802da8e5b3f8a5da65d""",282983,"""0xc1b634853cb333d3ad8663715b08f41a3aec47cc""","""0x1c479675ad559dc151f6ec7ed3fbf8cee79582b6""",1872780,1824274,2,True,98340,745,97595,98459,1,"""0x1f9090aae28b8a3dceadf281b0f12828e676c326""",18165319,136,0.1080275562844741,2023-09-05 20:25:59,1.000845069397535,96.32352941176472,96,59.216738431,0.05,60,59.166738431,416.252,399.129,"""arbitrum"""
18072641,132,"""0xfeb920ae55beab0a66680a54a244368b137e3d7d914563bab6037ec0fc32fa97""",282984,"""0xc1b634853cb333d3ad8663715b08f41a3aec47cc""","""0x1c479675ad559dc151f6ec7ed3fbf8cee79582b6""",1890692,1844975,2,True,99300,639,98661,99419,1,"""0x1f9090aae28b8a3dceadf281b0f12828e676c326""",18165319,136,0.1092534019867342,2023-09-05 20:25:59,1.000845069397535,97.05882352941175,97,59.216738431,0.05,60,59.166738431,416.252,399.129,"""arbitrum"""
18072641,130,"""0x25a3cac498260b62fa772eca9c50ee9feb1d8ab59b14300cc69d238352457827""",282982,"""0xc1b634853cb333d3ad8663715b08f41a3aec47cc""","""0x1c479675ad559dc151f6ec7ed3fbf8cee79582b6""",1884104,1838433,2,True,99172,766,98406,99291,1,"""0x1f9090aae28b8a3dceadf281b0f12828e676c326""",18165319,136,0.1088751982489186,2023-09-05 20:25:59,1.0009295763372883,95.58823529411764,96,59.221738431,0.055,76,59.166738431,416.252,399.129,"""arbitrum"""


In [None]:
# very high base gas difference
explore_df.filter(pl.col("block_number") == 18183437).filter(
    pl.col("sequencer_names") == "arbitrum"
)

block_number,transaction_index,transaction_hash,nonce,from_address,to_address,gas_limit,gas_used,transaction_type,success,n_input_bytes,n_input_zero_bytes,n_input_nonzero_bytes,n_rlp_bytes,chain_id,author,gas_used_block,transaction_index_max,tx_gas_cost,block_datetime,block_gas_premium,blockspace_percentile,rounded_blockspace_percentile,gas_price_gwei,max_priority_fee_per_gas_gwei,max_fee_per_gas_gwei,base_fee_per_gas_gwei,block_encoded_kbytes,block_calldata_kbytes,sequencer_names
u32,u64,str,u64,str,str,u64,u64,u32,bool,u32,u32,u32,u32,u64,str,u64,u64,f64,datetime[μs],f64,f64,f64,f64,f64,f64,f64,f64,f64,str
18183437,222,"""0xed8384fed8185931cf39f404a152fd637614010accdee56d62f221c4e43d968d""",296261,"""0xc1b634853cb333d3ad8663715b08f41a3aec47cc""","""0x1c479675ad559dc151f6ec7ed3fbf8cee79582b6""",1873083,1828536,2,True,98340,615,97725,98459,1,"""0x4675c7e5baafbffbca748158becba61ef3b0a263""",15525347,338,0.4117363065267246,2023-09-21 09:44:23,1.0004443009740285,65.68047337278107,66,225.172655352,0.1,277.332669832,225.072655352,354.016,314.434,"""arbitrum"""
18183437,223,"""0x3015787e357d9cf2278a0a999e2d8aca644eb4ea3d432ca92dbd237d724d1a3b""",296262,"""0xc1b634853cb333d3ad8663715b08f41a3aec47cc""","""0x1c479675ad559dc151f6ec7ed3fbf8cee79582b6""",1883905,1836550,2,True,98980,638,98342,99099,1,"""0x4675c7e5baafbffbca748158becba61ef3b0a263""",15525347,338,0.4135408401867156,2023-09-21 09:44:23,1.0004443009740285,65.97633136094674,66,225.172655352,0.1,256.0,225.072655352,354.016,314.434,"""arbitrum"""
18183437,3,"""0xd1f0a5032d1f27f7716ca31352b48b283ed8376c19904b16791c2a9481bb4956""",296260,"""0xc1b634853cb333d3ad8663715b08f41a3aec47cc""","""0x1c479675ad559dc151f6ec7ed3fbf8cee79582b6""",1873293,1825945,2,True,98404,690,97714,98523,1,"""0x4675c7e5baafbffbca748158becba61ef3b0a263""",15525347,338,0.4111528841767076,2023-09-21 09:44:23,1.0004443009740285,0.8875739644970414,1,225.172655352,0.1,384.0,225.072655352,354.016,314.434,"""arbitrum"""


In [None]:
# check gas difference betwen a small block range
# very high base gas difference
explore_df.filter(pl.col("block_number") > 18183327).filter(
    pl.col("block_number") < 18183477
).sort(by="block_number").plot.line(
    x="block_number",
    y="base_fee_per_gas_gwei",
    height=500,
    width=600,
    title="150 block range",
)

In [None]:
# NOTE - there's an interesting pattern where sequencers will submit multiple transactions in the same block. We can filter for this with `.filter(pl.col("block_number_diff") == 0)`
concat_df.filter(pl.col("block_number_diff") < 60).sort(
    by="base_fee_per_gas_gwei_diff", descending=True
)

sequencer_names,block_number,block_number_diff,n_input_nonzero_bytes_diff,n_input_zero_bytes_diff,gas_used_diff,base_fee_per_gas_gwei_diff
str,u32,i64,i64,i64,i64,f64
"""polygon_zkevm""",18114960,55,-4459,-9813,-144553,373.04485132800005
"""optimism""",18114948,20,-267,-102,-4680,243.236008871
"""base""",18114948,14,-11473,-92,-183936,220.469747989
"""arbitrum""",18183437,59,-473,25,-5265,199.44184149699998
"""arbitrum""",18183501,10,-678,6,-11713,114.12869625400003
"""optimism""",18183473,16,-52,-60,-1072,111.02371375000001
"""arbitrum""",18115370,47,127,1,-594,109.471645231
"""optimism""",18183413,11,-1177,-21,-18916,92.990335887
"""optimism""",18114958,9,-1828,-56,-29472,88.64950396600005
"""mantle""",18183398,58,13,19,952,88.027535683


In [None]:
concat_df.filter(pl.col("block_number_diff") < 60).sort(by="block_number").group_by(
    "sequencer_names"
).agg(
    pl.col("base_fee_per_gas_gwei_diff")
    .abs()
    .mean()
    .alias("base_fee_per_gas_gwei_diff_mean_abs"),
    pl.col("base_fee_per_gas_gwei_diff")
    .abs()
    .max()
    .alias("base_fee_per_gas_gwei_diff_max"),
    pl.col("n_input_nonzero_bytes_diff")
    .abs()
    .mean()
    .alias("n_input_nonzero_bytes_mean_abs"),
)

sequencer_names,base_fee_per_gas_gwei_diff_mean_abs,base_fee_per_gas_gwei_diff_max,n_input_nonzero_bytes_mean_abs
str,f64,f64,f64
"""base""",0.8412222719914758,220.469747989,11287.56922881786
"""linea""",0.5144804805574005,12.69229184,2938.934922266478
"""polygon_zkevm""",2.023582698155695,373.0448513280001,14620.346080305928
"""arbitrum""",0.7975138688615676,199.441841497,698.6699767589267
"""starknet""",0.2436989020797321,38.571344432,25489.29582874089
"""optimism""",1.3023106947318552,243.236008871,1642.357156830779
"""mantle""",1.0069630139010988,88.027535683,764.6813186813187


In [None]:
explore_df.filter(pl.col("sequencer_names") == "base").sort(
    by="block_number"
).with_columns((pl.col("block_number").diff(n=2)).alias("block_number_diff")).filter(
    pl.col("block_number_diff") < 5
).select(
    "block_number_diff"
).plot.hist(
    height=600,
    width=800,
)

In [None]:
# what should the interval range be for diff? Right now the default is one transaction.
# 300 blocks = 1hr...

tx_count_range = 5  # rolling average over `n` transactions
(
    explore_df.sort(by="block_datetime")
    .group_by("sequencer_names")
    .agg(
        pl.col("block_datetime").diff(tx_count_range).mean().alias("tx_time_diff_mean"),
        pl.col("block_number")
        .diff(tx_count_range)
        .mean()
        .alias("block_number_diff_mean"),
        pl.col("gas_used").diff(tx_count_range).mean().alias("gas_used_diff_mean"),
        pl.col("n_input_bytes")
        .diff(tx_count_range)
        .mean()
        .alias("n_input_bytes_diff_mean"),
        pl.col("block_datetime").diff(tx_count_range).std().alias("tx_time_diff_std"),
        pl.col("block_number")
        .diff(tx_count_range)
        .std()
        .alias("block_number_diff_std"),
        pl.col("gas_used").diff(tx_count_range).std().alias("gas_used_diff_std"),
        pl.col("n_input_bytes")
        .diff(tx_count_range)
        .std()
        .alias("n_input_bytes_diff_std"),
        pl.col("tx_gas_cost").mean().alias("tx_gas_cost_mean"),
    )
)

sequencer_names,tx_time_diff_mean,block_number_diff_mean,gas_used_diff_mean,n_input_bytes_diff_mean,tx_time_diff_std,block_number_diff_std,gas_used_diff_std,n_input_bytes_diff_std,tx_gas_cost_mean
str,duration[μs],f64,f64,f64,duration[μs],f64,f64,f64,f64
"""mantle""",1h 52m 30s 396423µs,557.3308494783904,9345.503725782415,201.0928961748634,2h 35m 10s 390610µs,769.0492560801673,895430.264199491,19281.54196207355,0.0618945752408706
"""linea""",7m 15s 824796µs,35.98278675411934,-4.068852983522636,0.0470964645656694,41m 59s 968061µs,208.15977392557676,127074.30863639008,9748.494122239095,0.01391660664874
"""optimism""",14m 47s 527786µs,73.27676070102287,5.7780962929181054,0.35898104111017,54m 9s 405400µs,268.40465323571846,55504.803210741455,3480.571994783157,0.0261366756231011
"""starknet""",1m 17s 174505µs,6.371750502104869,41.81486797762371,0.8249724835184786,18m 17s 213586µs,90.63139064723312,855655.3169131503,43794.54156132066,0.0081458730506828
"""polygon_zkevm""",56m 27s 42547µs,279.6441900970391,855.0898233391391,79.13610350833541,1h 45m 35s 207727µs,523.3257123365839,541464.9528251381,44105.12869976501,0.0174293228576527
"""base""",5m 45s 495625µs,28.525041207049576,24.40228223659186,1.5301635602890833,33m 43s 230298µs,167.11611035583238,466491.963708828,29270.252989011697,0.0136400709683044
"""arbitrum""",9m 35s 254085µs,47.49465816477345,-4.158354799206115,-0.2337739115746801,43m 40s 398663µs,216.44450180999692,29872.170953157263,1729.4699420581398,0.0254967872945874


In [None]:
# what should the interval range be for diff? Right now the default is one transaction.
# 300 blocks = 1hr...

tx_count_range = 50  # rolling average over `n` transactions
(
    explore_df.sort(by="block_datetime")
    .group_by("sequencer_names")
    .agg(
        pl.col("block_datetime").diff(tx_count_range).mean().alias("tx_time_diff_mean"),
        pl.col("block_number")
        .diff(tx_count_range)
        .mean()
        .alias("block_number_diff_mean"),
        pl.col("gas_used").diff(tx_count_range).mean().alias("gas_used_diff_mean"),
        pl.col("n_input_bytes")
        .diff(tx_count_range)
        .mean()
        .alias("n_input_bytes_diff_mean"),
        pl.col("block_datetime").diff(tx_count_range).std().alias("tx_time_diff_std"),
        pl.col("block_number")
        .diff(tx_count_range)
        .std()
        .alias("block_number_diff_std"),
        pl.col("gas_used").diff(tx_count_range).std().alias("gas_used_diff_std"),
        pl.col("n_input_bytes")
        .diff(tx_count_range)
        .std()
        .alias("n_input_bytes_diff_std"),
        pl.col("tx_gas_cost").mean().alias("tx_gas_cost_mean"),
    )
)

sequencer_names,tx_time_diff_mean,block_number_diff_mean,gas_used_diff_mean,n_input_bytes_diff_mean,tx_time_diff_std,block_number_diff_std,gas_used_diff_std,n_input_bytes_diff_std,tx_gas_cost_mean
str,duration[μs],f64,f64,f64,duration[μs],f64,f64,f64,f64
"""arbitrum""",1h 35m 54s 781350µs,475.13073278050433,-0.2352343882213572,0.0839397529192756,2h 20m 20s 931221µs,695.5343887800959,30256.033455460896,1751.552222278537,0.0254967872945874
"""polygon_zkevm""",9h 25m 32s 349270µs,2801.539003522899,2723.4743331655764,256.14494212380475,6h 11m 28s 126467µs,1839.2413949776571,622966.8190244868,52870.746076809606,0.0174293228576527
"""optimism""",2h 27m 58s 158128µs,733.002221641401,18.97124934657606,1.1970726607422897,2h 54m 18s 484623µs,863.8276684981025,59288.42779303996,3716.5109934314014,0.0261366756231011
"""starknet""",12m 51s 858103µs,63.726773847559485,4.983650468467139,-1.481298201608281,57m 26s 545746µs,284.6855761712296,855714.4388334951,43958.83363999416,0.0081458730506828
"""base""",57m 35s 444325µs,285.2915968519929,387.0612845899975,24.276288398070577,1h 46m 27s 134627µs,527.6183650791231,572877.1117534011,35945.28827703577,0.0136400709683044
"""linea""",1h 12m 41s 416725µs,360.0920217878885,-28.13159243832105,0.6059596283242551,2h 17m 46s 170184µs,682.696021746625,130567.91037081284,9901.374353265535,0.01391660664874
"""mantle""",18h 50m 30s 597560µs,5600.061991869919,39381.757113821135,837.7886178861788,11h 8m 15s 201057µs,3307.009103893865,1028471.9050396977,22133.34033909879,0.0618945752408706


In [None]:
STOP

NameError: name 'STOP' is not defined

### How does EIP-4844 work?
EIP-4844 introduces a new transaction type, called a blob-carrying transaction. A blob-carrying transaction is like a regular transaction, except it also carries an extra piece of data called a blob. Blobs are extremely large (~125 kB), and can be much cheaper than similar amounts of calldata. [source](https://notes.ethereum.org/@vbuterin/proto_danksharding_faq#What-format-is-blob-data-in-and-how-is-it-committed-to). The blob space is a new type of data storage that is not accessible to the Ethereum Virtual Machine (EVM), unlike calldata. Unlike calldata, which is stored in history forever, blobs are persisted in beacon nodes for a short period of time (approximately 2 weeks), which allows them to be priced cheaper.

### Blob Gas Math

Pre EIP-4844 block size in gas at 16 gas per nonzero calldata byte (most expensive type of data) is
$$\frac{30,000,000}{16} = 1.875MB $$

Each blob size is 0.125 MB. The target is 3 blobs (0.375 MB)and the maximum is 6 blobs (0.75 MB) per block. Each blob is worth 131,072 gas [2**17, source](https://eips.ethereum.org/EIPS/eip-4844)

The blob gas limit is 2**17 value which turns out to be 131,072 gas. This is about 1.04 gas per byte, compared to 16 gas per non-zero calldata byte. After EIP-4844, block size in gas will increase to 42,000,000 gas assuming a 16 gas per byte cost similar to calldata. 

### Blob Pricing
Regular gas pricing: The price of gas in a block is a simple function of how much gas was used in the previous block compared to the target.
Blob gas pricing: The price of blob gas is a function of the running tally excess blobs there have been in total. [source](https://domothy.com/blobspace/)

### Blob vs Calldata Tradeoff
The optimal rollup data strategy has two components - cost and delay. First, the base cost of blob data is 16x less expensive than calldata under constant gas conditions, when the gas markets are assumed identical. <span style="color: red;">Second, the blob must be submitted in full whereas calldata can be submitted in arbitrary chunks (is this true?).</span>

## Rollup Calldata Metrics

### Pricing Futures Calldata (Pre EIP-4844)
Since EIP-4844 will lower the gas cost for the calldata, we focus on the gas premium over the base gas fee. It's calculated as `gas_price` / `base_fee_per_gas` - 1 = `base_gas_premium_pct`. 


In [None]:
# aggregate rollup costs
def rollup_costs(df: pl.DataFrame):
    return (
        df.group_by("sequencer_names")
        .agg(
            pl.col("gas_used").sum().alias("sequencer_gas_used"),
            pl.col("tx_gas_cost").sum().alias("tx_gas_cost_sum_eth"),
            pl.col("gas_used_block").first(),
            pl.col("total_calldata_block_size").first(),
            pl.col("calldata_size").sum().alias("sequencer_calldata_size_sum"),
            pl.col("calldata_size").mean().alias("sequencer_calldata_size_mean_kb"),
            pl.col("block_datetime").diff().mean().alias("post_tx_time_mean"),
            pl.col("block_number").diff().mean().alias("posts_block_number_mean"),
            pl.col("gas_price").mean().alias("gas_price_mean"),
            pl.col("base_fee_per_gas").first().alias("base_fee_per_gas"),
            pl.col("base_fee_per_gas").mean().alias("base_fee_per_gas_mean"),
            pl.count().alias("tx_count"),
        )
        .with_columns(
            [
                (pl.col("sequencer_gas_used") / pl.col("gas_used_block")).alias(
                    "sequencer_gas_utilization"
                ),
                (pl.col("gas_price_mean") / pl.col("base_fee_per_gas") - 1).alias(
                    "base_gas_premium_mean_pct"
                ),
                (pl.col("sequencer_calldata_size_sum") / 10**6).alias(
                    "sequencer_calldata_size_sum_gb"
                ),
            ]
        )
        .sort(by="tx_gas_cost_sum_eth", descending=True)
    ).select(
        "sequencer_names",
        "tx_gas_cost_sum_eth",
        "tx_count",
        "sequencer_calldata_size_sum_gb",
        "sequencer_calldata_size_mean_kb",
        "base_gas_premium_mean_pct",
        "base_fee_per_gas_mean",
        "posts_block_number_mean",
        "post_tx_time_mean",
    )

In [None]:
rollup_cost_stats = rollup_costs(explore_df)
rollup_cost_stats

sequencer_names,tx_gas_cost_sum_eth,tx_count,sequencer_calldata_size_sum_gb,sequencer_calldata_size_mean_kb,base_gas_premium_mean_pct,base_fee_per_gas_mean,posts_block_number_mean,post_tx_time_mean
str,f64,u32,f64,f64,f64,f64,f64,duration[μs]
"""linea""",4165.574811471807,120184,17.821789624000033,148.28753930639712,0.2801574723752,32.859871859515486,1.742609187655492,21s 145353µs
"""arbitrum""",3796.9360026585096,52164,10.224386467999947,196.00464818648777,0.876147921898643,39.35876872583072,4.019764967505704,48s 777332µs
"""optimism""",1564.529636044541,25584,5.86886492200002,229.3959084584123,0.5088555307138645,33.03972985870466,8.175311730446,1m 39s 201813µs
"""scroll""",943.8644777635262,21660,4.465137980000031,206.14672114496912,0.3443830452740863,29.914249929707925,9.660602982593842,1m 57s 224802µs
"""starknet""",925.3124872041084,88967,3.2072467190005,36.04984678589252,0.47198696284825,32.160838320304585,2.3566306229346043,28s 596137µs
"""base""",782.5457781998224,42001,3.1439878529999885,74.8550713792526,0.4161816672628986,32.45925765840359,4.992119047619048,1m 576ms
"""mantle""",697.4686867547673,3964,0.9882795799999676,249.3137184661876,0.1962728941127911,27.792595493319343,52.22558667676003,10m 33s 747161µs
"""polygon_zkevm""",135.54231376253432,3820,0.6000028920000006,157.0688198952881,0.4837708722700196,32.88347995462066,54.293794186959936,10m 58s 837391µs


In [None]:
rollup_cost_stats.with_columns(
    (pl.col("sequencer_calldata_size_mean_kb") / pl.col("base_fee_per_gas_mean")).alias(
        "calldata_size_kb_per_gas"
    ),  # ! does this metric make sense?
    (
        pl.col("sequencer_calldata_size_mean_kb") * pl.col("base_gas_premium_mean_pct")
    ).alias("calldata_size_kb_per_gas_premium"),
    (
        pl.col("sequencer_calldata_size_mean_kb") / pl.col("posts_block_number_mean")
    ).alias("avg_calldata_size_kb_per_block"),
).select(
    "sequencer_names",
    # "post_tx_time_mean",
    "sequencer_calldata_size_mean_kb",
    "posts_block_number_mean",
    "base_fee_per_gas_mean",
    "base_gas_premium_mean_pct",
    "avg_calldata_size_kb_per_block",
    # "calldata_size_kb_per_gas",
    # "calldata_size_kb_per_gas_premium",
)

# Data from 12-10-23 to 1-8-24

sequencer_names,sequencer_calldata_size_mean_kb,posts_block_number_mean,base_fee_per_gas_mean,base_gas_premium_mean_pct,avg_calldata_size_kb_per_block
str,f64,f64,f64,f64,f64
"""linea""",148.28753930639712,1.742609187655492,32.859871859515486,0.2801574723752,85.09512078603424
"""arbitrum""",196.00464818648777,4.019764967505704,39.35876872583072,0.876147921898643,48.76022597612473
"""optimism""",229.3959084584123,8.175311730446,33.03972985870466,0.5088555307138645,28.05959161215957
"""scroll""",206.14672114496912,9.660602982593842,29.914249929707925,0.3443830452740863,21.33890829758738
"""starknet""",36.04984678589252,2.3566306229346043,32.160838320304585,0.47198696284825,15.297198650928712
"""base""",74.8550713792526,4.992119047619048,32.45925765840359,0.4161816672628986,14.99464869832264
"""mantle""",249.3137184661876,52.22558667676003,27.792595493319343,0.1962728941127911,4.773784926711608
"""polygon_zkevm""",157.0688198952881,54.293794186959936,32.88347995462066,0.4837708722700196,2.892942411694857


Algorithm: 
*  `posts_block_number_mean` is the average number of blocks a rollup posts data. 
* `avg_calldata_size_kb_per_block` is the average callddata in kb posted per block.

Given preconfirmations as a general future, we can calculate the optimal cost by 

### Charts

In [None]:
# TITLE - Gas usage per Sequencer. This chart shows the individual gas usage for each sequencer in a block
(
    explore_df.group_by("block_number", "sequencer_names").agg(
        (pl.col("gas_used").sum() / pl.col("gas_used_block").first()).alias(
            "sequencer_gas_used_pct"
        ),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="sequencer_gas_used_pct", descending=True).plot.scatter(
    x="sequencer_gas_used_pct",
    y="total_gas_in_block",
    by="sequencer_names",
    height=600,
    width=800,
    xlabel="Sequencer Gas % Used of Total Block Size",  # Custom X-axis label
    ylabel="Block Size (in millions of Gas)",  # Custom Y-axis label
    title="Gas Usage Per Sequencer",
)

In [None]:
# TITLE - KDE of Total Sequencer Gas Usage
(
    explore_df.group_by("block_number").agg(
        (pl.col("gas_used").sum()).alias("sequencer_gas_used_sum"),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="sequencer_gas_used_sum", descending=True).select(
    "sequencer_gas_used_sum"
).plot.kde(
    title="Collective Gas Usage Per Sequencer",
)

In [None]:
# TITLE - KDE of Total Sequencer Gas Usage
(
    explore_df.group_by("block_number").agg(
        (pl.col("gas_used").sum() / pl.col("gas_used_block").first()).alias(
            "sequencer_gas_used_pct"
        ),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="sequencer_gas_used_pct", descending=True).select(
    "sequencer_gas_used_pct"
).plot.kde(
    title="Collective Gas Usage Per Sequencer",
)

In [None]:
# TITLE - Total Sequencer Gas Usage Over Time
(
    explore_df.group_by("block_number").agg(
        (pl.col("gas_used").sum() / pl.col("gas_used_block").first()).alias(
            "sequencer_gas_used_pct"
        ),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="block_number", descending=True).plot.line(
    x="block_number",
    y="sequencer_gas_used_pct",
    title="Collective Gas Usage Per Sequencer (%)",
)

In [None]:
# TITLE - Total Sequencer Gas Usage Over Time
(
    explore_df.group_by("block_number").agg(
        (pl.col("gas_used").sum()).alias("sequencer_gas_used_sum"),
        (pl.col("gas_used_block").first()).alias("total_gas_in_block"),
    )
).sort(by="block_number", descending=True).plot.line(
    x="block_number",
    y=["sequencer_gas_used_sum"],
    title="Collective Gas Usage Per Sequencer (Gas)",
)