In [1]:
import holoviews as hv
import polars as pl
from holoviews import opts

hv.extension("bokeh")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

polars.config.Config

The goal of this notebook is to do preliminary analysis of blob transactions in the mempool as a starting point. 

Scope out what type of data to combine. Initially it seems like Base isn't going to care so much about consensus layer data, just that it makes it
into the beacon chain in the first place, in a somewhat timely manner.

In [3]:
df = pl.read_parquet("all_blob_analysis_3w.parquet").filter(pl.col('from') == '0x5050F69a9786F081509234F1a7F4684b5E5b76C9')

### Example - blob transaction waiting in the mempool

During periods of high volatility, a blob hash can linger in the mempool longer than desired. This part of the notebook looks at two such blob transactions - the most extreme (popular_str) and a more middle of the pack one (median_str). When a blob transaction needs to be resubmitted with a newer gas price, the transaction hash will be replaced. The way that blob txs are re-entering the mempool as a new transaction is by flattening the `list[str]` of blob hashes into a single large string in order to compare the blob hash similarities.

Manual inspection indicates that the blob transactions that are resubmitted is when `count` >= 9. Unsure why this is a general cutoff. The number of blob txs that meet this cutoff was about .7% (70/8900) (over a 21 day period). Zooming into the week 3/24/24 - 3/31/24, the number increases to 1.3% (40/2900).

In [4]:
(
    df.filter(pl.col("event_date_time") > pl.datetime(2024, 3, 24))
    .filter(pl.col("event_date_time") < pl.datetime(2024, 3, 31))
    .with_columns(
        pl.col("blob_hashes")
        # flatten list[str] into a long string for analysis
        .map_elements(lambda x: ",".join(x))
    )
    .group_by("blob_hashes")
    .agg(pl.len().alias("count"))
    .sort(by="count", descending=True)
).filter(pl.col("count") > 8).shape

(40, 2)

In [157]:
# this table identifies blob txs that have been resubmitted into the mempool with different gas and tx hash values
blob_hashes_df = (
    df.with_columns(
        pl.col("blob_hashes")
        # flatten list[str] into a long string for analysis
        .map_elements(lambda x: ",".join(x))
    )
    .group_by("blob_hashes")
    .agg(
        pl.len().alias("count"),
        pl.col("event_date_time").min(),
        pl.col("nonce").first(),
    )
    .sort(by="count", descending=True)
)

In [122]:
# popular string - worst case scenario
popular_str = blob_hashes_df[0].select("blob_hashes").item()
(
    df.with_columns(
        pl.col("blob_hashes")
        # flatten list[str] into a long string for analysis
        .map_elements(lambda x: ",".join(x))
    )
    # filter columns for easier analysis
    .filter(pl.col("blob_hashes") == popular_str)
    .drop(
        "type",
        "to",
        "from",
        "meta_network_name",
        "meta_client_implementation",
        "meta_client_geo_continent_code",
        "blob_hashes_length",
        "fill_percentage",
        "count",
        "blob_sidecars_size",
        "blob_sidecars_empty_size" "blob_gas",
    )
    .group_by("hash")
    .agg(
        pl.len().alias("count"),
        pl.col("event_date_time").mean().alias("mean_event_date_time"),
        # pl.col("blob_sidecars_size").first(),
        # pl.col("blob_sidecars_empty_size").first(),
        pl.col("nonce").first(),
        # pl.col("blob_gas").mean(),
        (
            pl.col("blob_gas_fee_cap") / 10**9
        ).mean(),  # really need blob base fee per gas...but this isn't captured by default. Need to find source?
        (
            pl.col("gas_price") / 10**9
        ).mean(),  # why is gas_price and gas_fee_cap the same?
        (pl.col("gas_tip_cap") / 10**9).mean(),  # priority fee per gas
        (pl.col("gas_fee_cap") / 10**9).mean(),
    )
    .sort(by="mean_event_date_time", descending=False)
)

hash,count,mean_event_date_time,nonce,blob_gas_fee_cap,gas_price,gas_tip_cap,gas_fee_cap
str,u32,datetime[ms],u64,f64,f64,f64,f64
"""0x1b7966525db20af4fda4ca5ddf228b728190bbdf4f8b272e723fdad9f945402e""",6,2024-03-27 17:00:21.495,377498,1,110.313460342,2,110.313460342
"""0x34ec2c04cdf364d4bd64db9b7fccd8855453f30b255fd78eca9291166948dc76""",6,2024-03-27 17:02:21.719,377498,2,220.626920684,4,220.626920684
"""0x4b8b547398d83441783428a4c216f8ee086bd1b576ee257b36dd5abe9d68d8c3""",6,2024-03-27 17:04:21.821,377498,4,441.253841368,8,441.253841368
"""0x2136ec22762381161ac4a73877957ab668e4f62403403f4bc76900638918decd""",6,2024-03-27 17:06:21.789,377498,8,882.507682736,16,882.507682736


worst case observations:
* Was resubmitted into the mempool 4 times over a 6.5 minute period.
* Looking on Ethernow, it look like there's nothing wrong [source](https://www.ethernow.xyz/tx/0x2136ec22762381161ac4a73877957ab668e4f62403403f4bc76900638918decd). However it is strange to see a blob base fee of 0.075 gwei and a max blob fee of 8 gwei.
* Looking at the [pending transaction](https://www.ethernow.xyz/tx/0x4b8b547398d83441783428a4c216f8ee086bd1b576ee257b36dd5abe9d68d8c3), we don't have what the hypothetical blob gas price. However we can do some rough estimate math. This blob tx lingered in the mempool for about 35 blocks and was confirmed in block 19526996. 
At block 19526961, blob base fee was .001 gwei.
At block 19526971, blob base fee was .004 gwei.
At block 19526981, blob base fee was .012 gwei.

In [127]:
blob_hashes_df.filter(pl.col("count") > 8).sort(by="event_date_time").plot.scatter(
    x="event_date_time", y="count", title="blob tx resubmission frequency"
)

In [128]:
# median string - least worse case scenario
median_str = blob_hashes_df.filter(pl.col("count") == 9)[
    1].select("blob_hashes").item()

(
    df.with_columns(
        pl.col("blob_hashes")
        # flatten list[str] into a long string for analysis
        .map_elements(lambda x: ",".join(x))
    )
    # filter columns for easier analysis
    .filter(pl.col("blob_hashes") == median_str)
    .drop(
        "type",
        "to",
        "from",
        "meta_network_name",
        "meta_client_implementation",
        "meta_client_geo_continent_code",
        "blob_hashes_length",
        "fill_percentage",
    )
    .group_by("hash")
    .agg(
        pl.len().alias("count"),
        pl.col("event_date_time").mean().alias("mean_event_date_time"),
        # pl.col("blob_sidecars_size").first(),
        # pl.col("blob_sidecars_empty_size").first(),
        pl.col("nonce").first(),
        # pl.col("blob_gas").mean(),
        (
            pl.col("blob_gas_fee_cap") / 10**9
        ).mean(),  # really need blob base fee per gas...but this isn't captured by default. Need to find source?
        (
            pl.col("gas_price") / 10**9
        ).mean(),  # why is gas_price and gas_fee_cap the same?
        (pl.col("gas_tip_cap") / 10**9).mean(),  # priority fee per gas
        (pl.col("gas_fee_cap") / 10**9).mean(),
    )
    .sort(by="mean_event_date_time", descending=False)
)

hash,count,mean_event_date_time,nonce,blob_gas_fee_cap,gas_price,gas_tip_cap,gas_fee_cap
str,u32,datetime[ms],u64,f64,f64,f64,f64
"""0xc25ea01ba3bfd3c275288c1db620c1d4fb79d948e1ac9d4c3e7a555dc2235c46""",3,2024-04-02 16:09:17.186,380548,106.172020274,139.059784054,2,139.059784054
"""0xac64a723a0dbb96ad309d6796540b541eb4b327fb8f2cad5791e2a0077208c5c""",3,2024-04-02 16:11:17.394,380548,212.344040548,278.119568108,4,278.119568108
"""0x975098828f8a7bec90eebbdc2f55427c238b71c00bbe5ccb0a4b319c190d877b""",3,2024-04-02 16:13:17.156,380548,424.688081096,556.239136216,8,556.239136216


### When a blob tx is waiting with a certain nonce, does that delay the rest of the blobs that need to be posted?

In [168]:
blob_hashes_df.filter(pl.col("nonce") < 380528).filter(pl.col("nonce") > 380518).sort(
    by="nonce"
)
# event_date_time is earliest (min) date time appearance

blob_hashes,count,event_date_time,nonce
str,u32,datetime[ms],u64
"""0x0134bdac242e7d81a2b686e1ef8f07ff4e1ad99c26033af5f9b02c51cc0d0366,0x01e409fe15526b3325e64dddab2922cd051e5849710cc40fbd693e91cc260a47,0x01f5aee96724e6d92c2f5cd286cddc176885c6b8c2ecd34cd0e1bda944d8b28…",3,2024-04-02 15:01:02.861,380519
"""0x01075cb3d124fdb249d2303cab68153eae8b1dded93fa31696a186cff1cf4a44,0x01c04a89d3a5128b58c588ecce447f78c72f7bb5f020701126d60d8ce916c55f,0x01bbbac5c8aea09d5685f299195bf18bfd76be3f3191113d6dfc17353f4626e…",3,2024-04-02 15:03:31.351,380520
"""0x018685935e6e185d727e9bd01b9f696e31727e75810d67bf8a2dfa01646051f5,0x013ba269a5c7935ec3ce84332a469450a270ece0842039f5996f592d6b68d376,0x01a1d35d1fbc9564a766b0f9bca9e7fa5c2ebbfbc8b063001591a86bc006c4b…",6,2024-04-02 15:06:13.134,380521
"""0x01dfa9c8566c3b6caf4a07622e065280697bb71721ee4b5e1677964688f1a5bc,0x013b0ff4e8d04f336280802555ca9c4b1358ae651ce37004c6d564493f7480d6,0x0193d7596fc57eb586219dca6c152f89547c1df1be4cd153f14b3eefc98e51f…",9,2024-04-02 15:08:35.305,380522
"""0x0124a761e23343458bae923a47cf5a809d1060a44471633f3c0d1cb03e91e283,0x01fcce80daa8b8002a48c4243bebf073a1d8766fa34cc1a7867cd975970996c1,0x01b654bc36ad6eda2df98ec010c9834bf76fdfe1cab87e2b3629ba2285a2c88…",9,2024-04-02 15:10:52.964,380523
"""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",9,2024-04-02 15:13:36.894,380524
"""0x0105f5f31f1dcc7a4dd7cae1f524e34b843b551a9c8dfc966e6813d3e045d647,0x013609a5d2855175d6264662eaf946143bfc4741f4ae88121ec03376a1442c9a,0x01b047818f773fabb3be4433478f5997a042f3c01c4cdaf207a12178080a768…",6,2024-04-02 15:16:16.366,380525
"""0x018157992d70b23a14aabc1315916321c18cf6dff3978cb015a2aa1010731896,0x01a50f274471bed38b0dfdd1fd3cccb94cb3a8ab5b6f6800932578ed06295c18,0x0197698399419612f1df9050fa1440a9b76400beaf74089717510f063934e0b…",3,2024-04-02 15:19:00.901,380526
"""0x01c600f1406a424491e285021af216cb74716bfb889e18931cf8461dcc7534e9,0x01780b6af01ebfcf0b79decf3d1b7d1e54cce1de46971f62bebaa03dff3a9449,0x01b1b00190fc8f07c357441f954a30836635b5f0924dfc47f82bb32f12058e8…",3,2024-04-02 15:21:30.277,380527


In [174]:
followup_str = (
    blob_hashes_df.filter(pl.col("nonce") == 380524).select("blob_hashes").item()
)

In [175]:
df.with_columns(
    pl.col("blob_hashes")
    # flatten list[str] into a long string for analysis
    .map_elements(lambda x: ",".join(x))
).filter(pl.col("blob_hashes") == followup_str).sort(by="event_date_time")

event_date_time,type,blob_sidecars_size,blob_sidecars_empty_size,hash,to,from,blob_hashes,nonce,meta_network_name,meta_client_implementation,meta_client_geo_continent_code,blob_hashes_length,fill_percentage,blob_gas,blob_gas_fee_cap,gas_price,gas_tip_cap,gas_fee_cap
datetime[ms],u8,u32,u32,str,str,str,str,u64,str,str,str,u64,f64,u64,i64,i64,i64,i64
2024-04-02 15:13:36.894,3,786432,6197,"""0xd283e4fbee2a5d48670d00d79f2b38f684879a0bec2883148ef786852d5613ea""","""0xFf00000000000000000000000000000000008453""","""0x5050F69a9786F081509234F1a7F4684b5E5b76C9""","""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",380524,"""mainnet""","""Xatu""","""EU""",6,99.21,786432,119443524432,128038446866,2000000000,128038446866
2024-04-02 15:13:37.011,3,786432,6197,"""0xd283e4fbee2a5d48670d00d79f2b38f684879a0bec2883148ef786852d5613ea""","""0xFf00000000000000000000000000000000008453""","""0x5050F69a9786F081509234F1a7F4684b5E5b76C9""","""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",380524,"""mainnet""","""Xatu""","""OC""",6,99.21,786432,119443524432,128038446866,2000000000,128038446866
2024-04-02 15:13:37.118,3,786432,6197,"""0xd283e4fbee2a5d48670d00d79f2b38f684879a0bec2883148ef786852d5613ea""","""0xFf00000000000000000000000000000000008453""","""0x5050F69a9786F081509234F1a7F4684b5E5b76C9""","""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",380524,"""mainnet""","""Xatu""","""NA""",6,99.21,786432,119443524432,128038446866,2000000000,128038446866
2024-04-02 15:15:37.316,3,786432,6197,"""0x80abba74d0bfa85fb2c46281434e589716641847b2f27cb52f3248d6d4dba99a""","""0xFf00000000000000000000000000000000008453""","""0x5050F69a9786F081509234F1a7F4684b5E5b76C9""","""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",380524,"""mainnet""","""Xatu""","""EU""",6,99.21,786432,238887048864,256076893732,4000000000,256076893732
2024-04-02 15:15:38.050,3,786432,6197,"""0x80abba74d0bfa85fb2c46281434e589716641847b2f27cb52f3248d6d4dba99a""","""0xFf00000000000000000000000000000000008453""","""0x5050F69a9786F081509234F1a7F4684b5E5b76C9""","""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",380524,"""mainnet""","""Xatu""","""OC""",6,99.21,786432,238887048864,256076893732,4000000000,256076893732
2024-04-02 15:15:38.066,3,786432,6197,"""0x80abba74d0bfa85fb2c46281434e589716641847b2f27cb52f3248d6d4dba99a""","""0xFf00000000000000000000000000000000008453""","""0x5050F69a9786F081509234F1a7F4684b5E5b76C9""","""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",380524,"""mainnet""","""Xatu""","""NA""",6,99.21,786432,238887048864,256076893732,4000000000,256076893732
2024-04-02 15:17:37.147,3,786432,6197,"""0x96e58bf8d2a21f5db37d242c135ffff6146758b2be057cc3815744966e018c22""","""0xFf00000000000000000000000000000000008453""","""0x5050F69a9786F081509234F1a7F4684b5E5b76C9""","""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",380524,"""mainnet""","""Xatu""","""EU""",6,99.21,786432,477774097728,512153787464,8000000000,512153787464
2024-04-02 15:17:37.688,3,786432,6197,"""0x96e58bf8d2a21f5db37d242c135ffff6146758b2be057cc3815744966e018c22""","""0xFf00000000000000000000000000000000008453""","""0x5050F69a9786F081509234F1a7F4684b5E5b76C9""","""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",380524,"""mainnet""","""Xatu""","""OC""",6,99.21,786432,477774097728,512153787464,8000000000,512153787464
2024-04-02 15:17:37.758,3,786432,6197,"""0x96e58bf8d2a21f5db37d242c135ffff6146758b2be057cc3815744966e018c22""","""0xFf00000000000000000000000000000000008453""","""0x5050F69a9786F081509234F1a7F4684b5E5b76C9""","""0x019e2657e86d7a340a2500870e26db6bf8e3ff53c0f2e6c918e7354bec2e5b65,0x0195e17274c19ebc085ddfcc1887798d2f7a8d2e5fd76352e1d650c77f3f14c0,0x01a800c7d0903ffe426605e1c60f86821e549d4e748e77f5022a684c11736a0…",380524,"""mainnet""","""Xatu""","""NA""",6,99.21,786432,477774097728,512153787464,8000000000,512153787464


In [169]:
blob_hashes_df.filter(pl.col("nonce") < 380528).filter(pl.col("nonce") > 380518).sort(
    by="nonce"
).plot.line(x="nonce", y="count")