# Efficiency losses for compute repricings

## Using data from gas benchmarks

#### Maria Silva, November 2025

In [None]:
import os
import duckdb
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [None]:
# plotting theme
sns.set_theme(
    style="whitegrid", palette="Set2", rc={"figure.dpi": 500, "axes.titlesize": 15}
)

## Load and process data

In [None]:
# Main directories
current_path = os.getcwd()
repo_dir = os.path.abspath(os.path.join(current_path, ".."))
data_dir = os.path.join(repo_dir, "data")

In this analysis, we are using data generated by running the [EEST benchmark suite](https://github.com/ethereum/execution-spec-tests/tree/main/tests/benchmark) with the [Nethermind benchmarking tooling](https://github.com/NethermindEth/gas-benchmarks). We extracted this data in 05-11-2025.

In [5]:
df = pd.read_csv(os.path.join(data_dir, "min_mgas_s_by_test_and_client_2025-11-05.csv"))
df = df.melt(
    id_vars=["test_title"], var_name="client", value_name="mgas/s"
).sort_values(by="mgas/s")

# Parse test title
df["test_file"] = (
    df["test_title"].str.replace("tests_benchmark_", "").str.split(".py").str[0]
)
df["test_name"] = (
    df["test_title"]
    .str.split(".py__")
    .str[1]
    .str.split("[")
    .str[0]
)
df["test_params"] = (
    df["test_title"]
    .str.split("[")
    .str[1]
    .str.split("]")
    .str[0]
    .str.split("engine_x")
    .str[1]
    .str[1:]
)
df["test_fork"] = df["test_title"].str.split("fork_").str[1].str.split("-").str[0]
# Parse opcodes
df["test_opcode"] = df["test_params"].str.extract(r"(?:opcode_|op_)([^-]+)")
df["test_opcode"] = np.where(df["test_name"]=="test_worst_modexp", "MODEXP", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"].str.contains("selfdestruct"), "SELFDESTRUCT", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_calldatacopy", "CALLDATACOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_mcopy", "MCOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_codecopy", "CODECOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_returndatacopy", "RETURNDATACOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"].str.contains("returndatasize"), "RETURNDATASIZE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"].str.contains("extcodecopy"), "EXTCODECOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_calldataload", "CALLDATALOAD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_keccak", "KECCAK", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_shifts", df["test_params"].str[-3:], df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_selfbalance", "SELFBALANCE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_msize", "MSIZE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_jumpdests", "JUMPDEST", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"].str.contains("jumpi"), "JUMPI", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_jumps", "JUMP", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_tstore", "TSTORE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_tload", "TLOAD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("SSTORE"), "SSTORE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("SLOAD"), "SLOAD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_log_opcodes", "LOG", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_calldatasize", "CALLDATASIZE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_blobhash", "BLOBHASH", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_blockhash", "BLOCKHASH", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_callvalue", "CALLVALUE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bn128_add"), "ecAdd", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bn128_mul"), "ecMul", df["test_opcode"])
df["test_opcode"] = np.where((df["test_params"].str.contains("bn128")) & (df["test_params"].str.contains("pairing")), "ecPairing", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_amortized_bn128_pairings", "ecPairing", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("point_evaluation"), "point evaluation", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("blake2f"), "blake2f", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("ecrecover"), "ecRecover", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("SHA2-256"), "SHA2-256", df["test_opcode"]) 
df["test_opcode"] = np.where(df["test_params"].str.contains("RIPEMD-160"), "RIPEMD-160", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("IDENTITY"), "identity", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_g1add"), "BLS12_G1ADD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_g1msm"), "BLS12_G1MSM", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_g2add"), "BLS12_G2ADD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_g2msm"), "BLS12_G2MSM", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_pairing_check"), "BLS12_PAIRING_CHECK", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_fp_to_g1"), "BLS12_MAP_FP_TO_G1", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_fp_to_g2"), "BLS12_MAP_FP2_TO_G2", df["test_opcode"])

df = df.drop(columns=["test_title"])
df.info()

TypeError: '<' not supported between instances of 'float' and 'str'

In [None]:
client_gas_cols = [c for c in df.columns[:9] if c.endswith("_gas")]
worse_df = df.copy()
# Get slowest value across clients 
worse_df["param_ns"] = df[client_gas_cols].max(axis=1)
# Select columns
worse_df = worse_df[["opcode", "param_ns", "constant_current_gas"]]
# Exclude zeros, logs and ARG2s from the analysis
worse_df = worse_df[worse_df["constant_current_gas"]>0]
worse_df = worse_df[~worse_df["opcode"].str.startswith("LOG")]
worse_df = worse_df[~worse_df["opcode"].str.endswith("ARG2")]
# Fix some opcode names
worse_df["opcode"] = np.where(worse_df["opcode"].str.endswith("ARG1"), worse_df["opcode"].str[:-5], worse_df["opcode"])

worse_df

## Compute efficiency losses for 300M gas limit @4s execution time

In [None]:
limit = 300*1e6
ex_time = 4*1e9
params_df = worse_df.copy()
# current
params_df["op_per_time"] = np.floor(ex_time/params_df["param_ns"]).astype(int)
params_df["op_per_limit_current"] = np.floor(limit/params_df["constant_current_gas"]).astype(int)
params_df["eff_loss_current"] = (params_df["op_per_time"]/params_df["op_per_limit_current"])-1
# new gas rounded
params_df["new_gas_rounded"] = np.ceil((limit*params_df["param_ns"])/ex_time).astype(int)
params_df["op_per_limit_rounded"] = np.floor(limit/params_df["new_gas_rounded"]).astype(int)
params_df["eff_loss_rounded"] = (params_df["op_per_time"]/params_df["op_per_limit_rounded"])-1
# new gas fractional
params_df["new_gas_frac"] = np.ceil(100*(limit*params_df["param_ns"])/ex_time)/100
params_df["op_per_limit_frac"] = np.floor(limit/params_df["new_gas_frac"]).astype(int)
params_df["eff_loss_frac"] = (params_df["op_per_time"]/params_df["op_per_limit_frac"])-1
# new gas minimal
params_df["new_gas_minimal"] = np.ceil((limit*params_df["param_ns"])/ex_time).astype(int)
params_df["new_gas_minimal"] = np.where(
    params_df["new_gas_minimal"]>params_df["constant_current_gas"], 
    params_df["new_gas_minimal"], 
    params_df["constant_current_gas"]
    )
params_df["op_per_limit_minimal"] = np.floor(limit/params_df["new_gas_minimal"]).astype(int)
params_df["eff_loss_minimal"] = (params_df["op_per_time"]/params_df["op_per_limit_minimal"])-1

params_df

In [None]:
params_df[params_df["new_gas_minimal"]>params_df["constant_current_gas"]]

In [None]:
print("Number of repriced opcodes:")
print("Minimal repricing:", sum(params_df["new_gas_minimal"]!=params_df["constant_current_gas"]))
print("Rounded repricing:", sum(params_df["new_gas_rounded"]!=params_df["constant_current_gas"]))
print("Fractional repricing:", sum(params_df["new_gas_frac"]!=params_df["constant_current_gas"]))

## Efficiency losses per opcode

In [None]:
eff_loss_df = params_df[
    [
        "opcode",
        "eff_loss_current",
        "eff_loss_rounded",
        "eff_loss_frac",
    ]
]
eff_cols = ["Current price", "Rounded repricing", "Fractional repricing"]
eff_loss_df.columns = ["opcode"] + eff_cols
eff_loss_df = eff_loss_df.melt(
    id_vars=["opcode"],
    value_vars=eff_cols,
    var_name="repricing_strategy",
    value_name="efficiency_loss",
)

plt.figure(figsize=(10, 2))
sns.boxplot(
    data=eff_loss_df,
    x="efficiency_loss",
    y="repricing_strategy",
    hue="repricing_strategy",
    legend=False
)
plt.title("Distribution of efficiency losses at opcode level\n(300M gas limit @4s execution time)")
plt.ylabel("")
plt.xlabel("Efficiency loss per opcode (theoretical max ops / max ops from gas price - 1)")
plt.show()

In [None]:
plt.figure(figsize=(10, 2))
sns.boxplot(
    data=eff_loss_df,
    x="efficiency_loss",
    y="repricing_strategy",
    hue="repricing_strategy",
    legend=False
)
plt.xlim(-0.5,20)
plt.title("Distribution of efficiency losses at opcode level\n(300M gas limit @4s execution time)")
plt.ylabel("")
plt.xlabel("Efficiency loss per opcode (theoretical max ops / max ops from gas price - 1)")
plt.show()

## Efficiency losses per transaction

First, we need to load the transactions traces

In [None]:
block_start = 22000000
block_end = 22001000
op_files_dir = os.path.join(data_dir, "aggregated_opcodes_v3", "*", "file.parquet")
# Load parquet files with duckdb
query = f"""
SELECT 
    block_height,
    tx_hash,
    op AS opcode,
    op_gas_cost,
    op_gas_pair_count,
    call_address
FROM read_parquet(
    '{op_files_dir}', 
    hive_partitioning=True, 
    union_by_name=True
    )
WHERE block_height BETWEEN {block_start} AND {block_end};
"""
raw_agg_trace_df = duckdb.connect().execute(query).fetchdf()
# Print info
raw_agg_trace_df.info()

Now we need to process the traces to isolate the costs of precompiles. We first define some auxiliary functions and then process the data.

In [None]:
PRECOMPILE_MAP = {
    1: {"name": "ECRECOVER", "fixed_cost": 3000.0},
    2: {"name": "SHA2-256", "fixed_cost": 60.0},
    3: {"name": "RIPEMD-160", "fixed_cost": 600.0},
    4: {"name": "IDENTITY", "fixed_cost": 15.0},
    5: {"name": "MODEXP", "fixed_cost": 200.0},
    6: {"name": "ECADD", "fixed_cost": 150.0},
    7: {"name": "ECMUL", "fixed_cost": 6000.0},
    8: {"name": "ECPAIRING317000", "fixed_cost": 45000.0}, # assuming worse case!
    9: {"name": "BLAKE2F", "fixed_cost": 0.0},
    10: {"name": "point_evaluation", "fixed_cost": 50000.0},
}

def is_precompile(call_address: str) -> bool:
    try:
        call_address_int = int(call_address, 16)
        if call_address_int in PRECOMPILE_MAP.keys():
            return True
        else:
            return False
    except:
        return False


def map_precompile(call_address: str) -> int | None:
    if is_precompile(call_address):
        call_address_int = int(call_address, 16)
        precompile = PRECOMPILE_MAP.get(call_address_int).get("name")
    else:
        precompile = None
    return precompile


def map_precompile_fixed_cost(call_address: str) -> float:
    if is_precompile(call_address):
        call_address_int = int(call_address, 16)
        cost = PRECOMPILE_MAP.get(call_address_int).get("fixed_cost")
    else:
        cost = 0.0
    return cost

In [None]:
agg_trace_df = raw_agg_trace_df.copy()
# Fix create2
agg_trace_df["opcode"] = np.where(agg_trace_df["opcode"]=="CREATE2", "CREATE", agg_trace_df["opcode"])
# Fix precompiles
agg_trace_df["precomp_name"] = agg_trace_df["call_address"].apply(
    lambda x: map_precompile(x)
)
agg_trace_df["precomp_cost"] = (
    agg_trace_df["call_address"].apply(lambda x: map_precompile_fixed_cost(x)).fillna(0)
)
agg_trace_df["op_gas_cost"] = agg_trace_df["op_gas_cost"] - agg_trace_df["precomp_cost"]
precomp_df = agg_trace_df[agg_trace_df["precomp_name"].notna()]
precomp_df["opcode"] = precomp_df["precomp_name"]
precomp_df["op_gas_cost"] = precomp_df["precomp_cost"]
agg_trace_df = pd.concat([agg_trace_df, precomp_df], ignore_index=True).drop(
    columns=["precomp_name", "precomp_cost", "call_address"]
)
agg_trace_df.head()

Now we can join the traces with the opcode repricing data to compute efficiency losses per transaction.

In [None]:
# Let see which opcodes will be ignored
cols = ["param_ns", "new_gas_minimal", "new_gas_rounded", "new_gas_frac"]
tx_params_df = agg_trace_df.merge(params_df[["opcode"] + cols], on="opcode", how="left")
tx_params_df[tx_params_df["param_ns"].isna()]["opcode"].unique()

In [None]:
# Join gas costs with transaction traces
cols = ["param_ns", "new_gas_minimal", "new_gas_rounded", "new_gas_frac"]
tx_params_df = agg_trace_df.merge(params_df[["opcode"] + cols], on="opcode", how="left")
tx_params_df["param_ns"] = tx_params_df["param_ns"].fillna(
    tx_params_df["op_gas_cost"] * ex_time / limit
)
tx_params_df["new_gas_minimal"] = tx_params_df["new_gas_minimal"].fillna(
    tx_params_df["op_gas_cost"]
)
tx_params_df["new_gas_rounded"] = tx_params_df["new_gas_rounded"].fillna(
    tx_params_df["op_gas_cost"]
)
tx_params_df["new_gas_frac"] = tx_params_df["new_gas_frac"].fillna(
    tx_params_df["op_gas_cost"]
)
# Compute total cost per transaction
tx_params_df[cols + ["op_gas_cost"]] = tx_params_df[cols + ["op_gas_cost"]].mul(
    tx_params_df["op_gas_pair_count"], axis=0
)
tx_params_df = (
    tx_params_df.groupby(["tx_hash"])[cols + ["op_gas_cost"]].sum().reset_index()
)
tx_params_df["new_gas_frac"] = np.ceil(tx_params_df["new_gas_frac"])
# cleanup column names
tx_params_df.columns = [
    "tx_hash",
    "total_ex_time_ns",
    "total_gas_minimal",
    "total_gas_rounded",
    "total_gas_frac",
    "total_gas_current",
]
# Compute efficiency metrics
tx_params_df["tx_per_time"] = np.floor(ex_time / tx_params_df["total_ex_time_ns"])
tx_params_df["tx_per_limit_current"] = np.floor(limit/tx_params_df["total_gas_current"])
tx_params_df["tx_per_limit_minimal"] = np.floor(limit/tx_params_df["total_gas_minimal"])
tx_params_df["tx_per_limit_rounded"] = np.floor(limit/tx_params_df["total_gas_rounded"])
tx_params_df["tx_per_limit_frac"] = np.floor(limit/tx_params_df["total_gas_frac"])
tx_params_df["eff_loss_current"] = (tx_params_df["tx_per_time"]/tx_params_df["tx_per_limit_current"])-1
tx_params_df["eff_loss_minimal"] = (tx_params_df["tx_per_time"]/tx_params_df["tx_per_limit_minimal"])-1
tx_params_df["eff_loss_rounded"] = (tx_params_df["tx_per_time"]/tx_params_df["tx_per_limit_rounded"])-1
tx_params_df["eff_loss_frac"] = (tx_params_df["tx_per_time"]/tx_params_df["tx_per_limit_frac"])-1

tx_params_df.info()

In [None]:
tx_eff_loss_df = tx_params_df[
    [
        "tx_hash",
        "eff_loss_current",
        "eff_loss_minimal",
        "eff_loss_rounded",
        "eff_loss_frac",
    ]
]
eff_cols = ["Current price", "Minimal repricing", "Rounded repricing", "Fractional repricing"]
tx_eff_loss_df.columns = ["tx_hash"] + eff_cols
tx_eff_loss_df = tx_eff_loss_df.melt(
    id_vars=["tx_hash"],
    value_vars=eff_cols,
    var_name="repricing_strategy",
    value_name="efficiency_loss",
)

plt.figure(figsize=(10, 2))
sns.boxplot(
    data=tx_eff_loss_df,
    x="efficiency_loss",
    y="repricing_strategy",
    hue="repricing_strategy",
    legend=False
)
plt.title("Distribution of efficiency losses at transaction level\n(300M gas limit @4s execution time)")
plt.ylabel("")
plt.xlabel("Efficiency loss per tx (theoretical max ops / max ops from gas price - 1)")
plt.show()

In [None]:
plt.figure(figsize=(10, 2))
sns.boxplot(
    data=tx_eff_loss_df,
    x="efficiency_loss",
    y="repricing_strategy",
    hue="repricing_strategy",
    legend=False
)
plt.xlim(-0.01,1.25)
plt.title("Distribution of efficiency losses at transaction level\n(300M gas limit @4s execution time)")
plt.ylabel("")
plt.xlabel("Efficiency loss per tx (theoretical max ops / max ops from gas price - 1)")
plt.show()

In [None]:
plt.figure(figsize=(10, 2))
sns.boxplot(
    data=tx_eff_loss_df[tx_eff_loss_df["repricing_strategy"]!="Minimal repricing"],
    x="efficiency_loss",
    y="repricing_strategy",
    hue="repricing_strategy",
    legend=False
)
plt.xlim(-0.01,1.25)
plt.title("Distribution of efficiency losses at transaction level\n(300M gas limit @4s execution time)")
plt.ylabel("")
plt.xlabel("Efficiency loss per tx (theoretical max ops / max ops from gas price - 1)")
plt.show()

In [None]:
tx_eff_loss_df[tx_eff_loss_df["efficiency_loss"] > 1.25].groupby("repricing_strategy").size()

In [None]:
tx_eff_loss_df[tx_eff_loss_df["efficiency_loss"] > 1.25].groupby(
    "repricing_strategy"
).size() / tx_eff_loss_df.groupby("repricing_strategy").size().iloc[0]