In [57]:
import pandas as pd
import plotly.express as px
from mainnet_launch.constants import *
from mainnet_launch.database.schema.ensure_tables_are_current.using_3rd_party.save_local_swap_matrix_all_autopools import (
    get_autopool_possible_assets,
)


def tidy_up_quotes(df: pd.DataFrame, token_address_to_decimals: dict, token_address_to_symbol: dict) -> pd.DataFrame:

    df = df.dropna(subset=["buyAmount", "minBuyAmount", "sellAmount", "buyToken", "sellToken"]).copy()
    df['datetime_received'] = pd.to_datetime(df['datetime_received'], utc=True)

    df["buy_amount_norm"] = df.apply(
        lambda row: int(row["buyAmount"]) / 10 ** token_address_to_decimals[row["buyToken"]], axis=1
    )
    df["min_buy_amount_norm"] = df.apply(
        lambda row: int(row["minBuyAmount"]) / 10 ** token_address_to_decimals[row["buyToken"]], axis=1
    )
    df["sell_amount_norm"] = df.apply(
        lambda row: int(row["sellAmount"]) / 10 ** token_address_to_decimals[row["sellToken"]], axis=1
    )

    df["buy_amount_price"] = df.apply(lambda row: row["buy_amount_norm"] / row["sell_amount_norm"], axis=1)
    df["min_buy_amount_price"] = df.apply(lambda row: row["min_buy_amount_norm"] / row["sell_amount_norm"], axis=1)
    df["buy_symbol"] = df.apply(lambda row: token_address_to_symbol[row["buyToken"]], axis=1)
    df["sell_symbol"] = df.apply(lambda row: token_address_to_symbol[row["sellToken"]], axis=1)
    df["label"] = df["sell_symbol"] + " -> " + df["buy_symbol"]

    df["safe_value_bought"] = df.apply(lambda row: row["buy_token_price"] * row["buy_amount_norm"], axis=1)
    df["safe_value_min_bought"] = df.apply(lambda row: row["buy_token_price"] * row["min_buy_amount_norm"], axis=1)
    df["safe_value_sold"] = df.apply(lambda row: row["sell_token_price"] * row["sell_amount_norm"], axis=1)
    df["safe_value_slippage_bps"] = df.apply(
        lambda row: 1_000 * (row["safe_value_sold"] - row["safe_value_bought"]) / row["safe_value_sold"], axis=1
    )
    df["long_label"] = df["label"] + " " + df["sell_amount_norm"].astype(int).astype(str)

    df["30_min_chunk"] = pd.to_datetime(df["datetime_received"]).dt.floor("30min")
    return df


bad_autopools = [BASE_EUR, SILO_ETH, SONIC_USD, BAL_ETH, DINERO_ETH, ARB_USD, SILO_USD]

autopool_to_slippage = {}
for autopool in ALL_AUTOPOOLS:
    if autopool not in bad_autopools:
        with_spot_prices_save_name = WORKING_DATA_DIR / f"swap_matrix/{autopool.name}_full_swap_matrix_with_prices.csv"
        print(autopool.name)
        df = pd.read_csv(with_spot_prices_save_name)

        assets = get_autopool_possible_assets(autopool)
        token_address_to_symbol = assets.set_index("token_address")["symbol"].to_dict()
        token_address_to_decimals = assets.set_index("token_address")["decimals"].to_dict()
        df = tidy_up_quotes(df, token_address_to_decimals, token_address_to_symbol)

        unique_slippage_values = (
            df.groupby(["30_min_chunk", "long_label"])[["datetime_received", "safe_value_slippage_bps", "buy_symbol"]]
            .first()
            .reset_index()
        )

        slippage_over_time = unique_slippage_values.pivot(
            index="30_min_chunk", columns="long_label", values="safe_value_slippage_bps"
        )
        autopool_to_slippage[autopool.name] = (df, slippage_over_time)
        print(f"Read {len(df)} rows for {with_spot_prices_save_name}")

autoETH
Read 9150 rows for /Users/pb/Documents/Github/Tokemak/v2-rebalance-dashboard/working_data/swap_matrix/autoETH_full_swap_matrix_with_prices.csv
autoLRT
Read 2149 rows for /Users/pb/Documents/Github/Tokemak/v2-rebalance-dashboard/working_data/swap_matrix/autoLRT_full_swap_matrix_with_prices.csv
baseETH
Read 1546 rows for /Users/pb/Documents/Github/Tokemak/v2-rebalance-dashboard/working_data/swap_matrix/baseETH_full_swap_matrix_with_prices.csv
autoUSD
Read 11721 rows for /Users/pb/Documents/Github/Tokemak/v2-rebalance-dashboard/working_data/swap_matrix/autoUSD_full_swap_matrix_with_prices.csv
baseUSD
Read 645 rows for /Users/pb/Documents/Github/Tokemak/v2-rebalance-dashboard/working_data/swap_matrix/baseUSD_full_swap_matrix_with_prices.csv
autoDOLA
Read 3647 rows for /Users/pb/Documents/Github/Tokemak/v2-rebalance-dashboard/working_data/swap_matrix/autoDOLA_full_swap_matrix_with_prices.csv


In [58]:
autopool = AUTO_DOLA
df, slippage_over_time = autopool_to_slippage[autopool.name]

valid_tokens = df['buy_symbol'].unique()

for token in valid_tokens:
    for size in df['sell_amount_norm'].unique():
        sub_df = df[(df['buy_symbol'] == token) & (df['sell_amount_norm'] == size)]

        unique_slippage_values = (
            sub_df.groupby(["30_min_chunk", "long_label"])[["datetime_received", "safe_value_slippage_bps", "buy_symbol"]]
            .first()
            .reset_index()
        )

        slippage_over_time = unique_slippage_values.pivot(
            index="30_min_chunk", columns="long_label", values="safe_value_slippage_bps"
        )

        sorted_slippage = slippage_over_time.median().sort_values()
        px.line(slippage_over_time).show()
    break


In [59]:



# silently missing values
px.box(
    slippage_over_time[sorted_slippage.index],
    title=f"Swap cost bps over time {autopool.name} (sorted by median)",
    orientation="h",
    height=900,
    width=900
).show()

In [60]:
df.pivot(index='datetime_received', columns='long_label', values='safe_value_slippage_bps')

long_label,DOLA -> USR 100000,DOLA -> USR 150000,DOLA -> USR 200000,DOLA -> USR 250000,DOLA -> USR 50000,DOLA -> alUSD 100000,DOLA -> alUSD 150000,DOLA -> alUSD 200000,DOLA -> alUSD 250000,DOLA -> alUSD 50000,...,scrvUSD -> sUSDS 100000,scrvUSD -> sUSDS 150000,scrvUSD -> sUSDS 200000,scrvUSD -> sUSDS 250000,scrvUSD -> sUSDS 50000,scrvUSD -> sUSDe 100000,scrvUSD -> sUSDe 150000,scrvUSD -> sUSDe 200000,scrvUSD -> sUSDe 250000,scrvUSD -> sUSDe 50000
datetime_received,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-09-26 02:22:48.392579+00:00,,,,,,,,,,,...,,,,,,,,,,
2025-09-26 02:22:48.393938+00:00,,,,2.186993,,,,,,,...,,,,,,,,,,
2025-09-26 02:22:48.394409+00:00,,,,,,,,,,,...,,,,,,,,,,
2025-09-26 02:22:48.423815+00:00,,,,,,,,,,,...,,,,,,,,,,
2025-09-26 02:22:48.424465+00:00,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-09-26 17:39:15.949936+00:00,,,,,,,,,,,...,,,,,,,,,,
2025-09-26 17:39:17.289530+00:00,,,,,,,,,,,...,,,,,,,,,,
2025-09-26 17:39:18.126121+00:00,,,,,,,,,,,...,,,,,,,,,,
2025-09-26 17:39:18.686059+00:00,,,,,,,,,,,...,,,,,,,,,,


In [61]:
px.scatter(df, title="Swap cost bps over time autoETH").show()

ValueError: Plotly Express cannot process wide-form data with columns of different type.

In [None]:
100 * (slippage_over_time.isna().sum() / len(slippage_over_time)).describe()

In [None]:
pass

In [None]:
columns_without_frx_ETH = [col for col in slippage_over_time.columns if "rETH" not in col]
non_frx_slippage = slippage_over_time[columns_without_frx_ETH]
sorted_slippage = non_frx_slippage.median().sort_values()

px.box(
    non_frx_slippage[sorted_slippage.index],
    title="Swap cost bps over time autoETH (sorted by median)",
    orientation="h",
    height=1000,
).show()

In [None]:
columns_with_frx_ETH = [col for col in slippage_over_time.columns if "rETH"  in col]
frx_slippage = slippage_over_time[columns_with_frx_ETH]

px.box(
    frx_slippage[frx_slippage.median().sort_values().index],
    title="Swap cost bps over time autoETH (sorted by median)",
    orientation="h",
    height=1000,
).show()

In [None]:
# on base eth  -> swapping to rETH is lower slippage and swapping from rETH is higher slippage


In [None]:
# at least in this sample 

In [None]:
percent_na = non_frx_slippage.isna().mean() * 100
percent_na

In [None]:
slippage_over_time

In [None]:
for i, label in enumerate(["30_min_ago", "60_min_ago", "90_min_ago", "120_min_ago"]):
    time_ago = slippage_over_time.shift(i + 1)

    absolute_diff = (slippage_over_time - time_ago).abs()

    px.histogram(absolute_diff, title=f"Absolute difference in swap cost bps from 30 min ago autoETH, {label}").show()

In [None]:
hour_ago = slippage_over_time.shift(3)
absolute_diff = (slippage_over_time - hour_ago).abs()

px.histogram(absolute_diff, title="Absolute difference in swap cost bps from 45 min ago").show()

In [None]:
hour_ago = slippage_over_time.shift(4)
absolute_diff = (slippage_over_time - hour_ago).abs()

px.histogram(absolute_diff, title="Absolute difference in swap cost bps from 1 hour ago").show()

In [None]:
px.box(slippage_over_time)
# slippage values are veyr tight, typically 2-3 bps, a few higher and a few lower

In [None]:
def compute_bps_diff_thresholds(slippage_over_time, column: str, percentile) -> pd.DataFrame:
    one_token_prices = slippage_over_time[[column]].copy()
    one_token_prices["t-15"] = one_token_prices[column].shift(1)
    one_token_prices["t-30"] = one_token_prices[column].shift(2)
    one_token_prices["t-45"] = one_token_prices[column].shift(3)
    one_token_prices["t-60"] = one_token_prices[column].shift(4)
    one_token_prices["t-120"] = one_token_prices[column].shift(8)
    one_token_prices["t-180"] = one_token_prices[column].shift(12)
    one_token_prices["bps_diff_15_min"] = one_token_prices[column] - one_token_prices["t-15"]
    one_token_prices["bps_diff_30_min"] = one_token_prices[column] - one_token_prices["t-30"]
    one_token_prices["bps_diff_45_min"] = one_token_prices[column] - one_token_prices["t-45"]
    one_token_prices["bps_diff_60_min"] = one_token_prices[column] - one_token_prices["t-60"]
    one_token_prices["bps_diff_120_min"] = one_token_prices[column] - one_token_prices["t-120"]
    one_token_prices["bps_diff_180_min"] = one_token_prices[column] - one_token_prices["t-180"]
    thresholds = one_token_prices.filter(like="bps_diff_").abs().quantile(percentile).rename(column)
    return thresholds


percentile = 0.95


def plot_bps_thresholds_by_size():
    for size in df["sell_amount_norm"].unique().astype(int).astype(str):
        all_thresholds = []
        for i, col in enumerate(slippage_over_time.columns):
            if " " + size not in col:
                continue
            thresholds = compute_bps_diff_thresholds(slippage_over_time, col, percentile)
            all_thresholds.append(thresholds)
        threshold_df = pd.concat(all_thresholds, axis=1).T
        px.box(threshold_df, title=f"{percentile}th percentile of bps diff for size " + size).show()


def plot_bps_diffs_by_label():
    for label in df["buy_symbol"].unique():
        all_thresholds = []

        for i, col in enumerate(slippage_over_time.columns):
            if str(label) in col:
                thresholds = compute_bps_diff_thresholds(slippage_over_time, col, percentile)
                all_thresholds.append(thresholds)

        if len(all_thresholds) == 0:
            print("No data for label", label)
            continue

        threshold_df = pd.concat(all_thresholds, axis=1).T
        px.box(
            threshold_df,
            title=f"{percentile}th percentile of bps diff for label " + label,
        ).show()


def plot_all_bps_diffs():
    all_thresholds = []
    for i, col in enumerate(slippage_over_time.columns):
        thresholds = compute_bps_diff_thresholds(slippage_over_time, col, percentile)
        all_thresholds.append(thresholds)
    threshold_df = pd.concat(all_thresholds, axis=1).T
    px.box(threshold_df, title=f"{percentile}th percentile of bps diff for all").show()


plot_all_bps_diffs()
plot_bps_diffs_by_label()

In [None]:
plot_bps_thresholds_by_size()