## Collecting protocol fees across Balancer core pools on all networks
- Spreadsheet as reference: https://docs.google.com/spreadsheets/d/1xwUPpbYq7woVOU9vQ8EB8MY75I-1mauTLyDVwvKUDKo/edit#gid=0
- Collab: https://colab.research.google.com/drive/1vKCvcV5mkL1zwW3565kLSGkBEbt8NsoB?usp=sharing


In [3]:
import os

from dotenv import load_dotenv
from web3 import Web3
from web3.middleware import geth_poa_middleware

from notebooks import get_twap_bpt_price

load_dotenv()
arb_web3 = Web3(Web3.HTTPProvider(os.environ["ARBNODEURL"]))
eth_web3 = Web3(Web3.HTTPProvider(os.environ["ETHNODEURL"]))
poly_web3 = Web3(Web3.HTTPProvider("https://polygon-rpc.com"))
poly_web3.middleware_onion.inject(geth_poa_middleware, layer=0)

# Define constants for Arbitrum:
ARB_CORE_POOLS = [
    "0xbe0f30217be1e981add883848d0773a86d2d2cd4000000000000000000000471",
    "0x36bf227d6bac96e2ab1ebb5492ecec69c691943f000200000000000000000316",
    "0x4a2f6ae7f3e5d715689530873ec35593dc28951b000000000000000000000481",
    "0xc6eee8cb7643ec2f05f46d569e9ec8ef8b41b389000000000000000000000475",
    "0x32df62dc3aed2cd6224193052ce665dc181658410002000000000000000003bd",
    "0x45c4d1376943ab28802b995acffc04903eb5223f000000000000000000000470",
    "0x9cebf13bb702f253abf1579294694a1edad00eaa000000000000000000000486",
]
ARB_BALANCER_GRAPH_URL = "https://api.thegraph.com/subgraphs/name/balancer-labs/balancer-arbitrum-v2"
# Arb block time is .3 seconds
ARB_CHAIN_BLOCK_TIME = 0.27

MAINNET_CORE_POOLS = [
    "0x41503c9d499ddbd1dcdf818a1b05e9774203bf46000000000000000000000594",
    "0x1e19cf2d73a72ef1332c882f20534b6519be0276000200000000000000000112",
    "0xae8535c23afedda9304b03c68a3563b75fc8f92b0000000000000000000005a0",
    "0x4c81255cc9ed7062180ea99962fe05ac0d57350b0000000000000000000005a3",
    "0x42ed016f826165c2e5976fe5bc3df540c5ad0af700000000000000000000058b",
    "0xf16aee6a71af1a9bc8f56975a4c2705ca7a782bc0002000000000000000004bb",
    "0xb08885e6026bab4333a80024ec25a1a3e1ff2b8a000200000000000000000445",
    "0xc2b021133d1b0cf07dba696fd5dd89338428225b000000000000000000000598",
    "0xdfe6e7e18f6cc65fa13c8d8966013d4fda74b6ba000000000000000000000558",
    "0x5f1f4e50ba51d723f12385a8a9606afc3a0555f5000200000000000000000465",
    "0x1ee442b5326009bb18f2f472d3e0061513d1a0ff000200000000000000000464",
    "0x9f9d900462492d4c21e9523ca95a7cd86142f298000200000000000000000462",
    "0x639883476960a23b38579acfd7d71561a0f408cf000200000000000000000505",
    "0x2e848426aec6dbf2260535a5bea048ed94d9ff3d000000000000000000000536",
    "0x36be1e97ea98ab43b4debf92742517266f5731a3000200000000000000000466",
]
MAINNET_BALANCER_GRAPH_URL = "https://api.thegraph.com/subgraphs/name/balancer-labs/balancer-v2"
MAINNET_CHAIN_BLOCK_TIME = 12

POLYGON_BALANCER_GRAPH_URL = "https://api.thegraph.com/subgraphs/name/balancer-labs/balancer-polygon-v2"
# Polygon block time is 2 seconds
POLYGON_CHAIN_BLOCK_TIME = 2
POLYGON_CORE_POOLS = [
    "0x03090a9811181a2afe830a3a0b467698ccf3a8b1000000000000000000000bf5",
    "0x9321e2250767d79bab5aa06daa8606a2b3b7b4c5000000000000000000000bf4",
    "0xac2cae8d2f78a4a8f92f20dbe74042cd0a8d5af3000000000000000000000be2",
    "0x402cfdb7781fa85d52f425352661128250b79e12000000000000000000000be3",
    "0xab269164a10fab22bc87c39946da06c870b172d6000000000000000000000bfc"
]
print("ARBITRUM")
arb_bpt_twap_prices = {}
for arb_pool in ARB_CORE_POOLS:
    arb_bpt_twap_prices[arb_pool] = get_twap_bpt_price(arb_pool, 'arbitrum', arb_web3)
    print(get_twap_bpt_price(arb_pool, 'arbitrum', arb_web3))
    
print("MAINNET")
mainnet_bpt_twap_prices = {}
for mainnet_pool in MAINNET_CORE_POOLS:
    mainnet_bpt_twap_prices[mainnet_pool] = get_twap_bpt_price(mainnet_pool, 'mainnet', eth_web3)
    print(get_twap_bpt_price(mainnet_pool, 'mainnet', eth_web3))
    
print("POLYGON")
polygon_bpt_twap_prices = {}
for polygon_pool in POLYGON_CORE_POOLS:
    polygon_bpt_twap_prices[polygon_pool] = get_twap_bpt_price(polygon_pool, 'polygon', poly_web3)
    print(get_twap_bpt_price(polygon_pool, 'polygon', poly_web3))


ARBITRUM
1664.325854325530001134427742
1678.238862410888894378599026
1670.977798491140066241908011
0.9906605958376910375689404110
1.173851070915343989675147645
1668.730132563883403688566128
1.000025925199399329690246051
MAINNET
1666.438758767065592600247809
1711.871839104478311999119538
1655.734096347396131918271244
1665.661926991577507295464904
1670.362861604763453888576930
24.91420924682509453423001572
1678.115126894037007703250708
0.9849123889501633554769046261
1674.674304490441818435885528
55.35542349787851525628040549
60.22846679963345422730886857
208.2058275694412925067884878
0.7435992240680706906129825548
1678.997252827061916150051147
10.02323332815189803288160323
POLYGON
0.9998366158156138384215492515
0.9916650982404577986436149912
0.5609691957752448541820939103
0.5604728351490517554995250398
1668.358660386120586874633164


In [9]:
# Query:
POOLS_SNAPSHOTS_QUERY = """
{{
  poolSnapshots(
    first: {first}
    skip: {skip}
    orderBy: timestamp
    orderDirection: desc
    block: {{ number: {block} }}
    where: {{ protocolFee_not: null }}
  ) {{
    pool {{
      address
      id
      symbol
      totalProtocolFeePaidInBPT
      tokens {{
        symbol
        address
        paidProtocolFees
      }}
    }}
    timestamp
    protocolFee
    swapFees
    swapVolume
    liquidity
  }}
}}
"""

## Populate Mimic Data

In [None]:
# Load mimic data
import pandas as pd
from web3 import Web3

mimic_df = pd.read_csv('../data/balancer_last_period_updated.csv')
# Calculate price per token for each sale event in csv file and add it as a new column
# Convert all amounts to float
mimic_df['amount'] = mimic_df['amount'].astype(float)
mimic_df['price_per_token'] = mimic_df['amount_usd'] / (mimic_df['amount'] / 1e18)
# Now that we have price for each event, we can calculate average price of token across all sale events
mimic_df['avg_price_per_token'] = mimic_df.groupby('token')['price_per_token'].transform('mean')
# Extract average price per token for each token
mimic_df = mimic_df[['network', 'token', 'avg_price_per_token']].drop_duplicates()
# Convert token address to checksum
mimic_df['token'] = mimic_df['token'].apply(lambda x: Web3.toChecksumAddress(x))

## Fetching data from the Balancer subgraphs

In [25]:
import datetime
from typing import Dict
from typing import List
from typing import Optional

from gql import Client
from gql import gql
from gql.transport.requests import RequestsHTTPTransport

# ARBITRUM
# TODO: 
# arb_block_now = arb_web3.eth.block_number - 1000
arb_block_now = 122428959  # 18 August 2023
arb_timestamp_now = arb_web3.eth.get_block(arb_block_now).timestamp
# Given Arb block time, we want to look back 2 weeks:
arb_block_2_weeks_ago = arb_block_now - (2 * 7 * 24 * 60 * 60 / ARB_CHAIN_BLOCK_TIME)
arb_timestamp_2_weeks_ago = arb_web3.eth.get_block(int(arb_block_2_weeks_ago)).timestamp
# Convert to datetime:
arb_datetime_now = datetime.datetime.fromtimestamp(arb_timestamp_now)
arb_datetime_2_weeks_ago = datetime.datetime.fromtimestamp(arb_timestamp_2_weeks_ago)
# MAINNET
# TODO: 
# mainnet_block_now = eth_web3.eth.block_number - 1000
mainnet_block_now = 17938293
mainnet_timestamp_now = eth_web3.eth.get_block(mainnet_block_now).timestamp
# Given mainnet block time, we want to look back 2 weeks:
mainnet_block_2_weeks_ago = mainnet_block_now - (2 * 7 * 24 * 60 * 60 / MAINNET_CHAIN_BLOCK_TIME)
mainnet_timestamp_2_weeks_ago = eth_web3.eth.get_block(int(mainnet_block_2_weeks_ago)).timestamp
# Convert to datetime:
mainnet_datetime_now = datetime.datetime.fromtimestamp(mainnet_timestamp_now)
mainnet_datetime_2_weeks_ago = datetime.datetime.fromtimestamp(mainnet_timestamp_2_weeks_ago)

# POLYGON
# poly_block_now = poly_web3.eth.block_number - 1000
poly_block_now = 46439663
poly_timestamp_now = poly_web3.eth.get_block(poly_block_now).timestamp
# Given polygon block time, we want to look back 2 weeks:
poly_block_2_weeks_ago = poly_block_now - (2 * 7 * 24 * 60 * 60 / POLYGON_CHAIN_BLOCK_TIME)
poly_timestamp_2_weeks_ago = poly_web3.eth.get_block(int(poly_block_2_weeks_ago)).timestamp
# Convert to datetime:
poly_datetime_now = datetime.datetime.fromtimestamp(poly_timestamp_now)
poly_datetime_2_weeks_ago = datetime.datetime.fromtimestamp(poly_timestamp_2_weeks_ago)


# Fetch all the data from the balancer subgraph
def make_gql_client(url: str) -> Optional[Client]:
    transport = RequestsHTTPTransport(url=url, retries=3)
    return Client(
        transport=transport, fetch_schema_from_transport=True, execute_timeout=60
    )


def get_balancer_pool_snapshots(block: int, graph_url: str) -> Optional[List[Dict]]:
    client = make_gql_client(graph_url)
    all_pools = []
    limit = 1000
    offset = 0
    while True:
        result = client.execute(
            gql(POOLS_SNAPSHOTS_QUERY.format(first=limit, skip=offset, block=block)))
        all_pools.extend(result['poolSnapshots'])
        offset += limit
        if offset >= 5000:
            break
        if len(result['poolSnapshots']) < limit - 1:
            break
    return all_pools


arbi_pool_snapshots_now = get_balancer_pool_snapshots(arb_block_now, ARB_BALANCER_GRAPH_URL)
arbi_pool_snapshots_2_weeks_ago = get_balancer_pool_snapshots(int(arb_block_2_weeks_ago), ARB_BALANCER_GRAPH_URL)

mainnet_pool_snapshots_now = get_balancer_pool_snapshots(mainnet_block_now, MAINNET_BALANCER_GRAPH_URL)
mainnet_pool_snapshots_2_weeks_ago = get_balancer_pool_snapshots(int(mainnet_block_2_weeks_ago),
                                                                 MAINNET_BALANCER_GRAPH_URL)

polygon_pool_snapshots_now = get_balancer_pool_snapshots(poly_block_now, POLYGON_BALANCER_GRAPH_URL)
polygon_pool_snapshots_2_weeks_ago = get_balancer_pool_snapshots(int(poly_block_2_weeks_ago),
                                                                 POLYGON_BALANCER_GRAPH_URL)

## Extract fee data from CORE pools:


In [17]:
from collections import defaultdict


def collect_fee_info(pools: list[str], chain: str, pools_now: list[dict], pools_shifted: list[dict]) -> tuple[
    dict, dict]:
    # Iterate through snapshots now and 2 weeks ago and extract fee data, by subtracting today's fee data from 2 weeks ago
    # and then summing across all pools
    fees = {}
    token_fees = defaultdict(list)
    for pool in pools:
        current_fees_snapshots = [x for x in pools_now if x['pool']['id'] == pool]
        current_fees_snapshots.sort(key=lambda x: x['timestamp'], reverse=True)
        fees_2_weeks_ago = [x for x in pools_shifted if x['pool']['id'] == pool]
        fees_2_weeks_ago.sort(key=lambda x: x['timestamp'], reverse=True)
        # Take first element of list, which is the most recent snapshot
        if not current_fees_snapshots or not fees_2_weeks_ago:
            continue
        pool_snapshot_now = current_fees_snapshots[0]
        pool_snapshot_2_weeks_ago = fees_2_weeks_ago[0]
        # Calculate fees
        pool_fee = float(pool_snapshot_now['protocolFee']) - float(pool_snapshot_2_weeks_ago['protocolFee'])
        pool_swap_fee = float(pool_snapshot_now['swapFees']) - float(pool_snapshot_2_weeks_ago['swapFees'])
        # Now we need to collect token fee info. Let's start with BPT tokens, which is Balancer pool token. Notice,
        # That totalProtocolFeePaidInBPT can be null, so we need to check for that
        bpt_token_fee = 0
        bpt_price_usd = mimic_df[(mimic_df['token'] == Web3.toChecksumAddress(pool_snapshot_now['pool']['address']))][
            'avg_price_per_token'].values
        if len(bpt_price_usd) == 0:
            bpt_price_usd = 0
        else:
            bpt_price_usd = bpt_price_usd[0]
        if pool_snapshot_now['pool']['totalProtocolFeePaidInBPT'] is not None and pool_snapshot_2_weeks_ago['pool'][
            'totalProtocolFeePaidInBPT'] is not None:
            bpt_token_fee = float(pool_snapshot_now['pool']['totalProtocolFeePaidInBPT']) - float(
                pool_snapshot_2_weeks_ago['pool']['totalProtocolFeePaidInBPT'])
            token_fees[pool_snapshot_now['pool']['symbol']].append({
                'token': pool_snapshot_now['pool']['symbol'],
                'token_fee': bpt_token_fee,
                'token_addr': pool_snapshot_now['pool']['address'],
                'time_from': arb_datetime_2_weeks_ago,
                'time_to': arb_datetime_now,
                'chain': chain,
            })
        # Now collect fee info about fees paid in pool tokens. Pool tokens fee info is in pool.tokens dictionary. This will be separate dictionary
        else:
            for token_data in pool_snapshot_now['pool']['tokens']:
                token_data_2_weeks_ago = \
                    [t for t in pool_snapshot_2_weeks_ago['pool']['tokens'] if t['address'] == token_data['address']][0]
                token_fee = float(token_data.get('paidProtocolFees', None)) - float(
                    token_data_2_weeks_ago.get('paidProtocolFees', None))
                token_fees[pool_snapshot_now['pool']['symbol']].append({
                    'token': token_data['symbol'],
                    'token_fee': token_fee,
                    'token_addr': token_data['address'],
                    'time_from': arb_datetime_2_weeks_ago,
                    'time_to': arb_datetime_now,
                    'chain': chain,
                })
        # Get fee in USD by multiplying bpt_token_fee by price of BPT token taken from mimic_df

        fees[pool_snapshot_now['pool']['symbol']] = {
            'pool_fee': round(pool_fee, 2),
            'swap_fee': round(pool_swap_fee, 2),
            'bpt_token_fee': round(bpt_token_fee, 2),
            # Get fee in USD by multiplying bpt_token_fee by price of BPT token taken from mimic_df
            'bpt_token_fee_in_usd': bpt_token_fee * bpt_price_usd,
            'time_from': arb_datetime_2_weeks_ago,
            'time_to': arb_datetime_now,
            'chain': chain,
        }
    return fees, token_fees


arb_fees, arb_token_fees = collect_fee_info(ARB_CORE_POOLS, 'arbitrum', arbi_pool_snapshots_now,
                                            arbi_pool_snapshots_2_weeks_ago)
mainnet_fees, mainnet_token_fees = collect_fee_info(MAINNET_CORE_POOLS, 'mainnet', mainnet_pool_snapshots_now,
                                                    mainnet_pool_snapshots_2_weeks_ago)
polygon_fees, polygon_token_fees = collect_fee_info(POLYGON_CORE_POOLS, 'polygon', polygon_pool_snapshots_now,
                                                    polygon_pool_snapshots_2_weeks_ago)
# Convert to dataframe, sort by chain and pool fee
joint_fees = {**arb_fees, **mainnet_fees, **polygon_fees}
joint_fees_df = pd.DataFrame.from_dict(joint_fees, orient='index')
joint_fees_df.sort_values(by=['chain', 'pool_fee'], ascending=False, inplace=True)

In [19]:
# Now parse token_fees into dataframe and sort by chain and token fee.
# Notice, that token_fees is a dictionary of lists, so we need to flatten it first
joint_token_fees = {**arb_token_fees, **mainnet_token_fees, **polygon_token_fees}
joint_token_fees_df = pd.DataFrame.from_dict({(i, j): joint_token_fees[i][j]
                                              for i in joint_token_fees.keys()
                                              for j in range(len(joint_token_fees[i]))}, orient='index')
# Remove index name
joint_token_fees_df.index.names = ['pool', 'token_ix']
# Sort by chain and token name
# joint_token_fees_df.sort_values(by=['chain'], ascending=False, inplace=True)

## Populate token fees in USD using Mimic data

In [27]:
# Now go through each token in joint_token_fees_df and find its price in mimic_df
for row in joint_token_fees_df.itertuples():
    # Find token price in mimic_df, if no price is found, then skip
    addr = Web3.toChecksumAddress(row.token_addr)
    token_price = mimic_df[(mimic_df['token'] == addr)]['avg_price_per_token'].values
    if len(token_price) == 0:
        # Set 0 price and continue
        joint_token_fees_df.loc[row.Index, 'token_fee_usd'] = 0
        # Set token price
        continue
    # Calculate token fee in USD and assign it to token_fee_usd column
    joint_token_fees_df.loc[row.Index, 'token_fee_usd'] = round(row.token_fee * token_price[0], 2)
    # Set token price
    joint_token_fees_df.loc[row.Index, 'token_price'] = token_price[0]

# Shuffle columns by names:
joint_token_fees_df_copy = joint_token_fees_df[
    ['chain', 'token', 'token_price', 'token_fee_usd', 'token_fee', 'time_from', 'time_to']]
# Sort by token_ix

# Print out token fees in USD
joint_token_fees_df_copy

Unnamed: 0_level_0,Unnamed: 1_level_0,chain,token,token_price,token_fee_usd,token_fee,time_from,time_to,token_addr
pool,token_ix,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
rETH-bb-a-WETH-BPT,0,arbitrum,bb-a-WETH,,0.0,0.0,2023-07-21 15:40:36,2023-08-18 03:24:52,0xad28940024117b442a9efb6d0f25c8b59e1c950b
rETH-bb-a-WETH-BPT,1,arbitrum,rETH-bb-a-WETH-BPT,1824.888612,0.0,0.0,2023-07-21 15:40:36,2023-08-18 03:24:52,0xbe0f30217be1e981add883848d0773a86d2d2cd4
rETH-bb-a-WETH-BPT,2,arbitrum,rETH,,0.0,0.0,2023-07-21 15:40:36,2023-08-18 03:24:52,0xec70dcb4a1efa46b8f2d97c310c9c4790ba5ffa8
B-wstETH-WETH-Stable,0,arbitrum,wstETH,2080.513177,8709.65,4.186298,2023-07-21 15:40:36,2023-08-18 03:24:52,0x5979d7b546e38e414f7e9822514be443a4800529
B-wstETH-WETH-Stable,1,arbitrum,WETH,1811.358945,2007.04,1.10803,2023-07-21 15:40:36,2023-08-18 03:24:52,0x82af49447d8a07e3bd95bd0d56f35241523fbab1
wstETH/rETH/cbETH,0,arbitrum,cbETH,,0.0,0.0,2023-07-21 15:40:36,2023-08-18 03:24:52,0x1debd73e752beaf79865fd6446b0c970eae7732f
wstETH/rETH/cbETH,1,arbitrum,wstETH/rETH/cbETH,,0.0,0.0,2023-07-21 15:40:36,2023-08-18 03:24:52,0x4a2f6ae7f3e5d715689530873ec35593dc28951b
wstETH/rETH/cbETH,2,arbitrum,wstETH,2080.513177,0.0,0.0,2023-07-21 15:40:36,2023-08-18 03:24:52,0x5979d7b546e38e414f7e9822514be443a4800529
wstETH/rETH/cbETH,3,arbitrum,rETH,,0.0,0.0,2023-07-21 15:40:36,2023-08-18 03:24:52,0xec70dcb4a1efa46b8f2d97c310c9c4790ba5ffa8
bb-a-USD,0,arbitrum,bb-a-USD,1.001321,4202.97,4197.4265,2023-07-21 15:40:36,2023-08-18 03:24:52,0xc6eee8cb7643ec2f05f46d569e9ec8ef8b41b389
