## Collecting protocol fees across Balancer core pools on all networks
- Spreadsheet as reference: https://docs.google.com/spreadsheets/d/1xwUPpbYq7woVOU9vQ8EB8MY75I-1mauTLyDVwvKUDKo/edit#gid=0
- Collab: https://colab.research.google.com/drive/1vKCvcV5mkL1zwW3565kLSGkBEbt8NsoB?usp=sharing


In [43]:
# Define constants for Arbitrum:
ARB_CORE_POOLS = [
    "0xbe0f30217be1e981add883848d0773a86d2d2cd4000000000000000000000471",
    "0x36bf227d6bac96e2ab1ebb5492ecec69c691943f000200000000000000000316",
    "0x4a2f6ae7f3e5d715689530873ec35593dc28951b000000000000000000000481",
    "0xc6eee8cb7643ec2f05f46d569e9ec8ef8b41b389000000000000000000000475",
    "0x32df62dc3aed2cd6224193052ce665dc181658410002000000000000000003bd",
    "0x45c4d1376943ab28802b995acffc04903eb5223f000000000000000000000470",
    "0x9cebf13bb702f253abf1579294694a1edad00eaa000000000000000000000486",
]
ARB_BALANCER_GRAPH_URL = "https://api.thegraph.com/subgraphs/name/balancer-labs/balancer-arbitrum-v2"
# Arb block time is .3 seconds
ARB_CHAIN_BLOCK_TIME = 0.27

MAINNET_CORE_POOLS = [
    "0x41503c9d499ddbd1dcdf818a1b05e9774203bf46000000000000000000000594",
    "0x1e19cf2d73a72ef1332c882f20534b6519be0276000200000000000000000112",
    "0xae8535c23afedda9304b03c68a3563b75fc8f92b0000000000000000000005a0",
    "0x4c81255cc9ed7062180ea99962fe05ac0d57350b0000000000000000000005a3",
    "0x42ed016f826165c2e5976fe5bc3df540c5ad0af700000000000000000000058b",
    "0xf16aee6a71af1a9bc8f56975a4c2705ca7a782bc0002000000000000000004bb",
    "0xb08885e6026bab4333a80024ec25a1a3e1ff2b8a000200000000000000000445",
    "0xc2b021133d1b0cf07dba696fd5dd89338428225b000000000000000000000598",
    "0xdfe6e7e18f6cc65fa13c8d8966013d4fda74b6ba000000000000000000000558",
    "0x5f1f4e50ba51d723f12385a8a9606afc3a0555f5000200000000000000000465",
    "0x1ee442b5326009bb18f2f472d3e0061513d1a0ff000200000000000000000464",
    "0x9f9d900462492d4c21e9523ca95a7cd86142f298000200000000000000000462",
    "0x639883476960a23b38579acfd7d71561a0f408cf000200000000000000000505",
    "0x2e848426aec6dbf2260535a5bea048ed94d9ff3d000000000000000000000536",
    "0x36be1e97ea98ab43b4debf92742517266f5731a3000200000000000000000466",
]
MAINNET_BALANCER_GRAPH_URL = "https://api.thegraph.com/subgraphs/name/balancer-labs/balancer-v2"
MAINNET_CHAIN_BLOCK_TIME = 12

POLYGON_BALANCER_GRAPH_URL = "https://api.thegraph.com/subgraphs/name/balancer-labs/balancer-polygon-v2"
# Polygon block time is 2 seconds
POLYGON_CHAIN_BLOCK_TIME = 2
POLYGON_CORE_POOLS = [
    "0x03090a9811181a2afe830a3a0b467698ccf3a8b1000000000000000000000bf5",
    "0x9321e2250767d79bab5aa06daa8606a2b3b7b4c5000000000000000000000bf4",
    "0xac2cae8d2f78a4a8f92f20dbe74042cd0a8d5af3000000000000000000000be2",
    "0x402cfdb7781fa85d52f425352661128250b79e12000000000000000000000be3",
    "0xab269164a10fab22bc87c39946da06c870b172d6000000000000000000000bfc"
]

In [44]:
# Query:
POOLS_SNAPSHOTS_QUERY = """
{{
  poolSnapshots(
    first: {first}
    skip: {skip}
    orderBy: timestamp
    orderDirection: desc
    block: {{ number: {block} }}
    where: {{ protocolFee_not: null }}
  ) {{
    pool {{
      address
      id
      symbol
      totalProtocolFeePaidInBPT
      tokens {{
        symbol
        address
        paidProtocolFees
      }}
    }}
    timestamp
    protocolFee
    swapFees
    swapVolume
    liquidity
  }}
}}
"""

## Fetching data from the Balancer subgraphs

In [45]:
import os
import datetime
from typing import Dict
from typing import List
from typing import Optional

from dotenv import load_dotenv
from gql import Client
from gql import gql
from gql.transport.requests import RequestsHTTPTransport
from web3 import Web3
from web3.middleware import geth_poa_middleware

load_dotenv()
arb_web3 = Web3(Web3.HTTPProvider(os.environ["ARBNODEURL"]))
eth_web3 = Web3(Web3.HTTPProvider(os.environ["ETHNODEURL"]))
poly_web3 = Web3(Web3.HTTPProvider("https://polygon-rpc.com"))
poly_web3.middleware_onion.inject(geth_poa_middleware, layer=0)
# ARBITRUM
arb_block_now = arb_web3.eth.block_number - 1000
arb_timestamp_now = arb_web3.eth.get_block(arb_block_now).timestamp
# Given Arb block time, we want to look back 2 weeks:
arb_block_2_weeks_ago = arb_block_now - (2 * 7 * 24 * 60 * 60 / ARB_CHAIN_BLOCK_TIME)
arb_timestamp_2_weeks_ago = arb_web3.eth.get_block(int(arb_block_2_weeks_ago)).timestamp
# Convert to datetime:
arb_datetime_now = datetime.datetime.fromtimestamp(arb_timestamp_now)
arb_datetime_2_weeks_ago = datetime.datetime.fromtimestamp(arb_timestamp_2_weeks_ago)
# MAINNET
mainnet_block_now = eth_web3.eth.block_number - 1000
mainnet_timestamp_now = eth_web3.eth.get_block(mainnet_block_now).timestamp
# Given mainnet block time, we want to look back 2 weeks:
mainnet_block_2_weeks_ago = mainnet_block_now - (2 * 7 * 24 * 60 * 60 / MAINNET_CHAIN_BLOCK_TIME)
mainnet_timestamp_2_weeks_ago = eth_web3.eth.get_block(int(mainnet_block_2_weeks_ago)).timestamp
# Convert to datetime:
mainnet_datetime_now = datetime.datetime.fromtimestamp(mainnet_timestamp_now)
mainnet_datetime_2_weeks_ago = datetime.datetime.fromtimestamp(mainnet_timestamp_2_weeks_ago)

# POLYGON
poly_block_now = poly_web3.eth.block_number - 1000
poly_timestamp_now = poly_web3.eth.get_block(poly_block_now).timestamp
# Given polygon block time, we want to look back 2 weeks:
poly_block_2_weeks_ago = poly_block_now - (2 * 7 * 24 * 60 * 60 / POLYGON_CHAIN_BLOCK_TIME)
poly_timestamp_2_weeks_ago = poly_web3.eth.get_block(int(poly_block_2_weeks_ago)).timestamp
# Convert to datetime:
poly_datetime_now = datetime.datetime.fromtimestamp(poly_timestamp_now)
poly_datetime_2_weeks_ago = datetime.datetime.fromtimestamp(poly_timestamp_2_weeks_ago)

# Fetch all the data from the balancer subgraph
def make_gql_client(url: str) -> Optional[Client]:
    transport = RequestsHTTPTransport(url=url, retries=3)
    return Client(
        transport=transport, fetch_schema_from_transport=True, execute_timeout=60
    )


def get_balancer_pool_snapshots(block: int, graph_url: str) -> Optional[List[Dict]]:
    client = make_gql_client(graph_url)
    all_pools = []
    limit = 1000
    offset = 0
    while True:
        result = client.execute(
            gql(POOLS_SNAPSHOTS_QUERY.format(first=limit, skip=offset, block=block)))
        all_pools.extend(result['poolSnapshots'])
        offset += limit
        if offset >= 5000:
            break
        if len(result['poolSnapshots']) < limit - 1:
            break
    return all_pools


arbi_pool_snapshots_now = get_balancer_pool_snapshots(arb_block_now, ARB_BALANCER_GRAPH_URL)
arbi_pool_snapshots_2_weeks_ago = get_balancer_pool_snapshots(int(arb_block_2_weeks_ago), ARB_BALANCER_GRAPH_URL)

mainnet_pool_snapshots_now = get_balancer_pool_snapshots(mainnet_block_now, MAINNET_BALANCER_GRAPH_URL)
mainnet_pool_snapshots_2_weeks_ago = get_balancer_pool_snapshots(int(mainnet_block_2_weeks_ago), MAINNET_BALANCER_GRAPH_URL)

polygon_pool_snapshots_now = get_balancer_pool_snapshots(poly_block_now, POLYGON_BALANCER_GRAPH_URL)
polygon_pool_snapshots_2_weeks_ago = get_balancer_pool_snapshots(int(poly_block_2_weeks_ago), POLYGON_BALANCER_GRAPH_URL)

## Extract fee data from CORE pools:


In [46]:
from collections import defaultdict
import pandas as pd

def collect_fee_info(pools: list[str], chain: str, pools_now: list[dict], pools_shifted: list[dict]) -> tuple[dict, dict]:
    # Iterate through snapshots now and 2 weeks ago and extract fee data, by subtracting today's fee data from 2 weeks ago
    # and then summing across all pools
    fees = {}
    token_fees = defaultdict(list)
    for pool in pools:
        current_fees_snapshots = [x for x in pools_now if x['pool']['id'] == pool]
        current_fees_snapshots.sort(key=lambda x: x['timestamp'], reverse=True)
        fees_2_weeks_ago = [x for x in pools_shifted if x['pool']['id'] == pool]
        fees_2_weeks_ago.sort(key=lambda x: x['timestamp'], reverse=True)
        # Take first element of list, which is the most recent snapshot
        if not current_fees_snapshots or not fees_2_weeks_ago:
            continue
        pool_snapshot_now = current_fees_snapshots[0]
        pool_snapshot_2_weeks_ago = fees_2_weeks_ago[0]
        # Calculate fees
        pool_fee = float(pool_snapshot_now['protocolFee']) - float(pool_snapshot_2_weeks_ago['protocolFee'])
        pool_swap_fee = float(pool_snapshot_now['swapFees']) - float(pool_snapshot_2_weeks_ago['swapFees'])
        # Now we need to collect token fee info. Let's start with BPT tokens, which is Balancer pool token. Notice,
        # That totalProtocolFeePaidInBPT can be null, so we need to check for that
        bpt_token_fee = 0
        if pool_snapshot_now['pool']['totalProtocolFeePaidInBPT'] is not None and pool_snapshot_2_weeks_ago['pool']['totalProtocolFeePaidInBPT'] is not None:
            bpt_token_fee = float(pool_snapshot_now['pool']['totalProtocolFeePaidInBPT']) - float(pool_snapshot_2_weeks_ago['pool']['totalProtocolFeePaidInBPT'])
        # Now collect fee info about fees paid in pool tokens. Pool tokens fee info is in pool.tokens dictionary. This will be separate dictionary
        else:
            for token_data in pool_snapshot_now['pool']['tokens']:
                token_data_2_weeks_ago = [t for t in pool_snapshot_2_weeks_ago['pool']['tokens'] if t['address'] == token_data['address']][0]
                token_fee = float(token_data.get('paidProtocolFees', None)) - float(token_data_2_weeks_ago.get('paidProtocolFees', None))
                token_fees[pool_snapshot_now['pool']['symbol']].append({
                    'token': token_data['symbol'],
                    'token_fee': token_fee,
                    'time_from': arb_datetime_2_weeks_ago,
                    'time_to': arb_datetime_now,
                    'chain': chain,
                })
        fees[pool_snapshot_now['pool']['symbol']] = {
            'pool_fee': round(pool_fee, 2), 
            'swap_fee': round(pool_swap_fee, 2),
            'bpt_token_fee': round(bpt_token_fee, 2),
            'time_from': arb_datetime_2_weeks_ago,
            'time_to': arb_datetime_now,
            'chain': chain,
        }
    return fees, token_fees

arb_fees, arb_token_fees = collect_fee_info(ARB_CORE_POOLS, 'arbitrum', arbi_pool_snapshots_now, arbi_pool_snapshots_2_weeks_ago)
mainnet_fees, mainnet_token_fees = collect_fee_info(MAINNET_CORE_POOLS, 'mainnet', mainnet_pool_snapshots_now, mainnet_pool_snapshots_2_weeks_ago)
polygon_fees, polygon_token_fees = collect_fee_info(POLYGON_CORE_POOLS, 'polygon', polygon_pool_snapshots_now, polygon_pool_snapshots_2_weeks_ago)
# Convert to dataframe, sort by chain and pool fee
joint_fees = {**arb_fees, **mainnet_fees, **polygon_fees}
joint_fees_df = pd.DataFrame.from_dict(joint_fees, orient='index')
joint_fees_df.sort_values(by=['chain', 'pool_fee'], ascending=False, inplace=True)
joint_fees_df

Unnamed: 0,pool_fee,swap_fee,bpt_token_fee,time_from,time_to,chain
MaticX-bb-a-WMATIC-BPT,3286.74,317.61,4822.53,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
stMATIC-bb-a-WMATIC-BPT,1727.73,79.73,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
wstETH-bb-a-WETH-BPT,0.0,0.02,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
bb-am-usd,0.0,0.0,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
wstETH-rETH-sfrxETH-BPT,27491.32,4296.08,14.74,2023-08-02 21:42:04,2023-08-16 17:46:15,mainnet
B-rETH-STABLE,25985.83,16422.47,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,mainnet
50rETH-50BADGER,17810.87,10689.62,261.64,2023-08-02 21:42:04,2023-08-16 17:46:15,mainnet
50STG-50bbaUSD,16308.63,23723.88,20756.67,2023-08-02 21:42:04,2023-08-16 17:46:15,mainnet
swETH-bb-a-WETH-BPT,7651.34,34.14,4.11,2023-08-02 21:42:04,2023-08-16 17:46:15,mainnet
ankrETH/wstETH,6248.11,984.55,3.36,2023-08-02 21:42:04,2023-08-16 17:46:15,mainnet


In [47]:
# Now parse token_fees into dataframe and sort by chain and token fee.
# Notice, that token_fees is a dictionary of lists, so we need to flatten it first
joint_token_fees = {**arb_token_fees, **mainnet_token_fees, **polygon_token_fees}
joint_token_fees_df = pd.DataFrame.from_dict({(i,j): joint_token_fees[i][j] 
                           for i in joint_token_fees.keys() 
                           for j in range(len(joint_token_fees[i]))}, orient='index')
# Remove index name
joint_token_fees_df.index.names = ['pool', 'token_ix']
# Sort by chain and token name
joint_token_fees_df.sort_values(by=['chain'], ascending=False, inplace=True)
joint_token_fees_df

Unnamed: 0_level_0,Unnamed: 1_level_0,token,token_fee,time_from,time_to,chain
pool,token_ix,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
wstETH-bb-a-WETH-BPT,2,wstETH-bb-a-WETH-BPT,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
wstETH-bb-a-WETH-BPT,1,bb-a-WETH,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
wstETH-bb-a-WETH-BPT,0,wstETH,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
stMATIC-bb-a-WMATIC-BPT,2,bb-a-WMATIC,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
stMATIC-bb-a-WMATIC-BPT,1,stMATIC-bb-a-WMATIC-BPT,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
stMATIC-bb-a-WMATIC-BPT,0,stMATIC,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
bb-am-usd,3,bb-a-USDC,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
bb-am-usd,2,bb-a-DAI,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
bb-am-usd,1,bb-a-USDT,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
bb-am-usd,0,bb-am-usd,0.0,2023-08-02 21:42:04,2023-08-16 17:46:15,polygon
