## Collecting protocol fees across Balancer core pools on all networks
Spreadsheet as reference: https://docs.google.com/spreadsheets/d/1xwUPpbYq7woVOU9vQ8EB8MY75I-1mauTLyDVwvKUDKo/edit#gid=0
Collab: https://colab.research.google.com/drive/1vKCvcV5mkL1zwW3565kLSGkBEbt8NsoB?usp=sharing


In [1]:
# Define constants for Arbitrum:
ARB_CORE_POOLS = [
    "0xbe0f30217be1e981add883848d0773a86d2d2cd4000000000000000000000471",
    "0x36bf227d6bac96e2ab1ebb5492ecec69c691943f000200000000000000000316",
    "0x4a2f6ae7f3e5d715689530873ec35593dc28951b000000000000000000000481",
    "0xc6eee8cb7643ec2f05f46d569e9ec8ef8b41b389000000000000000000000475",
    "0x32df62dc3aed2cd6224193052ce665dc181658410002000000000000000003bd",
    "0x45c4d1376943ab28802b995acffc04903eb5223f000000000000000000000470",
    "0x9cebf13bb702f253abf1579294694a1edad00eaa000000000000000000000486",
]
ARB_BALANCER_GRAPH_URL = "https://api.thegraph.com/subgraphs/name/balancer-labs/balancer-arbitrum-v2"
# Arb block time is .3 seconds
ARB_CHAIN_BLOCK_TIME = 0.27

In [2]:
# Query:
POOLS_SNAPSHOTS_QUERY = """
{{
  poolSnapshots(
    first: {first}
    skip: {skip}
    orderBy: timestamp
    orderDirection: desc
    block: {{ number: {block} }}
    where: {{ protocolFee_not: null }}
  ) {{
    pool {{
      address
      id
      symbol
    }}
    timestamp
    protocolFee
    swapFees
    swapVolume
    liquidity
  }}
}}
"""

## Fetching data from the Balancer subgraphs

In [3]:
import os
import datetime
from typing import Dict
from typing import List
from typing import Optional

from dotenv import load_dotenv
from gql import Client
from gql import gql
from gql.transport.requests import RequestsHTTPTransport
from web3 import Web3

load_dotenv()
arb_web3 = Web3(Web3.HTTPProvider(os.environ["ARBNODEURL"]))

arb_block_now = arb_web3.eth.block_number - 1000
arb_timestamp_now = arb_web3.eth.get_block(arb_block_now).timestamp
# Given Arb block time, we want to look back 2 weeks:
block_2_weeks_ago = arb_block_now - (2 * 7 * 24 * 60 * 60 / ARB_CHAIN_BLOCK_TIME)
arb_timestamp_2_weeks_ago = arb_web3.eth.get_block(int(block_2_weeks_ago)).timestamp

# Convert to datetime:
arb_datetime_now = datetime.datetime.fromtimestamp(arb_timestamp_now)
arb_datetime_2_weeks_ago = datetime.datetime.fromtimestamp(arb_timestamp_2_weeks_ago)


# Fetch all the data from the balancer subgraph
def make_gql_client(url: str) -> Optional[Client]:
    transport = RequestsHTTPTransport(url=url, retries=3)
    return Client(
        transport=transport, fetch_schema_from_transport=True, execute_timeout=60
    )


def get_balancer_pool_snapshots(block: int, graph_url: str) -> Optional[List[Dict]]:
    client = make_gql_client(graph_url)
    all_pools = []
    limit = 1000
    offset = 0
    while True:
        result = client.execute(
            gql(POOLS_SNAPSHOTS_QUERY.format(first=limit, skip=offset, block=block)))
        all_pools.extend(result['poolSnapshots'])
        offset += limit
        if offset >= 5000:
            break
        if len(result['poolSnapshots']) < limit - 1:
            break
    return all_pools


pool_snapshots_now = get_balancer_pool_snapshots(arb_block_now, ARB_BALANCER_GRAPH_URL)
pool_snapshots_2_weeks_ago = get_balancer_pool_snapshots(int(block_2_weeks_ago), ARB_BALANCER_GRAPH_URL)

## Extract fee data from CORE pools:


In [6]:
import pandas as pd

# Iterate through snapshots now and 2 weeks ago and extract fee data, by subtracting today's fee data from 2 weeks ago
# and then summing across all pools
arb_fees = {}
for pool in ARB_CORE_POOLS:
    current_fees_snapshots = [x for x in pool_snapshots_now if x['pool']['id'] == pool]
    current_fees_snapshots.sort(key=lambda x: x['timestamp'], reverse=True)
    fees_2_weeks_ago = [x for x in pool_snapshots_2_weeks_ago if x['pool']['id'] == pool]
    fees_2_weeks_ago.sort(key=lambda x: x['timestamp'], reverse=True)
    # Take first element of list, which is the most recent snapshot
    pool_snapshot_now = current_fees_snapshots[0]
    pool_snapshot_2_weeks_ago = fees_2_weeks_ago[0]
    # Calculate fees
    pool_fee = float(pool_snapshot_now['protocolFee']) - float(pool_snapshot_2_weeks_ago['protocolFee'])
    pool_swap_fee = float(pool_snapshot_now['swapFees']) - float(pool_snapshot_2_weeks_ago['swapFees'])
    arb_fees[pool_snapshot_now['pool']['symbol']] = {
        'pool_fee': pool_fee, 'swap_fee': pool_swap_fee, 'time_from': arb_datetime_2_weeks_ago,
        'time_to': arb_datetime_now,
        'chain': 'Arbitrum',
    }
# Convert to dataframe, sort by pool_fee descending
arb_fees_df = pd.DataFrame.from_dict(arb_fees, orient='index')
arb_fees_df = arb_fees_df.sort_values(by='pool_fee', ascending=False)
arb_fees_df

Unnamed: 0,pool_fee,swap_fee,time_from,time_to,chain
RDNT-WETH,11341.350457,18742.401366,2023-07-31 22:32:46,2023-08-14 17:06:23,Arbitrum
B-wstETH-WETH-Stable,5397.714232,64.015104,2023-07-31 22:32:46,2023-08-14 17:06:23,Arbitrum
bb-a-USD,3172.079133,875.765654,2023-07-31 22:32:46,2023-08-14 17:06:23,Arbitrum
wstETH-bb-a-WETH-BPT,2004.249838,446.4265,2023-07-31 22:32:46,2023-08-14 17:06:23,Arbitrum
rETH-bb-a-WETH-BPT,1082.7925,318.178761,2023-07-31 22:32:46,2023-08-14 17:06:23,Arbitrum
bbaUSDC/bbaUSDCe,163.814092,201.953956,2023-07-31 22:32:46,2023-08-14 17:06:23,Arbitrum
wstETH/rETH/cbETH,82.04366,67.858882,2023-07-31 22:32:46,2023-08-14 17:06:23,Arbitrum
