# 1 - Data Collection

In this section, we fetch data from Uniswap V3 subgraph, and store them in json for further processing.

In [1]:
# Standard Library
import datetime as dt
import glob
import json
import os
from pprint import pprint

# Third Party Library
import numpy as np
import pandas as pd
from flatdict import FlatDict
from gql import Client, gql
from gql.transport.aiohttp import AIOHTTPTransport

# Local Folder Library
from pyammanalysis.graphql_helper import run_query
from pyammanalysis.util import read_yaml

In [2]:
# refetch setting - if True, rerun GraphQL queries
refetch = True

# config
config = read_yaml("../config.yaml")
DATA_PATH = config["DATA_PATH"]
DATA_TOKEN_DAY_PATH = os.path.join(DATA_PATH, "token", "day")
DATA_POOL_DAY_PATH = os.path.join(DATA_PATH, "pool", "day")
UNISWAP_V3_SUBGRAPH_URL = config["UNISWAP_V3_SUBGRAPH_URL"]

if refetch:
    transport = AIOHTTPTransport(url=UNISWAP_V3_SUBGRAPH_URL)

# start timestamp for time series
START_TIMESTAMP = 1619170975  # GMT: Friday, April 23, 2021 9:42:55 AM

# create folder if needed
for folder in [DATA_PATH, DATA_TOKEN_DAY_PATH, DATA_POOL_DAY_PATH]:
    if not os.path.exists(folder):
        os.makedirs(folder)

token_dict = config["tokens"]
token_addr_dict = config["token_addr"]  # mapping from symbol to addr
whitelisted_symbols = np.sort(
    np.concatenate([i for i in FlatDict(token_dict).itervalues()])
)

# address-related config
# addresses in `config.yaml` follow EIP-55: Mixed-case checksum address encoding
# enforce lower case by `str.lower()`
sym2addr = lambda symbol: config["token_addr"][
    symbol
].lower()  # mapping from symbol to addr
addr2sym = lambda addr: {v.lower(): k for k, v in config["token_addr"].items()}[
    addr
]  # mapping from addr to symbol
whitelisted_addresses = np.array(
    [i.lower() for i in FlatDict(token_addr_dict).itervalues()]
)

## Uniswap V3 Global Data
For now we only fetch the newest pool count and TVL.

In [3]:
CURRENT_GLOBAL_DATA_QUERY = """
{
    factory(id: "0x1F98431c8aD98523631AE4a59f267346ea31F984" ) {
        poolCount
        totalValueLockedUSD
    }
}
"""

GLOBAL_DATA_PATH = os.path.join(DATA_PATH, "globalData.json")

if refetch:
    global_data = run_query(UNISWAP_V3_SUBGRAPH_URL, CURRENT_GLOBAL_DATA_QUERY)["data"][
        "factory"
    ]
    with open(GLOBAL_DATA_PATH, "w") as f:
        json.dump(global_data, f, indent=4)
else:
    with open(GLOBAL_DATA_PATH, "r") as f:
        global_data = json.load(f)

pprint(global_data)

{'poolCount': '7046',
 'totalValueLockedUSD': '8810152814.546202739316191550666919'}


## Token Data

In [4]:
# get top 1000 tokens by TVL (but only analyze top 30)
TOP_TOKENS_QUERY = """
{
    tokens(first: 1000, orderBy: totalValueLockedUSD, orderDirection: desc) {
        id
        symbol
        name
        totalValueLockedUSD
    }
}
"""

TOP_TOKENS_PATH = os.path.join(DATA_PATH, "topTokenAddr.json")

if refetch:
    top_token_ids = run_query(UNISWAP_V3_SUBGRAPH_URL, TOP_TOKENS_QUERY)["data"]
    addr2sym_dict = {x["id"]: x["symbol"] for x in top_token_ids["tokens"][:30]}
    top_token_addrs = list(map(lambda x: x["id"], top_token_ids["tokens"]))
    with open(TOP_TOKENS_PATH, "w") as f:
        json.dump({"tokenList": top_token_addrs}, f, indent=4)
else:
    with open(TOP_TOKENS_PATH, "r") as f:
        top_token_addrs = json.load(f)["tokenList"]

pprint(top_token_ids)

{'tokens': [{'id': '0x12b32f10a499bf40db334efe04226cca00bf2d9b',
             'name': 'UMIIE COIN',
             'symbol': 'UMIIE',
             'totalValueLockedUSD': '1418175887838.43006967093349508692'},
            {'id': '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48',
             'name': 'USD Coin',
             'symbol': 'USDC',
             'totalValueLockedUSD': '983971706.051422'},
            {'id': '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2',
             'name': 'Wrapped Ether',
             'symbol': 'WETH',
             'totalValueLockedUSD': '749522278.3740788113238405846807616'},
            {'id': '0x6b175474e89094c44da98b954eedeac495271d0f',
             'name': 'Dai Stablecoin',
             'symbol': 'DAI',
             'totalValueLockedUSD': '621934705.676179429171431545'},
            {'id': '0xdac17f958d2ee523a2206206994597c13d831ec7',
             'name': 'Tether USD',
             'symbol': 'USDT',
             'totalValueLockedUSD': '209457252.222022'},
      

## Token Day Time Series
Ref: https://github.com/Uniswap/v3-info/blob/770a05dc1a191cf229432ebc43c1f2ceb3666e3b/src/data/tokens/chartData.ts#L14

In [5]:
def get_token_key(symbol: str, addr: str) -> str:
    """
    Generates a key for a token.
    The uniqueness of the key is guaranteed by the address,
    but the symbol is also prefixed for readability.
    """
    return f"{symbol}_{addr}"

In [6]:
TOKEN_DAY_TIME_SERIES = """
    query tokenDayDatas($startTime: Int!, $skip: Int!, $address: String!) {
        tokenDayDatas(
            first: 1000
            skip: $skip
            where: { token: $address, date_gt: $startTime }
            orderBy: date
            orderDirection: asc
            subgraphError: allow
        ) {
            date
            volumeUSD
            totalValueLockedUSD
        }
    }
"""

In [7]:
# TODO: fix error (possibly due to timeout)
fetch_token_error = []


async def fetch_token_chart_data(
    address: str, symbol: str, transport: AIOHTTPTransport = transport
):
    error = False
    skip = 0
    all_found = False
    result = {"tokenDayDatas": []}

    async with Client(
        transport=transport,
        fetch_schema_from_transport=True,
    ) as session:
        params = {"address": address, "startTime": START_TIMESTAMP, "skip": skip}
        try:
            while not all_found:
                temp = await session.execute(
                    gql(TOKEN_DAY_TIME_SERIES), variable_values=params
                )
                skip += 1000
                if len(temp["tokenDayDatas"]) < 1000 or error:
                    all_found = True
                if temp:
                    # concat the lists
                    result["tokenDayDatas"] = (
                        result["tokenDayDatas"] + temp["tokenDayDatas"]
                    )
        except Exception as e:
            print(e)
            error = True
            fetch_token_error.append(address)

    if not error:
        if not os.path.exists(DATA_TOKEN_DAY_PATH):
            os.makedirs(DATA_TOKEN_DAY_PATH)

        with open(
            f"{DATA_TOKEN_DAY_PATH}/{get_token_key(symbol, address)}.json",
            "w",
        ) as f:
            json.dump(result, f, indent=4)

In [8]:
if refetch:
    # remove existing content in the out folder
    for f in glob.glob(DATA_TOKEN_DAY_PATH + "/*"):
        os.remove(f)

    # fetch token day data for each token
    for addr, sym in addr2sym_dict.items():
        await fetch_token_chart_data(
            addr,
            sym,
        )
    print(fetch_token_error)



['0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48', '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2']


## Pool Data

In [9]:
def get_pool_key(symbol0: str, symbol1: str, fee_tier: int) -> str:
    """
    Generates a key for a pool.
    `token0`, `token1` and `feeTier` together uniquely define a pool.
    But using symbol instead of token address involve a risk.
    """
    return f"{symbol0}_{symbol1}_{fee_tier}"

In [10]:
TWENTY_LARGEST_TVL_POOLS_QUERY = """
{
    pools(first: 20, orderBy: totalValueLockedUSD, orderDirection: desc) {
        id
    } 
}
"""

LARGEST_TVL_POOLS_PATH = os.path.join(DATA_PATH, "largestTVLPoolAddr.json")

if refetch:
    largest_tvl_pool_ids = run_query(
        UNISWAP_V3_SUBGRAPH_URL, TWENTY_LARGEST_TVL_POOLS_QUERY
    )
    largest_tvl_pool_addrs = list(
        map(lambda x: x["id"], largest_tvl_pool_ids["data"]["pools"])
    )
    with open(LARGEST_TVL_POOLS_PATH, "w") as f:
        json.dump({"poolList": largest_tvl_pool_addrs}, f, indent=4)
else:
    with open(LARGEST_TVL_POOLS_PATH, "r") as f:
        largest_tvl_pool_addrs = json.load(f)["poolList"]

print(largest_tvl_pool_addrs)

['0xa850478adaace4c08fc61de44d8cf3b64f359bec', '0x5777d92f208679db4b9778590fa3cab3ac9e2168', '0x6c6bc977e13df9b0de53b251522280bb72383700', '0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8', '0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640', '0xcbcdf9626bc03e24f779434178a73a0b4bad62ed', '0xc63b0708e2f7e69cb8a1df0e1389a98c35a76d52', '0x3416cf6c708da44db2624d63ea0aaef7113527c6', '0x4e68ccd3e89f51c3074ca5072bbac773960dfa36', '0x4585fe77225b41b697c938b018e2ac67ac5a20c0', '0x8ee3cc8e29e72e03c4ab430d7b7e08549f0c71cc', '0x99ac8ca7087fa4a2a1fb6357269965a2014abc35', '0xc2e9f25be6257c210d7adf0d4cd6e3e881ba25f8', '0x025b887e7f62d8b5f1564ba204187452cf27f634', '0x97e7d56a0408570ba1a7852de36350f7713906ec', '0x5c128d25a21f681e678cb050e551a895c9309945', '0x7bea39867e4169dbe237d55c8242a8f2fcdcc387', '0x00cef0386ed94d738c8f8a74e8bfd0376926d24c', '0x11b815efb8f581194ae79006d24e0d814b7697f6', '0x7858e59e0c01ea06df3af3d20ac7b0003275d4bf']


In [11]:
GET_POOL_BY_ID_QUERY = """
    query getPoolById($pool_addr: ID!) {
        pool(id: $pool_addr) {
            token0 {
                symbol
                id
                decimals
            }
            token1 {
                symbol
                id
                decimals
            }
            feeTier
        }
    }
"""

In [12]:
async def fetch_pools_metadata(addresses: list, verbose: bool = False):
    result = {"topPoolDatas": []}

    transport = AIOHTTPTransport(url=UNISWAP_V3_SUBGRAPH_URL)

    async with Client(
        transport=transport,
        fetch_schema_from_transport=True,
    ) as session:
        for id in addresses:
            params = {"pool_addr": id}
            temp = await session.execute(
                gql(GET_POOL_BY_ID_QUERY), variable_values=params
            )
            result["topPoolDatas"].append(temp["pool"])

    if verbose:
        pprint(result)

    with open(f"{DATA_PATH}/topPoolDatas.json", "w") as f:
        json.dump(result, f, indent=4)

In [13]:
if refetch:
    await fetch_pools_metadata(largest_tvl_pool_addrs)

In [14]:
with open(f"{DATA_PATH}/topPoolDatas.json", "r") as f:
    top_pool_datas = json.load(f)

# replace nested dict with token addr
for pool_dict in top_pool_datas["topPoolDatas"]:
    for token in ["token0", "token1"]:
        pool_dict[token] = pool_dict[token]["id"]

top_pools_df = pd.DataFrame.from_dict(top_pool_datas["topPoolDatas"]).astype(
    {"token0": str, "token1": str, "feeTier": int}
)

# add addr
top_pools_df["pool_addr"] = largest_tvl_pool_addrs

# whitelist a pool if both its token0 and token1 are whitelisted
is_whitelisted_pool = top_pools_df["token0"].isin(whitelisted_addresses) & top_pools_df[
    "token1"
].isin(whitelisted_addresses)
top_pools_df = top_pools_df[is_whitelisted_pool]

# add name
top_pools_df["name"] = top_pools_df.apply(
    lambda x: get_pool_key(addr2sym(x["token0"]), addr2sym(x["token1"]), x["feeTier"]),
    axis=1,
)

top_pools_df

Unnamed: 0,token0,token1,feeTier,pool_addr,name
1,0x6b175474e89094c44da98b954eedeac495271d0f,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,100,0x5777d92f208679db4b9778590fa3cab3ac9e2168,DAI_USDC_100
2,0x6b175474e89094c44da98b954eedeac495271d0f,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,500,0x6c6bc977e13df9b0de53b251522280bb72383700,DAI_USDC_500
3,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,3000,0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8,USDC_WETH_3000
4,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,500,0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640,USDC_WETH_500
5,0x2260fac5e5542a773aa44fbcfedf7c193bc2c599,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,3000,0xcbcdf9626bc03e24f779434178a73a0b4bad62ed,WBTC_WETH_3000
7,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,0xdac17f958d2ee523a2206206994597c13d831ec7,100,0x3416cf6c708da44db2624d63ea0aaef7113527c6,USDC_USDT_100
8,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,0xdac17f958d2ee523a2206206994597c13d831ec7,3000,0x4e68ccd3e89f51c3074ca5072bbac773960dfa36,WETH_USDT_3000
9,0x2260fac5e5542a773aa44fbcfedf7c193bc2c599,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,500,0x4585fe77225b41b697c938b018e2ac67ac5a20c0,WBTC_WETH_500
11,0x2260fac5e5542a773aa44fbcfedf7c193bc2c599,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,3000,0x99ac8ca7087fa4a2a1fb6357269965a2014abc35,WBTC_USDC_3000
12,0x6b175474e89094c44da98b954eedeac495271d0f,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,3000,0xc2e9f25be6257c210d7adf0d4cd6e3e881ba25f8,DAI_WETH_3000


In [15]:
top_pools_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 14 entries, 1 to 19
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   token0     14 non-null     object
 1   token1     14 non-null     object
 2   feeTier    14 non-null     int32 
 3   pool_addr  14 non-null     object
 4   name       14 non-null     object
dtypes: int32(1), object(4)
memory usage: 616.0+ bytes


## Pool Day Time Series
Ref: https://github.com/Uniswap/v3-info/blob/770a05dc1a191cf229432ebc43c1f2ceb3666e3b/src/data/pools/chartData.ts#L14

In [16]:
POOL_DAY_TIME_SERIES = """
    query poolDayDatas($startTime: Int!, $skip: Int!, $address: String!) {
        poolDayDatas(
            first: 1000
            skip: $skip
            where: { pool: $address, date_gt: $startTime }
            orderBy: date
            orderDirection: asc
            subgraphError: allow
        ) {
            date
            volumeUSD
            tvlUSD
        }
    }
"""

In [17]:
fetch_pool_error = []


async def fetch_pool_chart_data(
    address: str,
    symbol0: str,
    symbol1: str,
    fee_tier: int,
    transport: AIOHTTPTransport = transport,
):
    error = False
    skip = 0
    all_found = False
    result = {"poolDayDatas": []}

    async with Client(
        transport=transport,
        fetch_schema_from_transport=True,
    ) as session:
        params = {"address": address, "startTime": START_TIMESTAMP, "skip": skip}
        try:
            while not all_found:
                temp = await session.execute(
                    gql(POOL_DAY_TIME_SERIES), variable_values=params
                )
                skip += 1000
                if len(temp["poolDayDatas"]) < 1000 or error:
                    all_found = True
                if temp:
                    # concat the lists
                    result["poolDayDatas"] = (
                        result["poolDayDatas"] + temp["poolDayDatas"]
                    )
        except Exception as e:
            print(e)
            error = True
            fetch_pool_error.append(address)

    if not error:
        if not os.path.exists(DATA_POOL_DAY_PATH):
            os.makedirs(DATA_POOL_DAY_PATH)

        with open(
            f"{DATA_POOL_DAY_PATH}/{get_pool_key(symbol0, symbol1, fee_tier)}.json",
            "w",
        ) as f:
            json.dump(result, f, indent=4)

In [18]:
if refetch:
    # remove existing content in the out folder
    for f in glob.glob(DATA_POOL_DAY_PATH + "/*"):
        os.remove(f)

    # fetch pool data for each pool
    for i, row in top_pools_df.iterrows():
        await fetch_pool_chart_data(
            row["pool_addr"],
            addr2sym(row["token0"]),
            addr2sym(row["token1"]),
            row["feeTier"],
        )
    print(fetch_pool_error)

{'message': 'Failed to get entities from store: canceling statement due to conflict with recovery, query = /* qid: 704c0c9d28d8520a-d31d38fc8e9d4f69 */\nselect \'PoolDayData\' as entity, to_jsonb(c.*) as data from (select  * \n  from "sgd217942"."pool_day_data" c\n where c.block_range @> $1 and ("date" > $2 and "pool" = $3)\n\n order by "date" asc, "id" asc\n limit 1000) c -- binds: [14993604, 1619170975, "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"]'}
['0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640']


In [19]:
# reads pool day datas from json
df = pd.DataFrame(columns=["date"])
pool_names = []

for f in os.listdir(DATA_POOL_DAY_PATH):
    fullname = os.fsdecode(f)

    # not a rigorous check
    with open(os.path.join(DATA_POOL_DAY_PATH, fullname), "r") as file:
        pool_day_datas = json.load(file)

    # parse dict as df
    temp = pd.DataFrame.from_dict(pool_day_datas["poolDayDatas"]).astype(
        {"volumeUSD": np.float64, "tvlUSD": np.float64}
    )

    # Note: there is no need to analyze fees separately,
    # as it is a fixed proportion of the pool's trade volume

    # prefix columns (except "date") with pool name
    cols = temp.columns[~temp.columns.isin(["date"])]
    pool_name = fullname.split(os.sep)[-1].split(".")[0]
    pool_names.append(pool_name)
    temp.rename(columns=dict(zip(cols, pool_name + "_" + cols)), inplace=True)

    # outer join: union of items on "date"
    df = pd.merge(df, temp, how="outer", on=["date"])

# sort by "date"
df.sort_values(by="date", inplace=True)
df.reset_index(drop="index", inplace=True)

df.head()

Unnamed: 0,date,BUSD_USDC_500_volumeUSD,BUSD_USDC_500_tvlUSD,DAI_USDC_100_volumeUSD,DAI_USDC_100_tvlUSD,DAI_USDC_500_volumeUSD,DAI_USDC_500_tvlUSD,DAI_WETH_3000_volumeUSD,DAI_WETH_3000_tvlUSD,USDC_USDT_100_volumeUSD,...,WBTC_USDC_3000_volumeUSD,WBTC_USDC_3000_tvlUSD,WBTC_WETH_3000_volumeUSD,WBTC_WETH_3000_tvlUSD,WBTC_WETH_500_volumeUSD,WBTC_WETH_500_tvlUSD,WETH_USDT_3000_volumeUSD,WETH_USDT_3000_tvlUSD,WETH_USDT_500_volumeUSD,WETH_USDT_500_tvlUSD
0,1620086400,,,,,0.0,0.0,0.0,0.0,,...,,,0.0,0.0,,,,,,
1,1620172800,,,,,1455737.0,8034837.0,1142416.0,2593409.0,,...,260209.6,16314910.0,492532.4,8760155.0,0.0,0.0,976189.0,2609716.0,9444.342,25979.84
2,1620259200,,,,,15193080.0,17324280.0,20541030.0,15582820.0,,...,2844009.0,15904780.0,7507954.0,34598200.0,1102.031,5146.736,26725440.0,26497790.0,235700.9,58462.05
3,1620345600,,,,,11234530.0,20948980.0,26987360.0,21446390.0,,...,3494689.0,16506610.0,10970520.0,45080250.0,3922264.0,2073400.0,54044380.0,53713750.0,15231770.0,2854816.0
4,1620432000,,,,,16914800.0,29295970.0,17861050.0,20636460.0,,...,4087189.0,17109870.0,23273350.0,53072940.0,7443618.0,1980521.0,93245470.0,51724090.0,29393160.0,2159909.0


In [20]:
# ["date"]: int -> date (in "YYYY-MM-DD")
df["timestamp"] = df["date"]  # keep timestamp in a new col
df["date"] = df["date"].map(dt.date.fromtimestamp)

df.head()

Unnamed: 0,date,BUSD_USDC_500_volumeUSD,BUSD_USDC_500_tvlUSD,DAI_USDC_100_volumeUSD,DAI_USDC_100_tvlUSD,DAI_USDC_500_volumeUSD,DAI_USDC_500_tvlUSD,DAI_WETH_3000_volumeUSD,DAI_WETH_3000_tvlUSD,USDC_USDT_100_volumeUSD,...,WBTC_USDC_3000_tvlUSD,WBTC_WETH_3000_volumeUSD,WBTC_WETH_3000_tvlUSD,WBTC_WETH_500_volumeUSD,WBTC_WETH_500_tvlUSD,WETH_USDT_3000_volumeUSD,WETH_USDT_3000_tvlUSD,WETH_USDT_500_volumeUSD,WETH_USDT_500_tvlUSD,timestamp
0,2021-05-04,,,,,0.0,0.0,0.0,0.0,,...,,0.0,0.0,,,,,,,1620086400
1,2021-05-05,,,,,1455737.0,8034837.0,1142416.0,2593409.0,,...,16314910.0,492532.4,8760155.0,0.0,0.0,976189.0,2609716.0,9444.342,25979.84,1620172800
2,2021-05-06,,,,,15193080.0,17324280.0,20541030.0,15582820.0,,...,15904780.0,7507954.0,34598200.0,1102.031,5146.736,26725440.0,26497790.0,235700.9,58462.05,1620259200
3,2021-05-07,,,,,11234530.0,20948980.0,26987360.0,21446390.0,,...,16506610.0,10970520.0,45080250.0,3922264.0,2073400.0,54044380.0,53713750.0,15231770.0,2854816.0,1620345600
4,2021-05-08,,,,,16914800.0,29295970.0,17861050.0,20636460.0,,...,17109870.0,23273350.0,53072940.0,7443618.0,1980521.0,93245470.0,51724090.0,29393160.0,2159909.0,1620432000


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 413 entries, 0 to 412
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   date                       413 non-null    object 
 1   BUSD_USDC_500_volumeUSD    380 non-null    float64
 2   BUSD_USDC_500_tvlUSD       380 non-null    float64
 3   DAI_USDC_100_volumeUSD     220 non-null    float64
 4   DAI_USDC_100_tvlUSD        220 non-null    float64
 5   DAI_USDC_500_volumeUSD     412 non-null    float64
 6   DAI_USDC_500_tvlUSD        412 non-null    float64
 7   DAI_WETH_3000_volumeUSD    413 non-null    float64
 8   DAI_WETH_3000_tvlUSD       413 non-null    float64
 9   USDC_USDT_100_volumeUSD    220 non-null    float64
 10  USDC_USDT_100_tvlUSD       220 non-null    float64
 11  USDC_USDT_500_volumeUSD    413 non-null    float64
 12  USDC_USDT_500_tvlUSD       413 non-null    float64
 13  USDC_WETH_10000_volumeUSD  412 non-null    float64

In [22]:
# sanity check for number of days elapsed
print(df["date"][0], "to", dt.date.today(), "has", (dt.date.today() - df["date"][0]))

2021-05-04 to 2022-06-20 has 412 days, 0:00:00


In [23]:
pools_df = pd.DataFrame(
    list(zip(pool_names, largest_tvl_pool_addrs)), columns=["name", "addr"]
)
pools_df.to_csv(os.path.join(DATA_PATH, "pools_df.csv"), index=False)
df.to_csv(os.path.join(DATA_POOL_DAY_PATH, "poolDay.csv"), index=False)