In [1]:
import pandas as pd
import numpy as np
import requests
import json
import time
import os

In [2]:
url = 'https://docs.google.com/spreadsheets/d/1ObrsfW-RB2CWmN2QhpKCz4z_FpS6UA-hajFEUZDlEFQ/export'

apis = pd.read_csv(f'{url}?gid=1357965727&format=csv')
apis = apis.set_index(['Version', 'Network'])['API'].to_dict()

tables = dict()
tables['V2'] = pd.read_csv(f'{url}?gid=1841150650&format=csv')
tables['V3'] = pd.read_csv(f'{url}?gid=496837599&format=csv')

In [3]:
def run_query(api, query): # A simple function to use requests.post to make the API call. Note the json= section.
    request = requests.post(api, json={'query': query})
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))

In [4]:
def get_query_string(table):
    e = dict()
    for col in table.columns:
        q = table[col].str.split(':').str[0]
        #q.loc[1:3] = q.loc[1:4].apply(lambda x: x + '{id}' if isinstance(x, str) else x)
        q = list(q.dropna())
        h = f'{col}(first: 1000 orderBy: id)'
        e[col] = h + '{' + ',\n'.join(q) + '}'
    return e

In [30]:
for (version, network), api in apis.items():
    if os.path.isfile(f'raw_data/tables_{version}_{network}.json'):
        continue
    
    print("####", version, network, "####")
    table = tables[version]
    
    q_strings = get_query_string(table)
    query = '{' + '\n'.join(q_strings.values()) + '}'
    result = run_query(api, query)

    events = dict()
    count = 1
    while True:
        query = '{'
        for key in result['data'].keys():
            # Add query results to dictionary
            if key not in events:
                events[key] = result['data'][key]
            else:
                events[key].extend(result['data'][key])

            # Check if there is still more to query
            if len(result['data'][key]) >= 1000:
                q = q_strings[key]
                q = q.replace('(first: 1000 orderBy: id)', '(first: 1000 orderBy: id where: {id_gt: "' + result['data'][key][-1]['id'] + '"})')
                query += q
            else:
                print(f'{key} table is done at {count} with {len(events[key])} entries')

        if query == '{':
            print(f'Everything is done at {count}!!!')
            break

        query += '}'
        result = run_query(api, query)
        
        if 'errors' in result:
            print(query)
            print(result)
            break

        count += 1
        if count % 100 == 0:
            print(count)
            time.sleep(10)

    with open(f'raw_data/tables_{version}_{network}.json', 'w') as outfile:
        outfile.write(json.dumps(events))
    time.sleep(60)

#### V2 Polygon ####
pools table is done at 1 with 1 entries
reserves table is done at 1 with 13 entries
priceOracleAssets table is done at 1 with 14 entries
priceOracles table is done at 1 with 1 entries
100
200
300
400
500
600
700
800
900
1000
1100
users table is done at 1115 with 1114427 entries
1200
1300
1400
1500
1600
1700
userReserves table is done at 1789 with 1788534 entries
Everything is done at 1789!!!
#### V3 Arbitrum ####
emodeCategories table is done at 1 with 2 entries
pools table is done at 1 with 1 entries
reserves table is done at 1 with 15 entries
priceOracleAssets table is done at 1 with 15 entries
priceOracles table is done at 1 with 1 entries
100
200
users table is done at 200 with 199655 entries
300
userReserves table is done at 332 with 331657 entries
Everything is done at 332!!!
#### V3 Avalanche ####
emodeCategories table is done at 1 with 2 entries
pools table is done at 1 with 1 entries
reserves table is done at 1 with 12 entries
priceOracleAssets table is do

True

In [None]:
run_query(api, query)

### Read Tables

In [None]:
version, network = 'V2', 'Polygon'

In [None]:
with open(f'raw_data/tables_{version}_{network}.json') as json_file:
    data = json.load(json_file)

In [None]:
for key, values in data.items():
    df = pd.json_normalize(values)
    print(key, df.shape)
    print(df.columns.values)

In [None]:
pd.json_normalize(data['priceOracleAssets'])