## Import libraries

In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta, timezone

## Connect Subgraph
1. Define URL: "https://api.studio.thegraph.com/query/75401/aave-v3-arbitrum-project/version/latest"
2. Define run_query function

In [2]:
# Define the GraphQL endpoint
SUBGRAPH_URL = "https://api.studio.thegraph.com/query/75401/aave-v3-arbitrum-project/version/latest"


In [3]:
# Function to run a query
def run_query(query):
    response = requests.post(SUBGRAPH_URL, json={'query': query})
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f'Query failed. Return code is {response.status_code}. {query}')


## Locate Block Number: 
1. Define function to get the last block of every hour, from `Start Time` to `End Time`
2. Given block `Start time` and `End Time` at the hour marker (00:00) in UTC


In [4]:
# Function to get the closest blocks for every hour within a datetime range
def get_closest_block_of_every_hour(start_time, end_time):
    current_time = start_time
    blocks = []

    while current_time <= end_time:
        query = f"""
        {{
          blocks(first: 1, orderBy: timestamp, orderDirection: desc, where: {{timestamp_lte: "{int(current_time.timestamp())}"}}) {{
            number
            timestamp
          }}
        }}
        """
        result = run_query(query)
        if 'errors' in result:
            raise Exception(f"GraphQL query failed with errors: {result['errors']}")
        if 'data' in result and 'blocks' in result['data'] and result['data']['blocks']:
            block = result['data']['blocks'][0]
            block_info = {
                'block_number': block['number'],
                'timestamp': datetime.utcfromtimestamp(int(block['timestamp'])).strftime('%Y-%m-%d %H:%M:%S')
            }
            # Avoid duplicates
            if not blocks or blocks[-1]['block_number'] != block_info['block_number']:
                blocks.append(block_info)
        current_time += timedelta(hours=1)

    return pd.DataFrame(blocks)

In [5]:
# Determine the start time and end time at the hour marker (00:00) in UTC
start_time = datetime(2022, 4, 20, 19, 0, tzinfo=timezone.utc)
end_time = datetime(2022, 4, 21, 0, 0, tzinfo=timezone.utc)

# Get closest blocks for start_time and end_time
closest_blocks = get_closest_block_of_every_hour(start_time, end_time)
print(closest_blocks)

  block_number            timestamp
0     10206964  2022-04-20 18:59:29
1     10209340  2022-04-20 19:59:39
2     10211801  2022-04-20 20:59:53
3     10214338  2022-04-20 21:59:07
4     10216491  2022-04-20 22:58:55
5     10219440  2022-04-20 23:58:32


##  Fetch the latest UserTokenSnapshots of the specific block
1. Fetch the UserTokenSnapshots (max 5000 transactions) on the specific block (given `block_number`)
2. Get the `distinct` UserTokenSnapshots from all latest UserTokenSnapshots
3. Convert the data to a pandas DataFrame
4. Save as CSV files

In [6]:
# Given a specific block number
block_number = 7998790

In [7]:
# Define the GraphQL query function
def get_latest_user_token_snapshots(block_number):
    all_snapshots = []
    has_more = True
    skip = 0
    first = 1000  # Number of results to fetch per request
    max_skip = 5000  # Maximum value for skip

    while has_more:
        query = f"""
        {{
          userTokenSnapshots(
            where: {{blockNumber: {block_number}}},
            first: {first},
            skip: {skip}
          ) {{
            id
            userToken {{
              id
              user {{
                id
              }}
              token {{
                id
                symbol
                decimals
              }}
            }}
            event
            totalSupplied
            totalBorrowed
            netSupplied
            blockNumber
            blockTimestamp
          }}
        }}
        """
        try:
            result = run_query(query)
            snapshots = result['data']['userTokenSnapshots']
            if not snapshots:
                print(f"There is no Aave v3 Arbitrum transaction on the block number: {block_number}")
                break
            else:
                all_snapshots.extend(snapshots)
                if len(snapshots) < first or skip + first >= max_skip:
                    has_more = False
                else:
                    skip += first
        except KeyError:
            print(f"There is no Aave v3 Arbitrum transaction on the block number: {block_number}")
            break
        except Exception as e:
            print(f"An error occurred: {e}")
            break

    return all_snapshots


In [8]:
# Convert the data to a pandas DataFrame
def snapshots_to_dataframe(snapshots):
    data = []
    if isinstance(snapshots, list) and snapshots:
        for snapshot in snapshots:
            user_token = snapshot['userToken']
            data.append({
                "block_number": snapshot['blockNumber'],
                "timestamp": pd.to_datetime(int(snapshot['blockTimestamp']), unit='s'),
                "owner_address": user_token['user']['id'],
                "token_symbol": user_token['token']['symbol'],
                "token_address": user_token['token']['id'],
                "token_amount": int(snapshot['netSupplied'])/(10 ** user_token['token']['decimals']),
                "last_event": snapshot['event'],
                "total_supply": int(snapshot['totalSupplied'])/(10 ** user_token['token']['decimals']),
                "total_borrow": int(snapshot['totalBorrowed'])/(10 ** user_token['token']['decimals'])
            })
        df = pd.DataFrame(data)
        return df
    else:
        print("There is no Aave v3 Arbitrum transaction on the given block number")
        data.append({
                "block_number": [],
                "timestamp": [],
                "owner_address": [],
                "token_symbol": [],
                "token_address": [],
                "token_amount": [],
                "last_event": [],
                # "total_supply": [],
                # "total_borrow": []
            })
        return pd.DataFrame(data)


In [9]:
# Fetch the UserTokenSnapshots for each UserToken up to the specific block number
latest_snapshots = get_latest_user_token_snapshots(block_number)

In [10]:
# Convert to DataFrame
df = snapshots_to_dataframe(latest_snapshots)
df

Unnamed: 0,block_number,timestamp,owner_address,token_symbol,token_address,token_amount,last_event,total_supply,total_borrow
0,7998790,2022-03-16 16:11:13,0x17d4e25d083bcc954a4d1690902caf24d93e83f0,WETH,0x82af49447d8a07e3bd95bd0d56f35241523fbab1,0.01,Interest,0.01,0.0
1,7998790,2022-03-16 16:11:13,0x235d19279630c73680a55db20a31210603e785b3,WETH,0x82af49447d8a07e3bd95bd0d56f35241523fbab1,0.95,Supply,0.95,0.0


In [11]:
# Save to CSV
df.to_csv("user_token_snapshots_exact_block.csv", index=False)s