In [2]:
import pandas as pd
from datetime import datetime, timedelta, date
import os
import requests
import time
from google.cloud import storage
import re

# Utility Function to Execute BitQuery Queries

In [None]:
def run_query(query, retries=10):
        """
        Query graphQL API.
        If timeerror
        """
        headers = {"X-API-KEY": "BQYCaXaMZlqZrPCSQVsiJrKtxKRVcSe4"}

        retries_counter = 0
        try:
            request = requests.post(
                "https://graphql.bitquery.io/", json={"query": query}, headers=headers
            )
            result = request.json()
            # print(dir(request.content))
            # Make sure that there is no error message
            # assert not request.content.errors
            assert "errors" not in result
        except:
            while (
                (request.status_code != 200
                or "errors" in result)
                and retries_counter < 10
            ):
                print(datetime.now(), f"Retry number {retries_counter}")
                if "errors" in result:
                    print(result["errors"])
                print(datetime.now(), f"Query failed for reason: {request.reason}. sleeping for {150*retries_counter} seconds and retrying...")
                time.sleep(150*retries_counter)
                request = requests.post(
                    "https://graphql.bitquery.io/",
                    json={"query": query},
                    headers=headers,
                )
                retries_counter += 1
            if retries_counter >= retries:
                raise Exception(
                    "Query failed after {} retries and return code is {}.{}".format(
                        retries_counter, request.status_code, query
                    )
                )
        return request.json()

# Dictionary Mapping data.base58 to Instruction Type

In [None]:
instruction_type_dict = {
    '5QCjN' : 'CancelAllPerpOrders',
    'BNuyR' : 'CachePrices',
    'BcYfW' : 'PlacePerpOrder',
    'CruFm' : 'CacheRootBanks',
    'HRDyP' : 'ConsumeEvents',
    'QioWX' : 'CachePerpMarkets',
    'SCnns' : 'UpdateFunding',
    'Y8jvF' : 'UpdateRootBank',
    '' : ''
}

# Instantiate Google Cloud Storage Client and Resources

In [None]:
client = storage.Client()
bucket = client.get_bucket('entropy-keeper-transactions')

# Read in BitQuery Query

In [4]:
with open('../entropy_instructions_bitquery.txt') as query:
    query_string = query.read()

# Initialize Starting Time Interval to Query

In [None]:
after = datetime(2022, 4, 22, 0, 0, 0)
till = after + timedelta(minutes=30)

# Loop Through Query Results and Write to Google Cloud Storage

In [None]:
while after < datetime(2022, 4, 29, 22, 0, 0):
    
    after_param = after.strftime('%Y-%m-%dT%H:%M:%SZ')
    till_param = till.strftime('%Y-%m-%dT%H:%M:%SZ')
    print(datetime.now(),'Running from {} to {}'.format(after_param, till_param))
        
    result = run_query(query_string % (after_param, till_param))
    print(datetime.now(), 'Query completed!')
    df = pd.json_normalize(result['data']['solana']['instructions'])
    
    if df.empty:
        print(datetime.now(), 'DataFrame empty!')
        after += timedelta(minutes=30)
        till += timedelta(minutes=30)
        pass
    
    else:
        df['data.base58_trunc'] = df['data.base58'].apply(lambda x: x[:5])
        df['instruction_type'] = df['data.base58_trunc'].apply(lambda x: instruction_type_dict[x] if x in instruction_type_dict.keys() else 'other')

        df_reduced = df[df['instruction_type'].isin(['UpdateRootBank','CacheRootBanks','CachePerpMarkets','CachePrices','UpdateFunding','ConsumeEvents'])][['block.height','block.timestamp.iso8601','transaction.feePayer','instruction_type','transaction.signature']]
        df_reduced.rename(columns={
            'transaction.feePayer' : 'entropy_keeper_address',
            'transaction.signature' : 'transaction_id'
        }, inplace=True)

        print(datetime.now(), 'Creating parquet file...')       
        df_reduced.to_parquet(after_param+'-'+till_param+'.parquet')

        print(datetime.now(), 'Uploading file to GCS...')
        blob = bucket.blob('raw/'+after.date().strftime('%Y-%m-%d')+'/'+after_param+'-'+till_param+'.parquet')
        blob.upload_from_filename(after_param+'-'+till_param+'.parquet')

        print(datetime.now(), 'Deleting file from local memory')
        os.remove(after_param+'-'+till_param+'.parquet')

        after += timedelta(minutes=30)
        till += timedelta(minutes=30)