In [39]:
import pandas as pd
from datetime import datetime, timedelta, date
import os
import requests
import time
from google.cloud import storage
import re

# Utility Function to Execute BitQuery Queries

In [40]:
def run_query(query, retries=10):
        """
        Query graphQL API.
        If timeerror
        """
        headers = {"X-API-KEY": "BQYCaXaMZlqZrPCSQVsiJrKtxKRVcSe4"}

        retries_counter = 0
        try:
            request = requests.post(
                "https://graphql.bitquery.io/", json={"query": query}, headers=headers
            )
            result = request.json()
            # print(dir(request.content))
            # Make sure that there is no error message
            # assert not request.content.errors
            assert "errors" not in result
        except:
            while (
                (request.status_code != 200
                or "errors" in result)
                and retries_counter < 10
            ):
                print(datetime.now(), f"Retry number {retries_counter}")
                if "errors" in result:
                    print(result["errors"])
                print(datetime.now(), f"Query failed for reason: {request.reason}. sleeping for {150*retries_counter} seconds and retrying...")
                time.sleep(150*retries_counter)
                request = requests.post(
                    "https://graphql.bitquery.io/",
                    json={"query": query},
                    headers=headers,
                )
                retries_counter += 1
            if retries_counter >= retries:
                raise Exception(
                    "Query failed after {} retries and return code is {}.{}".format(
                        retries_counter, request.status_code, query
                    )
                )
        return request.json()

# Dictionary Mapping data.base58 to Instruction Type

In [41]:
instruction_type_dict = {
    '5QCjN' : 'CancelAllPerpOrders',
    'BNuyR' : 'CachePrices',
    'BcYfW' : 'PlacePerpOrder',
    'CruFm' : 'CacheRootBanks',
    'HRDyP' : 'ConsumeEvents',
    'QioWX' : 'CachePerpMarkets',
    'SCnns' : 'UpdateFunding',
    'Y8jvF' : 'UpdateRootBank',
    '' : ''
}

# Instantiate Google Cloud Storage Client and Resources

In [42]:
client = storage.Client()
bucket = client.get_bucket('entropy-keeper-transactions')

# Read in BitQuery Query

In [43]:
with open('../entropy_instructions_bitquery.txt') as query:
    query_string = query.read()

# Initialize Starting Time Interval to Query

In [49]:
after = datetime(2022, 5, 4, 0, 0, 0)
till = after + timedelta(minutes=10)

# Loop Through Query Results and Write to Google Cloud Storage

In [None]:
while after < datetime(2022, 5, 5, 3, 30, 0):
    
    after_param = after.strftime('%Y-%m-%dT%H:%M:%SZ')
    till_param = till.strftime('%Y-%m-%dT%H:%M:%SZ')
    print(datetime.now(),'Running from {} to {}'.format(after_param, till_param))
        
    result = run_query(query_string % (after_param, till_param))
    print(datetime.now(), 'Query completed!')
    df = pd.json_normalize(result['data']['solana']['instructions'])
    
    if df.empty:
        print(datetime.now(), 'Creating empty parquet file...')   
        df.to_parquet(after_param+'-'+till_param+'.parquet')

        print(datetime.now(), 'Uploading file to GCS...')
        blob = bucket.blob('raw/'+datetime.strptime(after_param,'%Y-%m-%dT%H:%M:%SZ').date().strftime('%Y-%m-%d')+'/'+after_param+'-'+till_param+'.parquet')
        blob.upload_from_filename(after_param+'-'+till_param+'.parquet')

        print(datetime.now(), 'Deleting file from local memory')
        os.remove(after_param+'-'+till_param+'.parquet')
        
        after += timedelta(minutes=10)
        till += timedelta(minutes=10)
    
    else:
        df['data.base58_trunc'] = df['data.base58'].apply(lambda x: x[:5])
        df['instruction_type'] = df['data.base58_trunc'].apply(lambda x: instruction_type_dict[x] if x in instruction_type_dict.keys() else 'other')

        df_reduced = df[df['instruction_type'].isin(['UpdateRootBank','CacheRootBanks','CachePerpMarkets','CachePrices','UpdateFunding','ConsumeEvents'])][['block.height','block.timestamp.iso8601','transaction.feePayer','instruction_type','transaction.signature']]
        df_reduced.rename(columns={
            'transaction.feePayer' : 'entropy_keeper_address',
            'transaction.signature' : 'transaction_id'
        }, inplace=True)

        print(datetime.now(), 'Creating parquet file...')       
        df_reduced.to_parquet(after_param+'-'+till_param+'.parquet')
        
        
        
        print(datetime.now(), 'Writing data to GBQ...')
        df_reduced.rename(columns={
            'block.height' : 'block_height',
            'block.timestamp.iso8601' : 'block_timestamp_iso8601'
        }).to_gbq('solana.entropy_keeper_transactions',if_exists='append')

        print(datetime.now(), 'Uploading file to GCS...')
        blob = bucket.blob('raw/'+after.date().strftime('%Y-%m-%d')+'/'+after_param+'-'+till_param+'.parquet')
        blob.upload_from_filename(after_param+'-'+till_param+'.parquet')

        print(datetime.now(), 'Deleting file from local memory')
        os.remove(after_param+'-'+till_param+'.parquet')

        after += timedelta(minutes=10)
        till += timedelta(minutes=10)

2022-05-01 23:00:00.146917 Running from 2022-04-22T12:30:00Z to 2022-04-22T13:00:00Z
2022-05-01 23:00:00.795833 Query completed!
2022-05-01 23:00:00.796984 Creating empty parquet file...
2022-05-01 23:00:00.799323 Uploading file to GCS...
2022-05-01 23:00:01.058215 Deleting file from local memory
2022-05-01 23:00:01.058845 Running from 2022-04-22T13:00:00Z to 2022-04-22T13:30:00Z
2022-05-01 23:00:07.367894 Query completed!
2022-05-01 23:00:07.424377 Creating parquet file...
2022-05-01 23:00:07.430805 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 10672.53it/s]


2022-05-01 23:00:15.116095 Uploading file to GCS...
2022-05-01 23:00:15.399688 Deleting file from local memory
2022-05-01 23:00:15.399993 Running from 2022-04-22T13:30:00Z to 2022-04-22T14:00:00Z
2022-05-01 23:00:21.832574 Query completed!
2022-05-01 23:00:21.890430 Creating parquet file...
2022-05-01 23:00:21.896770 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 11491.24it/s]


2022-05-01 23:00:26.938681 Uploading file to GCS...
2022-05-01 23:00:27.222765 Deleting file from local memory
2022-05-01 23:00:27.223439 Running from 2022-04-22T14:00:00Z to 2022-04-22T14:30:00Z
2022-05-01 23:00:28.848985 Query completed!
2022-05-01 23:00:28.855458 Creating parquet file...
2022-05-01 23:00:28.857864 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 7913.78it/s]


2022-05-01 23:00:33.042948 Uploading file to GCS...
2022-05-01 23:00:33.319362 Deleting file from local memory
2022-05-01 23:00:33.319981 Running from 2022-04-22T14:30:00Z to 2022-04-22T15:00:00Z
2022-05-01 23:00:34.930985 Query completed!
2022-05-01 23:00:34.931542 Creating empty parquet file...
2022-05-01 23:00:34.933413 Uploading file to GCS...
2022-05-01 23:00:35.095430 Deleting file from local memory
2022-05-01 23:00:35.095996 Running from 2022-04-22T15:00:00Z to 2022-04-22T15:30:00Z
2022-05-01 23:00:36.761233 Query completed!
2022-05-01 23:00:36.768221 Creating parquet file...
2022-05-01 23:00:36.770959 Writing data to GBQ...
