In [1]:
import pandas as pd
from google.cloud import storage
from google.api_core import page_iterator
from datetime import datetime, date, timedelta
import os

# Utility Function to List Out Google Clou Storage Directories for a Given File Path

In [2]:
def _item_to_value(iterator, item):
    return item

def list_directories(bucket_name, prefix):
    if prefix and not prefix.endswith('/'):
        prefix += '/'

    extra_params = {
        "projection": "noAcl",
        "prefix": prefix,
        "delimiter": '/'
    }

    gcs = storage.Client()

    path = "/b/" + bucket_name + "/o"

    iterator = page_iterator.HTTPIterator(
        client=gcs,
        api_request=gcs._connection.api_request,
        path=path,
        items_key='prefixes',
        item_to_value=_item_to_value,
        extra_params=extra_params,
    )

    return [x for x in iterator]

# Define Query to Execute Against GBQ for Keeper Total Rewards Calculation

In [3]:
query = '''

select
  krd.entropy_keeper_address,
  sum(krd.daily_keeper_reward) as total_keeper_reward
from entropy.keeper_rewards_daily krd
where
  date(krd.date) <= date('{}')
group by 1

'''

# Initialize Google Cloud Storage Client and Resources

In [4]:
client = storage.Client()
rewards_bucket = client.get_bucket('entropy-rewards')

In [5]:
date_list = [x.strip("'raw/").strip('/') for x in list_directories('entropy-keeper-transactions', 'raw/')]
date_list[:-1]

['2022-04-22',
 '2022-04-23',
 '2022-04-24',
 '2022-04-25',
 '2022-04-26',
 '2022-04-27',
 '2022-04-28',
 '2022-04-29',
 '2022-04-30',
 '2022-05-01']

# Loop Through Txns and Aggregate Rewards Per Keeper Wallet "as of" a Specific Day

In [18]:
for date in date_list[:-1]:
    print(datetime.now(), "Starting the process for the {} txn set...".format(date))

    rewards_df = pd.read_gbq(query=query.format(date))
    rewards_df['as_of_date'] = date
    
    print(datetime.now(), 'Creating parquet file...')   
    rewards_df[['as_of_date','entropy_keeper_address','total_keeper_reward']].to_parquet('total-rewards-as-of-'+date+'.parquet')
    
    print(datetime.now(), 'Writing data to GBQ...')
    rewards_df[['as_of_date','entropy_keeper_address','total_keeper_reward']].to_gbq('entropy.keeper_rewards_total',if_exists='append')

    print(datetime.now(), 'Uploading file to GCS...')
    blob = rewards_bucket.blob('cumulative/'+date[:7]+'/total-rewards-as-of-'+date+'.parquet')
    blob.upload_from_filename('total-rewards-as-of-'+date+'.parquet')
    
    print(datetime.now(), 'Uploading "current" file to GCS...')
    blob = rewards_bucket.blob('cumulative/current-total-rewards.parquet')
    blob.upload_from_filename('total-rewards-as-of-'+date+'.parquet')

    print(datetime.now(), 'Deleting file from local memory...')
    os.remove('total-rewards-as-of-'+date+'.parquet')

2022-05-03 00:07:16.969724 Starting the process for the 2022-04-22 txn set...
2022-05-03 00:07:17.966110 Creating parquet file...
2022-05-03 00:07:17.969901 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 7752.87it/s]


2022-05-03 00:07:21.688508 Uploading file to GCS...
2022-05-03 00:07:22.008852 Uploading "current" file to GCS...
2022-05-03 00:07:22.216772 Deleting file from local memory...
2022-05-03 00:07:22.218172 Starting the process for the 2022-04-23 txn set...
2022-05-03 00:07:23.133433 Creating parquet file...
2022-05-03 00:07:23.136488 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 7667.83it/s]


2022-05-03 00:07:31.441178 Uploading file to GCS...
2022-05-03 00:07:31.711471 Uploading "current" file to GCS...
2022-05-03 00:07:31.971714 Deleting file from local memory...
2022-05-03 00:07:31.972313 Starting the process for the 2022-04-24 txn set...
2022-05-03 00:07:32.960985 Creating parquet file...
2022-05-03 00:07:32.964302 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 9489.38it/s]


2022-05-03 00:07:37.158669 Uploading file to GCS...
2022-05-03 00:07:37.387884 Uploading "current" file to GCS...
2022-05-03 00:07:37.657195 Deleting file from local memory...
2022-05-03 00:07:37.657730 Starting the process for the 2022-04-25 txn set...
2022-05-03 00:07:38.571799 Creating parquet file...
2022-05-03 00:07:38.574735 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 12192.74it/s]


2022-05-03 00:07:44.223123 Uploading file to GCS...
2022-05-03 00:07:44.460305 Uploading "current" file to GCS...
2022-05-03 00:07:44.690126 Deleting file from local memory...
2022-05-03 00:07:44.690672 Starting the process for the 2022-04-26 txn set...
2022-05-03 00:07:45.603005 Creating parquet file...
2022-05-03 00:07:45.606165 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 11491.24it/s]


2022-05-03 00:07:50.484208 Uploading file to GCS...
2022-05-03 00:07:50.773674 Uploading "current" file to GCS...
2022-05-03 00:07:50.997006 Deleting file from local memory...
2022-05-03 00:07:50.997580 Starting the process for the 2022-04-27 txn set...
2022-05-03 00:07:51.944956 Creating parquet file...
2022-05-03 00:07:51.948060 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 14665.40it/s]


2022-05-03 00:07:54.974926 Uploading file to GCS...
2022-05-03 00:07:55.217406 Uploading "current" file to GCS...
2022-05-03 00:07:55.476831 Deleting file from local memory...
2022-05-03 00:07:55.477354 Starting the process for the 2022-04-28 txn set...
2022-05-03 00:07:56.385755 Creating parquet file...
2022-05-03 00:07:56.390495 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 8176.03it/s]


2022-05-03 00:08:17.002619 Uploading file to GCS...
2022-05-03 00:08:17.236798 Uploading "current" file to GCS...
2022-05-03 00:08:17.538663 Deleting file from local memory...
2022-05-03 00:08:17.539254 Starting the process for the 2022-04-29 txn set...
2022-05-03 00:08:18.510886 Creating parquet file...
2022-05-03 00:08:18.514119 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 13231.24it/s]


2022-05-03 00:08:23.015653 Uploading file to GCS...
2022-05-03 00:08:23.255040 Uploading "current" file to GCS...
2022-05-03 00:08:23.528716 Deleting file from local memory...
2022-05-03 00:08:23.529253 Starting the process for the 2022-04-30 txn set...
2022-05-03 00:08:24.448779 Creating parquet file...
2022-05-03 00:08:24.451854 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 13530.01it/s]


2022-05-03 00:08:29.396767 Uploading file to GCS...
2022-05-03 00:08:29.666027 Uploading "current" file to GCS...
2022-05-03 00:08:29.942642 Deleting file from local memory...
2022-05-03 00:08:29.943199 Starting the process for the 2022-05-01 txn set...
2022-05-03 00:08:30.936090 Creating parquet file...
2022-05-03 00:08:30.939378 Writing data to GBQ...


100%|██████████| 1/1 [00:00<00:00, 13066.37it/s]


2022-05-03 00:08:35.447638 Uploading file to GCS...
2022-05-03 00:08:35.707299 Uploading "current" file to GCS...
2022-05-03 00:08:35.975789 Deleting file from local memory...


In [11]:
rewards_df

Unnamed: 0,entropy_keeper_address,total_keeper_reward,as_of_date
0,2dtWbt8X3uQNRNLdwwybDuFLzPrZribVXDdSVaRE5JzR,4650.850933,2022-04-22
1,6i7XMXBUDd2b8Y57xxaq2CaMihHKBomSS1K7mz6T7Nxm,605.574204,2022-04-22
2,432DaYkzyf5EWRjJXHD7baJp3f95djNtSTtWPNg1VQUM,840.187685,2022-04-22
3,HxeoZxH4JTrYGs2RTDF2p7Vsd7pmV3j5o5xTRoXmDg47,269.736585,2022-04-22
4,tCw98YuVqNruoGQbqTDNoh6f8wz8fiecRVNWnTc2rhG,1.464703,2022-04-22
5,5Cky8emeF1q1SHc9gjHLrVYBbCarRZNPtCnVMYAKcoP,859.153139,2022-04-22
6,arbp6SkGwKz7qkUt9StvZKTWFfDHeJfusUSpw6aqR27,327.549622,2022-04-22
7,AxpcUKnhF1pNNw9x5GbcYY16TCmqvFxDyHxEKyrpkcJe,305.579281,2022-04-22
8,DBAMcasPmgSrStwH82DhVSKaWZrsjUR3nz6M3vc13aCs,95.289504,2022-04-22
9,FytZdmfYW6oCP5jhiBCE9xmUhzqVX6i2hw3NrwcGBYMo,1.26267,2022-04-22
