In [1]:
import pandas as pd
from google.cloud import storage
from datetime import datetime, date, timedelta
import os

# Initialize Google Cloud Storage Client and Resources

In [None]:
client = storage.Client()
txn_bucket = client.get_bucket('entropy-keeper-transactions')
rewards_bucket = client.get_bucket('entropy-rewards')
rewards_rate_table = pd.read_parquet('gs://entropy-rewards/program-references/2022-05-01-rewards-rate-table.parquet')
rewards_rate_table['date'] = rewards_rate_table['date'].apply(lambda x: pd.to_datetime(x))
rewards_rate_table.set_index('date',inplace=True)

# Loop Through Txns and Aggregate Rewards Per Keeper Wallet

In [None]:
blobs = txn_bucket.list_blobs(prefix='daily/2022-04/2')

for blob in blobs:
    date = blob.name.split('/')[-1][:10]
    file_path = 'gs://entropy-keeper-transactions/{}'.format(blob.name)
    print(datetime.now(), "Starting the process for the {} txn set...".format(date))

    df = pd.read_parquet(file_path)

    print(datetime.now(), "Formatting and organizing the dataframe...")
    # create a column which converts the iso timestamp to datetime
    df['date_time'] = df['block.timestamp.iso8601'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ'))

    # sort the dataframe by instruction type and datetime
    df_sorted = df.sort_values(by=['instruction_type','date_time'],ascending=[True, True])

    # create a column which shows the previous txns datetime relative to the current txn
    df_sorted['prev_date_time'] = df_sorted.groupby('instruction_type')['date_time'].shift()

    # calculate the time since last txn
    df_sorted['time_since_last_txn'] = df_sorted['date_time'] - df_sorted['prev_date_time']

    # decompose the time since last txn into time components, convert them to seconds, and add them together
    time_components = df_sorted['time_since_last_txn'].dt.components
    df_sorted['seconds_since_last_txn'] = (time_components['hours']*3600) + (time_components['minutes']*60) + (time_components['seconds'])
    df_sorted['seconds_since_last_txn'].fillna(0,inplace=True)

    print(datetime.now(), 'Calculating the total time for each entropy keeper address...')
    consumeEvents_time_dict = {}
    consumeEvents_time = 0
    otherEvents_time_dict = {}
    otherEvents_time = 0


    for index, txn in df_sorted.iterrows():
        wallet = txn['entropy_keeper_address']

        if txn['instruction_type'] == 'ConsumeEvents':
            if wallet not in consumeEvents_time_dict.keys():
                consumeEvents_time_dict[wallet] = 0

            seconds_since_last_txn = txn['seconds_since_last_txn']
            consumeEvents_time_dict[wallet] += seconds_since_last_txn
            consumeEvents_time += seconds_since_last_txn

        else:
            if wallet not in otherEvents_time_dict.keys():
                otherEvents_time_dict[wallet] = 0

            seconds_since_last_txn = txn['seconds_since_last_txn']
            otherEvents_time_dict[wallet] += seconds_since_last_txn
            otherEvents_time += seconds_since_last_txn


    print(datetime.now(), 'Calculating the rewards for each entropy keeper address...')
    consumeEvents_rewards_dict = {}
    otherEvents_rewards_dict = {}

    for wallet in consumeEvents_time_dict.keys():
        consumeEvents_rewards_dict[wallet] = rewards_rate_table['consume_events_reward'][date] * (consumeEvents_time_dict[wallet] / consumeEvents_time)

    for wallet in otherEvents_time_dict.keys():
        otherEvents_rewards_dict[wallet] = rewards_rate_table['other_events_reward'][date] * (otherEvents_time_dict[wallet] / otherEvents_time)

    otherEvents_rewards_df = pd.DataFrame(list(otherEvents_rewards_dict.items()),columns=['entropy_keeper_address','rewards'])
    consumeEvents_rewards_df = pd.DataFrame(list(consumeEvents_rewards_dict.items()),columns=['entropy_keeper_address','rewards'])

    total_rewards_df = pd.concat([otherEvents_rewards_df,consumeEvents_rewards_df]).groupby('entropy_keeper_address').sum().sort_values(by='rewards',ascending=False)
    total_rewards_df['date'] = date
    
    total_rewards_df.to_parquet(date+'-daily-rewards.parquet')

    print(datetime.now(), 'Uploading file to GCS...')
    blob = rewards_bucket.blob('daily/2022-04/'+date+'-daily-rewards.parquet')
    blob.upload_from_filename(date+'-daily-rewards.parquet')

    print(datetime.now(), 'Deleting file from local memory...')
    os.remove(date+'-daily-rewards.parquet')