

This notebook is used to perform the transformation of the "Lightblocks Full Raw Data" dataset to the "Lightblocks Full Processed Data" dataset.

In [None]:
# All imports are here
from IPython.display import display, HTML

from datetime import datetime
import time

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 

import mipasa

import warnings
warnings.filterwarnings('ignore123')


AssertionError: invalid action: 'ignore123'

In [None]:
client = mipasa.Client()

# Connect Lightblocks Raw Dataset
raw_feed = client.get_feed_by_name('Lightblocks Raw')

# Connect Lightblocks Full Processed Data Dataset
clean_feed = client.get_feed_by_name('Lightblocks Full Processed Data')


In [None]:
# Received dataframe and uploads to clean feed
def upload_clean_file_mipasa(df, filename):
    ver = mipasa.DataVersion(clean_feed)
    _file = mipasa.File.from_bytes(ver, bytes(df.to_csv(), encoding='utf-8'),'text/csv')
    _file.name = filename
    ver.upload_file(_file)

In [None]:
# Copy relays dataset
relays_df = raw_feed.get_file('relayers.csv').get_as_dataframe()
# upload_clean_file_mipasa(relays_df, 'relayers.csv')


In [None]:
# Copy builders dataset
builders_df = raw_feed.get_file('builders.csv').get_as_dataframe()
# upload_clean_file_mipasa(builders_df, 'builders.csv')

In [None]:
# Copy ppds dataset
ppds_df = raw_feed.get_file('ppds_full.csv').get_as_dataframe()
# upload_clean_file_mipasa(ppds_df, 'ppds_full.csv')

unpaid_df = raw_feed.get_file('unpaid-blocks.csv').get_as_dataframe()

filtered_ppds_df = ppds_df.copy()
filtered_ppds_df = filtered_ppds_df[~filtered_ppds_df['slot'].isin(unpaid_df['slot'].values)]
filtered_ppds_df.created_at = pd.to_datetime(filtered_ppds_df.created_at).dt.date
upload_clean_file_mipasa(ppds_df, 'fitered_ppds_full.csv')

In [None]:
bbr9_df = raw_feed.get_file('4906799_4935598.csv').get_as_dataframe()
upload_clean_file_mipasa(bbr9_df, '4906799_4935598.csv')

In [None]:
ppds_df

In [None]:
# Copy rewards dataset
rewards_df = raw_feed.get_file('rewards.csv').get_as_dataframe()
# upload_clean_file_mipasa(rewards_df, 'rewards.csv')

In [None]:
#5 Avg validator reward between different relays 

# TODO: Check which of the rewards were actually paid in blockchain
validator_reward_over_time_df = filtered_ppds_df.copy()
validator_reward_over_time_df['date'] = pd.to_datetime(validator_reward_over_time_df['created_at']).dt.date

validator_reward_over_time_df = validator_reward_over_time_df.drop_duplicates(subset=['slot'],keep='last').reset_index(drop = True)

avg_daily_reward_per_relay = validator_reward_over_time_df.groupby(['relayer_id', 'date'])\
    .agg(**{'value':('value', 'sum'), 'count': ('slot', 'count'), 'min' : ('value', 'min'), 'max' : ('value', 'max')}).reset_index()

upload_clean_file_mipasa(avg_daily_reward_per_relay, 'avg_daily_reward_per_relay.csv')


In [None]:
# Clean and upload BBR files
ver = mipasa.DataVersion(clean_feed)
for f in raw_feed.files:
    print(f.name)
    if f.name.find('_slots.csv') != -1: # Only take _slot.csv files
        df = raw_feed.get_file(f.name).get_as_dataframe()

        # The reason we remove duplicates here is becaise Bloxroute Max Profit relay includes information
        # from both Bloxroute Ethical and Bloxroute Regulated relays, thus creating repetitions
        df = df.drop_duplicates(subset = ['block_hash', 'relayer_id', 'builder_id', 'value'],keep = 'last').reset_index(drop = True)

        _file = mipasa.File.from_bytes(ver, bytes(df.to_csv(), encoding='utf-8'),'text/csv')
        _file.name = 'clean_' + f.name
        ver.upload_file(_file)


In [None]:
# Concatenate frames from all bbr frames into one
# BBR stands for Builder Block Received and represents block offers that were sent from the Builder to Relay
# It's a large dataset since for each slot multiple builders send several offers to one or more relay
# Dataset presented here holds data over one week from 20.10.2022 to 26.10.2022
clean_feed = client.get_feed_by_name('Lightblocks Full Processed Data')
week_bbr_df = pd.DataFrame()
for f in raw_feed.files:
    if f.name.find('_slots.csv') != -1: # Only take _slot.csv files
        df = raw_feed.get_file(f.name).get_as_dataframe()
        week_bbr_df = pd.concat([week_bbr_df, df])

upload_clean_file_to_mipasa(week_bbr_df, 'week-bbr.csv')

In [None]:
# Here we create a dataset that represents aggregated number of proposals sent by each builder
# This allows us to investigate how builders use various strategies to get their bids through

# Data for chart #13: Growth of amount of proposed blocks being sent per slot 
bbrs_over_time_df = week_bbr_df.copy()
bbrs_over_time_df['date'] = pd.to_datetime(bbrs_over_time_df['timestamp']).dt.date

bbrs_over_time_df = bbrs_over_time_df.set_index(bbrs_over_time_df['date'])
bbrs_over_time_df = bbrs_over_time_df.sort_index()

In [None]:
# 13 Growth of amount of proposed blocks being sent per slot (Contd.)

# We break the set into separate files by date range
date_list = pd.date_range(start=bbrs_over_time_df.index.min(),end=bbrs_over_time_df.index.max(), normalize=True)
    
for _day in date_list:
    _filtered = bbrs_over_time_df[bbrs_over_time_df['date'] == _day]
    _agg = _filtered.groupby('builder_id').agg(**{'bbrs':('slot', 'nunique')}).reset_index()
    upload_clean_file_to_mipasa(_agg,  f'{_day}_builders_num_bbr.csv')


In [None]:
# 14 : Understanding correlation between number of sent bids and win rate
min_slot = 4949999 # 20/10
max_slot = 5000399 # 26/10

# Aggregate BBRs by builder id
result_bbrs = week_bbr_df.groupby('builder_id').agg(**{'bbrs':('slot', 'nunique')})

# Filter ppds to match the bbrs range selection and aggregate by filter id
week_ppd_df = ppds_df[(ppds_df.slot >= min_slot) & (ppds_df.slot < max_slot)]
result_ppds = week_ppd_df.groupby('builder_id').agg(**{'ppds':('slot', 'nunique'), 'value' : ('value', 'sum')})

# Prepare builders dataset for merge
b_df = builders_df.rename(columns={'id': 'builder_id'})
b_df.sort_values('builder_id')

# Merge all three datasets on builder_id
merged_df = pd.merge(result_ppds, result_bbrs, how='inner', on='builder_id').reset_index()
merged_df = pd.merge(merged_df, b_df, how='inner', on='builder_id')

upload_clean_file_to_mipasa(merged_df,  'builders_ppds_per_bbrs.csv')


In [None]:
# Data for chart 12: Builder growth per relay
# For each relay, we show in which slot we first saw the builder with specific id
builder_first_slot_per_relay_df = week_bbr_df.groupby(['relayer_id', 'builder_id']).agg(**{'slot':('slot', 'min'), 'created_at': ('created_at', 'min')}).reset_index()
upload_clean_file_to_mipasa(builder_first_slot_per_relay_df,  'builder_first_slot_per_relay_df.csv')


In [None]:
# Copy relays dataset
relays_builder_df = raw_feed.get_file('relays-builder.csv').get_as_dataframe()
upload_clean_file_to_mipasa(relays_builder_df,  'relays-builder.csv')

In [None]:
# Copy relays dataset
unpaid_df = raw_feed.get_file('unpaid-blocks.csv').get_as_dataframe()
upload_clean_file_to_mipasa(unpaid_df,  'unpaid-blocks.csv')
