In [1]:
import pandas as pd
import numpy as np
import panel as pn
import hvplot.pandas
from icecream import ic
from bokeh.models.formatters import NumeralTickFormatter
from bokeh.models import HoverTool
ic.configureOutput(prefix='ic|',outputFunction=print)
pn.extension('tabulator')
pd.set_option('display.max_columns', 500)

Utilities.

In [2]:
def shorten_hashes(df):
    """
    This method shortens addresses in a dataframe for convenience.
    Transforms string columns of a dataframe on values that begin with 0x.
    For any value that begins with 0x in an object column, take only 10 characters.
    """
    df[df.select_dtypes('object').columns] = df.select_dtypes('object').apply(lambda x: np.where(x.str.startswith('0x'), x.str.slice(stop=10), x))
    return df

Load dataset according to ygg-a. Add an additional step that further reduces columns.

In [3]:
# Read the Vote Coefficients Inputs Dataset
ic("...Loading Donations dataset...")
df_donations = pd.read_csv('./input/vote_coefficients_input.csv', parse_dates=['last_score_timestamp'])
ic(df_donations.shape)

# Drop Unsuccessful Rows
ic("...Dropping unsuccessful data...")
ic(len(df_donations[df_donations['success']==False]))
df_donations = df_donations[df_donations['success']==True]
ic(df_donations.shape)

# Drop Unecessary Columns
ic("...Dropping Unecessary columns...")
drop_columns=['success', 'status', 'type']
ic(drop_columns)
df_donations = df_donations.drop(drop_columns, axis=1)
ic(df_donations.shape)

# Shorten Hash Values for Easier Reading
ic("...Shortening hashes...")
df_donations = shorten_hashes(df_donations)
ic(df_donations.shape)

# Further drop columns to isolate algorithm environment
ic("...Dropping Unecessary columns...")
drop_columns=['projectId', 'roundId', 'token', 'amount', 'coefficient', 'last_score_timestamp', 'rawScore', 'threshold']
ic(drop_columns)
df_donations = df_donations.drop(drop_columns, axis=1)
ic(df_donations.shape)

# Use applicationId as projectId to make identifying projects easier
ic("...Rename applicationId to projectId...")
df_donations = df_donations.rename({'applicationId':'projectId'},axis=1)
ic(df_donations.shape)

ic|'...Loading Donations dataset...'
ic|df_donations.shape: (257, 16)
ic|'...Dropping unsuccessful data...'
ic|len(df_donations[df_donations['success']==False]): 4
ic|df_donations.shape: (253, 16)
ic|'...Dropping Unecessary columns...'
ic|drop_columns: ['success', 'status', 'type']
ic|df_donations.shape: (253, 13)
ic|'...Shortening hashes...'
ic|df_donations.shape: (253, 13)
ic|'...Dropping Unecessary columns...'
ic|drop_columns: ['projectId',
                  'roundId',
                  'token',
                  'amount',
                  'coefficient',
                  'last_score_timestamp',
                  'rawScore',
                  'threshold']
ic|df_donations.shape: (253, 5)
ic|'...Rename applicationId to projectId...'
ic|df_donations.shape: (253, 5)


(253, 5)

In [4]:
df_donations

Unnamed: 0,id,projectId,voter,grantAddress,amountUSD
0,0x24a5bbf1,19,0x9ba96198,0xA26d6AEB,9.184332
1,0x3dce13bb,6,0x9390fa86,0x9390fA86,4.094567
2,0x4cf20243,7,0x5136cdfc,0x0035cC37,74.446665
3,0x2b032f10,16,0x524cb61b,0x45b79C6b,5.583500
4,0x0842753b,29,0x524cb61b,0x5041A1C1,5.583500
...,...,...,...,...,...
252,0x26e1e300,7,0x4405f427,0x0035cC37,1.847803
253,0xa21ca1aa,16,0xcdfbbe10,0x45b79C6b,1.843793
254,0x634b5156,24,0xcdfbbe10,0x4f8c531d,1.843793
255,0x4efa29aa,10,0x410d86e3,0x7f3eb18E,1.843793


Total Donation Amounts Per Project.

In [5]:
df_projects = df_donations.groupby('projectId').agg(
    amountUSD=('amountUSD', 'sum'),
    donations=('amountUSD', 'size'),
    mean=('amountUSD', 'mean'),
    median=('amountUSD', 'median')
)
df_projects

Unnamed: 0_level_0,amountUSD,donations,mean,median
projectId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,49.118253,8,6.139782,5.847482
3,230.329281,24,9.597053,5.873165
6,37.108687,7,5.301241,5.688977
7,274.514986,19,14.448157,4.740046
9,1003.98862,11,91.271693,6.005987
10,14.908828,5,2.981766,1.843793
13,2021.307778,15,134.753852,18.707399
14,263.524009,21,12.548762,5.998856
16,169.2036,17,9.953153,5.688977
17,69.954191,10,6.995419,5.847482


Total Donation Amounts Per Voter

In [6]:
df_voters = df_donations.groupby('voter').agg(
    amountUSD=('amountUSD', 'sum'),
    donations=('amountUSD', 'size'),
    mean=('amountUSD', 'mean'),
    median=('amountUSD', 'median')
)
df_voters

Unnamed: 0_level_0,amountUSD,donations,mean,median
voter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0x01d3a04a,9.564049,1,9.564049,9.564049
0x04e8e5aa,82.661408,3,27.553803,27.553803
0x0f465406,27.887267,1,27.887267,27.887267
0x109991e3,936.545861,1,936.545861,936.545861
0x114b242d,57.159401,1,57.159401,57.159401
...,...,...,...,...
0xef22a550,28.365762,3,9.455254,9.455254
0xf1fd2aa4,9.859106,1,9.859106,9.859106
0xf632ce27,38.247675,3,12.749225,9.561919
0xf754ebcf,9.275321,1,9.275321,9.275321


Load TEC Token Dataset.

In [7]:
# get table of valid tec holders
# extracted from https://dune.com/queries/2457553/4040451
ic("...Loading TEC Token Holders dataset...")
df_tec = pd.read_csv('./input/tec_holders.csv')
ic(df_tec.shape)

# Shorten Hash Values for Easier Reading
ic("...Shortening hashes...")
df_tec = shorten_hashes(df_tec)
ic(df_tec.shape)

# Take the address and balance columns
ic("...Take address and balance...")
df_tec = df_tec[['address', 'balance']]
ic(df_tec.shape)

ic|'...Loading TEC Token Holders dataset...'
ic|df_tec.shape: (409, 3)
ic|'...Shortening hashes...'
ic|df_tec.shape: (409, 3)
ic|'...Take address and balance...'
ic|df_tec.shape: (409, 2)


(409, 2)

In [8]:
df_tec

Unnamed: 0,address,balance
0,0x38dfd788,150071.717791
1,0x5b757549,106053.271906
2,0x839395e2,73838.661487
3,0xdf290293,69337.513233
4,0x45602bfb,59165.981018
...,...,...
404,0x423d60df,10.873917
405,0xc70c7f14,10.674364
406,0xae7f1137,10.309472
407,0x96bdad64,10.223873


Load TEA Credentials Dataset.

In [9]:
# get table of te academy token holders
# extracted from https://dune.com/queries/2457581
ic("...Loading TEA Credentials dataset...")
df_tea_dune = pd.read_csv('./input/tea_holders_dune.csv')
df_tea_tea = pd.read_excel('./input/tea_holders_tea.xlsx')
ic(df_tea_dune.shape)
ic(df_tea_tea.shape)

# Examine Intersecting Wallets between dune and tea
ic("...Examine Overlap...")
ic(len(set(df_tea_tea['wallet']).intersection(set(df_tea_dune['wallet']))))
ic(len(set(df_tea_tea['wallet']).union(set(df_tea_dune['wallet']))))

# Supplement Dune data with TEA data and drop duplicates
ic("...Leftjoin to Dune Data...")
df_tea = pd.concat([df_tea_dune, df_tea_tea]).drop_duplicates(subset=['wallet'])
ic(df_tea.shape)

# Shorten Hash Values for Easier Reading
ic("...Shortening hashes...")
df_tea = shorten_hashes(df_tea)
ic(df_tea.shape)

# Make a contiguous index
ic("...Resetting index...")
df_tea = df_tea.reset_index(drop=True)
ic(df_tea.shape)

# Fill balance of TEA with 1 for now
ic("...Fill Nan Balance with 1...")
df_tea = df_tea.fillna(1)
ic(df_tea.shape)

# Rename Wallet to Address to be consistent
ic("...Rename Wallet to Address...")
df_tea = df_tea.rename({'wallet':'address'},axis=1)
ic(df_tea.shape)

# Take the address and balance columns
ic("...Take address and balance columns...")
df_tea = df_tea[['address', 'balance']]
ic(df_tea.shape)

ic|'...Loading TEA Credentials dataset...'
ic|df_tea_dune.shape: (192, 3)
ic|df_tea_tea.shape: (214, 3)
ic|'...Examine Overlap...'
ic|len(set(df_tea_tea['wallet']).intersection(set(df_tea_dune['wallet']))): 151
ic|len(set(df_tea_tea['wallet']).union(set(df_tea_dune['wallet']))): 249
ic|'...Leftjoin to Dune Data...'
ic|df_tea.shape: (249, 3)
ic|'...Shortening hashes...'
ic|df_tea.shape: (249, 3)
ic|'...Resetting index...'
ic|df_tea.shape: (249, 3)
ic|'...Fill Nan Balance with 1...'
ic|df_tea.shape: (249, 3)
ic|'...Rename Wallet to Address...'
ic|df_tea.shape: (249, 3)
ic|'...Take address and balance columns...'
ic|df_tea.shape: (249, 2)


(249, 2)

In [10]:
df_tea

Unnamed: 0,address,balance
0,0x68f6f2db,1.0
1,0x3e0cf03f,5.0
2,0x1d1874f9,1.0
3,0x4daa278b,3.0
4,0xc710f3da,3.0
...,...,...
244,0xe1954808,1.0
245,0x7f990adf,1.0
246,0xd1595177,1.0
247,0xcc449df4,1.0


Number of Voters who have TEC Tokens

In [11]:
ic(len(set(df_donations['voter']).intersection(set(df_tec['address']))))

ic|len(set(df_donations['voter']).intersection(set(df_tec['address']))): 19


19

Number of Voters who Have TEA Credentials

In [12]:
ic(len(set(df_donations['voter']).intersection(set(df_tea['address']))))

ic|len(set(df_donations['voter']).intersection(set(df_tea['address']))): 8


8

Number of Voters who have Both TEC Tokens and TEA Credentials

In [13]:
ic(len(set(df_donations['voter']).intersection(set(df_tec['address'])).intersection(set(df_tea['address']))))

ic|len(set(df_donations['voter']).intersection(set(df_tec['address'])).intersection(set(df_tea['address']))): 4


4

In [14]:
df_voters

Unnamed: 0_level_0,amountUSD,donations,mean,median
voter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0x01d3a04a,9.564049,1,9.564049,9.564049
0x04e8e5aa,82.661408,3,27.553803,27.553803
0x0f465406,27.887267,1,27.887267,27.887267
0x109991e3,936.545861,1,936.545861,936.545861
0x114b242d,57.159401,1,57.159401,57.159401
...,...,...,...,...
0xef22a550,28.365762,3,9.455254,9.455254
0xf1fd2aa4,9.859106,1,9.859106,9.859106
0xf632ce27,38.247675,3,12.749225,9.561919
0xf754ebcf,9.275321,1,9.275321,9.275321


Merge the Data Together.

In [20]:
# Left join the three tables
df_voters_merged = df_voters.reset_index().merge(
    df_tec, left_on='voter', right_on='address',how='left').merge(
    df_tea, left_on='voter', right_on='address',how='left', suffixes=('_tec', '_tea')).drop(columns=['address_tec','address_tea'])

# Replace Nan values with 0
df_voters_merged = df_voters_merged.fillna(0)

In [21]:
df_voters_merged

Unnamed: 0,voter,amountUSD,donations,mean,median,balance_tec,balance_tea
0,0x01d3a04a,9.564049,1,9.564049,9.564049,0.000000,0.0
1,0x04e8e5aa,82.661408,3,27.553803,27.553803,0.000000,0.0
2,0x0f465406,27.887267,1,27.887267,27.887267,1526.260043,0.0
3,0x109991e3,936.545861,1,936.545861,936.545861,0.000000,0.0
4,0x114b242d,57.159401,1,57.159401,57.159401,0.000000,0.0
...,...,...,...,...,...,...,...
78,0xef22a550,28.365762,3,9.455254,9.455254,0.000000,0.0
79,0xf1fd2aa4,9.859106,1,9.859106,9.859106,0.000000,0.0
80,0xf632ce27,38.247675,3,12.749225,9.561919,0.000000,0.0
81,0xf754ebcf,9.275321,1,9.275321,9.275321,0.000000,0.0


In [22]:
import param as pm
import numpy as np

In [23]:
class Boost(pm.Parameterized):
    signal = pm.Series(precedence=-1)
    distribution = pm.Series(constant=True, precedence=-1)
    logy = pm.Boolean(False)
    transformation = pm.ObjectSelector('Sigmoid', objects=['Threshold', 'Linear', 'Sigmoid'])
    threshold = pm.Number(100, precedence=-1, bounds=(0, 1000), step=1)
    sigmoid_frequency = pm.Number(1, precedence=-1, bounds=(0.1,5))
    sigmoid_shift = pm.Number(0, precedence=-1, bounds=(-5,5))
    
    def __init__(self, **params):
        super().__init__(**params)
        self.show_transformation_params()
        
    @pm.depends('logy', 'threshold', 'sigmoid_frequency', 'sigmoid_shift', watch=True)
    def update_distribution(self):
        if self.logy:
            signal = np.log(self.signal+1)
            threshold = np.log(self.threshold)
        else:
            signal = self.signal
            threshold = self.threshold
            
        with pm.edit_constant(self): 
            if self.transformation == 'Threshold':
                self.distribution = self._threshold(signal, threshold)
            elif self.transformation == 'Sigmoid':
                self.distribution = self._sigmoid_scale(signal, k=self.sigmoid_frequency, b=self.sigmoid_shift)
            elif self.transformation == 'Linear':
                self.distribution = self._min_max_scale(signal)
            else:
                raise(Exception(f"Unkown Transformation: {self.transformation}"))
        
    @pm.depends('transformation', watch=True)
    def show_transformation_params(self):

        with pm.parameterized.batch_call_watchers(self):
            self.param['threshold'].precedence = -1
            self.param['sigmoid_frequency'].precedence = -1
            self.param['sigmoid_shift'].precedence = -1

            if self.transformation == 'Threshold':
                self.param['threshold'].precedence = 1
                
            if self.transformation == 'Sigmoid':
                self.param['sigmoid_frequency'].precedence = 1
                self.param['sigmoid_shift'].precedence = 1
                
        self.update_distribution()
                
    
    @staticmethod
    def _sigmoid(x, A=1, k=1, b=0):
        return A / (1 + np.exp(-k * (x - b)))
    
    @staticmethod
    def _min_max_scale(signal):
        return pd.Series((signal -signal.min()) /  (signal.max() - signal.min()))

    @staticmethod
    def _threshold(signal, t):
        return (signal >= t).astype(int)
    
    @staticmethod
    def _mean_std_scale(signal):
        return (signal - signal.mean()) / signal.std()
    
    def _sigmoid_scale(self, signal, **params):
        return self._min_max_scale(self._sigmoid(self._mean_std_scale(signal), **params))
    
    def view_distribution(self):
        return self.distribution.sort_values(ascending=False).reset_index(drop=True).hvplot.step()
    
    def view(self):
        return pn.Row(self, self.view_distribution)

In [27]:
tec_boost = Boost(signal=df_voters_merged['balance_tec'], transformation='Sigmoid', logy=True, sigmoid_frequency=3, sigmoid_shift=1)
tec_boost.view()

In [28]:
tea_boost = Boost(signal=df_voters_merged['balance_tea'], transformation='Sigmoid', logy=False, threshold=1, sigmoid_frequency=1)
tea_boost.view()

Applying the new algorithm.

In [30]:
boost_factor = 1.5
df_voters_merged['balance_tec_sigmoid'] = tec_boost.distribution
df_voters_merged['balance_tea_sigmoid'] = tea_boost.distribution
df_voters_merged['coefficient'] = 1 + boost_factor * (df_voters_merged['balance_tec_sigmoid'] + df_voters_merged['balance_tea_sigmoid'])

Inspect the SMEs

In [42]:
df_sme = df_voters_merged[(df_voters_merged['balance_tec']>0) | (df_voters_merged['balance_tea']>0)].sort_values('coefficient', ascending=False)
df_sme

Unnamed: 0,voter,amountUSD,donations,mean,median,balance_tec,balance_tea,balance_tec_sigmoid,balance_tea_sigmoid,coefficient
54,0xb2d60143,84.895828,4,21.223957,24.969361,5283.368339,3.0,0.967165,0.916059,3.824835
11,0x3e31155a,26.10965,7,3.72995,3.72995,191.350379,5.0,0.56399,1.0,3.345985
15,0x4318cc44,49.387533,4,12.346883,12.346883,60.872508,2.0,0.310637,0.762702,2.610008
33,0x7ee9033a,38.11635,1,38.11635,38.11635,0.0,5.0,0.0,1.0,2.5
34,0x839395e2,444.287724,10,44.428772,12.479992,73838.661487,0.0,1.0,0.0,2.5
82,0xf8d1d349,41.991991,7,5.998856,5.998856,18635.449575,0.0,0.991634,0.0,2.487451
32,0x7d547666,20.037435,8,2.504679,2.003744,18134.425365,0.0,0.991342,0.0,2.487013
60,0xc46c67bb,37.229192,5,7.445838,7.445838,15565.761361,0.0,0.989566,0.0,2.484349
68,0xcf79c7ea,110.012462,11,10.001133,10.001133,14385.066301,0.0,0.98855,0.0,2.482825
56,0xb760fe1b,9.001957,5,1.800391,1.000217,10563.361347,0.0,0.983815,0.0,2.475722


In [43]:
df_sme[['amountUSD', 'donations']].sum()

amountUSD    3199.53537
donations     121.00000
dtype: float64

In [44]:
df_sme[['mean', 'coefficient']].mean()

mean           23.073750
coefficient     2.389107
dtype: float64

Combine Voters Dataset with Donations Dataset.

In [45]:
# Left join the three tables
df_merged = df_donations.merge(df_voters_merged, left_on='voter', right_on='voter',how='left')
df_merged.sample(5)

Unnamed: 0,id,projectId,voter,grantAddress,amountUSD_x,amountUSD_y,donations,mean,median,balance_tec,balance_tea,balance_tec_sigmoid,balance_tea_sigmoid,coefficient
33,0x7bdd37e8,2,0xcd0d4cdb,0x9bcF35BD,6.005987,96.09579,16,6.005987,6.005987,0.0,0.0,0.0,0.0,1.0
191,0xd9a49e82,14,0xf8d1d349,0x9b55D80A,5.998856,41.991991,7,5.998856,5.998856,18635.449575,0.0,0.991634,0.0,2.487451
174,0xacdddc5c,14,0xc46c67bb,0x9b55D80A,7.445838,37.229192,5,7.445838,7.445838,15565.761361,0.0,0.989566,0.0,2.484349
245,0xb5ddac72,16,0x751747b1,0x45b79C6b,1.847803,1.847803,1,1.847803,1.847803,0.0,0.0,0.0,0.0,1.0
137,0x6eac8abd,24,0x364f0ad6,0x4f8c531d,92.601977,92.601977,1,92.601977,92.601977,0.0,0.0,0.0,0.0,1.0


In [47]:
df_merged.sort_values('coefficient', ascending=False)

Unnamed: 0,id,projectId,voter,grantAddress,amountUSD_x,amountUSD_y,donations,mean,median,balance_tec,balance_tea,balance_tec_sigmoid,balance_tea_sigmoid,coefficient
77,0xb316dadc,3,0xb2d60143,0x808DB7C1,9.987744,84.895828,4,21.223957,24.969361,5283.368339,3.0,0.967165,0.916059,3.824835
78,0x2bb0ecf4,22,0xb2d60143,0x8110d1D0,24.969361,84.895828,4,21.223957,24.969361,5283.368339,3.0,0.967165,0.916059,3.824835
79,0xdbac59a3,29,0xb2d60143,0x5041A1C1,24.969361,84.895828,4,21.223957,24.969361,5283.368339,3.0,0.967165,0.916059,3.824835
80,0x365c3b2e,14,0xb2d60143,0x9b55D80A,24.969361,84.895828,4,21.223957,24.969361,5283.368339,3.0,0.967165,0.916059,3.824835
12,0x9434f150,9,0x3e31155a,0xbbD107D7,3.729950,26.109650,7,3.729950,3.729950,191.350379,5.0,0.563990,1.000000,3.345985
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,0x08b94c09,24,0x468fd68b,0x4f8c531d,1.001181,16.018894,16,1.001181,1.001181,0.000000,0.0,0.000000,0.000000,1.000000
61,0xd9450f5c,22,0x468fd68b,0x8110d1D0,1.001181,16.018894,16,1.001181,1.001181,0.000000,0.0,0.000000,0.000000,1.000000
60,0x4cfce6d0,7,0x468fd68b,0x0035cC37,1.001181,16.018894,16,1.001181,1.001181,0.000000,0.0,0.000000,0.000000,1.000000
59,0xc596710c,2,0x468fd68b,0x9bcF35BD,1.001181,16.018894,16,1.001181,1.001181,0.000000,0.0,0.000000,0.000000,1.000000
