In [3]:
import pandas as pd
import numpy as np
from datetime import date, timedelta, datetime
import pytz

In [4]:
# Read in Transaction Data
data = pd.read_csv('transaction.csv')
data.head(5)

Unnamed: 0,value,block_timestamp
0,0,1668124739
1,117286251058621419,1668124739
2,0,1668124739
3,0,1668124739
4,0,1668124739


In [66]:
def index(df):
    df['value'] = df['value'].astype(float)
    df['prop'] = df['value']/df['value'].sum() 
    H = -df['prop']*np.log2(df['prop'])
    E = H.sum()
    V = 2**E
    return V

def HHI(df):
    df['value'] = df['value'].astype(float)
    df['prop'] = df['value']/df['value'].sum() 
    SQ = df['prop']**2
    V = SQ.sum()
    return V

def gini(df):
    df.sort_values(by=['value'],ascending=False,inplace=True)
    df.reset_index(inplace=True,drop=True)
    df.set_index(keys=df.index.values+1,inplace=True)
    x = df.shape[0]
    df['tmp'] = df['value']*df.index.values
    if df.value.sum() == 0:
        return 0
    G = 1 - 2*sum(df.tmp+df.value/2)/x/df.value.sum()
    return G

def nakamoto(df):
    df.sort_values(by=['value'],ascending=False,inplace=True)
    df.reset_index(inplace=True,drop=True)
    df.set_index(keys=df.index.values+1,inplace=True)
    df['prop'] = df['value']/df['value'].sum() 
    df['cumprop'] = df['prop'].cumsum(axis=0)
    try:
        V = df[df['cumprop'] > 0.5].index[0]
        return V
    except:
        return 0

"""
def index_calc(data):

            :param data: dataframe containing transaction value and timestamp of the transaction (block_timestamp)
            :type data: pandas.DataFrame object
            :param start: the start date of calculation 00:00:00 UTC
            :type start: datetime.date object
            :param end: the end date of calculation 23:59:59 UTC
            :type end: datetime.date object
            :param index_type: the metrics used for measuring decentralization, default is our index
            :type index_type: index function

    returns pandas dataframe with decentralization index value and date
"""

def index_calc(data,  start, end, index_type = index):
    tz = pytz.timezone('UTC')
    data['datetime'] = data['block_timestamp'].apply(lambda x: datetime.fromtimestamp(x,tz))
    duration= pd.date_range(start=start, end=end)
    days = np.size(duration)
    IndexValues = pd.DataFrame(np.zeros(days), columns=['val'])
    IndexValues['date'] = duration

    for i in range(0, days):
        start_date = start + timedelta(days=i)
        end_date = start_date + timedelta(days=1)
        IndexValues.loc[i,'val'] = index_type(data[(data['datetime'].dt.date >= start_date) & (data['datetime'].dt.date < end_date)].copy())
    
    return IndexValues


In [67]:
#input transaction data and desired date range
#returns index value 1 if specification range exceeds data 
start = date(2022,11,10)
end = date(2022,11,13)
results = index_calc(data,start,end)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [68]:
#sample data from 2022-11-10 to 2022-11-11
results

Unnamed: 0,val,date
0,13.089093,2022-11-10
1,13.558045,2022-11-11
2,1.0,2022-11-12
3,1.0,2022-11-13


In [69]:
#specify alternate index metric: HHI
#returns index value 0 if specification range exceeds data 
results_HHI = index_calc(data,start,end,HHI)
results_HHI

Unnamed: 0,val,date
0,0.284268,2022-11-10
1,0.152138,2022-11-11
2,0.0,2022-11-12
3,0.0,2022-11-13


In [70]:
#specify alternate index metric: gini
#returns index value 0 if specification range exceeds data 
results_HHI = index_calc(data,start,end,gini)
results_HHI

Unnamed: 0,val,date
0,0.962565,2022-11-10
1,0.985598,2022-11-11
2,0.0,2022-11-12
3,0.0,2022-11-13


In [64]:
#specify alternate index metric: nakamoto coefficient
#returns index value 0 if specification range exceeds data 
results_nakamoto = index_calc(data,start,end,nakamoto)
results_nakamoto

Unnamed: 0,val,date
0,1.0,2022-11-10
1,3.0,2022-11-11
2,0.0,2022-11-12
3,0.0,2022-11-13
