<a href="https://colab.research.google.com/github/SciEcon/DecentralizationIndex/blob/main/Decentralization_Classes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Prelim

In [None]:
!pip install stargazer
!pip install pingouin
!pip install statsmodels

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import decimal
from datetime import datetime, date, timedelta, timezone
from dateutil.relativedelta import relativedelta
import math
import plotly.offline as py     
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn import datasets, linear_model
import statsmodels.api as sm
import plotly.colors as pc
import re

from stargazer.stargazer import Stargazer
import pingouin
import IPython.core.display
import requests
import json 
from datetime import datetime
import matplotlib.pyplot as plt
from IPython.core.display import HTML
import statsmodels.api as sm

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
import seaborn as sns
from math import sqrt
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.api import VAR
from sklearn.decomposition import PCA
from statsmodels.tsa.ar_model import AutoReg

In [None]:
#Connect to Google Cloud
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [None]:
#Connect to Google BigQuery
PROJECT_ID = 'trans-invention-305714'

from google.cloud import bigquery
client = bigquery.Client(project=PROJECT_ID, location='US')
dataset_ref = client.dataset("crypto_ethereum", project="bigquery-public-data")
dataset = client.get_dataset(dataset_ref)
tables = list(client.list_tables(dataset))

# Print names of all tables in the dataset
for table in tables:  
  print(table.table_id)

amended_tokens
balances
blocks
contracts
logs
sessions
token_transfers
tokens
traces
transactions


#Class for Querying and Processing Decentralization Index

In [None]:
class Decentralization:
  """
      class Decentralization:
              def __init__(self, start, end, address):
      

              :param start: the start date of querying
              :type token: datetime.date object
              :param end: the end date of querying
              :type metrics: datetime.date
              :param address: address of ERC-20 token 
              :type address: string
              
    
      Public Functions:
          - ``program``: returns pandas dataframe with date and decentralization index value
          
    
  """
  def __init__(self, start, end, address):
    self.start = start
    self.end = end
    self.address = address

  #Query and Caculation Program
  def program(self):
      duration= pd.date_range(start=self.start, end=self.end)
      days = np.size(duration)
      Entropy= pd.DataFrame(np.zeros(days), columns=['val'])
      Entropy['date'] = duration
      
      for i in range(0, days):
        start_date = self.start + timedelta(days=i)
        end_date = start_date + timedelta(days=1)
        sql = """
        SELECT token_address, from_address, to_address,block_timestamp, cast(value AS NUMERIC) as value FROM 
        `bigquery-public-data.crypto_ethereum.token_transfers` 
        WHERE 
        token_address = '""" + self.address + """' AND cast(value AS NUMERIC) <> 0
        AND
          block_timestamp >= TIMESTAMP('""" + str(start_date) + """ 00:00:00+00')
        AND 
          block_timestamp < TIMESTAMP('""" + str(end_date) + """ 00:00:00+00')
        """
        df = client.query(sql).to_dataframe(progress_bar_type='tqdm_notebook')
        Ent = self.Processing_tranvol(df)
        Entropy.loc[i,'val'] = Ent

      return Entropy

  #Processing for or Entropy (regardless of sender/receiver)
  def Processing_tranvol(self,df):
      df.rename(columns={'f0_':'value','from_address':'from','to_address':'to'}, inplace = True)
      df['value'] = df['value'].astype(float)
      df = df.dropna()
      E = self.index_1(df)
      return E

  #Transaction Entropy
  def index_1(self,df): 
    df['pr'] = df['value']/df['value'].sum() 
    H = -df['pr']*np.log2(df['pr'])
    E = H.sum()
    V = 2**E
    return V

## Example Usage

In [None]:
start = date(2021,7,28)
end = date(2021,7,31)

In [None]:
UNI = '0x1f9840a85d5af5bf1d1762f925bdaddc4201f984'

In [None]:
Uni = Decentralization(start = start, end = end, address = UNI)
df_uni = Uni.program()

Downloading:   0%|          | 0/2907 [00:00<?, ?rows/s]

Downloading:   0%|          | 0/3032 [00:00<?, ?rows/s]

Downloading:   0%|          | 0/3856 [00:00<?, ?rows/s]

Downloading:   0%|          | 0/3655 [00:00<?, ?rows/s]

In [None]:
df_uni

Unnamed: 0,val,date
0,174.786472,2021-07-28
1,274.551509,2021-07-29
2,579.624903,2021-07-30
3,394.990031,2021-07-31


#Class for Econometrics Analysis

In [None]:
class Decentralization_Analysis:
  """
    class Decentralization_Analysis:
            def __init__(self, token, metrics, BTC, ETH):
    

            :param token: the dataframe containing shannon entropy values for daily transactions
            :type token: pandas.DataFrame
            :param metrics: all market metrics of token inputed retrieved from CoinMetrics
            :type metrics: pandas.DataFrame
            :param BTC: all market metrics of Bitcoin retrieved from CoinMetrics 
            :type BTC: pandas.DataFrame
            :param ETH: all market metrics of Ethereum retrieved from CoinMetrics 
            :type ETH: pandas.DataFrame
            
  
    Public Functions:
        - ``autor``: returns autoregression of our dependent variable: decentralization index.
        - ``check_stationarity``: return Dickey Fuller test results and autocorrelation figures
        - ``heat_map``: returns correlation heatmap of variables
        - ``stargazer_reverse_eth``: displays regression results with independent variables: 
                Ether market variables; dependent variable: decentralization index
        - ``stargazer_reverse_btc``: displays regression results with independent variables: 
                Bitcoin market variables; dependent variable: decentralization index
   
    """

  def __init__(self, token, metrics, BTC, ETH):
    self.reset()
    self.token = token.copy()
    self.metrics = metrics.copy()
    self.btc = BTC.copy()
    self.eth = ETH.copy()
    self.result = None
    self.token['date'].replace(to_replace= r'\s00:00:00', value='', regex=True, inplace=True)
    
    ent_exp = pd.DataFrame()
    ent_exp['val'] = np.exp2(self.token['val'])
    ent_exp['date'] = self.token['date']
    ent_exp['n'] = range(1,ent_exp.shape[0]+1)

    ent_exp['val_pct30'] = ent_exp['val'].pct_change(30)


    columns =['time','CapMrktCurUSD', 'PriceUSD', 'VtyDayRet30d', 'TxTfrValAdjUSD', 
                            'TxTfrCnt']
    metrics1 = self.metrics[columns]
    metrics1.time=pd.to_datetime(metrics1.time)
    ent_exp.date=pd.to_datetime(ent_exp.date)
    metrics1.rename(columns={'time':'date'},inplace=True)
    metrics1.dropna(inplace=True)
    df = ent_exp.merge(metrics1,on='date')

    columns2 =['time', 'PriceUSD', 'VtyDayRet30d', 'TxTfrValAdjUSD', 
                            'TxTfrCnt']

    btc1 = self.btc[columns2]
    btc1.time=pd.to_datetime(btc1.time)
    btc1.rename(columns={'time':'date','PriceUSD':'BTC_PriceUSD','VtyDayRet30d':'BTC_VtyDayRet30d',
                         'TxTfrValAdjUSD':'BTC_TxTfrValAdjUSD','TxTfrCnt':'BTC_TxTfrCnt'},inplace=True)
    btc1.dropna(inplace=True)
    df = df.merge(btc1,on='date')
    
    eth1 = self.eth[columns2]
    eth1.time=pd.to_datetime(eth1.time)
    eth1.rename(columns={'time':'date','PriceUSD':'ETH_PriceUSD','VtyDayRet30d':'ETH_VtyDayRet30d',
                         'TxTfrValAdjUSD':'ETH_TxTfrValAdjUSD','TxTfrCnt':'ETH_TxTfrCnt'},inplace=True)
    eth1.dropna(inplace=True)
    df = df.merge(eth1,on='date')

    df['BTC_VtyDayRet30d'] = df['BTC_VtyDayRet30d'].shift(30)
    df['ETH_VtyDayRet30d'] = df['ETH_VtyDayRet30d'].shift(30)


    df['BTC_Ret'] = df['BTC_PriceUSD'].pct_change(1).shift(1)
    df['ETH_Ret'] = df['ETH_PriceUSD'].pct_change(1).shift(1)
    df['BTC_Ret7'] = df['BTC_PriceUSD'].pct_change(7).shift(1)
    df['ETH_Ret7'] = df['ETH_PriceUSD'].pct_change(7).shift(1)
    df['BTC_Ret14'] = df['BTC_PriceUSD'].pct_change(14).shift(1)
    df['ETH_Ret14'] = df['ETH_PriceUSD'].pct_change(14).shift(1)
    df['BTC_Ret21'] = df['BTC_PriceUSD'].pct_change(21).shift(1)
    df['ETH_Ret21'] = df['ETH_PriceUSD'].pct_change(21).shift(1)
    df['BTC_Ret30'] = df['BTC_PriceUSD'].pct_change(30).shift(1)
    df['ETH_Ret30'] = df['ETH_PriceUSD'].pct_change(30).shift(1)

    
    
    

    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(inplace=True)
    # subset=['date','val','CapMrktCurUSD', 'PriceUSD', 'VtyDayRet30d', 'TxTfrValAdjUSD', 
    #                         'TxTfrCnt', 'BTC_PriceUSD', 'BTC_VtyDayRet30d', 'BTC_TxTfrValAdjUSD', 
    #                         'BTC_TxTfrCnt', 'ETH_PriceUSD', 'ETH_VtyDayRet30d']
    x=df[['ETH_Ret','ETH_Ret7','ETH_Ret14','ETH_Ret21','ETH_Ret30','ETH_VtyDayRet30d']]
    x2=df[['BTC_Ret','BTC_Ret7','BTC_Ret14','BTC_Ret21','BTC_Ret30','BTC_VtyDayRet30d']]
    pca = PCA(n_components=1)
    df['ETH_PC'] = pca.fit_transform(x)
    df['BTC_PC'] = pca.fit_transform(x2)
    self.df = df.copy()

  def autor(self):
    variables = ['val']
    ar_model = AutoReg(self.df.val, lags=5).fit()
    print(ar_model.summary())
 
  def check_stationarity(self, lags_plots=48, figsize=(22,8)):
    "Use Series as parameter"
    variables = ['val']
    variables.extend(['ETH_Ret','ETH_Ret7','ETH_Ret14','ETH_Ret21','ETH_Ret30','ETH_VtyDayRet30d','ETH_PC',
                    'BTC_Ret','BTC_Ret7','BTC_Ret14','BTC_Ret21','BTC_Ret30','BTC_VtyDayRet30d','BTC_PC'])
    significant = []
    fig = plot_pacf(self.df['val'].dropna(), lags=lags_plots, zero=False, method='ols', color='teal',title='Partial Autocorrelation');
    fig.savefig('PAC.pdf') 
    for y in variables:
      select = self.df[y][self.df[y]!=0]   ##Some rates data on DeFi pulse is missing have '0' value
      if select.empty:
        continue
      col = pd.Series(select.dropna())
      # fig = plt.figure()

      # ax1 = plt.subplot2grid((3, 3), (0, 0), colspan=2)
      # ax2 = plt.subplot2grid((3, 3), (1, 0))
      # ax3 = plt.subplot2grid((3, 3), (1, 1))
      # ax4 = plt.subplot2grid((3, 3), (2, 0), colspan=2)
      # col.plot(ax=ax1, figsize=figsize, color='teal')
      # ax1.set_title(y)
      # plot_acf(col, lags=lags_plots, zero=False, ax=ax2, color='teal');
      # fig = plot_pacf(col, lags=lags_plots, zero=False, method='ols', color='teal',title=y);
      # # sns.distplot(col, bins=int(sqrt(len(col))), ax=ax4, color='teal')
      # fig.set_title('Partial Autocorrelation')

      # plt.tight_layout()
      
      print('Dickey-Fuller test results:')
      adfinput = adfuller(col)
      adftest = pd.Series(adfinput[0:4], index=['Statistical Test','P-Value','Used Lags','Observations Number'])
      if adftest.loc['P-Value'] < 0.05:
        significant.append(y)
      adftest = round(adftest,4)
      
      for key, value in adfinput[4].items():
          adftest["Critical Values (%s)"%key] = value.round(4)  
      print(adftest)
      fig.show()

    print("Stationary Variables:", significant)

  def heat_map(self):
    corr_columns = ['ETH_Ret','ETH_Ret7','ETH_Ret14','ETH_Ret21','ETH_Ret30','ETH_VtyDayRet30d','ETH_PC',
                    'BTC_Ret','BTC_Ret7','BTC_Ret14','BTC_Ret21','BTC_Ret30','BTC_VtyDayRet30d','BTC_PC']
    plt.figure(figsize=(12, 6))
    
    mask = np.triu(np.ones_like(self.df[corr_columns].corr(), dtype=np.bool))
    heatmap = sns.heatmap(self.df[corr_columns].corr(), mask=mask, vmin=-1, vmax=1, annot=True, cmap='YlGnBu') 
    # heatmap.set_title('Triangle Correlation Heatmap: Market Return and Volatility', fontdict={'fontsize':18}, pad=16);
    plt.savefig('Return_Heatmap.pdf', bbox_inches="tight", pad_inches=1) 

  def regression(self, decentralized, economical):
      a = self.df[[decentralized,economical]].copy()
      # add lag term for dependent variable
      a['lag_term'] = a[economical].shift(-1)
      a.dropna(inplace=True)
      X = a[[decentralized,'lag_term']][a[economical]!=0] 
      y = a[economical][a[economical]!=0]  ##Some rates data on DeFi pulse is missing have '0' value
      if y.empty:
        return 1
    # add a constant to the dependent variables
      X = sm.add_constant(X)
    # conduct regression
      model = sm.OLS(y, X).fit(cov_type='HAC',cov_kwds={'maxlags':1}) #Newey–West estimator
      return model

  def regression_rev(self, decentralized, economical):
      a = self.df[[decentralized,economical]].copy()
      a.dropna(inplace=True)
      y = a[decentralized][a[economical]!=0] 
      X = a[economical][a[economical]!=0]  ##Some rates data on DeFi pulse is missing have '0' value
      if y.empty:
        return 1
    # add a constant to the dependent variables
      X = sm.add_constant(X)
    # conduct regression
      model = sm.OLS(y, X).fit(cov_type='HAC',cov_kwds={'maxlags':1}) #Newey–West estimator
      return model

  def stargazer_reverse_eth(self):
    # variables = list(self.df.columns[8:])
    variables =['ETH_Ret','ETH_Ret7','ETH_Ret14','ETH_Ret21','ETH_Ret30','ETH_VtyDayRet30d','ETH_PC']
    lis = ["val"]
    for j in lis:
      independent = list()
      for i in variables:
        i = self.regression_rev (j,i)
        independent.append(i)
     
      
      # if 1 in lis:
      #   continue
      stargazer_result = Stargazer(independent)
      stargazer_result.significant_digits(3)
      stargazer_result.show_confidence_intervals(False)
      covariates = variables.copy()
      covariates.append('const')
      
      stargazer_result.covariate_order(covariates)
      file_name = "ETH.tex" #Include directory path if needed
      tex_file = open(file_name, "w" ) #This will overwrite an existing file
      tex_file.write( stargazer_result.render_latex() )
      tex_file.close()
      file = "ETH.html" #Include directory path if needed
      html_file = open(file, "w" ) #This will overwrite an existing file
      html_file.write( stargazer_result.render_html() )
      html_file.close()
      display(HTML(stargazer_result.render_html()))

  def stargazer_reverse_btc(self):
    # variables = list(self.df.columns[8:])
    variables =['BTC_Ret','BTC_Ret7','BTC_Ret14','BTC_Ret21','BTC_Ret30','BTC_VtyDayRet30d','BTC_PC']
    lis = ["val"]
    for j in lis:
      independent = list()
      for i in variables:
        i = self.regression_rev (j,i)
        independent.append(i)
     
      
      # if 1 in lis:
      #   continue
      stargazer_result = Stargazer(independent)
      stargazer_result.significant_digits(3)
      stargazer_result.show_confidence_intervals(False)
      covariates = variables.copy()
      covariates.append('const')
      
      stargazer_result.covariate_order(covariates)
      file_name = "BTC.tex" #Include directory path if needed
      tex_file = open(file_name, "w" ) #This will overwrite an existing file
      tex_file.write( stargazer_result.render_latex() )
      tex_file.close()
      file = "BTC.html" #Include directory path if needed
      html_file = open(file, "w" ) #This will overwrite an existing file
      html_file.write( stargazer_result.render_html() )
      html_file.close()
      display(HTML(stargazer_result.render_html()))
      
  def reset(self):
      self.token = None
      self.metrics = None
      self.df = None
      self.tvl = None
