In [1323]:
import pandas as pd
import numpy as np
import scipy.stats as stats

import os
import glob
from functools import reduce

import matplotlib.pyplot as plt
import pylab 

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [1324]:
eth_df = pd.read_csv('eth_all.csv')
eth_df.index = eth_df['Date']
eth_df = eth_df.drop(columns=['Date'])

In [1326]:
names =  ['Block utilization (%)', 'Block time', 'Throughput',
                       'Network growth', 'Network activeness (%)',
                       'Difficulty adjustment', 'Energy consumption', 'Non-zero balance address',
                       'Mining profitability (%)', 'Fee to reward (%)', 'Transaction fee',
                       'Development contributor', 'Development activity',
                       'Supply deviation', 'Coin age', 'Velocity',
                       'Market capitalization', 'MVRV (%)',
                       'Trading volume', 'Transaction value', 'Whale transaction',
                       'Flow balance', 'Withdrawal transaction',
                       'Supply on exchange', 'Supply on non-exchange top holder',
                       'Weighted sentiment', 'Social volume',
                       'NFT transaction count', 'NFT trading volume',
                       'Log return (%)', u'\U0001D706(1,t)', u'\U0001D706(2,t)', u'\u03C3(t|t-2)'
                       ]


In [2189]:
from statsmodels.tsa.stattools import adfuller

def adfuller_test(series, signif=0.05, name='', verbose=False):
    """Perform ADFuller to test for Stationarity of given series and print report"""
    r = adfuller(series, autolag='AIC')
    output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
    p_value = output['pvalue'] 
    def adjust(val, length= 6): return str(val).ljust(length)
    return(output)

In [2190]:
adf_df1 = []
for name, column in eth_df.items():
    adf_df1.append(adfuller_test(column, name=column.name))
adf_df1 = pd.DataFrame(adf_df1)
adf_df1 = adf_df1.drop(['n_lags','n_obs'], axis=1)

adf_df2 = []
for name, column in eth_df.diff()[1:].items():
    adf_df2.append(adfuller_test(column, name=column.name))
adf_df2 = pd.DataFrame(adf_df2)
adf_df2 = adf_df2.drop(['n_lags','n_obs'], axis=1)


In [2191]:
pd.options.display.float_format = "{:,.3f}".format
merged_adf = pd.concat([adf_df1, adf_df2], axis=1)
merged_adf.index = names

In [None]:
eth_nonstationary = eth_df[['block_utilization', 'block_time', 'throughput', 'network_growth',
       'network_activeness', 'mining_difficulty', 'energy_twh',
       'nonzero_addresses', 'mining_profit', 'fee_in_reward', 'avg_fee', 'supply_deviation', 'coin_age', 'market_cap', 'MVRV', 'trading_vol',
       'avg_transaction_value', 'large_transaction', 
       'withdrawal_count', 'exchange_supply']]
eth_stationary = eth_df.drop(['block_utilization', 'block_time', 'throughput', 'network_growth',
       'network_activeness', 'mining_difficulty', 'energy_twh',
       'nonzero_addresses', 'mining_profit', 'fee_in_reward', 'avg_fee', 'supply_deviation', 'coin_age', 'market_cap', 'MVRV', 'trading_vol',
       'avg_transaction_value', 'large_transaction', 
       'withdrawal_count', 'exchange_supply'], axis=1)
eth_nonstationary = eth_nonstationary.diff()

eth_new_df = pd.concat([eth_stationary, eth_nonstationary], axis=1)[1:]
# reorder cols
eth_new_df = eth_new_df[eth_df.columns]

In [None]:
from itertools import combinations
from statsmodels.tsa.api import VAR
lag_res = dict()
pairs = list(combinations(eth_new_df.columns, 2))
for i in pairs:
    bics = []
    model = VAR(eth_new_df[list(i)])
    for p in range(1,31):
        result = model.fit(p)
        bics.append(result.bic)
    lag_res[i] = bics.index(min(bics)) + 1
    

In [1406]:
from statsmodels.tsa.stattools import grangercausalitytests

def granger_causation_matrix(data, variables, p, test = 'ssr_chi2test', verbose=False):    
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], p, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(p)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

In [1467]:
granger_list = []
for key, value in lag_res.items():
    granger_list.append(granger_causation_matrix(eth_new_df[list(key)], eth_new_df[list(key)].columns, value))

In [1625]:
all_lst = []
for factor in eth_new_df.columns:
    lst = []
    for i in range(len(granger_list)):
        try:
            lst.append(granger_list[i]['{}_x'.format(factor)])
        except:
            continue
    factor_df = pd.concat(lst, join='inner')
    factor_df = factor_df[~factor_df.index.duplicated(keep='first')]
    all_lst.append(factor_df)
  
correct_index = all_lst[0].index

for l in range(2,len(all_lst)):
    all_lst[l] =all_lst[l].reindex(list(correct_index))

In [2037]:
causal_matrix = pd.concat(all_lst, axis=1)

In [2038]:
adj_matrix_sig = causal_matrix.where(causal_matrix <= 0.05, 0)
adj_matrix_sig = adj_matrix_sig.where(adj_matrix_sig == 0, 1) 
adj_matrix_sig.columns = names 
adj_matrix_sig.index = names

In [None]:
### if lag = 0 => no
lag_correction = corrected_group_lag_df.where(corrected_group_lag_df == 0, 1)
adj_matrix_sig = adj_matrix_sig *lag_correction

### network

In [2278]:
front_matrix = adj_matrix_sig[0:29].where(adj_matrix_sig[0:29]==0, 0)
back_matrix = adj_matrix_sig[29:]
all_matrix = pd.concat([front_matrix, back_matrix])

In [2291]:
adj_matrix_sig_T = adj_matrix_sig.T
front_matrix = adj_matrix_sig_T[0:29].where(adj_matrix_sig_T[0:29]==0, 0)
back_matrix = adj_matrix_sig_T[29:]
all_matrix = pd.concat([front_matrix, back_matrix])

In [2351]:
# x -> y
i = 17 # 29 return, 32 sigma
front_matrix = adj_matrix_sig[0:i].where(adj_matrix_sig[0:i]==0, 0)
mid_matrix = adj_matrix_sig[i:i+1]
back_matrix = adj_matrix_sig[i+1:33].where(adj_matrix_sig[i+1:33]==0, 0)
all_matrix = pd.concat([front_matrix, mid_matrix, back_matrix])

In [2352]:
# y -> x transpose
i = 17 # 29 return, 32 sigma
adj_matrix_sig_T = adj_matrix_sig.T
front_matrix = adj_matrix_sig_T[0:i].where(adj_matrix_sig_T[0:i]==0, 0)
mid_matrix = adj_matrix_sig_T[i:i+1]
back_matrix = adj_matrix_sig_T[i+1:33].where(adj_matrix_sig_T[i+1:33]==0, 0)
all_matrix = pd.concat([front_matrix, mid_matrix, back_matrix])

In [2308]:
fun_matrix = adj_matrix_sig[0:13]
fun_matrix = fun_matrix.iloc[:,0:13]
# mkt_matrix = adj_matrix_sig[13:29]
# mkt_matrix = mkt_matrix.iloc[:,13:29]
# Ｇ = nx.from_pandas_adjacency(mkt_matrix)


In [2353]:
import networkx as nx
import nxviz as nv

import plotly.graph_objs as go
import networkx as nx

#create graph G
Ｇ = nx.from_pandas_adjacency(adj_matrix_sig)
# Ｇ = nx.from_pandas_adjacency(all_matrix)
# Ｇ = nx.from_pandas_adjacency(fun_matrix)

# adjust node size according to degree, etc
d = nx.degree(G)
node_sizes = []
for i in d:
    _, value = i
    node_sizes.append(3*value+30)

#get a x,y position for each node  
pos = nx.circular_layout(G)

#add a pos attribute to each node
for node in G.nodes:
    G.nodes[node]['pos'] = list(pos[node])

pos=nx.get_node_attributes(G,'pos')

dmin=1
ncenter=0
for n in pos:
    x,y=pos[n]
    d=(x-0.5)**2+(y-0.5)**2
    if d<dmin:
        ncenter=n
        dmin=d

p=nx.single_source_shortest_path_length(G,ncenter)

#Create Edges
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=1,color='blue'),
    hoverinfo='none',
    mode='lines')

for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])
    

colors = ['rgba(0, 166, 166, 1)']*13 + ['rgba(202, 144, 126, 1)']*16 + ['rgba(239, 202, 8, 1)']*4
node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(opacity = 1,
        showscale=False,
        color=colors,
        size=node_sizes,
        line=dict(width=0)))

for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])

#add color to node points
connection_list = []
for node, adjacencies in enumerate(G.adjacency()):
    node_trace['marker']['color']+=tuple([len(adjacencies[1])])
    node_info = 'Name: ' + str(adjacencies[0]) + '<br># of connections: '+str(len(adjacencies[1]))
    connection_list.append(len(adjacencies[1]))
    node_trace['text']+=tuple([node_info])
    
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                title='',
                titlefont=dict(size=16),
                showlegend=False,
                hovermode='closest',
                width=880,
                height=800,
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text='',
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
              )
             )

fig['layout'].update(height=1200, width=1200,
                    title='',
                    showlegend=False,
                    font=dict(family='Times New Roman', size=14))

fig.update_layout({'plot_bgcolor': 'rgba(0,0,0,0)',
                    'paper_bgcolor': 'rgba(0,0,0,0)'},
                    font_color='black')

node_names =  ['\u0394 Block <br> utilization (%)', 
               '\u0394 Block <br> time', 
               '\u0394 Throughput',
                       '\u0394 Network <br> growth', 
                       '\u0394 Network <br> activeness (%)',
                       '\u0394 Difficulty <br> adjustment', 
                       '\u0394 Energy <br> consumption', 
                       '\u0394 Non-zero balance <br> address',
                       '\u0394 Mining <br> profitability (%)',
                       '\u0394 Fee to <br> reward (%)', 
                       '\u0394 Transaction <br> fee',
                       'Development <br> contributor', 
                       'Development <br> activity',
                       '\u0394 Supply <br> deviation', 
                       '\u0394 Coin <br> age', 
                       'Velocity',
                       '\u0394 Market <br> capitalization',
                       '\u0394 MVRV (%)',
                       '\u0394 Trading <br> volume', 
                       '\u0394 Transaction <br> value', 
                       '\u0394 Whale <br> transaction',
                       'Flow <br> balance', 
                       '\u0394 Withdrawal <br> transaction',
                       '\u0394 Supply on <br> exchange', 
                       'Supply on <br> non-exchange <br> top holder',
                       'Weighted <br> sentiment', 
                       'Social <br> volume',
                       'NFT transaction <br> count', 
                       'NFT trading <br> volume',
                       'Log <br> return (%)', 
                       u'\U0001D706(1,t)', 
                       u'\U0001D706(2,t)', 
                       u'\u03C3(t|t-2)'
                       ]

for i, e in enumerate(pos):
    if e[0] is not None:
        fig.add_annotation(x=pos[e][0],
                           y=pos[e][1],
                           text = '{}'.format(node_names[i]),
                           showarrow = False,
                        #    yshift= -20,
                           align='center'
                          )


fig.show()


### lag matrix

In [None]:
corrected_group_lag_df = group_lag_df
# *adj_matrix_sig.values

In [1955]:
from_lst = []
to_lst = []
val = []
for key, value in lag_res.items():
    from_lst.append(key[0])
    to_lst.append(key[1])
    val.append(value)
lag_df = pd.DataFrame({'from':from_lst, 'to':to_lst, 'lags':val})

In [1997]:
group_lag_df = lag_df.groupby(['to', 'from'])['lags'].sum().unstack(fill_value=0)
group_lag_df = group_lag_df.reindex(list(eth_new_df.columns))


fitted_conditional_stdev_lag = [ v for k, v in lag_res.items() if 'fitted_conditional_stdev' in k] + [0]
group_lag_df['fitted_conditional_stdev'] = fitted_conditional_stdev_lag

group_lag_df = group_lag_df[list(eth_new_df.columns)]
block_utilization_lag = [0] + [ v for k, v in lag_res.items() if 'block_utilization' in k]
group_lag_df.loc['block_utilization'] = block_utilization_lag

group_lag_df.columns = names 
group_lag_df.index = names
