<a href="https://colab.research.google.com/github/SciEcon/EIP1559/blob/main/Empirical_Analysis_of_EIP_1559_Transaction_Fees%2C_User_Experience%2C_and_Blockchain_Security.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Supplementary material (Main)
**Paper Title**: 

["Empirical Analysis of EIP-1559: Transaction Fees, User Experience, and Blockchain Security"](https://arxiv.org/abs/2201.05574), Accepted at [ACM CCS'22](https://www.sigsac.org/ccs/CCS2022/call-for-papers.html)

**Author List**: 

Yulin Liu, Yuxuan Lu, Kartik Nayak, Fan Zhang\*, Luyao Zhang\*, and Yinhong Zhao 

(*names by alphabetic order, * corresponding authors*)


# Import Packages

In [None]:
# import pandas and numpy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Import Plotly Packages
import plotly.express as px
import plotly.offline as py     
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

In [None]:
# Import Regression Packages
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import statsmodels.formula.api as smf
pio.templates.default = "simple_white"
# Install package for model comparison
!pip install stargazer
!pip install linearmodels
from stargazer.stargazer import Stargazer
from IPython.core.display import HTML
from linearmodels import PooledOLS
import statsmodels.api as sm
from linearmodels import PanelOLS
!pip install scipy

In [None]:
# Import Kaleido
!pip install -U kaleido
!pip install plotly>=4.7.1
!wget https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage -O /usr/local/bin/orca
!chmod +x /usr/local/bin/orca
!apt-get install xvfb libgtk2.0-0 libgconf-2-4

In [None]:
#Mount drive and copy path of the input data
from google.colab import drive
drive.mount('/content/drive')
print('Authenticated')

# Import and Merge Data

In [None]:
block = pd.read_csv("/content/drive/MyDrive/EIP-1559/EIP-1559 Data/block.csv")
MEV = pd.read_csv("/content/drive/MyDrive/EIP-1559/EIP-1559 Data/FULL_MEVdata.csv")
waiting_time = pd.read_csv("/content/drive/MyDrive/EIP-1559/EIP-1559 Data/FULL_waiting_time.csv")
gas_price = pd.read_csv("/content/drive/MyDrive/EIP-1559/EIP-1559 Data/FULL_gasprices.csv")
sibling_cnt = pd.read_csv("/content/drive/MyDrive/EIP-1559/EIP-1559 Data/sibling_csv.csv")
supply = pd.read_csv("/content/drive/MyDrive/EIP-1559/EIP-1559 Data/supply_csv(detailed).csv")
avggas = pd.read_csv("/content/drive/MyDrive/EIP-1559/EIP-1559 Data/avggas_per.csv")

In [None]:
price = pd.read_csv('/content/drive/My Drive/EIP-1559/EIP-1559 Data/ETHprice21Aug.csv').reset_index(drop=True)
price['minute'] = pd.to_datetime(price['Date']).dt.tz_localize('US/Eastern').dt.tz_convert('UTC')
price = price.drop(['Date'], axis=1).sort_values('minute').reset_index(drop = True)
price['HML'] = np.log(price['High']) - np.log(price['Low'])
price['OMC'] = abs(np.log(price['Close']) - np.log(price['Open']))
price['volatility30'] = np.sqrt(30) * price['OMC'].rolling(window=30).mean()
price['volatility90'] = np.sqrt(60) * price['OMC'].rolling(window=90).mean()
price['volatility180'] = np.sqrt(180) * price['OMC'].rolling(window=180).mean()

In [None]:
gas_price = gas_price/10**9
gas_price[["block_number", "all_gpcount", "all_mfcount", "all_prcount", "legacy_gpcount", "legacy_mfcount", "legacy_prcount", "eip_gpcount", "eip_mfcount", "eip_prcount"]] = gas_price[["block_number", "all_gpcount", "all_mfcount", "all_prcount", "legacy_gpcount", "legacy_mfcount", "legacy_prcount", "eip_gpcount", "eip_mfcount", "eip_prcount"]] * 10**9
supply = supply[["block_number", "total_supply"]]

In [None]:
supply = supply[["block_number", "total_supply"]]
df = pd.merge(waiting_time, block, on = "block_number", how = "left")
df = pd.merge(df, gas_price, on = "block_number", how = "left")
df = pd.merge(df, supply, on = "block_number", how = "left")

df['BQ_timestamp'] = pd.to_datetime(df['BQ_timestamp'])
df['minute'] = df.BQ_timestamp.dt.floor(freq="T")
df['hour'] = df.BQ_timestamp.dt.hour
df = pd.merge(df, price, on = "minute", how = "left")
df["base_fee"] = df["base_fee"]/10**9
df = df[df['block_number'] < 13105000]
df = df[df['block_number'] > 12894999]
df['period'] = (df['block_number'] > 12894999).astype(int) + (df['block_number'] > 12964999).astype(int) + (df['block_number'] > 13034999).astype(int)
df['EIP'] = (df['block_number'] > 12964999) + 0
df = df.drop_duplicates("block_number").reset_index(drop = True)
df["adoption"] =  df["all_mfcount"]/df["all_gpcount"]
df["iqdiff"] = df["all_gpq75"]-df["all_gpq25"]
df["normalized_iqdiff"] = (df["all_gpq75"]-df["all_gpq25"])/df["all_gpq50"]
df['iqd_legacy_gpq']=df['legacy_gpq75']-df['legacy_gpq25']
df['iqd_eip_gpq']=df['eip_gpq75']-df['eip_gpq25']
df["total_supply"] = df["total_supply"].astype(float)/10**18
df['ROI']=df['Close'].pct_change()
for i in range(0,24):
  varname = "hour" + str(i)
  df[varname] = (df['hour'] == i) + 0

In [None]:
df_13 = df[df["period"] != 2]
df_13['nblock']=df_13['block_number']
df_13['nblock']=df_13['block_number'].apply(lambda x: x-12965000 if x<13035000 else x-13035000)
df1 = df_13[df_13["period"] == 1]
df3 = df_13[df_13["period"] == 3]

# Visualization

## Block Characteristics

In [None]:
#Time Series for Block Gas Limit and Block Gas Used
trace0 = go.Scattergl(x = df["block_number"], y = df["gas_used"], hoverinfo='x+y', mode='markers',  name = 'gas used', marker=dict(size=0.2, opacity=0.3))
trace1 = go.Scattergl(x = df["block_number"], y = df["gas_limit"], hoverinfo='x+y', mode='lines',  name = 'gas limit', marker=dict(size=2))

layout = go.Layout(xaxis = dict(title="block number"), yaxis = dict(title ="gas") ) 
data = [trace1, trace0]
fig_used = go.Figure(layout = layout, data = data) 
fig_used = fig_used.add_trace(go.Scatter(x=[12965000, 12965000], y=[0, 30000000], mode="lines", name="London Hardfork"))
fig_used.update_layout(
    legend=dict(
      yanchor="top",
      y=0.99,
      xanchor="left",
      x=0.01,
      itemsizing = "constant"),
    font = dict(size = 24)
    )
fig_used.show()
fig_used.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/gas used.pdf", width = 1200, height = 600)
fig_used.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/gas used.png", width = 1200, height = 600)

In [None]:
df_after = df[df["block_number"] > 12965000]
df_after = df_after[df_after["gas_used"] < 30000000]
fig_used2 = px.histogram(df_after, x="gas_used", nbins=50, histnorm="percent")
fig_used2.update_layout(yaxis_title = "percent", xaxis_title = "gas used", font = dict(size = 24))
fig_used2.show()
fig_used2.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/gas used distribution.pdf", width = 1200, height = 600)
fig_used2.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/gas used distribution.png", width = 1200, height = 600)

In [None]:
df["usage"] = df["all_mfcount"]/df["all_gpcount"]

df_after = df[df["block_number"] > 12965000]
df_after["usage"] =  df_after["all_mfcount"]/df_after["all_gpcount"]
olsmod = smf.ols('usage ~ 1+ block_number', data=df_after).fit()
df_after["usage_fitted"] = olsmod.predict(df_after["block_number"])

trace1 = go.Scattergl(x = df["block_number"], y = df["usage"], hoverinfo='x+y', mode='markers', name = 'adoption rate', marker=dict(size=1, opacity = 0.6))
trace2 = go.Scattergl(x = df_after["block_number"], y = df_after["usage_fitted"], hoverinfo='x+y', mode='lines', name = 'fitted line')
layout = go.Layout(xaxis = dict(title="block number"), yaxis = dict(title ="adoption rate")) 
data = [trace1, trace2]

fig_adoption = go.Figure(layout = layout, data = data) 
fig_adoption.add_trace(go.Scatter(x=[12965000, 12965000], y=[0,1], mode="lines", name="London Hardfork"))
fig_adoption.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    itemsizing = "constant"),
    font = dict(size = 24))
fig_adoption.show()
fig_adoption.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/adoption rate.pdf", width = 1200, height = 600)
fig_adoption.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/adoption rate.png", width = 1200, height = 600)

## Block Transaction Fees

In [None]:
df_basefee = df[(df["base_fee"] < 200) | (df["base_fee"].isna())]
fig_basefee = go.Figure()
fig_basefee.add_trace(
   go.Scattergl(x = df_basefee["block_number"], y = df_basefee["base_fee"], hoverinfo='x+y', mode='markers', name = 'base fee', marker=dict(size=1)),
)

fig_basefee.add_trace(go.Scatter(x=[12965000, 12965000], y=[0,200], mode="lines", name="London Hardfork"))
fig_basefee.update_layout(
   xaxis = dict(title="block number"), 
   yaxis = dict(title ="base fee (Gwei)"),
   font = dict(size = 24)
) 
fig_basefee.update_layout(legend=dict(
    orientation = "h",
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    itemsizing = "constant"
))
fig_basefee.show()
fig_basefee.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/base fee.pdf", width = 1200, height = 600)
fig_basefee.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/base fee.png", width = 1200, height = 600)

In [None]:
df["ma_total_supply"] = df["total_supply"].rolling(500, min_periods=100).mean()
df_supply = df[df["total_supply"]> -5]
fig_supply = go.Figure()
fig_supply.add_trace(
   go.Scattergl(x = df_supply["block_number"], y = df_supply["total_supply"], hoverinfo='x+y', mode='markers', name = 'net supply', marker=dict(size=1, opacity = 0.3)),
)
fig_supply.add_trace(
    go.Scattergl(x = df_supply["block_number"], y = df_supply["ma_total_supply"], hoverinfo='x+y', mode='lines',  
               name = 'net supply moving average', marker=dict(size=1, color = "black", opacity = 0.8)),
)
fig_supply.update_layout(
   xaxis = dict(title="block number"), 
   yaxis = dict(title ="net supply"),
   font = dict(size = 24)
) 
fig_supply.add_trace(go.Scatter(x=[12965000, 12965000], y=[-4,6], mode="lines", name="London Hard Fork"))
fig_supply.update_layout(legend=dict(
    orientation = "h",
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    itemsizing = "constant"
))
fig_supply.show()
fig_supply.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/net supply.pdf", width = 1200, height = 600)
fig_supply.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/net supply.png", width = 1200, height = 600)

In [None]:
df_gp = df[df['all_gpq75'] < 200]
trace1 = go.Scattergl(x = df_gp["block_number"], y = df_gp["all_gpq25"], hoverinfo='x+y', mode='markers',  name = '25% quartile actual gas price paid', marker=dict(size=0.5))
trace2 = go.Scattergl(x = df_gp["block_number"], y = df_gp["all_gpq50"], hoverinfo='x+y', mode='markers',  name = '50% quartile actual gas price paid', marker=dict(size=0.5))
trace3 = go.Scattergl(x = df_gp["block_number"], y = df_gp["all_gpq75"], hoverinfo='x+y', mode='markers',  name = '75% quartile actual gas price paid', marker=dict(size=0.5))

layout = go.Layout(xaxis = dict(title="block number"), yaxis = dict(title ="gas price paid (Gwei)") ) 
data = [trace1, trace2, trace3]

fig_gp = go.Figure(layout = layout, data = data) 
fig_gp.add_trace(go.Scatter(x=[12965000, 12965000], y=[0,200], mode="lines", name="London Hardfork"))
fig_gp.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    itemsizing = "constant"),
    font = dict(size = 24))
fig_gp.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/gas price.pdf", width = 1200, height = 600)
fig_gp.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/gas price.png", width = 1200, height = 600)

In [None]:
#Time Series for Inter-quartile Difference of Gas Price
df["iqdiff"] = df["all_gpq75"]-df["all_gpq25"]
df["normalized_iqdiff"] = (df["all_gpq75"]-df["all_gpq25"])/df["all_gpq50"]
df_iq = df[df["iqdiff"]<50]
df_iq = df_iq[df_iq["normalized_iqdiff"]<1]
df_iq["ma_iqdiff"] = df_iq["iqdiff"].rolling(500, min_periods=100).mean()
df_iq["ma_normalized_iqdiff"] = df_iq["normalized_iqdiff"].rolling(500, min_periods=100).mean()

fig_iqdiff = make_subplots(rows=2, cols=1)
fig_iqdiff.add_trace(
    go.Scattergl(x = df_iq["block_number"], y = df_iq["iqdiff"], hoverinfo='x+y', mode='markers',  
               name = 'within-block inter-quartile difference', marker=dict(size=1, opacity = 0.8)),
               row = 1, col = 1
)
fig_iqdiff.add_trace(
    go.Scattergl(x = df_iq["block_number"], y = df_iq["normalized_iqdiff"], hoverinfo='x+y', mode='markers',  
               name = 'within-block normalized inter-quartile difference', marker=dict(size=1, opacity = 0.8)),
               row = 2, col = 1
)
fig_iqdiff.add_trace(
    go.Scattergl(x = df_iq["block_number"], y = df_iq["ma_iqdiff"], hoverinfo='x+y', mode='lines',  
               name = 'within-block inter-quartile difference', marker=dict(size=1, color = "black", opacity = 0.8)),
               row = 1, col = 1
)
fig_iqdiff.add_trace(
    go.Scattergl(x = df_iq["block_number"], y = df_iq["ma_normalized_iqdiff"], hoverinfo='x+y', mode='lines',  
               name = 'within-block normalized inter-quartile difference', marker=dict(size=1, color = "black", opacity = 0.8)),
               row = 2, col = 1
)
fig_iqdiff.add_trace(go.Scatter(x=[12965000, 12965000], y=[0,50], mode="lines", name="London Hardfork"), row = 1, col =1)
fig_iqdiff.add_trace(go.Scatter(x=[12965000, 12965000], y=[0,1], mode="lines", name="London Hardfork"), row = 2, col =1)
fig_iqdiff.update_layout(
    showlegend = False,
    font = dict(size = 24)
)

fig_iqdiff.update_xaxes(title_text="block_number", row=2, col=1)
fig_iqdiff.update_yaxes(title_text="IQR", row=1, col=1)
fig_iqdiff.update_yaxes(title_text="std IQR", row=2, col=1)
fig_iqdiff.show()
fig_iqdiff.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/inter-quartile difference.pdf", width = 1200, height = 800)
fig_iqdiff.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/inter-quartile difference.png", width = 1200, height = 800)

In [None]:
#Priority Fee Bid Distribution
df_after = df[df["block_number"]>12965000]
df_pr = df_after[df["all_prq75"] < 20]

trace1 = go.Scattergl(x = df_pr["block_number"], y = df_pr["all_prq25"], hoverinfo='x+y', mode='markers',  name = '25% quartile pr. fee bid', marker=dict(size=1.5))
trace2 = go.Scattergl(x = df_pr["block_number"], y = df_pr["all_prq50"], hoverinfo='x+y', mode='markers',  name = '50% quartile pr. fee bid', marker=dict(size=1.5))
trace3 = go.Scattergl(x = df_pr["block_number"], y = df_pr["all_prq75"], hoverinfo='x+y', mode='markers',  name = '75% quartile pr. fee bid', marker=dict(size=1.5))

layout = go.Layout(xaxis = dict(title="block number"), yaxis = dict(title ="max priority fee bid(Gwei)") ) 
data = [trace1, trace2, trace3]

fig_prfee = go.Figure(layout = layout, data = data) 
fig_prfee.update_layout(legend=dict(
    orientation = "h",
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    itemsizing = "constant"),
    font = dict(size = 24))
fig_prfee.show()
fig_prfee.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/timeseries prfee.pdf", width = 1200, height = 600)
fig_prfee.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/timeseries prfee.png", width = 1200, height = 600)

In [None]:
df_after = df[df["block_number"] > 12965000]
df_after = df_after[df_after["all_prq50"] < 20]
fig_prdst = px.histogram(df_after, x="all_prq50", nbins=20, histnorm="percent")
fig_prdst.update_layout(yaxis_title = "percent", 
                        xaxis_title = "block median priority fee", 
                        font = dict(size = 24), 
                        legend = dict(itemsizing = "constant"))
fig_prdst.show()

fig_prdst.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/priority fee.pdf", width = 1200, height = 600)
fig_prdst.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/priority fee.png", width = 1200, height = 600)

In [None]:
#Max Fee Bid Distribution
df_after = df[df["block_number"]>12965000]
df_mf = df_after[df["all_mfq75"] < 200]

trace1 = go.Scattergl(x = df_mf["block_number"], y = df_mf["all_mfq25"], hoverinfo='x+y', mode='markers',  name = '25% quartile max fee bid', marker=dict(size=1))
trace2 = go.Scattergl(x = df_mf["block_number"], y = df_mf["all_mfq50"], hoverinfo='x+y', mode='markers',  name = '50% quartile max fee bid', marker=dict(size=1))
trace3 = go.Scattergl(x = df_mf["block_number"], y = df_mf["all_mfq75"], hoverinfo='x+y', mode='markers',  name = '75% quartile max fee bid', marker=dict(size=1))

layout = go.Layout(xaxis = dict(title="block number"), yaxis = dict(title ="max fee bid (Gwei)") ) 
data = [trace1, trace2, trace3]

fig_mf = go.Figure(layout = layout, data = data) 
fig_mf.update_layout(legend=dict(
    orientation = "h",
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    itemsizing = "constant"),
    font = dict(size = 24)
  )
fig_mf.show()
fig_mf.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/max fee.pdf", width = 1200, height = 600)
fig_mf.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/max fee.png", width = 1200, height = 600)

In [None]:
#Overview of Gas Price Bid and Paid Distribution
df_after = df[df["block_number"]>12965000]
df_all = df_after[df_after["all_gpq50"] < 250]
df_all = df_all[df_all["all_mfq50"] < 250]
df_all["all_aprq50"] = df_all["all_gpq50"] - df_all["base_fee"]
df_all = df_all[df_all["all_prq50"] < 250]
df_all = df_all[df_all["base_fee"] < 250]

trace1 = go.Scattergl(x = df_all["block_number"], y = df_all["all_gpq50"], hoverinfo='x+y', mode='markers',  name = 'median gas price paid', marker=dict(size=0.5))
trace2 = go.Scattergl(x = df_all["block_number"], y = df_all["all_mfq50"], hoverinfo='x+y', mode='markers',  name = 'median max fee bid', marker=dict(size=0.5))

layout = go.Layout(xaxis = dict(title="block number"), yaxis = dict(title ="gas price (Gwei)") ) 
data = [trace1, trace2]

fig_all = go.Figure(layout = layout, data = data) 
fig_all.update_layout(legend=dict(
    orientation = "h",
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    itemsizing = "constant"),
    font = dict(size = 24))
fig_all.show()
fig_all.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/max fee & gp.pdf", width = 1200, height = 600)
fig_all.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/max fee & gp.png", width = 1200, height = 600)

## Waiting Time

In [None]:
print(df1["all_wtq50"].describe())
print(df3["all_wtq50"].describe())
print(df3["legacy_wtq50"].describe())
print(df3["eip_wtq50"].describe())

In [None]:
df_wt = df_13[df_13["all_wtq50"] < 60]
df_wt = df_wt[df_wt["all_wtq50"] > 0]
df_wt["London Hardfork"] = df_wt["EIP"]
df_wt.loc[df_wt["London Hardfork"] == 0, "London Hardfork"] = " before London Hardfork"
df_wt.loc[df_wt["London Hardfork"] == 1, "London Hardfork"] = " after London Hardfork"

fig_wtdis = px.histogram(df_wt, x="all_wtq50", color="London Hardfork", histnorm = "percent", marginal = "box", barmode='overlay', labels={"London Hardfork": ""})
fig_wtdis.update_layout(
    xaxis = dict(title="median waiting time"),
    yaxis = dict(title = "percent"),
    font = dict(size = 24)
)
fig_wtdis.update_layout(legend=dict(
    yanchor="top",
    y=0.6,
    xanchor="left",
    x=0.7),
    font = dict(size = 24))
fig_wtdis.show()
fig_wtdis.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/median waiting time distribution.pdf", width = 1200, height = 600)
fig_wtdis.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/median waiting time distribution.png", width = 1200, height = 600)

In [None]:
df_wt1 = df3[df3["all_wtq50"] < 60]
df_wt1 = df_wt1[df_wt1["all_wtq50"] > 0]
df_wt1 = df_wt1.rename(columns = {'all_wtq50': 'waiting time'})
df_wt1 = df_wt1[["block_number", "waiting time"]]
df_wt1["category"] = "All"

df_wt2 = df3[df3["legacy_wtq50"] < 60]
df_wt2 = df_wt2[df_wt2["legacy_wtq50"] > 0]
df_wt2 = df_wt2.rename(columns = {'legacy_wtq50': 'waiting time'})
df_wt2 = df_wt2[["block_number", "waiting time"]]
df_wt2["category"] = "Legacy"

df_wt3 = df3[df3["eip_wtq50"] < 60]
df_wt3 = df_wt3[df_wt3["eip_wtq50"] > 0]
df_wt3 = df_wt3.rename(columns = {'eip_wtq50': 'waiting time'})
df_wt3 = df_wt3[["block_number", "waiting time"]]
df_wt3["category"] = "EIP"

df_wt = pd.concat([df_wt1, df_wt2, df_wt3])

fig_wtdis2 = px.histogram(df_wt, x="waiting time", color="category", histnorm = "percent", marginal = "box", barmode='overlay')
fig_wtdis2.update_layout(
    xaxis = dict(title="median waiting time"),
    yaxis = dict(title = "percent"),
    font = dict(size = 24)
)
fig_wtdis2.update_layout(legend=dict(
    yanchor="top",
    y=0.6,
    xanchor="left",
    x=0.7),
    font = dict(size = 24))
fig_wtdis2.show()
fig_wtdis2.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/wtdistr by type.pdf", width = 1200, height = 600)
fig_wtdis2.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/wtdistr by type.png", width = 1200, height = 600)

# Regressions

In [None]:
hourfe = "+ hour1 + hour2 + hour3 + hour4 + hour5 + hour6 + hour7 + hour8 + hour9 + hour10 + hour11 + hour12 + hour13 + hour14 + hour15 + hour16 + hour17 + hour18 + hour19 + hour20 + hour21 + hour22 + hour23"

## Gas Price

In [None]:
results1 = smf.ols('all_gpq50 ~ 1+ EIP', data=df_13).fit()
results2 = smf.ols('all_gpq50 ~ 1+ EIP + adoption + nblock', data=df_13).fit()
results3 = smf.ols('all_gpq50 ~ 1+ EIP + nblock +adoption+size+ROI+volatility90', data=df_13).fit()
stargazer = Stargazer([results1,results2,results3])
stargazer.title('Gas Fee Median in ETH')
stargazer.significant_digits(4)
stargazer.covariate_order(['EIP','adoption','nblock','size','ROI','volatility90','Intercept'])
HTML(stargazer.render_html())
print(stargazer.render_latex())

\begin{table}[!htbp] \centering
  \caption{Gas Fee Median in ETH}
\begin{tabular}{@{\extracolsep{5pt}}lccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{3}{c}{\textit{Dependent variable:}} \
\cr \cline{3-4}
\\[-1.8ex] & (1) & (2) & (3) \\
\hline \\[-1.8ex]
 EIP & 20.2246$^{***}$ & -11.8181$^{***}$ & -13.6894$^{***}$ \\
  & (0.3528) & (0.7543) & (0.7509) \\
 adoption & & -0.5387$^{}$ & 6.6704$^{***}$ \\
  & & (1.0776) & (1.0821) \\
 nblock & & 0.0005$^{***}$ & 0.0005$^{***}$ \\
  & & (0.0000) & (0.0000) \\
 size & & & -0.0001$^{***}$ \\
  & & & (0.0000) \\
 ROI & & & -63.4699$^{}$ \\
  & & & (343.9860) \\
 volatility90 & & & 3094.3442$^{***}$ \\
  & & & (89.8447) \\
 Intercept & 39.4370$^{***}$ & 55.5752$^{***}$ & 47.2638$^{***}$ \\
  & (0.2492) & (0.3986) & (0.7177) \\
\hline \\[-1.8ex]
 Observations & 138,043 & 138,043 & 137,769 \\
 $R^2$ & 0.0233 & 0.0429 & 0.0581 \\
 Adjusted $R^2$ & 0.0232 & 0.0429 & 0.0580 \\
 Residual Std. Error & 65.5373(df = 138041) & 64.8760(df = 138039) 

In [None]:
hours = [0]*24
for i in range(24):
  a = "hour" + str(i)
  hours[i] = a

In [None]:
df3_gpq50_melted = pd.melt(df3, id_vars=['adoption', 'nblock','size','ROI','volatility30','volatility90','volatility180']+hours, value_vars=["legacy_gpq50", "eip_gpq50"], var_name="Type", value_name="gpq50")
df3_gpq50_melted['eip']=df3_gpq50_melted["Type"].apply(lambda x: 1 if x=="eip_gpq50" else 0)
gpq50mean = df3_gpq50_melted.groupby(["nblock"])["gpq50"].mean().reset_index().rename(columns = {"gpq50": "gpq50mean"})
df3_gpq50_melted = pd.merge(df3_gpq50_melted, gpq50mean, how = "left", on = "nblock")
df3_gpq50_melted["gpq50_normalized"] = df3_gpq50_melted["gpq50"] - df3_gpq50_melted["gpq50mean"]

results1_did_q50 = smf.ols('gpq50 ~ 1+ eip + nblock + adoption'+ hourfe, data=df3_gpq50_melted).fit()
results2_did_q50 = smf.ols('gpq50 ~ 1+ eip + nblock + adoption + size + ROI+ volatility90'+ hourfe, data=df3_gpq50_melted).fit()
results3_did_q50 = smf.ols('gpq50_normalized ~ 1+ eip', data=df3_gpq50_melted).fit()
stargazer = Stargazer([results1_did_q50 ,results2_did_q50, results3_did_q50])
stargazer.title('Intrablock Gas Price Difference & Adoption Rate')
stargazer.covariate_order(['eip',  'nblock', 'adoption',  'volatility90', 'size', 'ROI', 'Intercept'])
stargazer.significant_digits(5)
HTML(stargazer.render_html())
print(stargazer.render_latex())

\begin{table}[!htbp] \centering
  \caption{Intrablock Gas Price Difference & Adoption Rate}
\begin{tabular}{@{\extracolsep{5pt}}lccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
\\[-1.8ex] & (1) & (2) & (3) \\
\hline \\[-1.8ex]
 eip & -11.00459$^{***}$ & -11.01356$^{***}$ & -10.83807$^{***}$ \\
  & (0.29886) & (0.29694) & (0.15988) \\
 nblock & 0.00087$^{***}$ & 0.00093$^{***}$ & \\
  & (0.00001) & (0.00001) & \\
 adoption & -4.40374$^{***}$ & 0.12530$^{}$ & \\
  & (0.71369) & (0.71746) & \\
 volatility90 & & 3933.12687$^{***}$ & \\
  & & (99.88466) & \\
 size & & -0.00004$^{***}$ & \\
  & & (0.00000) & \\
 ROI & & -239.97575$^{}$ & \\
  & & (330.82715) & \\
 Intercept & 30.51834$^{***}$ & 5.38495$^{***}$ & 5.24094$^{***}$ \\
  & (0.81693) & (1.08557) & (0.11118) \\
\hline \\[-1.8ex]
 Observations & 133,394 & 133,394 & 133,394 \\
 $R^2$ & 0.18269 & 0.19348 & 0.03330 \\
 Adjusted $R^2$ & 0.18254 & 0.19331 & 0.03330 \\
 Residual Std. Error & 54.42487(df = 133367) & 54.06513(df = 133364) & 29.1805

## Intra-Block Difference

In [None]:
# Original Table in Paper
df_iqdiff = df_13[df_13["all_gpq50"] > 1]
df_iqdiff[df_iqdiff["normalized_iqdiff"] < 3] = 3
#df_iqdiff = df_iqdiff[df_iqdiff["normalized_iqdiff"] < 3]
results1 = smf.ols('normalized_iqdiff ~ 1 + EIP' + hourfe, data = df_iqdiff).fit()
results2 = smf.ols('normalized_iqdiff ~ 1 + EIP + adoption + nblock' + hourfe, data = df_iqdiff).fit()
results3 = smf.ols('normalized_iqdiff ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90' + hourfe, data = df_iqdiff).fit()
results4 = smf.ols('normalized_iqdiff ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90 + size + ROI' + hourfe, data = df_iqdiff).fit()
stargazer = Stargazer([results1 ,results2, results3, results4])
stargazer.title('Intrablock Gas Price Difference & Adoption Rate')
stargazer.covariate_order(['EIP', 'adoption', 'nblock', 'all_gpq50', 'volatility90', 'size', 'ROI', 'Intercept'])
stargazer.significant_digits(5)
HTML(stargazer.render_html())
#print(stargazer.render_latex())



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



0,1,2,3,4
,,,,
,Dependent variable:normalized_iqdiff,Dependent variable:normalized_iqdiff,Dependent variable:normalized_iqdiff,Dependent variable:normalized_iqdiff
,,,,
,(1),(2),(3),(4)
,,,,
EIP,-4.31553***,3.64479***,-6.86251***,-7.98973***
,(0.07821),(0.15462),(0.25571),(0.24686)
adoption,,-4.75735***,-4.52511***,-8.49838***
,,(0.25768),(0.25153),(0.24374)
nblock,,-0.00009***,0.00008***,0.00008***


In [None]:
# Robustness check with unstandardized inter-quartile difference
df_iqdiff = df_13[df_13["iqdiff"] < 100]
results1 = smf.ols('iqdiff ~ 1 + EIP' + hourfe, data = df_iqdiff).fit()
results2 = smf.ols('iqdiff ~ 1 + EIP + adoption + nblock' + hourfe, data = df_iqdiff).fit()
results3 = smf.ols('iqdiff ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90' + hourfe, data = df_iqdiff).fit()
results4 = smf.ols('iqdiff ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90 + size + ROI' + hourfe, data = df_iqdiff).fit()
stargazer = Stargazer([results1 ,results2, results3, results4])
stargazer.title('Intrablock Gas Price Difference & Adoption Rate')
stargazer.covariate_order(['EIP', 'adoption', 'nblock', 'all_gpq50', 'volatility90', 'size', 'ROI', 'Intercept'])
stargazer.significant_digits(5)
HTML(stargazer.render_html())
#print(stargazer.render_latex())

0,1,2,3,4
,,,,
,Dependent variable:iqdiff,Dependent variable:iqdiff,Dependent variable:iqdiff,Dependent variable:iqdiff
,,,,
,(1),(2),(3),(4)
,,,,
EIP,3.18875***,2.25948***,3.09493***,3.22379***
,(0.05992),(0.12306),(0.11739),(0.11789)
adoption,,-17.34851***,-16.53413***,-16.82489***
,,(0.17389),(0.16608),(0.16800)
nblock,,0.00012***,0.00008***,0.00008***


In [None]:
# Robustness check with different volatility estimates
df_iqdiff = df_13[df_13["all_gpq50"] > 1]
df_iqdiff = df_iqdiff[df_iqdiff["normalized_iqdiff"] < 3]
results1 = smf.ols('normalized_iqdiff ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility30 + size + ROI' + hourfe, data = df_iqdiff).fit()
results2 = smf.ols('normalized_iqdiff ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90 + size + ROI' + hourfe, data = df_iqdiff).fit()
results3 = smf.ols('normalized_iqdiff ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility180 + size + ROI' + hourfe, data = df_iqdiff).fit()
stargazer = Stargazer([results1 ,results2, results3])
stargazer.title('Intrablock Gas Price Difference & Adoption Rate')
stargazer.covariate_order(['EIP', 'adoption', 'nblock', 'all_gpq50', 'volatility30', 'volatility90', 'volatility180', 'size', 'ROI', 'Intercept'])
stargazer.significant_digits(5)
HTML(stargazer.render_html())
print(stargazer.render_latex())

\begin{table}[!htbp] \centering
  \caption{Intrablock Gas Price Difference & Adoption Rate}
\begin{tabular}{@{\extracolsep{5pt}}lccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{3}{c}{\textit{Dependent variable:}} \
\cr \cline{3-4}
\\[-1.8ex] & (1) & (2) & (3) \\
\hline \\[-1.8ex]
 EIP & 0.08622$^{***}$ & 0.08603$^{***}$ & 0.08590$^{***}$ \\
  & (0.00260) & (0.00260) & (0.00260) \\
 adoption & -0.24707$^{***}$ & -0.24573$^{***}$ & -0.24471$^{***}$ \\
  & (0.00374) & (0.00374) & (0.00375) \\
 nblock & -0.00000$^{***}$ & -0.00000$^{***}$ & -0.00000$^{***}$ \\
  & (0.00000) & (0.00000) & (0.00000) \\
 all_gpq50 & -0.00001$^{}$ & -0.00001$^{}$ & -0.00001$^{}$ \\
  & (0.00001) & (0.00001) & (0.00001) \\
 volatility30 & -4.12185$^{***}$ & & \\
  & (0.38035) & & \\
 volatility90 & & -1.75653$^{***}$ & \\
  & & (0.33043) & \\
 volatility180 & & & -0.48006$^{**}$ \\
  & & & (0.21847) \\
 size & 0.00000$^{***}$ & 0.00000$^{***}$ & 0.00000$^{***}$ \\
  & (0.00000) & (0.00000) & (0.00000) \\

## Waiting Time

In [None]:
# Original Table
df_wt = df_13.reset_index(drop = True)
df_wt[df_wt["all_wtq50"] < 0] = 0
df_wt[df_wt["legacy_wtq50"] < 0] = 0
df_wt[df_wt["all_wtq50"] > 300] = 300
df_wt[df_wt["legacy_wtq50"] > 300] = 300
results1 = smf.ols('all_wtq50 ~ 1 + EIP + adoption + nblock' + hourfe, data = df_wt).fit()
results2 = smf.ols('all_wtq50 ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90' + hourfe, data = df_wt).fit()
results3 = smf.ols('all_wtq50 ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90 + size + ROI' + hourfe, data = df_wt).fit()
results4 = smf.ols('legacy_wtq50 ~ 1 + EIP + adoption + nblock' + hourfe, data = df_wt).fit()
results5 = smf.ols('legacy_wtq50 ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90 + size + ROI' + hourfe, data = df_wt).fit()
stargazer = Stargazer([results1 ,results2, results3, results4, results5])
stargazer.title('Ethereum Median Waiting Time and EIP-1559 Adoption')
stargazer.covariate_order(['EIP', 'adoption', 'nblock', 'all_gpq50', 'volatility90', 'size', 'ROI', 'Intercept'])
stargazer.significant_digits(3)
HTML(stargazer.render_html())
print(stargazer.render_latex())

In [None]:
df3_wtq50_melted = pd.melt(df3, id_vars=['adoption', 'nblock','size','ROI','volatility30','volatility90','volatility180']+hours, value_vars=["legacy_wtq50", "eip_wtq50"], var_name="Type", value_name="wtq50")
df3_wtq50_melted['eip']=df3_wtq50_melted["Type"].apply(lambda x: 1 if x=="eip_wtq50" else 0)
df3_wtq50_melted[df3_wtq50_melted["wtq50"] < 0] = 0
df3_wtq50_melted[df3_wtq50_melted["wtq50"] > 300] = 300
wtq50mean = df3_wtq50_melted.groupby(["nblock"])["wtq50"].mean().reset_index().rename(columns = {"wtq50": "wtq50mean"})
df3_wtq50_melted = pd.merge(df3_wtq50_melted, wtq50mean, how = "left", on = "nblock")
df3_wtq50_melted["wtq50_normalized"] = df3_wtq50_melted["wtq50"] - df3_wtq50_melted["wtq50mean"]

results1_did_q50 = smf.ols('wtq50 ~ 1+ eip + nblock + adoption'+ hourfe, data=df3_wtq50_melted).fit()
results2_did_q50 = smf.ols('wtq50 ~ 1+ eip + nblock + adoption + size + ROI+ volatility90'+ hourfe, data=df3_wtq50_melted).fit()
results3_did_q50 = smf.ols('wtq50_normalized ~ 1+ eip', data=df3_wtq50_melted).fit()
stargazer = Stargazer([results1_did_q50 ,results2_did_q50, results3_did_q50])
stargazer.title('Intrablock Gas Price Difference & Adoption Rate')
stargazer.covariate_order(['eip',  'nblock', 'adoption',  'volatility90', 'size', 'ROI', 'Intercept'])
stargazer.significant_digits(5)
HTML(stargazer.render_html())
print(stargazer.render_latex())

\begin{table}[!htbp] \centering
  \caption{Intrablock Gas Price Difference & Adoption Rate}
\begin{tabular}{@{\extracolsep{5pt}}lccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
\\[-1.8ex] & (1) & (2) & (3) \\
\hline \\[-1.8ex]
 eip & -0.49933$^{***}$ & -1.14876$^{***}$ & 0.00059$^{}$ \\
  & (0.13514) & (0.12950) & (0.00088) \\
 nblock & 0.00005$^{***}$ & 0.00001$^{**}$ & \\
  & (0.00000) & (0.00000) & \\
 adoption & -3.83371$^{***}$ & -9.74894$^{***}$ & \\
  & (0.30980) & (0.30025) & \\
 volatility90 & & 59.16974$^{}$ & \\
  & & (37.05756) & \\
 size & & 0.00013$^{***}$ & \\
  & & (0.00000) & \\
 ROI & & -125.41390$^{***}$ & \\
  & & (36.26787) & \\
 Intercept & 14.76364$^{***}$ & 5.37660$^{***}$ & -0.00345$^{}$ \\
  & (0.15423) & (0.30356) & (0.03561) \\
\hline \\[-1.8ex]
 Observations & 137,780 & 137,780 & 140,000 \\
 $R^2$ & 0.70541 & 0.73170 & 0.00000 \\
 Adjusted $R^2$ & 0.70535 & 0.73165 & -0.00000 \\
 Residual Std. Error & 24.75174(df = 137753) & 23.62149(df = 137750) & 13.18300(df = 13

In [None]:
# Robustness check with LAD regressions
df_wt = df_13.reset_index(drop = True)
df_wt[df_wt["all_wtq50"] < 0] = 0
results1 = smf.quantreg('all_wtq50 ~ 1 + EIP' + hourfe, data = df_iqdiff).fit(q = 0.5)
results2 = smf.quantreg('all_wtq50 ~ 1 + EIP + adoption + nblock' + hourfe, data = df_iqdiff).fit(q = 0.5)
results3 = smf.quantreg('all_wtq50 ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90' + hourfe, data = df_iqdiff).fit(q = 0.5)
results4 = smf.quantreg('all_wtq50 ~ 1 + EIP + adoption + nblock + all_gpq50 + volatility90 + size + ROI' + hourfe, data = df_iqdiff).fit(q = 0.5)
stargazer = Stargazer([results1 ,results2, results3, results4])
stargazer.title('Waiting time & EIP-1559 adoption')
stargazer.covariate_order(['EIP', 'adoption', 'nblock', 'all_gpq50', 'volatility90', 'size', 'ROI', 'Intercept'])
stargazer.significant_digits(5)
HTML(stargazer.render_html())
print(stargazer.render_latex())

In [None]:
# All transactions waiting time
results1 = smf.quantreg('all_wtq50 ~ 1 + EIP + adoption + all_gpq50 + volatility', data = df_13).fit(q=0.5)
# EIP transactions waiting time
results2 = smf.quantreg('all_wtq50 ~ 1 + EIP + all_gpq50 + volatility', data = df_13).fit(q=0.5)
# Legacy transactions waiting time
results3 = smf.quantreg('legacy_wtq50 ~ 1 + adoption + all_gpq50 + legacy_gpq50 + volatility', data = df3).fit(q=0.5)
# EIP transactions waiting time
results4 = smf.quantreg('legacy_wtq50 ~ 1 + adoption + all_gpq50 + eip_gpq50 + volatility', data = df3).fit(q=0.5)
stargazer = Stargazer([results1 ,results2, results3, results4])
stargazer.title('Waiting Time & Adoption Rate')
stargazer.covariate_order(['EIP', 'adoption', 'volatility', 'all_gpq50', 'legacy_gpq50', 'eip_gpq50', 'Intercept'])
stargazer.significant_digits(3)
HTML(stargazer.render_html())
print(stargazer.render_latex())

# Statistic Tests 

## Correlation Heatmap

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(font_scale=1.5)
columns = ['EIP', 'adoption', 'nblock', 'size', 'gas_used', 'volatility30','volatility90','volatility180','ROI']
df_Heatmap =df_13[columns]
plt.figure(figsize=(18, 12))
#triangle
mask = np.triu(np.ones_like(df_Heatmap.corr(), dtype=np.bool))
heatmap = sns.heatmap(df_Heatmap.corr(), mask=mask, vmin=-1, vmax=1, annot=True, cmap='YlGnBu')
plt.savefig('/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/correlation heatmap.pdf')

## The Dickey-Fuller test for stationary
Null hypothesis: not stationary

https://machinelearningmastery.com/time-series-data-stationary-python/

In [None]:
s=df['gas_used'].dropna()
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -46.850743
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


In [None]:
s=df3['base_fee'].dropna()
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -13.514776
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


In [None]:
s=df3['adoption'].dropna()
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -7.578229
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


In [None]:
s=df['iqdiff'].fillna(method = "ffill")
s=s[s != float("inf")]
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -29.240991
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


In [None]:
s=df['normalized_iqdiff'].fillna(method = "ffill")
s=s[s != float("inf")]
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -34.837507
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


In [None]:
s=df['volatility90'].dropna()
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -10.993182
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


In [None]:
s=df['size'].dropna()
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -24.497425
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


In [None]:
s=df['gas_used'].dropna()
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -46.850743
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


In [None]:
s=df['all_gpq50'].dropna()
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -32.920746
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


In [None]:
s=df['all_wtq50'].dropna()
from statsmodels.tsa.stattools import adfuller
result = adfuller(s)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -60.834474
p-value: 0.000000
Critical Values:
	1%: -3.430
	5%: -2.862
	10%: -2.567


## Autocorrelation

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plt.rcParams['font.size'] = '16'
#creating a function to plot the graph and show the test result:
def auto_correlation(y, column, lags_plots=48, figsize=(22,8)):
    "Use Series as parameter"
    y = pd.Series(y)
    fig = plt.figure()

    ax1 = plt.subplot2grid((3, 3), (0, 0), colspan=2)
    ax2 = plt.subplot2grid((3, 3), (1, 0))
    ax3 = plt.subplot2grid((3, 3), (1, 1))

    y.plot(ax=ax1, figsize=figsize, color='teal')
    ax1.set_title(column)
    plot_acf(y, lags=lags_plots, zero=False, ax=ax2, color='teal');
    plot_pacf(y, lags=lags_plots, zero=False, ax=ax3, method='ols', color='teal');
    plt.tight_layout()
        

In [None]:
## autocorrelation for iqdiff
df_iqdiff = df[df["all_gpq50"] > 0]
auto_correlation(df_iqdiff['normalized_iqdiff'].dropna(), 'standardized IQR', lags_plots=60)

In [None]:
df_wt = df_13.reset_index(drop = True)
df_wt[df_wt["all_wtq50"] < 0] = 0
auto_correlation(df_wt['all_wtq50'].dropna(), 'Median Waiting Time', lags_plots=60)

In [None]:
auto_correlation(df['all_gpq50'].dropna(), 'Median Gas Price' ,lags_plots=60)

# Sibling & Security

In [None]:
# Merge Data
gas_price = gas_price
df_sibling = pd.merge(block, sibling_cnt, how = "inner", on = "block_number")
df_sibling = pd.merge(df_sibling, gas_price, how = "inner", on = "block_number")
df_sibling["adoption"] = df_sibling["all_prcount"]/df_sibling["all_gpcount"]
df_sibling["EIP"] = (df_sibling["block_number"] > 12965000) + 0
df_sibling['BQ_timestamp'] = pd.to_datetime(df_sibling['BQ_timestamp'])
df_sibling['Hour'] = df_sibling.BQ_timestamp.dt.floor(freq="H")

df_sibling = df_sibling[df_sibling['block_number'] < 13105000]
df_sibling = df_sibling[df_sibling['block_number'] > 12894999]
df_sibling['period'] = (df_sibling['block_number'] > 12894999).astype(int) + (df_sibling['block_number'] > 12964999).astype(int) + (df_sibling['block_number'] > 13034999).astype(int)
df_sibling = df_sibling[df_sibling["period"] != 2]

df_sibling["gas_used_million"] = df_sibling["gas_used"]/10**6
df_sibling["size_kbyte"] = df_sibling["size"]/1024
df_sibling["sibling_x"] = (df_sibling["sibling_cnt"] > 0) + 0
#df_sibling["EIP*gas_used"] = df_sibling["EIP"] * df_sibling["gas_used"]

In [None]:
df_sibling_hour = df_sibling.groupby(["Hour"])["sibling_cnt"].sum().reset_index()

trace = go.Scatter(x = df_sibling_hour["Hour"], y = df_sibling_hour["sibling_cnt"], hoverinfo='x+y', mode='lines', name = 'Hourly Sibling Count', marker=dict(size=2))
layout = go.Layout(xaxis = dict(title="time"), yaxis = dict(title ="hourly sibling count"), font = dict(size = 24)) 
fig_sibling = go.Figure(layout = layout, data = trace) 

fig_sibling.show()
fig_sibling.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/sibling.pdf", width = 1200, height = 600)
fig_sibling.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/sibling.png", width = 1200, height = 600)

## Research Question 1 & 2

In [None]:
df_sibling[df_sibling["block_number"] < 12965000]["size"].describe()

count     69166.000000
mean      64051.066391
std       19887.066141
min         631.000000
25%       53152.000000
50%       62870.000000
75%       72734.750000
max      420284.000000
Name: size, dtype: float64

In [None]:
df_sibling[df_sibling["block_number"] > 12965000]["size"].describe()

count    137917.000000
mean      76596.501497
std       55407.170224
min         647.000000
25%       28516.000000
50%       66400.000000
75%      119270.000000
max      504962.000000
Name: size, dtype: float64

In [None]:
results1 = smf.ols('size ~ 1 + EIP', data=df_sibling).fit()
results2 = smf.ols('size ~ 1 + EIP + adoption + gas_used + EIP:gas_used', data=df_sibling).fit()
stargazer = Stargazer([results1 ,results2])
stargazer.title('Block Size & Adoption Rate')
stargazer.covariate_order(['EIP','adoption','gas_used', 'EIP:gas_used','Intercept'])
HTML(stargazer.render_html())
print(stargazer.render_latex())

\begin{table}[!htbp] \centering
  \caption{Block Size & Adoption Rate}
\begin{tabular}{@{\extracolsep{5pt}}lcc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{2}{c}{\textit{Dependent variable:}} \
\cr \cline{2-3}
\\[-1.8ex] & (1) & (2) \\
\hline \\[-1.8ex]
 EIP & 13960.686$^{***}$ & 25126.760$^{***}$ \\
  & (225.731) & (7494.917) \\
 adoption & & 25803.954$^{***}$ \\
  & & (357.959) \\
 gas_used & & 0.006$^{***}$ \\
  & & (0.001) \\
 EIP:gas_used & & -0.002$^{***}$ \\
  & & (0.001) \\
 Intercept & 64051.066$^{***}$ & -33113.125$^{***}$ \\
  & (159.455) & (7492.033) \\
\hline \\[-1.8ex]
 Observations & 138,055 & 138,055 \\
 $R^2$ & 0.027 & 0.726 \\
 Adjusted $R^2$ & 0.027 & 0.726 \\
 Residual Std. Error & 41935.882(df = 138053) & 22251.126(df = 138050)  \\
 F Statistic & 3825.008$^{***}$ (df = 1.0; 138053.0) & 91473.617$^{***}$ (df = 4.0; 138050.0) \\
\hline
\hline \\[-1.8ex]
\textit{Note:} & \multicolumn{2}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
\end{tabular}
\end{t

In [None]:
# Generate PCV to Avoid MultiCollinearity between gas used and block size
results1 = smf.ols('size ~ 1 + gas_used', data=df_sibling).fit()
df_sibling['size_PCV'] = df_sibling["size"] - results1.predict(df_sibling["gas_used"])

results2 = smf.ols('gas_used ~ 1 + size', data=df_sibling).fit()
df_sibling['gas_PCV'] = df_sibling["gas_used"] - results2.predict(df_sibling["size"])

from statsmodels.stats.outliers_influence import variance_inflation_factor
X = df_sibling[["gas_used_million", "size_PCV"]]

# VIF dataframe
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns

# calculating VIF for each feature
vif_data["VIF"] = [variance_inflation_factor(X.values, i)
						for i in range(len(X.columns))]

print(vif_data)

In [None]:
#Logit
results1 = smf.logit('sibling_x ~ 1 + size_kbyte', data=df_sibling).fit()
results2 = smf.logit('sibling_x ~ 1 + size_kbyte + EIP + adoption', data=df_sibling).fit()
stargazer = Stargazer([results1 ,results2])
stargazer.title('Sibling Count & Block Size')
stargazer.significant_digits(4)
stargazer.covariate_order(['size_kbyte', 'EIP',  'adoption', 'Intercept'])
HTML(stargazer.render_html())
print(stargazer.render_latex())

Optimization terminated successfully.
         Current function value: 0.188710
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.188677
         Iterations 7
\begin{table}[!htbp] \centering
  \caption{Sibling Count & Block Size}
\begin{tabular}{@{\extracolsep{5pt}}lcc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{2}{c}{\textit{Dependent variable:}} \
\cr \cline{2-3}
\\[-1.8ex] & (1) & (2) \\
\hline \\[-1.8ex]
 size_kbyte & 0.0034$^{***}$ & 0.0036$^{***}$ \\
  & (0.0003) & (0.0003) \\
 EIP & & 0.0704$^{*}$ \\
  & & (0.0400) \\
 adoption & & -0.2257$^{***}$ \\
  & & (0.0764) \\
 Intercept & -3.2573$^{***}$ & -3.2608$^{***}$ \\
  & (0.0249) & (0.0263) \\
\hline \\[-1.8ex]
 Observations & 138,055 & 138,055 \\
 $R^2$ &  &  \\
 Adjusted $R^2$ &  &  \\
 Residual Std. Error & 1.0000(df = 138053) & 1.0000(df = 138051)  \\
 F Statistic & $^{}$ (df = 1.0; 138053.0) & $^{}$ (df = 3.0; 138051.0) \\
\hline
\hline \\[-1.8ex]
\textit{Note:} & \multico

In [None]:
df_sibling_3["size_kbyte"].describe()

In [None]:
# Logit with PCV
df_sibling["gas_used_million"] = df_sibling["gas_used"]/10**6
df_sibling["size_kbyte"] = df_sibling["size"]/1024
results1 = smf.logit('sibling_x ~ 1 + size_PCV + gas_used_million', data=df_sibling).fit()
results2 = smf.logit('sibling_x ~ 1 + gas_used_million', data=df_sibling).fit()
results3 = smf.logit('sibling_x ~ 1 + size_PCV', data=df_sibling).fit()
results4 = smf.logit('sibling_x ~ 1 + size_PCV + gas_used_million + EIP + EIP*size_PCV + EIP*gas_used_million', data=df_sibling).fit()
stargazer2 = Stargazer([results1 ,results2, results3, results4])
stargazer2.title('Sibling Count & Block Size')
stargazer2.significant_digits(7)
stargazer2.covariate_order(['size_PCV', 'gas_used_million', 'EIP', 'EIP:size_PCV', 'EIP:gas_used_million', 'Intercept'])
stargazer2.show_confidence_intervals(True)
HTML(stargazer2.render_html())

In [None]:
df_sibling_1 = df_sibling[df_sibling["block_number"] < 12965000]
df_sibling_1 = df_sibling_1[df_sibling_1["block_number"] > 12894999]
df_sibling_3 = df_sibling[df_sibling["block_number"] > 13034999]
df_sibling_3 = df_sibling_3[df_sibling_3["block_number"] < 13105000]

In [None]:
df_sibling_1["sibling_cnt"].mean()

0.047263106150420724

In [None]:
(df_sibling_3["sibling_cnt"].mean() - df_sibling_1["sibling_cnt"].mean())/df_sibling_1["sibling_cnt"].mean()

0.03749856443086714

In [None]:
df_sibling_before[df_sibling_before["sibling_cnt"] == 3]

In [None]:
df_sibling_after[df_sibling_after["sibling_cnt"] == 3]

In [None]:
df_sibling["sibling_cnt_ma5"] = df_sibling["sibling_cnt"].rolling(window=5).sum()
df_sibling["sibling_cnt_ma20"] = df_sibling["sibling_cnt"].rolling(window=20).sum()
df_sibling["sibling_cnt_ma50"] = df_sibling["sibling_cnt"].rolling(window=50).sum()
df_sibling["sibling_cnt_ma200"] = df_sibling["sibling_cnt"].rolling(window=200).sum()
df_sibling["sibling_cnt_ma500"] = df_sibling["sibling_cnt"].rolling(window=500).sum()
df_sibling_before = df_sibling[df_sibling["block_number"] < 12965000]
df_sibling_before = df_sibling_before[df_sibling_before["block_number"] > 12895000]
df_sibling_after = df_sibling[df_sibling["block_number"] > 12965000]
df_sibling_after = df_sibling_after[df_sibling_after["block_number"] < 13105000]

In [None]:
df_sibling_before["sibling_cnt_20"].describe(percentiles = [.75, .95, .99, .999])

In [None]:
df_sibling_after["sibling_cnt_20"].describe([.75, .95, .99, .999])

In [None]:
df_sibling_after.loc[267130:267178, ["sibling_cnt", "block_number"]]

## Part II

In [None]:
df_sibling["gas_used_ma10"] = df_sibling["gas_used"].rolling(10).sum()
df_sibling["spike0"] = (df_sibling["gas_used_ma10"] > 200000000)
df_sibling["spike1"] = df_sibling["spike0"].shift(-1)
df_sibling["spike2"] = df_sibling["spike0"].shift(-2)
df_sibling["spike3"] = df_sibling["spike0"].shift(-3)
df_sibling["spike4"] = df_sibling["spike0"].shift(-4)
df_sibling["spike5"] = df_sibling["spike0"].shift(-5)
df_sibling["spike6"] = df_sibling["spike0"].shift(-6)
df_sibling["spike7"] = df_sibling["spike0"].shift(-7)
df_sibling["spike8"] = df_sibling["spike0"].shift(-8)
df_sibling["spike9"] = df_sibling["spike0"].shift(-9)
df_sibling["spike"] = (df_sibling["spike0"] | df_sibling["spike1"] | df_sibling["spike2"] | df_sibling["spike3"] | df_sibling["spike4"] | df_sibling["spike5"] | df_sibling["spike6"] | df_sibling["spike7"] | df_sibling["spike8"] | df_sibling["spike9"]) + 0

In [None]:
df_sibling = df_sibling[df_sibling["block_number"] < 13105000]
df_sibling = df_sibling[df_sibling["block_number"] > 12894999]
df_sibling["sibling_cnt_ma5"] = df_sibling["sibling_cnt"].rolling(window=5).sum()
df_sibling["sibling_cnt_ma20"] = df_sibling["sibling_cnt"].rolling(window=20).sum()
df_sibling["sibling_cnt_ma50"] = df_sibling["sibling_cnt"].rolling(window=50).sum()
df_sibling["sibling_cnt_ma200"] = df_sibling["sibling_cnt"].rolling(window=200).sum()
df_sibling["sibling_cnt_ma500"] = df_sibling["sibling_cnt"].rolling(window=500).sum()
df_sibling["spike"].describe()

In [None]:
df_sibling_spike = df_sibling[df_sibling["spike"] == 1]
df_sibling_normal = df_sibling[df_sibling["spike"] == 0]

In [None]:
df_sibling["gas_used_million"] = df_sibling["gas_used"]/10**6
df_sibling["size_kbyte"] = df_sibling["size"]/1024
df_sibling["sibling_x"] = (df_sibling["sibling_cnt"] > 0) + 0
results1 = smf.ols('sibling_cnt ~ 1 + spike + EIP + size_kbyte + gas_used_million', data=df_sibling).fit()
results2 = smf.ols('sibling_cnt ~ 1 + spike + EIP', data=df_sibling).fit()
results3 = smf.ols('sibling_cnt ~ 1 + EIP + size_kbyte + gas_used_million', data=df_sibling).fit()
stargazer3 = Stargazer([results1 ,results2, results3])
stargazer3.title('Sibling Count & Spikes')
stargazer3.significant_digits(5)
stargazer3.covariate_order(['spike', 'EIP','size_kbyte', 'gas_used_million', 'Intercept'])
stargazer3.show_confidence_intervals(True)
HTML(stargazer3.render_html())

In [None]:
df_sibling["gas_used_million"] = df_sibling["gas_used"]/10**6
df_sibling["size_kbyte"] = df_sibling["size"]/1024
df_sibling["sibling_x"] = (df_sibling["sibling_cnt"] > 0) + 0
results1 = smf.logit('sibling_x ~ 1 + spike + EIP + size_kbyte + gas_used_million', data=df_sibling).fit()
results2 = smf.logit('sibling_x ~ 1 + spike + EIP', data=df_sibling).fit()
results3 = smf.logit('sibling_x ~ 1 + EIP + size_kbyte + gas_used_million', data=df_sibling).fit()
stargazer3 = Stargazer([results1 ,results2, results3])
stargazer3.title('Sibling Count & Spikes')
stargazer3.significant_digits(5)
stargazer3.covariate_order(['spike', 'EIP','size_kbyte', 'gas_used_million', 'Intercept'])
stargazer3.show_confidence_intervals(True)
HTML(stargazer3.render_html())

In [None]:
df_sibling["gas_used_million"] = df_sibling["gas_used"]/10**6
df_sibling["size_kbyte"] = df_sibling["size"]/1024
results1 = smf.mnlogit('sibling_cnt ~ 1 + spike + EIP + size_kbyte + gas_used_million', data=df_sibling).fit()
results2 = smf.mnlogit('sibling_cnt ~ 1 + spike + EIP', data=df_sibling).fit()
results3 = smf.mnlogit('sibling_cnt ~ 1 + EIP + size_kbyte + gas_used_million', data=df_sibling).fit()
stargazer3 = Stargazer([results1 ,results2, results3])
stargazer3.title('Sibling Count & Spikes')
stargazer3.significant_digits(5)
stargazer3.covariate_order(['spike', 'EIP','size_kbyte', 'gas_used_million', 'Intercept'])
stargazer3.show_confidence_intervals(True)
HTML(stargazer3.render_html())

In [None]:
df_sibling_normal["sibling_cnt_ma20"].describe(percentiles = [.75, .95, .99, .999])

In [None]:
df_sibling_spike["sibling_cnt_ma20"].describe(percentiles = [.75, .95, .99, .999])

## Research Question 3

In [None]:
results1 = smf.ols('gas_used ~ 1 + EIP + adoption', data=df_13).fit()
results2 = smf.ols('gas_used ~ 1 + EIP', data=df_13).fit()
results3 = smf.ols('gas_used ~ 1 + adoption', data=df_13).fit()
stargazer1 = Stargazer([results1 ,results2, results3])
stargazer1.title('Gas Used & EIP')
stargazer1.covariate_order(['adoption','EIP'])
stargazer1.show_confidence_intervals(True)
HTML(stargazer1.render_html())

## Research Question 4

In [None]:
timestamp = block[["BQ_timestamp", "block_number"]]
timestamp = timestamp[timestamp["block_number"] > 12894999]
timestamp = timestamp[timestamp["block_number"] < 13105000]
timestamp = timestamp.reset_index(drop = True)
timestamp["timestamp"] = pd.DatetimeIndex(timestamp.BQ_timestamp).astype(np.int64)/1000000000
timestamp = timestamp[["block_number", "timestamp"]]
timestamp.to_csv("/content/drive/MyDrive/EIP-1559/EIP-1559 Data/timestamp.csv")

In [None]:
avggas.loc[avggas["block_number"] < 12965000, "EIP"] = "pre-EIP"
avggas.loc[avggas["block_number"] > 12964999, "EIP"] = "post-EIP"

fig = make_subplots(
    rows=2, cols=3,
    subplot_titles=("20 sec", "30 sec", "40 sec", "60 sec", "90 sec", "120 sec"))
fig.add_trace(go.Violin(x=avggas['EIP'], y=avggas['20 sec'], name = "20 sec"), row=1, col=1)
fig.add_trace(go.Violin(x=avggas['EIP'], y=avggas['30 sec'], name = "30 sec"), row=1, col=2)
fig.add_trace(go.Violin(x=avggas['EIP'], y=avggas['40 sec'], name = "40 sec"), row=1, col=3)
fig.add_trace(go.Violin(x=avggas['EIP'], y=avggas['60 sec'], name = "60 sec"), row=2, col=1)
fig.add_trace(go.Violin(x=avggas['EIP'], y=avggas['90 sec'], name = "90 sec"), row=2, col=2)
fig.add_trace(go.Violin(x=avggas['EIP'], y=avggas['120 sec'], name = "120 sec"), row=2, col=3)
fig.update_layout(showlegend=False,
                  font = dict(size = 30))
fig.update_yaxes(range=[0, 6000000], row=1, col=1)
fig.update_yaxes(range=[0, 6000000], row=1, col=2)
fig.update_yaxes(range=[0, 6000000], row=1, col=3)
fig.update_yaxes(range=[0, 6000000], row=2, col=1)
fig.update_yaxes(range=[0, 6000000], row=2, col=2)
fig.update_yaxes(range=[0, 6000000], row=2, col=3)

for i in fig['layout']['annotations']:
    i['font'] = dict(size=24)
    
#fig.show()
fig.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/pdf/avggas.pdf", width = 1200, height = 800)
fig.write_image("/content/drive/MyDrive/EIP-1559/Code & Output/colab output/png/avggas.png", width = 1200, height = 800)