In [1]:
start_date = '2021-08-31'

In [2]:
import pandas as pd
import requests
import json
from datetime import datetime
from datetime import date
import time
#config info
from configparser import ConfigParser
config = ConfigParser()
config.read('../config.ini')
etherscan_api = config.get('ETHERSCAN','etherscan_api')

#header for parsing
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
start_time = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp())

contracts = pd.DataFrame(pd.read_csv("optimism_creator_addresses.csv"))

# Data from https://optimistic.etherscan.io/

In [3]:
# Contract creator ids
creator_ids = contracts.loc[contracts["project"]=='Chainlink']

latest_block_str = 'https://api-optimistic.etherscan.io/api?module=proxy&action=eth_blockNumber&apikey=' + etherscan_api
starting_block_str = 'https://api-optimistic.etherscan.io/api?module=block&action=getblocknobytime&timestamp='\
                + str(start_time) + '&closest=before&apikey=' + etherscan_api
start_block = requests.get(starting_block_str,headers = headers).json()
latest_block = requests.get(latest_block_str,headers = headers).json()

start_block = int(start_block['result'])
latest_block = int(latest_block['result'],base=16)

chunk_size = 5000 #calc 5000 blocks at a time?


In [4]:
# Get contracts
df = pd.DataFrame()
apid = []
for index, row in creator_ids.iterrows():
    addr = row['address']
    proj = row['project']
    api_str = 'https://api-optimistic.etherscan.io/api?module=account&action=txlist&address='\
                + addr + '&startblock=' + str(1) + '&endblock=' + str(latest_block) +'&sort=desc&apikey=' + etherscan_api
    r = requests.get(api_str, headers=headers)
    x = r.json()
    x = pd.DataFrame(x).reset_index()
    x = pd.json_normalize(x['result'])
    max_block_value = pd.to_numeric(x["blockNumber"]).max() #get last block of all txs (sort desc above)
    if len(x.index) < 10000: #if all txs captured append
        if not x.empty:
            x = x.loc[(x['to'] == '') & (x['contractAddress'] != '')] #Where transactions are being sent to NULL (means contract creation)
            x.insert(0, "project", proj, True) #add project name
            apid.append(x)
    else:
        i = start_block
        n = 1
        while i <= max_block_value: #where last block hasn't yet been reached
        #for i in range(1, latest_block, chunk_size*10):
            if n % 5 == 0:
                time.sleep(1)
            api_str = 'https://api-optimistic.etherscan.io/api?module=account&action=txlist&address='\
                    + addr + '&startblock=' + str(i) + '&endblock=' + str(i+(chunk_size)-1) +'&sort=asc&apikey=' + etherscan_api
            r = requests.get(api_str, headers=headers)
            y = r.json()
            y = pd.json_normalize(y['result'])
            max_block_iter = pd.to_numeric(y["blockNumber"]).max() #latest block of this iteration (sort asc above)
            #print(x)
            if not y.empty:
                y = y.loc[(y['to'] == '') & (y['contractAddress'] != '')] #Where transactions are being sent to NULL (means contract creation)
                y.insert(0, "project", proj, True) #add project name
                apid.append(y)
            if i == max_block_value: #if last block, iterate out of loop
                i = max_block_value + 1
            else: # if not yet at last block, set the last block found to the beginning (some overlap if we don't catch the whole block)
                i = max_block_iter
            n=n+1
#print(apid)
df = pd.concat(apid).reset_index()
df = df.drop_duplicates() # just in case
#print(df)
contract_hashes = df.loc[:,['contractAddress','project']]#.to_numpy() #convert to an array

In [5]:
#Get txs
df = pd.DataFrame()
apid = []
for index, row in contract_hashes.iterrows():
    addr = row['contractAddress']
    proj = row['project']
    api_str = 'https://api-optimistic.etherscan.io/api?module=account&action=txlist&address='\
            + addr + '&startblock=' + str(1) + '&endblock=' + str(latest_block)\
            + '&sort=desc&apikey=' + etherscan_api
    r = requests.get(api_str, headers=headers)
    x = r.json()
    x = pd.DataFrame(x).reset_index()
    x = pd.json_normalize(x['result'])
    max_block_value = pd.to_numeric(x["blockNumber"]).max() #get last block of all txs (sort desc above)
    
    if len(x.index) < 10000: #if all txs captured append
        if not x.empty:
            x = x.loc[x['to'] == str.lower(addr)] #Where transactions are being sent to the addresses
            x.insert(0, "project", proj, True) #add project name
            apid.append(x)
    else: #else go through chunks
        i = start_block
        n = 1
        while i <= max_block_value: #where last block hasn't yet been reached
        #for i in range(start_block, latest_block, chunk_size):
            if n % 5 == 0: # 5 call per second api limit
                time.sleep(1)
            #etherscan api limits to 10k results.
            #This pulls txs from the beginning. iterate on where to start based on api limits
            api_str = 'https://api-optimistic.etherscan.io/api?module=account&action=txlist&address='\
                + addr + '&startblock=' + str(i) + '&endblock=' + str(max_block_value)\
                + '&sort=asc&apikey=' + etherscan_api

            r = requests.get(api_str,headers=headers)
            y = r.json()
            y = pd.json_normalize(y['result'])
            max_block_iter = pd.to_numeric(y["blockNumber"]).max() #latest block of this iteration (sort asc above)
            if not y.empty:
                y = y.loc[y['to'] == str.lower(addr)] #Where transactions are being sent to the addresses
                y.insert(0, "project", proj, True) #add project name
                apid.append(y)
            if i == max_block_value: #if last block, iterate out of loop
                i = max_block_value + 1
            else: # if not yet at last block, set the last block found to the beginning (some overlap if we don't catch the whole block)
                i = max_block_iter
            n = n+1 #iterate counter for rate limit
        
df = pd.concat(apid)
df = df.drop_duplicates() # where block numbers may overlap
#print(df)

In [6]:
#Aggregate values
df_trunc = pd.DataFrame()
df_trunc['dt'] = pd.to_datetime(df['timeStamp'],unit='s').dt.floor('d') #time is in seconds - trunc to day
df_trunc['project'] = df['project']
df_trunc['gas'] = pd.to_numeric(df['gas'])
df_trunc['gasPrice'] = pd.to_numeric(df['gasPrice'])
df_trunc['gasUsed'] = pd.to_numeric(df['gasUsed'])
df_trunc=df_trunc.groupby(['dt','project']).agg({'dt':'count','gas':'sum', 'gasPrice':'mean','gasUsed':'sum'})\
            .rename(columns={"dt":"num_txs","gas": "total_gas", "gasPrice":"avg_gas_price", "gasUsed":"total_gas_used"})

In [7]:
#df_export = pd.DataFrame()
#df_export['dt'] = pd.to_datetime(df['timeStamp'],unit='s') #time is in seconds
#df_export['hash'] = df['hash']
#df_export['to'] = df['to']
#df_export['gas'] = df['gas']
#df_export['gasPrice'] = df['gasPrice']
#df_export['gasUsed'] = df['gasUsed']

df_export = df_trunc
df_export = df_export.drop_duplicates() # just in case
export_str = 'optimism_oracle_' + date.today().strftime("%Y_%m_%d") + '.csv'

df_export.to_csv(export_str,index=True)