In [None]:
import csv
import os
import glob
import pandas as pd
import tqdm as tqdm
import requests
from more_itertools import unique_everseen

In [None]:
# Concatenate all etherscan csv files
# Note: The header in your etherscan csv file might need to be adjusted (or removed)
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
if not os.path.exists("combined_csv.csv"):
    combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames])
    combined_csv.to_csv("combined_csv.csv", index=False, encoding='utf-8-sig')

In [None]:
# Remove duplicates in the combined csv file
with open("combined_csv.csv",'r') as f, open("combined_unique.csv", 'w') as out_file:
    out_file.writelines(unique_everseen(f))

In [None]:
data = pd.read_csv("combined_unique.csv")

In [None]:
# Configure your own api key
apikey = 'Your_own_api_key'

In [None]:
# Data we need from our api call:
# - Source code
# - Solidity version
# - Date of contract creation
# - Lines of code (LOC)

def api_call(row, data=data):
    val = ""
    version = ""
    contract_addr = data['ContractAddress'][row]
    name = data['ContractName'][row]
    url = f'https://api.etherscan.io/api?module=contract&action=getsourcecode&address={contract_addr}&apikey={apikey}'
    response = requests.get(url)
    response.raise_for_status()
    if response.status_code != 204:

        try:
            result = response.json()['result']
            val = result[0].get('SourceCode')
            version = result[0].get('CompilerVersion')
        except ValueError:
            val = "Value Error"
            version = "Value Error"

    return name, contract_addr, val, version

In [None]:
# Function to write data to a csv file
def write_to_csv(data, filename):
    with open(filename, 'w') as f:
        writer = csv.writer(f, delimiter=",")
        writer.writerow(['ContractName','ContractAddress','Source','CompilerVersion'])
        for row in data:
            writer.writerows(row)

In [None]:
# Multi processing to speed up the api calls
from multiprocessing import Pool, cpu_count
from p_tqdm import p_map
def multi_call(func, args):
    results = []
    with Pool(cpu_count()) as p:
        completed = p_map(func, args, **{"num_cpus":cpu_count()})
    results.append(completed)


In [None]:
# Splitting up the data just so that I don't have to run everything over when an error occurs
# Random errors can occur (this is due to some api error or internet connection failure)
argument_list = [i for i in range(len(data))]
arg_list_1 = argument_list[:1000]
arg_list_2 = argument_list[1000:2000]
arg_list_3 = argument_list[2000:3000]
arg_list_4 = argument_list[3000:4000]
arg_list_5 = argument_list[4000:5000]
arg_list_6 = argument_list[5000:6000]
arg_list_7 = argument_list[6000:7000]
arg_list_8 = argument_list[7000:8000]
arg_list_9 = argument_list[8000:9000]
arg_list_10 = argument_list[9000:10000]
arg_list_11= argument_list[10000:11000]
arg_list_12 = argument_list[11000:12000]
arg_list_13 = argument_list[12000:13000]
arg_list_14 = argument_list[13000:14000]
arg_list_15 = argument_list[14000:15000]
arg_list_16 = argument_list[15000:16000]
arg_list_17 = argument_list[16000:17000]
arg_list_18 = argument_list[17000:18000]
arg_list_19 = argument_list[18000:19000]
arg_list_20 = argument_list[19000:20000]
arg_list_21= argument_list[20000:21000]
arg_list_22= argument_list[21000:22000]
arg_list_23= argument_list[22000:23000]
arg_list_24= argument_list[23000:24000]
arg_list_25= argument_list[24000:25000]
arg_list_26= argument_list[25000:]

In [None]:
# Repeat this for every agr_list and then combine them at the end
result1 = multi_call(api_call, arg_list_1)
write_to_csv(result1, 'contracts_1.csv')

...

In [None]:
# Result 6
result26 = multi_call(api_call, arg_list_26)
write_to_csv(result26, 'contracts_26.csv')

In [None]:
all_contracts = [i for i in glob.glob('contracts*.{}'.format(extension))]

In [None]:
if not os.path.exists("all_contracts.csv"):
    contract_combo = pd.concat([pd.read_csv(f) for f in all_contracts])
    contract_combo.to_csv("all_contracts.csv", index=False, encoding='utf-8-sig')