In [10]:
import os
import csv
import json
import math
import requests
import threading
import pandas as pd
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor

load_dotenv()

def get_api_key():
    return os.getenv('RONIN_API_KEY')

CONNECTION_URL = "https://api-gateway.skymavis.com/rpc/archive"
OPTIONS = {
    'headers': {
        'x-api-key': get_api_key()
    }
}

def get_logs(from_block, to_block, topic):
    payload = {
        "jsonrpc": "2.0",
        "method": "eth_getLogs",
        "params": [{
            "fromBlock": hex(from_block),
            "toBlock": hex(to_block),
            "address": "0xe35d62ebe18413d96ca2a2f7cf215bb21a406b4b",
            "topics": topic
        }],
        "id": 1
    }

    response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=payload)
    response_json = response.json()
    if 'result' in response_json:
        return response_json['result']
    else:
        print(f"Error fetching logs for blocks {from_block} to {to_block}: {response_json}")
        return []

def save_logs(logs, filename):
    with open(filename, 'w') as f:
        json.dump(logs, f)

def extract_transaction_hashes(logs):
    return set(log['transactionHash'] for log in logs)

def save_hashes_to_csv(hashes, filename):
    with open(filename, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['hash'])
        for tx_hash in hashes:
            writer.writerow([tx_hash])

def main(from_block, to_block, folder_name, topic):
    block_range = 500
    futures = []
    total_logs_count = 0
    merged_logs = []

    try:

        if not os.path.exists(f'./data/{folder_name}/'):
            os.makedirs(f'./data/{folder_name}/')

        with ThreadPoolExecutor(max_workers=10) as executor:
            for start in range(from_block, to_block, block_range):
                end = min(start + block_range - 1, to_block)
                future = executor.submit(get_logs, start, end, topic)
                futures.append((future, start, end))

            for future, start, end in futures:
                logs = future.result()
                total_logs_count += len(logs)
                merged_logs.extend(logs)

        merged_logs_count = len(merged_logs)
        with open(f'./data/{folder_name}/merged_logs.json', 'w') as f:
            json.dump(merged_logs, f)
        
        print(f"Merged all log files into ./data/{folder_name}/merged_logs.json")
        print(f"Total logs fetched: {total_logs_count}")
        print(f"Total logs in merged file: {merged_logs_count}")

        # Extract transaction hashes and save to CSV
        transaction_hashes = extract_transaction_hashes(merged_logs)
        save_hashes_to_csv(transaction_hashes, f'./data/{folder_name}/transaction_hashes.csv')

        print(f"Transaction hashes saved to ./data/{folder_name}/transaction_hashes.csv")
        print(f"Total transaction hashes: {len(transaction_hashes)}")
    except Exception as e:
        with open(f'./data/{folder_name}/errors.txt', 'w') as f:
            f.write(str(e))
        print(f"Error!! Saved to ./data/{folder_name}/errors.json")

**Relevant Block Numbers**

2751809 # 1 may 2021

6639676 # 13 sep 2021

9803808 # 1 jan 2022

11867416 # 13 Mar 2022

13191590 # 28 Apr 2022

**Relevant Topics**
TokenWithdrew(uint256,address,address,address,uint32,uint256): 0xd56c021eb1befc5273569485864a514b5d80a6192ce1181668ac7c553212558e

TokenDeposited(uint256,address,address,uint256): 0x5187d31a2b0e5829ff24ba2d281e6506286752e3d938cbaa86d0202f509ffeb0

In [3]:
from_block = 9803808 # 1 jan
to_block = 13191590 # 28 Apr
topics = ["0xd56c021eb1befc5273569485864a514b5d80a6192ce1181668ac7c553212558e"]
main(from_block, to_block, "logs-01Jan-28Apr-withds", topics)

Merged all log files into ./data/logs-01Jan-28Apr-withds/merged_logs.json
Total logs fetched: 35413
Total logs in merged file: 35413
Transaction hashes saved to ./data/logs-01Jan-28Apr-withds/transaction_hashes.csv
Total transaction hashes: 35411


In [2]:
from_block = 9803808 # 1 jan
to_block = 13191590 # 28 Apr
topics = ["0x5187d31a2b0e5829ff24ba2d281e6506286752e3d938cbaa86d0202f509ffeb0"]
main(from_block, to_block, "logs-01Jan-28Apr-deps", topics)

Merged all log files into ./data/logs-01Jan-28Apr-deps/merged_logs.json
Total logs fetched: 43990
Total logs in merged file: 43990
Transaction hashes saved to ./data/logs-01Jan-28Apr-deps/transaction_hashes.csv
Total transaction hashes: 39691


In [3]:
topics = ["0xd56c021eb1befc5273569485864a514b5d80a6192ce1181668ac7c553212558e"]
main(6639676, 9803807, "logs-13Sep-31Dec", topics)

Merged all log files into ./data/logs-13Sep-31Dec-new/merged_logs.json
Total logs fetched: 151323
Total logs in merged file: 151323
Transaction hashes saved to ./data/logs-13Sep-31Dec-new/transaction_hashes.csv
Total transaction hashes: 151323


In [3]:
topics = ["0x5187d31a2b0e5829ff24ba2d281e6506286752e3d938cbaa86d0202f509ffeb0"]
main(2751809, 6639675, "logs-1May-13Sep-deps", topics)

Merged all log files into ./data/logs-1May-13Sep-deps/merged_logs.json
Total logs fetched: 1483949
Total logs in merged file: 1483949
Transaction hashes saved to ./data/logs-1May-13Sep-deps/transaction_hashes.csv
Total transaction hashes: 327534


In [4]:
topics = ["0xd56c021eb1befc5273569485864a514b5d80a6192ce1181668ac7c553212558e"]
main(2751809, 6639675, "logs-1May-13Sep-withds", topics)

Merged all log files into ./data/logs-1May-13Sep-withds/merged_logs.json
Total logs fetched: 875884
Total logs in merged file: 875884
Transaction hashes saved to ./data/logs-1May-13Sep-withds/transaction_hashes.csv
Total transaction hashes: 875884


## Retrieve Transaction Receipts

In [17]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

RATE_LIMIT = 20  # requests per second
REQUEST_INTERVAL = 1 / RATE_LIMIT

def fetch_receipt(tx_hash, folder_name):
    data = {
        "id": 1,
        "jsonrpc": "2.0",
        "method": "eth_getTransactionReceipt",
        "params": [tx_hash]
    }

    try:
        response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=data)
        response_json = response.json()
        if 'result' in response_json:
            return response_json['result']
        else:
            print(f"Error fetching transaction receipt for tx {tx_hash} : {response_json}")
            return []
    except Exception as e:
        with open(f"./data/{folder_name}/errors.txt", "a") as error_file:
            error_file.write(f"Error retrieving transaction: {tx_hash}, {e}\n")

def process_hashes(hashes, folder_name):
    receipts = []
    total_hashes = len(hashes)
    progress_interval = max(1, total_hashes // 100)  # Update progress every 1%

    with ThreadPoolExecutor(max_workers=RATE_LIMIT) as executor:
        futures = {executor.submit(fetch_receipt, tx_hash, folder_name): tx_hash for tx_hash in hashes}
        
        for idx, future in enumerate(as_completed(futures), 1):
            try:
                result = future.result()
                receipts.append(result)

                if idx % progress_interval == 0 or idx == total_hashes:
                    progress_percentage = (idx / total_hashes) * 100
                    print(f"Progress: {progress_percentage:.2f}% ({idx}/{total_hashes})")
        
                time.sleep(REQUEST_INTERVAL)
            except Exception as e:
                with open(f"./data/{folder_name}/errors.txt", "a") as error_file:
                    error_file.write(f"Error retrieving transaction: {futures[future]}, {e}\n")
    return receipts

def retrieve_receipts(folder_name):
    # Read hashes from CSV
    hashes_df = pd.read_csv(f'./data/{folder_name}/transaction_hashes.csv')
    hashes = hashes_df['hash'].drop_duplicates().tolist()

    # Fetch receipts
    receipts = process_hashes(hashes, folder_name)

    # Save receipts to JSON
    with open(f'./data/{folder_name}/tx_receipts.json', 'w') as f:
        json.dump(receipts, f)

    # Print the count of processed receipts
    print(f"Total receipts fetched: {len(receipts)}")


In [7]:
retrieve_receipts("logs-01Jan-28Apr-deps")

Progress: 1.00% (396/39691)
Progress: 2.00% (792/39691)
Progress: 2.99% (1188/39691)
Progress: 3.99% (1584/39691)
Progress: 4.99% (1980/39691)
Progress: 5.99% (2376/39691)
Progress: 6.98% (2772/39691)
Progress: 7.98% (3168/39691)
Progress: 8.98% (3564/39691)
Progress: 9.98% (3960/39691)
Progress: 10.97% (4356/39691)
Progress: 11.97% (4752/39691)
Progress: 12.97% (5148/39691)
Progress: 13.97% (5544/39691)
Progress: 14.97% (5940/39691)
Progress: 15.96% (6336/39691)
Progress: 16.96% (6732/39691)
Progress: 17.96% (7128/39691)
Progress: 18.96% (7524/39691)
Progress: 19.95% (7920/39691)
Progress: 20.95% (8316/39691)
Progress: 21.95% (8712/39691)
Progress: 22.95% (9108/39691)
Progress: 23.94% (9504/39691)
Progress: 24.94% (9900/39691)
Progress: 25.94% (10296/39691)
Progress: 26.94% (10692/39691)
Progress: 27.94% (11088/39691)
Progress: 28.93% (11484/39691)
Progress: 29.93% (11880/39691)
Progress: 30.93% (12276/39691)
Progress: 31.93% (12672/39691)
Progress: 32.92% (13068/39691)
Progress: 33.9

In [6]:
retrieve_receipts("logs-01Jan-28Apr-withds")

Total receipts fetched: 0


In [4]:
retrieve_receipts('logs-13Sep-31Dec')

Progress: 1.00% (10289/1028951)
Progress: 2.00% (20578/1028951)
Progress: 3.00% (30867/1028951)
Progress: 4.00% (41156/1028951)
Progress: 5.00% (51445/1028951)
Progress: 6.00% (61734/1028951)
Progress: 7.00% (72023/1028951)
Progress: 8.00% (82312/1028951)
Progress: 9.00% (92601/1028951)
Progress: 10.00% (102890/1028951)
Progress: 11.00% (113179/1028951)
Progress: 12.00% (123468/1028951)
Progress: 13.00% (133757/1028951)
Progress: 14.00% (144046/1028951)
Progress: 15.00% (154335/1028951)
Progress: 16.00% (164624/1028951)
Progress: 17.00% (174913/1028951)
Progress: 18.00% (185202/1028951)
Progress: 19.00% (195491/1028951)
Progress: 20.00% (205780/1028951)
Progress: 21.00% (216069/1028951)
Progress: 22.00% (226358/1028951)
Progress: 23.00% (236647/1028951)
Progress: 24.00% (246936/1028951)
Progress: 25.00% (257225/1028951)
Progress: 26.00% (267514/1028951)
Progress: 27.00% (277803/1028951)
Progress: 28.00% (288092/1028951)
Progress: 29.00% (298381/1028951)
Progress: 30.00% (308670/1028951

In [7]:
retrieve_receipts('logs-1May-13Sep-deps')

Progress: 1.00% (3275/327534)
Progress: 2.00% (6550/327534)
Progress: 3.00% (9825/327534)
Progress: 4.00% (13100/327534)
Progress: 5.00% (16375/327534)


In [5]:
retrieve_receipts('logs-1May-13Sep-withds')

Progress: 1.00% (8758/875884)
Progress: 2.00% (17516/875884)
Progress: 3.00% (26274/875884)
Progress: 4.00% (35032/875884)
Progress: 5.00% (43790/875884)
Progress: 6.00% (52548/875884)
Progress: 7.00% (61306/875884)
Progress: 8.00% (70064/875884)
Progress: 9.00% (78822/875884)
Progress: 10.00% (87580/875884)
Progress: 11.00% (96338/875884)
Progress: 12.00% (105096/875884)
Progress: 13.00% (113854/875884)
Progress: 14.00% (122612/875884)
Progress: 15.00% (131370/875884)
Progress: 16.00% (140128/875884)
Progress: 17.00% (148886/875884)
Progress: 18.00% (157644/875884)
Progress: 19.00% (166402/875884)
Progress: 20.00% (175160/875884)
Progress: 21.00% (183918/875884)
Progress: 22.00% (192676/875884)
Progress: 23.00% (201434/875884)
Progress: 24.00% (210192/875884)
Progress: 25.00% (218950/875884)
Progress: 26.00% (227708/875884)
Progress: 27.00% (236466/875884)
Progress: 28.00% (245224/875884)
Progress: 29.00% (253982/875884)
Progress: 30.00% (262740/875884)
Progress: 31.00% (271498/875884

In [19]:
import os
import re

def load_failed_hashes(filename):
    with open(filename, 'r') as f:
        hashes = [re.findall(pattern='0x[a-fA-F0-9]{64}', string=line)[0] for line in f]
    return hashes

def retrieve_failed_receipts(folder_name):
    errors_file = f'./data/{folder_name}/errors.txt'
    if not os.path.exists(errors_file):
        print(f"Error file {errors_file} does not exist.")
        return

    # Load failed transaction hashes from errors.txt
    failed_hashes = load_failed_hashes(f'./data/{folder_name}/errors.txt')

    # Fetch receipts for failed hashes
    failed_receipts = process_hashes(failed_hashes, folder_name)

    # Save failed receipts to JSON
    with open(f'./data/{folder_name}/tx_receipts_2.json', 'w') as f:
        json.dump(failed_receipts, f)

    # Print the count of processed failed receipts
    print(f"Total failed receipts fetched: {len(failed_receipts)}")

In [13]:
retrieve_failed_receipts('logs-01Jan-28Apr-deps')

Progress: 33.33% (1/3)
Progress: 66.67% (2/3)
Progress: 100.00% (3/3)
Total failed receipts fetched: 3


In [None]:
retrieve_failed_receipts('logs-01Jan-28Apr-withds')

In [6]:
retrieve_failed_receipts('logs-13Sep-31Dec')

Progress: 0.60% (1/166)
Progress: 1.20% (2/166)
Progress: 1.81% (3/166)
Progress: 2.41% (4/166)
Progress: 3.01% (5/166)
Progress: 3.61% (6/166)
Progress: 4.22% (7/166)
Progress: 4.82% (8/166)
Progress: 5.42% (9/166)
Progress: 6.02% (10/166)
Progress: 6.63% (11/166)
Progress: 7.23% (12/166)
Progress: 7.83% (13/166)
Progress: 8.43% (14/166)
Progress: 9.04% (15/166)
Progress: 9.64% (16/166)
Progress: 10.24% (17/166)
Progress: 10.84% (18/166)
Progress: 11.45% (19/166)
Progress: 12.05% (20/166)
Progress: 12.65% (21/166)
Progress: 13.25% (22/166)
Progress: 13.86% (23/166)
Progress: 14.46% (24/166)
Progress: 15.06% (25/166)
Progress: 15.66% (26/166)
Progress: 16.27% (27/166)
Progress: 16.87% (28/166)
Progress: 17.47% (29/166)
Progress: 18.07% (30/166)
Progress: 18.67% (31/166)
Progress: 19.28% (32/166)
Progress: 19.88% (33/166)
Progress: 20.48% (34/166)
Progress: 21.08% (35/166)
Progress: 21.69% (36/166)
Progress: 22.29% (37/166)
Progress: 22.89% (38/166)
Progress: 23.49% (39/166)
Progress: 2

In [20]:
retrieve_failed_receipts('logs-1May-13Sep-withds')

Progress: 3.33% (1/30)
Progress: 6.67% (2/30)
Progress: 10.00% (3/30)
Progress: 13.33% (4/30)
Progress: 16.67% (5/30)
Progress: 20.00% (6/30)
Progress: 23.33% (7/30)
Progress: 26.67% (8/30)
Progress: 30.00% (9/30)
Progress: 33.33% (10/30)
Progress: 36.67% (11/30)
Progress: 40.00% (12/30)
Progress: 43.33% (13/30)
Progress: 46.67% (14/30)
Progress: 50.00% (15/30)
Progress: 53.33% (16/30)
Progress: 56.67% (17/30)
Progress: 60.00% (18/30)
Progress: 63.33% (19/30)
Progress: 66.67% (20/30)
Progress: 70.00% (21/30)
Progress: 73.33% (22/30)
Progress: 76.67% (23/30)
Progress: 80.00% (24/30)
Progress: 83.33% (25/30)
Progress: 86.67% (26/30)
Progress: 90.00% (27/30)
Progress: 93.33% (28/30)
Progress: 96.67% (29/30)
Progress: 100.00% (30/30)
Total failed receipts fetched: 30


For the second interval, we need to merge the json files with transaction receipts

In [21]:
import json

def merge_json_files(folder_name):
    file1 = f'./data/{folder_name}/tx_receipts.json'
    file2 = f'./data/{folder_name}/tx_receipts_2.json'
    output_file = f'./data/{folder_name}/tx_receipts.json'
    
    errors_file = f'./data/{folder_name}/errors.txt'
    if not os.path.exists(errors_file):
        print(f"Error file {errors_file} does not exist.")
        return
    
    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        json1 = json.load(f1)
        json2 = json.load(f2)

    merged_json = json1 + json2

    with open(output_file, 'w') as f:
        json.dump(merged_json, f)

    os.remove(file2)

    count1 = len(json1)
    count2 = len(json2)
    count_merged = len(merged_json)

    print(f"Number of transaction receipts in tx_receipts: {count1}")
    print(f"Number of transaction receipts in tx_receipts_2: {count2}")
    print(f"Number of transaction receipts in the merged file: {count_merged}")

In [17]:
merge_json_files("logs-01Jan-28Apr-deps")

Number of transaction receipts in tx_receipts: 39688
Number of transaction receipts in tx_receipts_2: 3
Number of transaction receipts in the merged file: 39691


In [27]:
merge_json_files("logs-01Jan-28Apr-withds")

Error file ./data/logs-01Jan-28Apr-withds/errors.txt does not exist.


In [8]:
merge_json_files("logs-13Sep-31Dec")

Number of transaction receipts in ./data/logs-13Sep-31Dec/tx_receipts.json: 1028867
Number of transaction receipts in ./data/logs-13Sep-31Dec/tx_receipts_2.json: 166
Number of transaction receipts in the merged file: 1029033


In [22]:
merge_json_files("logs-1May-13Sep-withds")

Number of transaction receipts in tx_receipts: 875854
Number of transaction receipts in tx_receipts_2: 30
Number of transaction receipts in the merged file: 875884


In [23]:
# remove duplicates from json file with result field of receipts
def remove_duplicates(folder_name):
    file = f'./data/{folder_name}/tx_receipts.json'

    with open(file, 'r') as f:
        receipts = json.load(f)

    unique_receipts = []
    unique_hashes = set()

    for receipt in receipts:
        tx_hash = receipt.get('transactionHash')
        if tx_hash not in unique_hashes:
            unique_hashes.add(tx_hash)
            unique_receipts.append(receipt)

    # Save unique receipts to a new file
    unique_output_file = f'./data/{folder_name}/unique_tx_receipts.json'
    with open(unique_output_file, 'w') as f:
        json.dump(unique_receipts, f)

    print(f"Unique receipts saved to {unique_output_file}")

    print(f"Removed {len(receipts) - len(unique_receipts)} duplicate receipts")
    print(f"Total receipts: {len(receipts)}")
    print(f"Total unique receipts: {len(unique_receipts)}")


In [25]:
remove_duplicates("logs-01Jan-28Apr-deps")

Unique receipts saved to ./data/logs-01Jan-28Apr-deps/tx_receipts.json
Removed 0 duplicate receipts
Total receipts: 39691
Total unique receipts: 39691


In [26]:
remove_duplicates( "logs-01Jan-28Apr-withds")

Unique receipts saved to ./data/logs-01Jan-28Apr-withds/tx_receipts.json
Removed 0 duplicate receipts
Total receipts: 35411
Total unique receipts: 35411


In [5]:
remove_duplicates("logs-13Sep-31Dec")

Unique receipts saved to ./data/logs-13Sep-31Dec/unique_tx_receipts.json
Removed 82 duplicate receipts
Total receipts: 1029032
Total unique receipts: 1028950


In [24]:
remove_duplicates("logs-1May-13Sep-withds")

Unique receipts saved to ./data/logs-1May-13Sep-withds/unique_tx_receipts.json
Removed 0 duplicate receipts
Total receipts: 875884
Total unique receipts: 875884


In [25]:
import concurrent.futures
import requests

def get_block_data(block_number, errors_file):
    payload = {
        "id": 1,
        "jsonrpc": "2.0",
        "params": [],
        "method": "eth_getBlockByNumber"
    }

    try:
        payload['params'] = [block_number]
        response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=payload)

        if response.status_code == 200:
            block = response.json()["result"]
            number = int(block["number"], 16)
            timestamp = int(block["timestamp"], 16)
            transactions = len(block["transactions"])
            return f"{number},{transactions},{timestamp}\n"
        else:
            with open(errors_file, "a") as error_file:
                error_file.write(f"Error code: {block_number}\n")
    except Exception as e:
        with open(errors_file, "a") as error_file:
            error_file.write(f"Error retrieving block: {block_number}; {e}\n")

def get_blocks_data(folder_name):
    input_file = f'./data/{folder_name}/tx_receipts.json'
    output_file = f'./data/{folder_name}/blocks.csv'
    errors_file = f'./data/{folder_name}/errors.txt'

    with open(input_file, 'r') as file:
        tx_receipts = json.load(file)

    print(f"Extracting block number and Unix timestamp from {len(tx_receipts)} transaction receipts...")

    block_numbers = [tx["blockNumber"] for tx in tx_receipts]

    print(f"Extracted {len(block_numbers)} block numbers...")

    with open(output_file, "a") as blocks_file:
        blocks_file.write("block_number,transactions,timestamp\n")
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
            # Submit tasks for each block in the range
            futures = {executor.submit(get_block_data, block_number, errors_file): block_number for block_number in block_numbers}

            # Process the completed tasks and write to the file
            for future in concurrent.futures.as_completed(futures):
                block_number = futures[future]
                try:
                    blocks_data = future.result()
                    blocks_file.write(blocks_data)
                except Exception as e:
                    print(f"Error processing block {block_number}: {e}")

    print(f'Extracted block number and Unix timestamp to {output_file}')

In [3]:
get_blocks_data('logs-01Jan-28Apr-deps')

Extracting block number and Unix timestamp from 39691 transaction receipts...
Extracted 39691 block numbers...
Extracted block number and Unix timestamp to ./data/logs-01Jan-28Apr-deps/blocks.csv


In [28]:
get_blocks_data('logs-01Jan-28Apr-withds')

Extracting block number and Unix timestamp from 5022 transaction receipts...
Extracted 5022 block numbers...
Extracted block number and Unix timestamp to ./data/logs-13Mar-28Apr/blocks.csv


In [5]:
get_blocks_data('logs-13Sep-31Dec')

Extracting block number and Unix timestamp from 1028950 transaction receipts...
Extracted 1028950 block numbers...
Error processing block 0x865a69: write() argument must be str, not None
Error processing block 0x4b70e0: write() argument must be str, not None
Error processing block 0x3d8f6c: write() argument must be str, not None
Error processing block 0x583132: write() argument must be str, not None
Error processing block 0x543c2a: write() argument must be str, not None
Error processing block 0x61fc78: write() argument must be str, not None
Error processing block 0x553c31: write() argument must be str, not None
Error processing block 0x55c190: write() argument must be str, not None
Error processing block 0x4cf8aa: write() argument must be str, not None
Error processing block 0x6504c8: write() argument must be str, not None
Error processing block 0x582272: write() argument must be str, not None
Error processing block 0x53e1c1: write() argument must be str, not None
Error processing bloc

In [26]:
get_blocks_data('logs-1May-13Sep-withds')

Extracting block number and Unix timestamp from 875884 transaction receipts...
Extracted 875884 block numbers...
Error processing block 0x5d297a: write() argument must be str, not None
Error processing block 0x5d4b09: write() argument must be str, not None
Error processing block 0x60d950: write() argument must be str, not None
Error processing block 0x3dd37c: write() argument must be str, not None
Error processing block 0x5d32e8: write() argument must be str, not None
Error processing block 0x5ed210: write() argument must be str, not None
Error processing block 0x4b3b42: write() argument must be str, not None
Error processing block 0x54ab02: write() argument must be str, not None
Error processing block 0x53979a: write() argument must be str, not None
Error processing block 0x3d32b4: write() argument must be str, not None
Error processing block 0x5619d9: write() argument must be str, not None
Error processing block 0x4c8bc8: write() argument must be str, not None
Error processing block 

KeyboardInterrupt: 

In [5]:
def retrieve_missing_blocks(folder_name):
    blocks_file = f'./data/{folder_name}/blocks.csv'
    receipts_file = f'./data/{folder_name}/tx_receipts.json'
    errors_file = f'./data/{folder_name}/errors_2.txt'

    blocks_data = pd.read_csv(blocks_file)

    with open(receipts_file, 'r') as file:
        tx_receipts = json.load(file)

    block_numbers = [tx["blockNumber"] for tx in tx_receipts]
    
    print("Loaded block numbers and block data...")

    print(len(block_numbers))
    print(len(blocks_data['block_number']))
    
    missing_blocks = set(block_numbers) - set(hex(block) for block in blocks_data['block_number'])

    print(f"Total missing blocks: {len(missing_blocks)}")

    for missing_block in missing_blocks:
        line = get_block_data(missing_block, errors_file)
        with open(blocks_file, 'a') as blocks_file:
            blocks_file.write(line)

In [6]:
retrieve_missing_blocks('logs-01Jan-28Apr-deps')

Loaded block numbers and block data...
39691
39691
Total missing blocks: 0


In [30]:
retrieve_missing_blocks('logs-01Jan-28Apr-withds')

Loaded block numbers and block data...
5022
5022
Total missing blocks: 0


In [7]:
retrieve_missing_blocks('logs-13Sep-31Dec')

Loaded block numbers and block data...
1028950
1028786
Total missing blocks: 64
