In [1]:
import os
import csv
import json
import math
import requests
import threading
import pandas as pd
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor

load_dotenv()

def get_api_key():
    return os.getenv('MOONBEAM_API_KEY')

CONNECTION_URL = "https://moonbeam.api.onfinality.io/rpc"
OPTIONS = {
    'headers': {
        'apikey': get_api_key()
    }
}

def get_logs(from_block, to_block, address, topic, folder_name):
    payload = {
        "jsonrpc": "2.0",
        "method": "eth_getLogs",
        "params": [{
            "fromBlock": hex(from_block),
            "toBlock": hex(to_block),
            "address": address,
            "topics": topic
        }],
        "id": 1
    }

    try:
        response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=payload)
        response_json = response.json()
        if 'result' in response_json:
            return response_json['result']
        else:
            with open(f'./data/{folder_name}/errors.txt', 'w') as f:
                f.write(f"Error fetching logs for blocks {from_block} to {to_block}: {response_json}")
            return None
    except Exception as e:
        with open(f'./data/{folder_name}/errors.txt', 'w') as f:
            f.write(f"Error fetching logs for blocks {from_block} to {to_block}: {e}")

def save_logs(logs, filename):
    with open(filename, 'w') as f:
        json.dump(logs, f)

def extract_transaction_hashes(logs):
    return set(log['transactionHash'] for log in logs)

def save_hashes_to_csv(hashes, filename):
    with open(filename, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['hash'])
        for tx_hash in hashes:
            writer.writerow([tx_hash])

def main(from_block, to_block, folder_name, address, topic):
    block_range = 500
    futures = []
    total_logs_count = 0
    merged_logs = []

    try:
        if not os.path.exists(f'./data/{folder_name}/'):
            os.makedirs(f'./data/{folder_name}/')

        with ThreadPoolExecutor(max_workers=1) as executor:
            for start in range(from_block, to_block, block_range):
                end = min(start + block_range - 1, to_block)
                future = executor.submit(get_logs, start, end, address, topic, folder_name)
                futures.append((future, start, end))

            for future, start, end in futures:
                logs = future.result()
                if logs is None:
                    with open(f'./data/{folder_name}/errors.txt', 'w') as f:
                        f.write("Failed: ", start, end)
                total_logs_count += len(logs)
                print(f"Fetched logs for blocks {start} to {end}: {len(logs)}")
                merged_logs.extend(logs)

        merged_logs_count = len(merged_logs)
        with open(f'./data/{folder_name}/merged_logs.json', 'w') as f:
            json.dump(merged_logs, f)
        
        print(f"Merged all log files into ./data/{folder_name}/merged_logs.json")
        print(f"Total logs fetched: {total_logs_count}")
        print(f"Total logs in merged file: {merged_logs_count}")

        # Extract transaction hashes and save to CSV
        transaction_hashes = extract_transaction_hashes(merged_logs)
        save_hashes_to_csv(transaction_hashes, f'./data/{folder_name}/transaction_hashes.csv')

        print(f"Transaction hashes saved to ./data/{folder_name}/transaction_hashes.csv")
        print(f"Total transaction hashes: {len(transaction_hashes)}")
    except Exception as e:
        with open(f'./data/{folder_name}/errors.txt', 'w') as f:
            f.write(str(e))
        print(f"Error!! Saved to ./data/{folder_name}/errors.txt")

ModuleNotFoundError: No module named 'pandas'

**Relevant Block Numbers**

171381 # 11 jan 2022 11:11:12 (deployment of contract 0xd3df in Moonbeam)

2511472 # 15 Dec 2022

6823084 # 31 Jul 2024

**Relevant Topics**
Withdrawals: Send(address,address,uint32,bytes32,uint256,bool): 0xa3d219cf126a12be40d7ad1ceef46231c987988dd4e686457b610e1b6b80a4bf

Deposits: Receive(uint64,address,address,address,uint256): 0x9f9a97db84f39202ca3b409b63f7ccf7d3fd810e176573c7483088b6f181bbbb

In [None]:
from_block = 171381 # 11 jan
to_block = 2511472 # 15 dec
address_withdrawals = "0xd3dfd3ede74e0dcebc1aa685e151332857efce2d"
topic = ["0xa3d219cf126a12be40d7ad1ceef46231c987988dd4e686457b610e1b6b80a4bf"]
main(from_block, to_block, "logs-11Jan-14Dec-withds", address_withdrawals, topic)

Fetched logs for blocks 171381 to 171880: 0
Fetched logs for blocks 171881 to 172380: 2
Fetched logs for blocks 172381 to 172880: 0
Fetched logs for blocks 172881 to 173380: 0
Fetched logs for blocks 173381 to 173880: 0
Fetched logs for blocks 173881 to 174380: 0
Fetched logs for blocks 174381 to 174880: 0
Fetched logs for blocks 174881 to 175380: 0
Fetched logs for blocks 175381 to 175880: 0
Fetched logs for blocks 175881 to 176380: 0
Fetched logs for blocks 176381 to 176880: 0
Fetched logs for blocks 176881 to 177380: 0
Fetched logs for blocks 177381 to 177880: 1
Fetched logs for blocks 177881 to 178380: 0
Fetched logs for blocks 178381 to 178880: 0
Fetched logs for blocks 178881 to 179380: 0
Error fetching logs for blocks 179381 to 179880: {'jsonrpc': '2.0', 'error': {'code': -32603, 'message': 'query timeout of 10 seconds exceeded'}, 'id': 1}
Fetched logs for blocks 179381 to 179880: 0
Fetched logs for blocks 179881 to 180380: 0
Fetched logs for blocks 180381 to 180880: 0
Fetched l

In [None]:
from_block = 171381 # 11 jan
to_block = 2511472 # 14 dec
address_deposits = "0xD3dfD3eDe74E0DCEBC1AA685e151332857efCe2d"
topic = ["0x9f9a97db84f39202ca3b409b63f7ccf7d3fd810e176573c7483088b6f181bbbb"]
main(from_block, to_block, "logs-11Jan-14Dec-deps", address_deposits, topic)

Fetched logs for blocks 171381 to 171880: 0
Fetched logs for blocks 171881 to 172380: 7
Fetched logs for blocks 172381 to 172880: 5
Fetched logs for blocks 172881 to 173380: 5
Fetched logs for blocks 173381 to 173880: 16
Fetched logs for blocks 173881 to 174380: 7
Fetched logs for blocks 174381 to 174880: 3
Fetched logs for blocks 174881 to 175380: 3
Fetched logs for blocks 175381 to 175880: 1
Fetched logs for blocks 175881 to 176380: 3
Fetched logs for blocks 176381 to 176880: 3
Fetched logs for blocks 176881 to 177380: 1
Fetched logs for blocks 177381 to 177880: 0
Fetched logs for blocks 177881 to 178380: 1
Fetched logs for blocks 178381 to 178880: 4
Fetched logs for blocks 178881 to 179380: 2
Fetched logs for blocks 179381 to 179880: 2
Fetched logs for blocks 179881 to 180380: 1
Fetched logs for blocks 180381 to 180880: 2
Fetched logs for blocks 180881 to 181380: 0
Fetched logs for blocks 181381 to 181880: 2
Fetched logs for blocks 181881 to 182380: 3
Fetched logs for blocks 182381 

In [3]:
from_block = 171381 # 11 jan
to_block = 2511472 # 14 dec
address_deposits = "0xD3dfD3eDe74E0DCEBC1AA685e151332857efCe2d"
topic = ["0x9f9a97db84f39202ca3b409b63f7ccf7d3fd810e176573c7483088b6f181bbbb"]
main(from_block, to_block, "logs-11Jan-14Dec-deps-2", address_deposits, topic)

Fetched logs for blocks 171381 to 171880: 0
Fetched logs for blocks 171881 to 172380: 7
Fetched logs for blocks 172381 to 172880: 5
Fetched logs for blocks 172881 to 173380: 5
Fetched logs for blocks 173381 to 173880: 16
Fetched logs for blocks 173881 to 174380: 7
Fetched logs for blocks 174381 to 174880: 3
Fetched logs for blocks 174881 to 175380: 3
Fetched logs for blocks 175381 to 175880: 1
Fetched logs for blocks 175881 to 176380: 3
Fetched logs for blocks 176381 to 176880: 3
Fetched logs for blocks 176881 to 177380: 1
Fetched logs for blocks 177381 to 177880: 0
Fetched logs for blocks 177881 to 178380: 1
Fetched logs for blocks 178381 to 178880: 4
Fetched logs for blocks 178881 to 179380: 2
Fetched logs for blocks 179381 to 179880: 2
Fetched logs for blocks 179881 to 180380: 1
Fetched logs for blocks 180381 to 180880: 2
Fetched logs for blocks 180881 to 181380: 0
Fetched logs for blocks 181381 to 181880: 2
Fetched logs for blocks 181881 to 182380: 3
Fetched logs for blocks 182381 

There were some failed log retrievals...

In [None]:
# get failed logs

def retrieve_failed_logs(folder_name, address_withdrawals, topic):
    new_logs = []
    total_logs_count = 0

    with open(f'./data/{folder_name}/errors.txt', 'r') as f:
        reader = csv.reader(f)
        for line in reader:
            from_block = int(line[0])
            to_block = int(line[1])
            logs = get_logs(from_block, to_block, address_withdrawals, topic, folder_name)
            total_logs_count += len(logs)
            print(f"Fetched logs for blocks {from_block} to {to_block}: {len(logs)}")
            new_logs.extend(logs)

        # Load merged logs from JSON file
        with open(f'./data/{folder_name}/merged_logs.json', 'r') as f:
            merged_logs_json = json.load(f)

        # Merge the loaded logs with the existing merged_logs
        merged_logs_json.extend(new_logs)

        # Print the number of fetched logs
        print(f"Total logs fetched (before + now): {len(merged_logs_json)}")

        # Store merged logs in a new file
        with open(f'./data/{folder_name}/merged_logs_updated.json', 'w') as f:
            json.dump(merged_logs_json, f)

        # Extract transaction hashes and save to CSV
        transaction_hashes = extract_transaction_hashes(new_logs)
        save_hashes_to_csv(transaction_hashes, f'./data/{folder_name}/new_transaction_hashes.csv')

        print(f"Transaction hashes saved to ./data/{folder_name}/transaction_hashes.csv")
        print(f"Total transaction hashes: {len(transaction_hashes)}")

        print(f"Merged logs stored in ./data/{folder_name}/merged_logs_updated.json")

In [None]:
address_withdrawals = "0xd3dfd3ede74e0dcebc1aa685e151332857efce2d"
topic = ["0xa3d219cf126a12be40d7ad1ceef46231c987988dd4e686457b610e1b6b80a4bf"]

retrieve_failed_logs("logs-11Jan-14Dec-withds", address_withdrawals, topic)

Fetched logs for blocks 179381 to 179880: 0
Fetched logs for blocks 214881 to 215380: 0
Fetched logs for blocks 215881 to 216380: 23
Fetched logs for blocks 246381 to 246880: 3
Fetched logs for blocks 248881 to 249380: 5
Fetched logs for blocks 248381 to 248880: 2
Fetched logs for blocks 249881 to 250380: 2
Fetched logs for blocks 250881 to 251380: 0
Fetched logs for blocks 251881 to 252380: 0
Fetched logs for blocks 254381 to 254880: 2
Fetched logs for blocks 255381 to 255880: 1
Fetched logs for blocks 256381 to 256880: 1
Fetched logs for blocks 270881 to 271380: 0
Fetched logs for blocks 271881 to 272380: 3
Fetched logs for blocks 276381 to 276880: 2
Fetched logs for blocks 277381 to 277880: 1
Fetched logs for blocks 277881 to 278380: 0
Fetched logs for blocks 278881 to 279380: 1
Fetched logs for blocks 279881 to 280380: 0
Fetched logs for blocks 279381 to 279880: 0
Fetched logs for blocks 280381 to 280880: 0
Fetched logs for blocks 280881 to 281380: 0
Fetched logs for blocks 281381 

In [None]:
address_deposits = "0xd3dfd3ede74e0dcebc1aa685e151332857efce2d"
topic = ["0x9f9a97db84f39202ca3b409b63f7ccf7d3fd810e176573c7483088b6f181bbbb"]

retrieve_failed_logs("logs-11Jan-14Dec-deps", address_deposits, topic)

Fetched logs for blocks 2507381 to 2507880: 0
Total logs fetched (before + now): 8208
Transaction hashes saved to ./data/logs-11Jan-14Dec-deps/transaction_hashes.csv
Total transaction hashes: 0
Merged logs stored in ./data/logs-11Jan-14Dec-deps/merged_logs_updated.json


## Retrieve Transaction Receipts

In [6]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

RATE_LIMIT = 1  # requests per second
REQUEST_INTERVAL = 1 / RATE_LIMIT

def fetch_receipt(tx_hash, folder_name):
    data = {
        "id": 1,
        "jsonrpc": "2.0",
        "method": "eth_getTransactionReceipt",
        "params": [tx_hash]
    }

    try:
        response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=data)
        response_json = response.json()
        if 'result' in response_json:
            return response_json['result']
        else:
            print(f"Error fetching logs for blocks {from_block} to {to_block}: {response_json}")
            return []
    except Exception as e:
        with open(f"./data/{folder_name}/errors.txt", "a") as error_file:
            error_file.write(f"Error retrieving transaction: {tx_hash}\n")

def process_hashes(hashes, folder_name):
    receipts = []
    total_hashes = len(hashes)
    progress_interval = max(1, total_hashes // 100)  # Update progress every 1%

    with ThreadPoolExecutor(max_workers=RATE_LIMIT) as executor:
        futures = {executor.submit(fetch_receipt, tx_hash, folder_name): tx_hash for tx_hash in hashes}
        
        for idx, future in enumerate(as_completed(futures), 1):
            try:
                result = future.result()
                receipts.append(result)

                if idx % progress_interval == 0 or idx == total_hashes:
                    progress_percentage = (idx / total_hashes) * 100
                    print(f"Progress: {progress_percentage:.2f}% ({idx}/{total_hashes})")
        
                time.sleep(REQUEST_INTERVAL)
            except Exception as e:
                with open(f"./data/{folder_name}/errors.txt", "a") as error_file:
                    error_file.write(f"Error retrieving transaction: {futures[future]}\n")
    return receipts

def retrieve_receipts(folder_name):
    # Read hashes from CSV
    hashes_df = pd.read_csv(f'./data/{folder_name}/transaction_hashes.csv')
    hashes = hashes_df['hash'].drop_duplicates().tolist()

    # Fetch receipts
    receipts = process_hashes(hashes, folder_name)

    # Save receipts to JSON
    with open(f'./data/{folder_name}/tx_receipts.json', 'w') as f:
        json.dump(receipts, f)

    # Print the count of processed receipts
    print(f"Total receipts fetched: {len(receipts)}")


In [None]:
retrieve_receipts("logs-11Jan-14Dec-withds")

Progress: 0.99% (53/5337)
Progress: 1.99% (106/5337)
Progress: 2.98% (159/5337)
Progress: 3.97% (212/5337)
Progress: 4.97% (265/5337)
Progress: 5.96% (318/5337)
Progress: 6.95% (371/5337)
Progress: 7.94% (424/5337)
Progress: 8.94% (477/5337)
Progress: 9.93% (530/5337)
Progress: 10.92% (583/5337)
Progress: 11.92% (636/5337)
Progress: 12.91% (689/5337)
Progress: 13.90% (742/5337)
Progress: 14.90% (795/5337)
Progress: 15.89% (848/5337)
Progress: 16.88% (901/5337)
Progress: 17.88% (954/5337)
Progress: 18.87% (1007/5337)
Progress: 19.86% (1060/5337)
Progress: 20.85% (1113/5337)
Progress: 21.85% (1166/5337)
Progress: 22.84% (1219/5337)
Progress: 23.83% (1272/5337)
Progress: 24.83% (1325/5337)
Progress: 25.82% (1378/5337)
Progress: 26.81% (1431/5337)
Progress: 27.81% (1484/5337)
Progress: 28.80% (1537/5337)
Progress: 29.79% (1590/5337)
Progress: 30.79% (1643/5337)
Progress: 31.78% (1696/5337)
Progress: 32.77% (1749/5337)
Progress: 33.76% (1802/5337)
Progress: 34.76% (1855/5337)
Progress: 35.7

In [None]:
retrieve_receipts('logs-11Jan-14Dec-deps')

Progress: 1.00% (82/8208)
Progress: 2.00% (164/8208)
Progress: 3.00% (246/8208)
Progress: 4.00% (328/8208)
Progress: 5.00% (410/8208)
Progress: 5.99% (492/8208)
Progress: 6.99% (574/8208)
Progress: 7.99% (656/8208)
Progress: 8.99% (738/8208)
Progress: 9.99% (820/8208)
Progress: 10.99% (902/8208)
Progress: 11.99% (984/8208)
Progress: 12.99% (1066/8208)
Progress: 13.99% (1148/8208)
Progress: 14.99% (1230/8208)
Progress: 15.98% (1312/8208)
Progress: 16.98% (1394/8208)
Progress: 17.98% (1476/8208)
Progress: 18.98% (1558/8208)
Progress: 19.98% (1640/8208)
Progress: 20.98% (1722/8208)
Progress: 21.98% (1804/8208)
Progress: 22.98% (1886/8208)
Progress: 23.98% (1968/8208)
Progress: 24.98% (2050/8208)
Progress: 25.97% (2132/8208)
Progress: 26.97% (2214/8208)
Progress: 27.97% (2296/8208)
Progress: 28.97% (2378/8208)
Progress: 29.97% (2460/8208)
Progress: 30.97% (2542/8208)
Progress: 31.97% (2624/8208)
Progress: 32.97% (2706/8208)
Progress: 33.97% (2788/8208)
Progress: 34.97% (2870/8208)
Progress

In [None]:
retrieve_receipts("logs-11Jan-14Dec-withds")

Progress: 0.99% (53/5337)
Progress: 1.99% (106/5337)
Progress: 2.98% (159/5337)
Progress: 3.97% (212/5337)
Progress: 4.97% (265/5337)
Progress: 5.96% (318/5337)
Progress: 6.95% (371/5337)
Progress: 7.94% (424/5337)
Progress: 8.94% (477/5337)
Progress: 9.93% (530/5337)
Progress: 10.92% (583/5337)
Progress: 11.92% (636/5337)
Progress: 12.91% (689/5337)
Progress: 13.90% (742/5337)
Progress: 14.90% (795/5337)
Progress: 15.89% (848/5337)
Progress: 16.88% (901/5337)
Progress: 17.88% (954/5337)
Progress: 18.87% (1007/5337)
Progress: 19.86% (1060/5337)
Progress: 20.85% (1113/5337)
Progress: 21.85% (1166/5337)
Progress: 22.84% (1219/5337)
Progress: 23.83% (1272/5337)
Progress: 24.83% (1325/5337)
Progress: 25.82% (1378/5337)
Progress: 26.81% (1431/5337)
Progress: 27.81% (1484/5337)
Progress: 28.80% (1537/5337)
Progress: 29.79% (1590/5337)
Progress: 30.79% (1643/5337)
Progress: 31.78% (1696/5337)
Progress: 32.77% (1749/5337)
Progress: 33.76% (1802/5337)
Progress: 34.76% (1855/5337)
Progress: 35.7

In [5]:
retrieve_receipts('logs-11Jan-14Dec-deps-2')

Progress: 1.00% (114/11443)
Progress: 1.99% (228/11443)
Progress: 2.99% (342/11443)
Progress: 3.98% (456/11443)
Progress: 4.98% (570/11443)
Error fetching logs for blocks 171381 to 2511472: {'jsonrpc': '2.0', 'error': {'code': -32029, 'message': 'Too Many Requests, Please apply an OnFinality API key or contact us to receive a higher rate limit'}, 'id': 1}
Progress: 5.98% (684/11443)
Error fetching logs for blocks 171381 to 2511472: {'jsonrpc': '2.0', 'error': {'code': -32029, 'message': 'Too Many Requests, Please apply an OnFinality API key or contact us to receive a higher rate limit'}, 'id': 1}
Error fetching logs for blocks 171381 to 2511472: {'jsonrpc': '2.0', 'error': {'code': -32029, 'message': 'Too Many Requests, Please apply an OnFinality API key or contact us to receive a higher rate limit'}, 'id': 1}
Progress: 6.97% (798/11443)
Progress: 7.97% (912/11443)
Progress: 8.97% (1026/11443)
Progress: 9.96% (1140/11443)
Progress: 10.96% (1254/11443)
Progress: 11.95% (1368/11443)
Prog

In [7]:
import os
import re

def load_failed_hashes(filename):
    with open(filename, 'r') as f:
        hashes = [re.findall(pattern='0x[a-fA-F0-9]{64}$', string=line)[0] for line in f]
    return hashes

def retrieve_failed_receipts(folder_name):
    errors_file = f'./data/{folder_name}/errors.txt'
    if not os.path.exists(errors_file):
        print(f"Error file {errors_file} does not exist.")
        return

    # Load failed transaction hashes from errors.txt
    failed_hashes = load_failed_hashes(f'./data/{folder_name}/errors.txt')

    # Fetch receipts for failed hashes
    failed_receipts = process_hashes(failed_hashes, folder_name)

    # Save failed receipts to JSON
    with open(f'./data/{folder_name}/tx_receipts_2.json', 'w') as f:
        json.dump(failed_receipts, f)

    # Print the count of processed failed receipts
    print(f"Total failed receipts fetched: {len(failed_receipts)}")

In [None]:
retrieve_failed_receipts("logs-11Jan-14Dec-deps")

Progress: 5.88% (1/17)
Progress: 11.76% (2/17)
Progress: 17.65% (3/17)
Progress: 23.53% (4/17)
Progress: 29.41% (5/17)
Progress: 35.29% (6/17)
Progress: 41.18% (7/17)
Progress: 47.06% (8/17)
Progress: 52.94% (9/17)
Progress: 58.82% (10/17)
Progress: 64.71% (11/17)
Progress: 70.59% (12/17)
Progress: 76.47% (13/17)
Progress: 82.35% (14/17)
Progress: 88.24% (15/17)
Progress: 94.12% (16/17)
Progress: 100.00% (17/17)
Total failed receipts fetched: 17


In [10]:
retrieve_failed_receipts("logs-11Jan-14Dec-deps-2")

Progress: 8.33% (1/12)
Progress: 16.67% (2/12)
Progress: 25.00% (3/12)
Progress: 33.33% (4/12)
Progress: 41.67% (5/12)
Progress: 50.00% (6/12)
Progress: 58.33% (7/12)
Progress: 66.67% (8/12)
Progress: 75.00% (9/12)
Progress: 83.33% (10/12)
Progress: 91.67% (11/12)
Progress: 100.00% (12/12)
Total failed receipts fetched: 12


In [15]:
retrieve_failed_receipts("logs-11Jan-14Dec-withds")

Progress: 6.67% (1/15)
Progress: 13.33% (2/15)
Progress: 20.00% (3/15)
Progress: 26.67% (4/15)
Progress: 33.33% (5/15)
Progress: 40.00% (6/15)
Progress: 46.67% (7/15)
Progress: 53.33% (8/15)
Progress: 60.00% (9/15)
Progress: 66.67% (10/15)
Progress: 73.33% (11/15)
Progress: 80.00% (12/15)
Progress: 86.67% (13/15)
Progress: 93.33% (14/15)
Progress: 100.00% (15/15)
Total failed receipts fetched: 15


For the second interval, we need to merge the json files with transaction receipts

In [11]:
import json

def merge_json_files(folder_name):
    file1 = f'./data/{folder_name}/tx_receipts.json'
    file2 = f'./data/{folder_name}/tx_receipts_2.json'
    output_file = f'./data/{folder_name}/tx_receipts.json'
    
    errors_file = f'./data/{folder_name}/errors.txt'
    if not os.path.exists(errors_file):
        print(f"Error file {errors_file} does not exist.")
        return
    
    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        json1 = json.load(f1)
        json2 = json.load(f2)

    merged_json = json1 + json2

    with open(output_file, 'w') as f:
        json.dump(merged_json, f)

    os.remove(file2)

    count1 = len(json1)
    count2 = len(json2)
    count_merged = len(merged_json)

    print(f"Number of transaction receipts in tx_receipts: {count1}")
    print(f"Number of transaction receipts in tx_receipts_2: {count2}")
    print(f"Number of transaction receipts in the merged file: {count_merged}")

In [None]:
merge_json_files("logs-11Jan-14Dec-withds")

Number of transaction receipts in tx_receipts: 5337
Number of transaction receipts in tx_receipts_2: 15
Number of transaction receipts in the merged file: 5352


In [None]:
merge_json_files("logs-11Jan-14Dec-deps")

Number of transaction receipts in tx_receipts: 8208
Number of transaction receipts in tx_receipts_2: 17
Number of transaction receipts in the merged file: 8225


In [12]:
merge_json_files("logs-11Jan-14Dec-deps-2")

Number of transaction receipts in tx_receipts: 11431
Number of transaction receipts in tx_receipts_2: 12
Number of transaction receipts in the merged file: 11443


In [13]:
import json

# remove duplicates from json file with result field of receipts
def remove_duplicates(folder_name):
    file = f'./data/{folder_name}/tx_receipts.json'

    with open(file, 'r') as f:
        receipts = json.load(f)

    unique_receipts = []
    unique_hashes = set()

    for receipt in receipts:
        try:
            tx_hash = receipt.get('transactionHash')
            if tx_hash not in unique_hashes:
                unique_hashes.add(tx_hash)
                unique_receipts.append(receipt)
        except Exception as e:
            print(f"Error processing receipt: {receipt}, {e}")
    # Save unique receipts to a new file
    unique_output_file = f'./data/{folder_name}/unique_tx_receipts.json'
    with open(unique_output_file, 'w') as f:
        json.dump(unique_receipts, f)

    print(f"Unique receipts saved to {unique_output_file}")

    print(f"Removed {len(receipts) - len(unique_receipts)} duplicate receipts")
    print(f"Total receipts: {len(receipts)}")
    print(f"Total unique receipts: {len(unique_receipts)}")


In [None]:
remove_duplicates("logs-11Jan-14Dec-withds")

Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Unique receipts saved to ./data/logs-11Jan-14Dec-withds/unique_tx_receipts.json
Removed 15 duplicate receipts
Total receipts: 5352
Total unique receipts: 5337


In [None]:
remove_duplicates( "logs-11Jan-14Dec-deps")

Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Error processing receipt: None, 'NoneType' object has no attribute 'get'
Unique receipts saved to ./data/logs-11Jan-14Dec-deps/unique_tx_receipts.json
Removed 17 duplicate receipts
Total receipts: 8225
Total unique receipts: 8208


In [14]:
remove_duplicates( "logs-11Jan-14Dec-deps-2")

Unique receipts saved to ./data/logs-11Jan-14Dec-deps-2/unique_tx_receipts.json
Removed 0 duplicate receipts
Total receipts: 11443
Total unique receipts: 11443


In [5]:
remove_duplicates("logs-13Sep-31Dec")

Unique receipts saved to ./data/logs-13Sep-31Dec/unique_tx_receipts.json
Removed 82 duplicate receipts
Total receipts: 1029032
Total unique receipts: 1028950


In [16]:
from dotenv import load_dotenv
import concurrent.futures
import requests
import json
import os

load_dotenv()

def get_api_key():
    return os.getenv('MOONBEAM_API_KEY')

CONNECTION_URL = "https://moonbeam.api.onfinality.io/rpc"
OPTIONS = {
    'headers': {
        'apikey': get_api_key()
    }
}

def get_block_data(block_number, errors_file):
    payload = {
        "id": 1,
        "jsonrpc": "2.0",
        "params": [],
        "method": "eth_getBlockByNumber"
    }

    try:
        payload['params'] = [block_number, False]
        response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=payload)

        if response.status_code == 200:
            block = response.json()["result"]
            number = int(block["number"], 16)
            timestamp = int(block["timestamp"], 16)
            transactions = len(block["transactions"])
            return f"{number},{transactions},{timestamp}\n"
        else:
            with open(errors_file, "a") as error_file:
                error_file.write(f"Error code: {block_number}\n")
            return "null,null,null\n"

    except Exception as e:
        with open(errors_file, "a") as error_file:
            error_file.write(f"Error retrieving block: {block_number}; {e}\n")

def get_blocks_data(folder_name):
    input_file = f'./data/{folder_name}/unique_tx_receipts.json'
    output_file = f'./data/{folder_name}/blocks.csv'
    errors_file = f'./data/{folder_name}/errors.txt'

    with open(input_file, 'r') as file:
        tx_receipts = json.load(file)

    print(f"Extracting block number and Unix timestamp from {len(tx_receipts)} transaction receipts...")

    block_numbers = [tx["blockNumber"] for tx in tx_receipts]

    print(f"Extracted {len(block_numbers)} block numbers...")

    with open(output_file, "a") as blocks_file:
        blocks_file.write("block_number,transactions,timestamp\n")
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
            # Submit tasks for each block in the range
            futures = {executor.submit(get_block_data, block_number, errors_file): block_number for block_number in block_numbers}

            # Process the completed tasks and write to the file
            for future in concurrent.futures.as_completed(futures):
                block_number = futures[future]
                try:
                    blocks_data = future.result()
                    blocks_file.write(blocks_data)
                except Exception as e:
                    print(f"Error processing block {block_number}: {e}")

    print(f'Extracted block number and Unix timestamp to {output_file}')

In [None]:
get_blocks_data('logs-11Jan-14Dec-withds')

Extracting block number and Unix timestamp from 5337 transaction receipts...
Extracted 5337 block numbers...
Error processing block 0x1719d1: write() argument must be str, not None
Error processing block 0x15b6b2: write() argument must be str, not None
Error processing block 0x147849: write() argument must be str, not None
Error processing block 0x17b5cd: write() argument must be str, not None
Error processing block 0x30c93: write() argument must be str, not None
Error processing block 0xafb6e: write() argument must be str, not None
Error processing block 0x17b370: write() argument must be str, not None
Error processing block 0xce56d: write() argument must be str, not None
Extracted block number and Unix timestamp to ./data/logs-11Jan-14Dec-withds/blocks.csv


In [None]:
get_blocks_data('logs-11Jan-14Dec-deps')

Extracting block number and Unix timestamp from 8208 transaction receipts...
Extracted 8208 block numbers...
Extracted block number and Unix timestamp to ./data/logs-11Jan-14Dec-deps/blocks.csv


In [17]:
get_blocks_data('logs-11Jan-14Dec-deps-2')

Extracting block number and Unix timestamp from 11443 transaction receipts...
Extracted 11443 block numbers...
Extracted block number and Unix timestamp to ./data/logs-11Jan-14Dec-deps-2/blocks.csv


In [19]:
import pandas as pd

def retrieve_missing_blocks(folder_name):
    blocks_file = f'./data/{folder_name}/blocks.csv'
    receipts_file = f'./data/{folder_name}/unique_tx_receipts.json'
    errors_file = f'./data/{folder_name}/errors_2.txt'

    blocks_data = pd.read_csv(blocks_file)

    with open(receipts_file, 'r') as file:
        tx_receipts = json.load(file)

    block_numbers = [tx["blockNumber"] for tx in tx_receipts]
    
    print("Loaded block numbers and block data...")

    print(len(block_numbers))
    print(len(blocks_data['block_number']))
    
    missing_blocks = set(block_numbers) - set(hex(block) for block in blocks_data['block_number'])

    print(f"Total missing blocks: {len(missing_blocks)}")

    with open(blocks_file, 'a') as blocks_file:
        for missing_block in missing_blocks:
            line = get_block_data(missing_block, errors_file)
            blocks_file.write(line)

In [None]:
retrieve_missing_blocks('logs-11Jan-14Dec-withds')

Loaded block numbers and block data...
5337
5330
Total missing blocks: 5


In [None]:
retrieve_missing_blocks('logs-11Jan-14Dec-deps')

Loaded block numbers and block data...
8208
8205
Total missing blocks: 3


In [20]:
retrieve_missing_blocks('logs-11Jan-14Dec-deps-2')

Loaded block numbers and block data...
11443
11442
Total missing blocks: 1
