In [87]:
import os
import csv
import json
import math
import requests
import threading
import pandas as pd
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor

load_dotenv()

def get_api_key():
    return os.getenv('ETHEREUM_API_KEY')

CONNECTION_URL = "https://svc.blockdaemon.com/ethereum/mainnet/native"
OPTIONS = {
    'headers': {
        "accept": "application/json",
        'x-api-key': get_api_key()
    }
}

def get_logs(from_block, to_block, topic, contract):
    payload = {
        "jsonrpc": "2.0",
        "method": "eth_getLogs",
        "params": [{
            "fromBlock": hex(from_block),
            "toBlock": hex(to_block),
            "address": contract,
            "topics": topic
        }],
        "id": 1
    }

    response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=payload)
    response_json = response.json()
    if 'result' in response_json:
        return response_json['result']
    else:
        print(f"Error fetching logs for blocks {from_block} to {to_block}: {response_json}")
        return []

def save_logs(logs, filename):
    with open(filename, 'w') as f:
        json.dump(logs, f)

def extract_transaction_hashes(logs):
    return set(log['transactionHash'] for log in logs)

def save_hashes_to_csv(hashes, filename):
    with open(filename, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['hash'])
        for tx_hash in hashes:
            writer.writerow([tx_hash])

def main(from_block, to_block, folder_name, topic, contract):
    block_range = 500
    futures = []
    total_logs_count = 0
    merged_logs = []

    try:

        if not os.path.exists(f'./data-get-logs/{folder_name}/'):
            os.makedirs(f'./data-get-logs/{folder_name}/')

        with ThreadPoolExecutor(max_workers=10) as executor:
            for start in range(from_block, to_block, block_range):
                end = min(start + block_range - 1, to_block)
                future = executor.submit(get_logs, start, end, topic, contract)
                futures.append((future, start, end))

            for future, start, end in futures:
                logs = future.result()
                total_logs_count += len(logs)
                merged_logs.extend(logs)

        print(f"Total logs fetched: {total_logs_count}")

        merged_logs_count = len(merged_logs)
        with open(f'./data-get-logs/{folder_name}/merged_logs.json', 'w') as f:
            json.dump(merged_logs, f)
        
        print(f"Merged all log files into ./data/{folder_name}/merged_logs.json")
        print(f"Total logs in merged file: {merged_logs_count}")

        # Extract transaction hashes and save to CSV
        transaction_hashes = extract_transaction_hashes(merged_logs)
        save_hashes_to_csv(transaction_hashes, f'./data-get-logs/{folder_name}/transaction_hashes.csv')

        print(f"Transaction hashes saved to ./data/{folder_name}/transaction_hashes.csv")
        print(f"Total transaction hashes: {len(transaction_hashes)}")
    except Exception as e:
        with open(f'./data-get-logs/{folder_name}/errors.txt', 'w') as f:
            f.write(str(e))
            f.write(merged_logs)
        print(f"Error!! Saved to ./data-get-logs/{folder_name}/errors.json")

**Relevant Block Numbers**

 # September 13, 2021 00:00:00

13916166 # January 1, 2022 00:00:00

14673143 # April 28, 2022 13:32:41

20429463 # July 31, 2024 10:17:35

**Relevant Topics**
TokenWithdrew(uint256,address,address,address,uint32,uint256): 0x86174ea401f083b9bb1bdebca3068f27fb023c7091365ed2a8a02b8d75cf0e52

TokenDeposited(uint256,address,address,uint256): 0x72848855a2461abf0dd243723dfcc9163eec2ea5215469d101c0d9c9ef58940d

In [2]:
from_block = 13916166 # 1 jan
to_block = 14673143 # 28 Apr
topics = ["0x86174ea401f083b9bb1bdebca3068f27fb023c7091365ed2a8a02b8d75cf0e52"]
contract = "0x1A2a1c938CE3eC39b6D47113c7955bAa9DD454F2"
main(from_block, to_block, "logs-01Jan-28Apr-withds", topics, contract)

Merged all log files into ./data/logs-01Jan-28Apr-withds/merged_logs.json
Total logs fetched: 25470
Total logs in merged file: 25470
Transaction hashes saved to ./data/logs-01Jan-28Apr-withds/transaction_hashes.csv
Total transaction hashes: 25470


In [3]:
from_block = 13916166 # 1 jan
to_block = 14673143 # 28 Apr
topics = ["0x72848855a2461abf0dd243723dfcc9163eec2ea5215469d101c0d9c9ef58940d"]
contract = "0x1A2a1c938CE3eC39b6D47113c7955bAa9DD454F2"
main(from_block, to_block, "logs-01Jan-28Apr-deps", topics, contract)

Merged all log files into ./data/logs-01Jan-28Apr-deps/merged_logs.json
Total logs fetched: 43989
Total logs in merged file: 43989
Transaction hashes saved to ./data/logs-01Jan-28Apr-deps/transaction_hashes.csv
Total transaction hashes: 43989


In [65]:
from_block = 14673144 # 28 Apr
to_block = 20429463 # 31 Jul
contract = "0x64192819Ac13Ef72bF6b5AE239AC672B43a9AF08"
topics = ["0x21e88e956aa3e086f6388e899965cef814688f99ad8bb29b08d396571016372d"] #only withdrawals
main(from_block, to_block, "logs-28Apr-today", topics, contract)

Total logs fetched: 58037
Merged all log files into ./data/logs-28Apr-today/merged_logs.json
Total logs in merged file: 58037
Transaction hashes saved to ./data/logs-28Apr-today/transaction_hashes.csv
Total transaction hashes: 58037


## Retrieve Transaction Receipts

In [100]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

RATE_LIMIT = 40  # requests per second
REQUEST_INTERVAL = 1 / RATE_LIMIT

def fetch_receipt(tx_hash, folder_name):
    data = {
        "id": 1,
        "jsonrpc": "2.0",
        "method": "eth_getTransactionReceipt",
        "params": [tx_hash]
    }

    try:
        response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=data)
        response_json = response.json()
        if 'result' in response_json:
            return response_json['result']
        else:
            print(f"Error fetching tx receipt for tx {tx_hash}, {response_json}")
            return []
    except Exception as e:
        with open(f"./data-get-logs/{folder_name}/errors.txt", "a") as error_file:
            error_file.write(f"Error retrieving transaction: {tx_hash}, {e}\n")

def process_hashes(hashes, folder_name):
    receipts = []
    total_hashes = len(hashes)
    progress_interval = max(1, total_hashes // 100)  # Update progress every 1%

    with ThreadPoolExecutor(max_workers=RATE_LIMIT) as executor:
        futures = {executor.submit(fetch_receipt, tx_hash, folder_name): tx_hash for tx_hash in hashes}
        
        for idx, future in enumerate(as_completed(futures), 1):
            try:
                result = future.result()
                receipts.append(result)

                if idx % progress_interval == 0 or idx == total_hashes:
                    progress_percentage = (idx / total_hashes) * 100
                    print(f"Progress: {progress_percentage:.2f}% ({idx}/{total_hashes})")
        
                time.sleep(REQUEST_INTERVAL)
            except Exception as e:
                with open(f"./data-get-logs/{folder_name}/errors.txt", "a") as error_file:
                    error_file.write(f"Error retrieving transaction: {futures[future]}, {e}\n")
    return receipts

def retrieve_receipts(folder_name):
    # Read hashes from CSV
    hashes_df = pd.read_csv(f'./data-get-logs/{folder_name}/transaction_hashes.csv')
    hashes = hashes_df['hash'].drop_duplicates().tolist()

    # Fetch receipts
    receipts = process_hashes(hashes, folder_name)

    # Save receipts to JSON
    with open(f'./data-get-logs/{folder_name}/tx_receipts.json', 'w') as f:
        json.dump(receipts, f)

    # Print the count of processed receipts
    print(f"Total receipts fetched: {len(receipts)}")


In [6]:
retrieve_receipts("logs-01Jan-28Apr-withds")

Progress: 1.00% (254/25470)
Progress: 1.99% (508/25470)
Progress: 2.99% (762/25470)
Progress: 3.99% (1016/25470)
Progress: 4.99% (1270/25470)
Progress: 5.98% (1524/25470)
Progress: 6.98% (1778/25470)
Progress: 7.98% (2032/25470)
Progress: 8.98% (2286/25470)
Progress: 9.97% (2540/25470)
Progress: 10.97% (2794/25470)
Progress: 11.97% (3048/25470)
Progress: 12.96% (3302/25470)
Progress: 13.96% (3556/25470)
Progress: 14.96% (3810/25470)
Progress: 15.96% (4064/25470)
Progress: 16.95% (4318/25470)
Progress: 17.95% (4572/25470)
Progress: 18.95% (4826/25470)
Progress: 19.95% (5080/25470)
Progress: 20.94% (5334/25470)
Progress: 21.94% (5588/25470)
Progress: 22.94% (5842/25470)
Progress: 23.93% (6096/25470)
Progress: 24.93% (6350/25470)
Progress: 25.93% (6604/25470)
Progress: 26.93% (6858/25470)
Progress: 27.92% (7112/25470)
Progress: 28.92% (7366/25470)
Progress: 29.92% (7620/25470)
Progress: 30.91% (7874/25470)
Progress: 31.91% (8128/25470)
Progress: 32.91% (8382/25470)
Progress: 33.91% (8636/

In [7]:
retrieve_receipts("logs-01Jan-28Apr-deps")

Progress: 1.00% (439/43989)
Progress: 2.00% (878/43989)
Progress: 2.99% (1317/43989)
Progress: 3.99% (1756/43989)
Progress: 4.99% (2195/43989)
Progress: 5.99% (2634/43989)
Progress: 6.99% (3073/43989)
Progress: 7.98% (3512/43989)
Progress: 8.98% (3951/43989)
Progress: 9.98% (4390/43989)
Progress: 10.98% (4829/43989)
Progress: 11.98% (5268/43989)
Progress: 12.97% (5707/43989)
Progress: 13.97% (6146/43989)
Progress: 14.97% (6585/43989)
Progress: 15.97% (7024/43989)
Progress: 16.97% (7463/43989)
Progress: 17.96% (7902/43989)
Progress: 18.96% (8341/43989)
Progress: 19.96% (8780/43989)
Progress: 20.96% (9219/43989)
Progress: 21.96% (9658/43989)
Progress: 22.95% (10097/43989)
Progress: 23.95% (10536/43989)
Progress: 24.95% (10975/43989)
Progress: 25.95% (11414/43989)
Progress: 26.95% (11853/43989)
Progress: 27.94% (12292/43989)
Progress: 28.94% (12731/43989)
Progress: 29.94% (13170/43989)
Progress: 30.94% (13609/43989)
Progress: 31.94% (14048/43989)
Progress: 32.93% (14487/43989)
Progress: 3

In [67]:
retrieve_receipts('logs-28Apr-today')

Progress: 1.00% (580/58037)
Progress: 2.00% (1160/58037)
Progress: 3.00% (1740/58037)
Progress: 4.00% (2320/58037)
Progress: 5.00% (2900/58037)
Progress: 6.00% (3480/58037)
Progress: 7.00% (4060/58037)
Progress: 7.99% (4640/58037)
Progress: 8.99% (5220/58037)
Progress: 9.99% (5800/58037)
Progress: 10.99% (6380/58037)
Progress: 11.99% (6960/58037)
Progress: 12.99% (7540/58037)
Progress: 13.99% (8120/58037)
Progress: 14.99% (8700/58037)
Progress: 15.99% (9280/58037)
Progress: 16.99% (9860/58037)
Progress: 17.99% (10440/58037)
Progress: 18.99% (11020/58037)
Progress: 19.99% (11600/58037)
Progress: 20.99% (12180/58037)
Progress: 21.99% (12760/58037)
Progress: 22.99% (13340/58037)
Progress: 23.98% (13920/58037)
Progress: 24.98% (14500/58037)
Progress: 25.98% (15080/58037)
Progress: 26.98% (15660/58037)
Progress: 27.98% (16240/58037)
Progress: 28.98% (16820/58037)
Progress: 29.98% (17400/58037)
Progress: 30.98% (17980/58037)
Progress: 31.98% (18560/58037)
Progress: 32.98% (19140/58037)
Progr

In [99]:
import os
import re

def load_failed_hashes(filename):
    with open(filename, 'r') as f:
        hashes = [re.findall(pattern='0x[a-fA-F0-9]{64}', string=line)[0] for line in f]
    return hashes

def retrieve_failed_receipts(folder_name):
    errors_file = f'./data-get-logs/{folder_name}/errors.txt'
    if not os.path.exists(errors_file):
        print(f"Error file {errors_file} does not exist.")
        return

    # Load failed transaction hashes from errors.txt
    failed_hashes = load_failed_hashes(f'./data-get-logs/{folder_name}/errors.txt')

    # Fetch receipts for failed hashes
    failed_receipts = process_hashes(failed_hashes, folder_name)

    # Save failed receipts to JSON
    with open(f'./data-get-logs/{folder_name}/tx_receipts_2.json', 'w') as f:
        json.dump(failed_receipts, f)

    # Print the count of processed failed receipts
    print(f"Total failed receipts fetched: {len(failed_receipts)}")

In [19]:
retrieve_failed_receipts('logs-01Jan-28Apr-deps')

['0x48bfbc5d66396e2eab1bdbac59c60e2a817e2f919ffce13d7cefc86aac78f7b3']
Progress: 100.00% (1/1)
Total failed receipts fetched: 1


In [21]:
retrieve_failed_receipts('logs-01Jan-28Apr-withds')

Progress: 25.00% (1/4)
Progress: 50.00% (2/4)
Progress: 75.00% (3/4)
Progress: 100.00% (4/4)
Total failed receipts fetched: 4


In [101]:
retrieve_failed_receipts('logs-28Apr-today')

Progress: 14.29% (1/7)
Progress: 28.57% (2/7)
Progress: 42.86% (3/7)
Progress: 57.14% (4/7)
Progress: 71.43% (5/7)
Progress: 85.71% (6/7)
Progress: 100.00% (7/7)
Total failed receipts fetched: 7


For the second interval, we need to merge the json files with transaction receipts

In [79]:
import json

def merge_json_files(file1, file2, output_file):
    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        json1 = json.load(f1)
        json2 = json.load(f2)

    merged_json = json1 + json2

    with open(output_file, 'w') as f:
        json.dump(merged_json, f)

    count1 = len(json1)
    count2 = len(json2)
    count_merged = len(merged_json)

    print(f"Number of transaction receipts in {file1}: {count1}")
    print(f"Number of transaction receipts in {file2}: {count2}")
    print(f"Number of transaction receipts in {output_file}: {count_merged}")

In [25]:
file1 = f'./data-get-logs/logs-01Jan-28Apr-deps/tx_receipts.json'
file2 = f'./data-get-logs/logs-01Jan-28Apr-deps/tx_receipts_2.json'
output_file = f'./data-get-logs/logs-01Jan-28Apr-deps/tx_receipts.json'
merge_json_files(file1, file2, output_file)

Number of transaction receipts in tx_receipts: 43989
Number of transaction receipts in tx_receipts_2: 1
Number of transaction receipts in the merged file: 43990


In [26]:
file1 = f'./data-get-logs/logs-01Jan-28Apr-withds/tx_receipts.json'
file2 = f'./data-get-logs/logs-01Jan-28Apr-withds/tx_receipts_2.json'
output_file = f'./data-get-logs/logs-01Jan-28Apr-withds/tx_receipts.json'
merge_json_files(file1, file2, output_file)

Number of transaction receipts in tx_receipts: 25470
Number of transaction receipts in tx_receipts_2: 4
Number of transaction receipts in the merged file: 25474


In [102]:
file1 = f'./data-get-logs/logs-28Apr-today/tx_receipts.json'
file2 = f'./data-get-logs/logs-28Apr-today/tx_receipts_2.json'
output_file = f'./data-get-logs/logs-28Apr-today/tx_receipts.json'
merge_json_files(file1, file2, output_file)

Number of transaction receipts in ./data-get-logs/logs-28Apr-today/tx_receipts.json: 58030
Number of transaction receipts in ./data-get-logs/logs-28Apr-today/tx_receipts_2.json: 7
Number of transaction receipts in ./data-get-logs/logs-28Apr-today/tx_receipts.json: 58037


In [103]:
# remove duplicates from json file with result field of receipts
def remove_duplicates(folder_name, filename):
    file = f'{folder_name}/{filename}'

    with open(file, 'r') as f:
        receipts = json.load(f)

    unique_receipts = []
    unique_hashes = set()

    for receipt in receipts:
        tx_hash = receipt.get('transactionHash')
        if tx_hash not in unique_hashes:
            unique_hashes.add(tx_hash)
            unique_receipts.append(receipt)

    # Save unique receipts to a new file
    unique_output_file = f'{folder_name}/unique_tx_receipts.json'
    with open(unique_output_file, 'w') as f:
        json.dump(unique_receipts, f)

    print(f"Unique receipts saved to {unique_output_file}")

    print(f"Removed {len(receipts) - len(unique_receipts)} duplicate receipts")
    print(f"Total receipts: {len(receipts)}")
    print(f"Total unique receipts: {len(unique_receipts)}")


In [51]:
remove_duplicates("./data-get-logs/logs-01Jan-28Apr-deps", "tx_receipts.json")

Unique receipts saved to ./data-get-logs/logs-01Jan-28Apr-deps/unique_tx_receipts.json
Removed 0 duplicate receipts
Total receipts: 43989
Total unique receipts: 43989


In [32]:
remove_duplicates("./data-get-logs/logs-01Jan-28Apr-withds", "tx_receipts.json")

Unique receipts saved to ./data-get-logs/logs-01Jan-28Apr-withds/unique_tx_receipts.json
Removed 0 duplicate receipts
Total receipts: 25470
Total unique receipts: 25470


In [104]:
remove_duplicates("./data-get-logs/logs-28Apr-today", "tx_receipts.json")

Unique receipts saved to ./data-get-logs/logs-28Apr-today/unique_tx_receipts.json
Removed 0 duplicate receipts
Total receipts: 58037
Total unique receipts: 58037


In [105]:
from dotenv import load_dotenv
import pandas as pd
import concurrent.futures
import json
import requests
import os

load_dotenv()

def get_api_key():
    return os.getenv('ETHEREUM_API_KEY')

CONNECTION_URL = "https://svc.blockdaemon.com/ethereum/mainnet/native"


OPTIONS = {
    "headers": {
        "accept": "application/json",
        "X-API-Key": get_api_key()
    }
}

def get_block_data(block_number, errors_file):
    payload = {
        "id": 1,
        "jsonrpc": "2.0",
        "params": [],
        "method": "eth_getBlockByNumber"
    }

    try:
        payload['params'] = [block_number, False]
        response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=payload)

        if response.status_code == 200:
            block = response.json()["result"]
            block_no = int(block["number"], 16)
            timestamp = int(block["timestamp"], 16)
            transactions = len(block["transactions"])
            return f"{block_no},{transactions},{timestamp}\n"
        else:
            with open(errors_file, "a") as error_file:
                error_file.write(f"Error code: {block_number}\n")
            return "null,null,null\n"
    except Exception as e:
        with open(errors_file, "a") as error_file:
            error_file.write(f"Error retrieving block: {block_number}, {e}\n")
        return "null,null,null\n"

def get_blocks_data(folder_name):
    input_file = f'./data-get-logs/{folder_name}/tx_receipts.json'
    output_file = f'./data-get-logs/{folder_name}/blocks.csv'
    errors_file = f'./data-get-logs/{folder_name}/errors.txt'

    with open(input_file, 'r') as file:
        tx_receipts = json.load(file)

    print(f"Extracting block number and Unix timestamp from {len(tx_receipts)} transaction receipts...")

    block_numbers = [tx["blockNumber"] for tx in tx_receipts]

    print(f"Extracted {len(block_numbers)} block numbers...")

    with open(output_file, "a") as blocks_file:
        blocks_file.write("block_number,transactions,timestamp\n")
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
            # Submit tasks for each block in the range
            futures = {executor.submit(get_block_data, block_number, errors_file): block_number for block_number in block_numbers if block_number is not None}

            # Process the completed tasks and write to the file
            for future in concurrent.futures.as_completed(futures):
                block_number = futures[future]
                try:
                    blocks_data = future.result()
                    blocks_file.write(blocks_data)
                except Exception as e:
                    print(f"Error processing block {block_number}: {e}")

    print(f'Extracted block number and Unix timestamp to {output_file}')

In [37]:
get_blocks_data('logs-01Jan-28Apr-deps')

Extracting block number and Unix timestamp from 43989 transaction receipts...
Extracted 43989 block numbers...
Extracted block number and Unix timestamp to ./data-get-logs/logs-01Jan-28Apr-deps/blocks.csv


In [38]:
get_blocks_data('logs-01Jan-28Apr-withds')

Extracting block number and Unix timestamp from 25470 transaction receipts...
Extracted 25470 block numbers...
Extracted block number and Unix timestamp to ./data-get-logs/logs-01Jan-28Apr-withds/blocks.csv


In [106]:
get_blocks_data('logs-28Apr-today')

Extracting block number and Unix timestamp from 58037 transaction receipts...
Extracted 58037 block numbers...
Extracted block number and Unix timestamp to ./data-get-logs/logs-28Apr-today/blocks.csv


In [39]:
def retrieve_missing_blocks(folder_name):
    blocks_file = f'./data-get-logs/{folder_name}/blocks.csv'
    receipts_file = f'./data-get-logs/{folder_name}/tx_receipts.json'
    errors_file = f'./data-get-logs/{folder_name}/errors_2.txt'

    blocks_data = pd.read_csv(blocks_file)

    with open(receipts_file, 'r') as file:
        tx_receipts = json.load(file)

    block_numbers = [tx["blockNumber"] for tx in tx_receipts]
    
    print("Loaded block numbers and block data...")

    print(len(block_numbers))
    print(len(blocks_data['block_number']))
    
    missing_blocks = set(block_numbers) - set(hex(block) for block in blocks_data['block_number'])

    print(f"Total missing blocks: {len(missing_blocks)}")

    for missing_block in missing_blocks:
        line = get_block_data(missing_block, errors_file)
        with open(blocks_file, 'a') as blocks_file:
            blocks_file.write(line)

In [40]:
retrieve_missing_blocks('logs-01Jan-28Apr-deps')

Loaded block numbers and block data...
43989
43989
Total missing blocks: 0


In [64]:
retrieve_missing_blocks('logs-01Jan-28Apr-withds')

Loaded block numbers and block data...
25470
25468
Total missing blocks: 1


In [107]:
retrieve_missing_blocks('logs-28Apr-today')

Loaded block numbers and block data...
58037
58036
Total missing blocks: 1


Merge with other data from Blockdaemon:

### logs-01Jan-28Apr-withds logs-01Jan-28Apr-deps

In [59]:
# firstly merge the ones in this script
file1 = './data-get-logs/logs-01Jan-28Apr-withds/unique_tx_receipts.json'
file2 = './data-get-logs/logs-01Jan-28Apr-deps/unique_tx_receipts.json'
output_file = './merged-data/logs-01Jan-28Apr/unique_tx_receipts-rpc.json'
merge_json_files(file1, file2, output_file)

Number of transaction receipts in ./data-get-logs/logs-01Jan-28Apr-withds/unique_tx_receipts.json: 25470
Number of transaction receipts in ./data-get-logs/logs-01Jan-28Apr-deps/unique_tx_receipts.json: 43989
Number of transaction receipts in ./merged-data/logs-01Jan-28Apr/unique_tx_receipts-rpc.json: 69459


In [60]:
file1 = './merged-data/logs-01Jan-28Apr/unique_tx_receipts-rpc.json'
file2 = './data-bd/logs-01Jan-28Apr/unique_tx_receipts.json'
output_file = './merged-data/logs-01Jan-28Apr/unique_tx_receipts-merged.json'
merge_json_files(file1, file2, output_file)
# delete file 1
os.remove(file1)

remove_duplicates('./merged-data/logs-01Jan-28Apr', "unique_tx_receipts-merged.json")

Number of transaction receipts in ./merged-data/logs-01Jan-28Apr/unique_tx_receipts-rpc.json: 69459
Number of transaction receipts in ./data-bd/logs-01Jan-28Apr/unique_tx_receipts.json: 53601
Number of transaction receipts in ./merged-data/logs-01Jan-28Apr/unique_tx_receipts-merged.json: 123060
Unique receipts saved to ./merged-data/logs-01Jan-28Apr/unique_tx_receipts.json
Removed 50240 duplicate receipts
Total receipts: 123060
Total unique receipts: 72820


In [62]:
file1 = './data-bd/logs-01Jan-28Apr/unique_tx_receipts.json'

file2 = './merged-data/logs-01Jan-28Apr/unique_tx_receipts.json'

#open each one
# for each receipt in file2, check if it is in file1
# if not, add to list
# save json to file in merged-data
output_file = './merged-data/logs-01Jan-28Apr/left_txs.json'

with open(file1, 'r') as f:
    receipts1 = json.load(f)
    already_loaded_hashes = set([r.get('transactionHash') for r in receipts1])

with open(file2, 'r') as f:
    receipts2 = json.load(f)

left_txs = []

for receipt in receipts2:
    tx_hash = receipt.get('transactionHash')
    if tx_hash not in already_loaded_hashes:
        left_txs.append(receipt)

print(f"Total transactions left: {len(left_txs)}")
with open(output_file, 'w') as f:
    json.dump(left_txs, f)

Total transactions left: 19219


### logs-28Apr-today

In [110]:
# Merge withdrawal receipts from data-bd/logs-28Apr-today with the unique_tx_receipts.json file in merged-data/logs-28Apr-today
file1 = './data-bd/logs-28Apr-today/withdrawal_receipts.json'
file2 = './data-get-logs/logs-28Apr-today/unique_tx_receipts.json'
output_file = './merged-data/logs-28Apr-today/tx_receipts-merged.json'
merge_json_files(file1, file2, output_file)

Number of transaction receipts in ./data-bd/logs-28Apr-today/withdrawal_receipts.json: 19482
Number of transaction receipts in ./data-get-logs/logs-28Apr-today/unique_tx_receipts.json: 58037
Number of transaction receipts in ./merged-data/logs-28Apr-today/tx_receipts-merged.json: 77519


In [111]:
remove_duplicates('./merged-data/logs-28Apr-today', 'tx_receipts-merged.json')

Unique receipts saved to ./merged-data/logs-28Apr-today/unique_tx_receipts.json
Removed 19482 duplicate receipts
Total receipts: 77519
Total unique receipts: 58037


In [112]:
file1 = './data-bd/logs-28Apr-today/withdrawal_receipts.json'

file2 = './merged-data/logs-28Apr-today/unique_tx_receipts.json'

output_file = './merged-data/logs-28Apr-today/left_txs.json'

with open(file1, 'r') as f:
    receipts1 = json.load(f)
    already_loaded_hashes = set([r.get('transactionHash') for r in receipts1])

with open(file2, 'r') as f:
    receipts2 = json.load(f)

left_txs = []

for receipt in receipts2:
    tx_hash = receipt.get('transactionHash')
    if tx_hash not in already_loaded_hashes:
        left_txs.append(receipt)

print(f"Total transactions left: {len(left_txs)}")
with open(output_file, 'w') as f:
    json.dump(left_txs, f)

Total transactions left: 38555
