In [None]:
import json
import time
import os
import random
from pathlib import Path
import requests
from requests.exceptions import HTTPError
from web3 import Web3
from concurrent.futures import ThreadPoolExecutor, as_completed
import concurrent.futures
import sys
import threading
from functools import wraps
from web3.exceptions import TransactionNotFound, TimeExhausted
from requests.exceptions import ReadTimeout, ConnectionError
import json
from hexbytes import HexBytes
from web3.datastructures import AttributeDict

# === CONFIGURATION ===
INFURA_URL = "https://mainnet.infura.io/v3/3921fc62a7ce4cda98926f47409b3d19"
ETHERSCAN_API_KEY = "F7K9BTHSSB9EQT9WEGHMG3VFJ54KA8RM1K"

CONTRACT_ADDRESS = POOL_ADDRESS = "0xCBCdF9626bC03E24f779434178A73a0B4bad62eD"
ABI_FILE = "WETH_WBTC_pool.json"  # Load your contract ABI file
BLOCKS_FILE = "blocks_data.json"
TRANSACTIONS_FILE = "transactions.json"
METADATA_FILE = "processed_blocks.json"
BATCH_SIZE = 1000  # Number of transactions to process before writing to disk

# === CONNECT TO ETHEREUM NODE ===
# w3 = Web3(Web3.HTTPProvider(INFURA_URL))
w3 = Web3(
    # Web3.HTTPProvider("https://mainnet.infura.io/v3/3921fc62a7ce4cda98926f47409b3d19")
    Web3.HTTPProvider("http://127.0.0.1:8545", request_kwargs={"timeout": 60})
)
assert w3.is_connected(), "Web3 provider connection failed"

In [None]:
# --------------------
# Helper Function: Get ABI from Etherscan or Disk
# --------------------
def get_abi(contract_address: str, api_key: str) -> list:
    """
    Retrieves the ABI for a given contract address.
    Checks if the ABI is available in the local 'ABI' folder.
    If not, it fetches the ABI from Etherscan using the provided API key,
    then saves it to disk for future use.

    Parameters:
        contract_address (str): The contract address (checksum not required here).
        api_key (str): Your Etherscan API key.

    Returns:
        list: The ABI loaded as a Python list.
    """
    # Ensure the ABI folder exists.
    abi_folder = "ABI"
    if not os.path.exists(abi_folder):
        os.makedirs(abi_folder)

    # Save ABI with filename based on contract address.
    filename = os.path.join(abi_folder, f"{contract_address}.json")

    # If file exists, load and return the ABI.
    if os.path.exists(filename):
        with open(filename, "r") as file:
            abi = json.load(file)
    else:
        # Construct the Etherscan API URL.
        url = f"https://api.etherscan.io/api?module=contract&action=getabi&address={contract_address}&apikey={api_key}"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            if data["status"] == "1":
                # Parse the ABI and save it for later use.
                abi = json.loads(data["result"])
                with open(filename, "w") as file:
                    json.dump(abi, file)
            else:
                raise Exception(
                    f"Error fetching ABI for contract {contract_address}: {data['result']}"
                )
        else:
            raise Exception(
                "Error connecting to the Etherscan API. Status code: "
                + str(response.status_code)
            )
    return abi


def to_dict(obj):
    """Recursively converts AttributeDict and HexBytes to standard Python types."""
    if isinstance(obj, dict):
        return {k: to_dict(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [to_dict(i) for i in obj]
    elif isinstance(obj, HexBytes):
        return obj.hex()  # Convert HexBytes to hex string
    return obj  # Return the value unchanged if it's not a special type



class Web3JSONEncoder(json.JSONEncoder):
    def default(self, obj):
        # HexBytes → hex string
        if isinstance(obj, HexBytes):
            return obj.hex()
        # AttributeDict → plain dict (recursively encoded)
        if isinstance(obj, AttributeDict):
            return dict(obj)
        # Peel off any other web3-specific types here as needed...
        return super().default(obj)


# Usage example:
#   json.dumps(your_web3_object, cls=Web3JSONEncoder, indent=2)

In [None]:
w3.eth.get_balance("0x95540EDD3edA578295e65DE021e4f42f5363b40D")
block = w3.eth.get_block(0, full_transactions=True)
tx_count = w3.eth.get_block_transaction_count(0)

In [None]:
def find_transaction(start_block, end_block, max_workers=8):
    block_number = []

    def process_block(block_number):
        count = w3.eth.get_block_transaction_count(block_number)
        if count:
            return block_number


    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_block = {
            executor.submit(process_block, bn): bn
            for bn in range(start_block, end_block + 1)
        }
        for future in as_completed(future_to_block):
            result = future.result()
            if result:
                block_number.append(result)

    return block_number

In [None]:
def get_transaction_data(block_list, max_workers=8, max_retries=5, backoff_factor=1.5):
    transaction_data = []

    def process_block(block_number):
        attempt = 0
        delay = 1  # initial backoff in seconds

        while attempt < max_retries:
            try:
                # Attempt to fetch the transaction
                res = w3.eth.get_block(block_number, full_transactions=True).transactions
                return res 

            except (ReadTimeout, ConnectionError) as network_err:
                # HTTP/network-level timeout or disconnection
                err = network_err
                msg = f"Network timeout/connection error on attempt {attempt+1}/{max_retries}"
            
            except TimeExhausted as te:
                # Web3-specific timeout when waiting on receipts or similar
                err = te
                msg = f"Web3 TimeExhausted on attempt {attempt+1}/{max_retries}" 

            except TransactionNotFound as nf:
                # Rare: node doesn’t know about this tx yet
                err = nf
                msg = f"TransactionNotFound on attempt {attempt+1}/{max_retries}"
            
            except Exception as other:
                # Catch-all for any other transient JSON-RPC errors
                err = other
                msg = f"Unexpected error on attempt {attempt+1}/{max_retries}: {other}"

            # Log and back off before retrying
            print(f"{msg}. Retrying in {delay:.1f}s...")
            time.sleep(delay + random.uniform(0, 0.5))
            delay *= backoff_factor
            attempt += 1

        # If we exit the loop without returning, re-raise the last error
        raise err
        
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_block = {
            executor.submit(process_block, bn): bn
            for bn in block_list
        }
        for future in as_completed(future_to_block):
            transaction_data.extend(future.result())


    return transaction_data

In [None]:
def get_only_smartcontract(transaction_data_list, max_workers=8):
    data = [] 
    missing_transaction = []

    def process_transaction(tx):
        # print(tx.blockNumber)
        # We Don't need everything
        if tx.to is None:
            try:
                receipt = w3.eth.get_transaction_receipt(tx.hash)
                contract_address = receipt.contractAddress
                return {
                    'block_number': tx.blockNumber,
                    'contract_address': contract_address
                }
            except:
                missing_transaction.append(tx.hash)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_block = {executor.submit(process_transaction, tx_data): tx_data for tx_data in transaction_data_list}
        for future in as_completed(future_to_block):
            res = future.result()
            if res:
                data.append(res)

    return data, missing_transaction

In [None]:
start_block = 6600000  # Replace with your desired start block
end_block = start_block + 5000 # Replace with your desired end block


block_with_transaction_list = find_transaction(start_block, end_block)

In [None]:
transaction_data_list = get_transaction_data(block_with_transaction_list)
with open("transaction_data_list.txt", "w") as f:
    f.write(json.dumps(transaction_data_list, cls=Web3JSONEncoder))

In [None]:
data, missing = get_only_smartcontract(transaction_data_list)

In [None]:
def retrieve_missing_tx_receipt(missing_tx_list):
    missing_tx_data = []
    for tx in missing_tx_list:
        time.sleep(2)
        try:
            receipt = w3.eth.get_transaction_receipt(tx.hash)
            missing_tx_list.append(receipt)
        except:
            print(f"doesn't exist really {tx.hex()}")
    return missing_tx_data


def retrieve_missing_tx(missing_tx_list):
    missing_tx_data = []
    for tx in missing_tx_list:
        time.sleep(2)
        try:
            transaction = w3.eth.get_transaction(tx.hash)
            missing_tx_list.append(transaction)
        except:
            print(f"doesn't exist really {tx.hex()}")
    return missing_tx_data

lol_full = retrieve_missing_tx(missing)
lol_receipt = retrieve_missing_tx_receipt(missing)

print(f"full: {len(lol_full)}")
print(f"receipt: {len(lol_receipt)}")

In [None]:
# Liquidity Uniswap v1 Analyzis
UNISWAP_MAINNET_SMARTCONTRACT_FACTORY = "0xc0a47dFe034B400B47bDaD5FecDa2621de6c4d95"
ETHERSCAN_API_KEY = "F7K9BTHSSB9EQT9WEGHMG3VFJ54KA8RM1K"


factory_address = w3.to_checksum_address(UNISWAP_MAINNET_SMARTCONTRACT_FACTORY)
factory_abi = get_abi(factory_address, ETHERSCAN_API_KEY)
# scan blocks from A to B

factory = w3.eth.contract(
    address=UNISWAP_MAINNET_SMARTCONTRACT_FACTORY,
    abi=factory_abi,  # includes NewExchange
)


In [None]:
factory.address

In [None]:
logs = factory.events.NewExchange.get_logs(from_block=start_block, to_block=end_block)
for ev in logs:
    token_addr = ev.args.token
    exchange_addr = ev.args.exchange
    block_num = ev.blockNumber
    print((token_addr, exchange_addr, block_num))
    # record (block_num, exchange_addr, token_addr)