In [None]:
import traceback
import json
import time
import os
import random
import gc
import logging
import tempfile
from concurrent.futures import ThreadPoolExecutor, as_completed
from pprint import pprint
from web3.logs import STRICT, IGNORE, DISCARD, WARN
import pandas as pd
import numpy as np
from numba import njit
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from decimal import Decimal
from hexbytes import HexBytes
import requests
from requests.exceptions import HTTPError
from datetime import datetime, timezone
from web3 import Web3
from web3.exceptions import Web3RPCError,TransactionNotFound, BlockNotFound
from web3.providers.rpc.utils import (
    ExceptionRetryConfiguration,
    REQUEST_RETRY_ALLOWLIST,
)
from collections import defaultdict, OrderedDict
pd.options.display.float_format = "{:20,.4f}".format
# === CONFIGURATION ===
ETHERSCAN_API_KEY_DICT = {
    "hearthquake": {
        "INFURA_URL": os.getenv("INFURA_URL_HEARTHQUAKE"),
        "ETHERSCAN_API_KEY": os.getenv("ETHERSCAN_API_KEY"),
    },
    "opensee": {
        "INFURA_URL": os.getenv("INFURA_URL_OPENSEE"),
        "ETHERSCAN_API_KEY": os.getenv("ETHERSCAN_API_KEY"),
    },
    "eco": {
        "INFURA_URL": os.getenv("INFURA_URL_ECO"),
        "ETHERSCAN_API_KEY": os.getenv("ETHERSCAN_API_KEY"),
    },
}

INFURA_URL = ETHERSCAN_API_KEY_DICT["hearthquake"]["INFURA_URL"]
ETHERSCAN_API_KEY = ETHERSCAN_API_KEY_DICT["hearthquake"]["ETHERSCAN_API_KEY"]
UNISWAP_V1_CONTRACT = "0xCBCdF9626bC03E24f779434178A73a0B4bad62eD"
UNISWAP_V1_CONTRACT = "0xc0a47dFe034B400B47bDaD5FecDa2621de6c4d95"
OUTPUT_FILE = "out/final_tx.jsonl"
STATE_FILE = "out/final_scan_state.json"
TOKEN_NAME_FILE = "out/token_name.json"
with open(TOKEN_NAME_FILE, "r", encoding="utf-8") as f:
    GLOBAL_DICT_TOKEN_SYMBOL = json.load(f)

# EVENTS = {
#     "0xc0a47dFe034B400B47bDaD5FecDa2621de6c4d95":{

#     },
#     "0x255e60c9d597dCAA66006A904eD36424F7B26286": {
#         "AddLiquidity": {},
#         "RemoveLiquidity": {},
#         "Transfer": {},
#         "EthPurchase": {},
#         "TokenPurchase": {},
#         "Approval": {},
#     },
#     "0xF173214C720f58E03e194085B1DB28B50aCDeeaD": {
#         "AddLiquidity": {},
#         "RemoveLiquidity": {},
#         "Transfer": {},
#         "EthPurchase": {},
#         "TokenPurchase": {},
#         "Approval": {},
#     },
# }

logger = logging.getLogger()
logger.setLevel(logging.INFO)

if not logger.handlers:  # avoid duplicate logs
    handler = logging.StreamHandler()
    handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
    logger.addHandler(handler)

w3 = Web3(
        Web3.HTTPProvider(
            endpoint_uri=INFURA_URL,
            request_kwargs={"timeout": 30},  # adjust as needed
            exception_retry_configuration=ExceptionRetryConfiguration(
                errors=(ConnectionError, HTTPError, TimeoutError),
                retries=5,
                backoff_factor=1,
                method_allowlist=REQUEST_RETRY_ALLOWLIST,
            )
        )
    # Web3.HTTPProvider("http://127.0.0.1:8545")
)

assert w3.is_connected(), "Web3 provider connection failed"
w3.eth.get_block('latest').number

In [None]:
# --------------------
# Helper Function: Get ABI from Etherscan or Disk
# --------------------
def get_abi(contract_address: str, api_key: str) -> list:
    """
    Retrieves the ABI for a given contract address.
    Checks if the ABI is available in the local 'ABI' folder.
    If not, it fetches the ABI from Etherscan using the provided API key,
    then saves it to disk for future use.
    
    Parameters:
        contract_address (str): The contract address (checksum not required here).
        api_key (str): Your Etherscan API key.
        
    Returns:
        list: The ABI loaded as a Python list.
    """
    # Ensure the ABI folder exists.
    abi_folder = "ABI"
    if not os.path.exists(abi_folder):
        os.makedirs(abi_folder)
    
    # Save ABI with filename based on contract address.
    filename = os.path.join(abi_folder, f"{contract_address}.json")
    
    # If file exists, load and return the ABI.
    if os.path.exists(filename):
        with open(filename, "r") as file:
            abi = json.load(file)
    else:
        try:
            url = f"https://api.etherscan.io/v2/api?chainid=1&module=contract&action=getabi&address={contract_address}&apikey={api_key}"
            response = requests.get(url)
            data = response.json()
            if data["status"] == "1":
                # Parse the ABI and save it for later use.
                abi = json.loads(data["result"])
                with open(filename, "w") as file:
                    json.dump(abi, file)    
        except Exception as e:
            Exception(f"Error fetching ABI for contract {contract_address}: {data['result']}")
    return abi

# -----------------------
# Helper: Convert event to dict
# -----------------------
def event_to_dict(event):
    d = dict(event)
    if "args" in d:
        d["args"] = dict(d["args"])
    if "transactionHash" in d:
        d["transactionHash"] = d["transactionHash"].hex()
    if "blockHash" in d:
        d["blockHash"] = d["blockHash"].hex()
    return d


class Web3JSONEncoder(json.JSONEncoder):
    def default(self, obj):
        # HexBytes → hex string
        if isinstance(obj, HexBytes):
            return obj.hex()
        # Peel off any other web3-specific types here as needed...
        return super().default(obj)


# -----------------------
# ETHERSCAN VERSION
# Used to find at which block 1 contract has been deployed
# Might be useful later, put it in JSON in the end
# -----------------------
def get_contract_creation_block_etherscan(contract_address: str, etherscan_api_key: str) -> int:
    """
    Retrieves the contract creation block from Etherscan.
    Returns the block number as an integer.
    """
    url = (f"https://api.etherscan.io/api?module=contract&action=getcontractcreation"
           f"&contractaddresses={contract_address}&apikey={etherscan_api_key}")
    response = requests.get(url)
    data = response.json()

    if data.get("status") == "1":
        results = data.get("result", [])
        if results and len(results) > 0:
            return int(results[0]["blockNumber"])
        else:
            raise Exception("No contract creation data found.")
    else:
        raise Exception("Error fetching creation block: " + data.get("result", "Unknown error"))

# -----------------------
# Used to find at which block 1 contract has been deployed
# Might be useful later, put it in JSON in the end
# -----------------------
def get_contract_creation_block_custom(start_block=0, end_block=100000):

    def get_contract_deployments(start_block, end_block, max_workers=8):
        deployments = []

        def process_block(block_number):
            block = w3.eth.get_block(block_number, full_transactions=True)
            block_deployments = []
            for tx in block.transactions:
                if tx.to is None:
                    try:
                        receipt = w3.eth.get_transaction_receipt(tx.hash)
                        contract_address = receipt.contractAddress
                        if contract_address:
                            block_deployments.append(
                                {
                                    "block_number": block_number,
                                    "contract_address": contract_address,
                                }
                            )
                    except:
                        print(tx.hash)
            return block_deployments

        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_block = {
                executor.submit(process_block, bn): bn
                for bn in range(start_block, end_block + 1)
            }
            for future in as_completed(future_to_block):
                block_deployments = future.result()
                deployments.extend(block_deployments)

        return deployments

    deployments = get_contract_deployments(start_block, end_block)

    # Save the results to a JSON file
    with open("contract_deployments.json", "w") as f:
        json.dump(deployments, f, indent=4)

# -- Step 2: Reconstruct an Event’s Signature --
def get_event_signature(event_name: str, abi: list) -> str:
    """
    Given an event name and an ABI, find the event definition and reconstruct its signature.
    For example, for event Transfer(address,address,uint256) this returns its keccak256 hash.
    """
    from eth_utils import keccak, encode_hex

    for item in abi:
        if item.get("type") == "event" and item.get("name") == event_name:
            # Build the signature string: "Transfer(address,address,uint256)"
            types = ",".join([inp["type"] for inp in item.get("inputs", [])])
            signature = f"{event_name}({types})"
            return encode_hex(keccak(text=signature))
    raise ValueError(f"Event {event_name} not found in ABI.")

def block_to_utc(block_number):
    """
    Convert a block number into its UTC timestamp.

    Parameters:
        w3 (Web3): A Web3 instance
        block_number (int): The block number

    Returns:
        datetime: The block timestamp in UTC
    """
    block = w3.eth.get_block(block_number)
    timestamp = block["timestamp"]
    return datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat()

def read_and_sort_jsonl(file_path):
    """
    Reads a JSONL file, each line being a JSON object with a field `blockNumber`,
    and returns a list of those objects sorted by blockNumber (ascending).
    """
    data = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError as e:
                # Handle bad JSON if needed, e.g., log or skip
                print(line)
                print(f"Skipping bad JSON line: {e}")
                continue
            # Optionally, you could check that 'blockNumber' exists, is int, etc.
            if "blockNumber" not in obj:
                print(f"Skipping line with no blockNumber: {obj}")
                continue
            data.append(obj)
    # Now sort by blockNumber ascending
    # If blockNumber in file is already int, fine; else convert
    sorted_data = sorted(data, key=lambda o: int(o["blockNumber"]))
    return sorted_data

def get_address_abi_contract(contract_address, etherscan_api_key=ETHERSCAN_API_KEY):
    address = w3.to_checksum_address(contract_address)
    contract_abi = get_abi(address, etherscan_api_key)
    contract = w3.eth.contract(address=contract_address, abi=contract_abi)

    return address, contract_abi, contract

# Find the amount of token depending on the contract at the very specific block_number
# but it use ETHERSCAN API (to go further: explorer the reconstruct from all the Transfer event but slow)
# Not super useful for the moment
def get_erc20_balance_at_block(user_address, token_address, block_number):
    """
        Query ERC-20 balance of an address at a specific block.

        user_address = "0xe2dFC8F41DB4169A24e7B44095b9E92E20Ed57eD"
        token_address = "0x514910771AF9Ca656af840dff83E8264EcF986CA"
        block_number = 23405236
        balance = get_erc20_balance_at_block(user_address, token_address, block_number)

        Parameters:
            user_address: string, account to check
            token_address: Web3 contract instance for the ERC-20 token
            block_number: int, historical block

        Returns:
            int: token balance
            None if contract is a proxy
    """
    token_address, token_abi, token_contract = get_address_abi_contract(token_address)
    user_address =  w3.to_checksum_address(user_address)
    token_name =  None
    token_symbol = None 
    try:
        token_name = token_contract.functions.name().call()
        token_symbol = token_contract.functions.symbol().call()
    except Exception as e:
        print(f"Error {e}")
        print(f"{token_address}")
        return None
    balance = token_contract.functions.balanceOf(user_address).call(
        block_identifier=block_number
    )
    print(
        f"Address {user_address} had {w3.from_wei(balance, "ether")} of {token_symbol} at block {block_number}"
    )
    return balance


def get_token_name_by_contract(token_address, TOKEN_NAME_FILE=TOKEN_NAME_FILE, proxy_address=None, global_cache=GLOBAL_DICT_TOKEN_SYMBOL):
    """
    Returns the token name for `token_address`, using a local JSON cache.
    If not in cache, will call get_token_name_by_contract (your ABI/Web3 function),
    store the result (or None) in the cache file, and return it.
    """
    # 1. Load cache
    cache = global_cache
    # if os.path.exists(TOKEN_NAME_FILE):
    #     try:
    #         with open(TOKEN_NAME_FILE, "r", encoding="utf-8") as f:
    #             cache = json.load(f)
    #     except Exception as e:
    #         # If file is corrupted, proceed with empty cache
    #         print(f"Warning: cannot read token name cache: {e}")

    # 2. Check cache
    if token_address in cache:
        return cache[token_address]

    # Not in cache → fetch from contract
    name = None
    symbol = None
    address = None
    try:
        if proxy_address:
            proxy_address, proxy_abi, proxy_contract = get_address_abi_contract(
                proxy_address
            )
            token_address = proxy_contract.functions.getToken(token_address).call()
        token_address, token_abi, token_contract = get_address_abi_contract(token_address)
        # call name
        name_raw = token_contract.functions.name().call()
        symbol_raw = token_contract.functions.symbol().call()
        address = token_contract.address
        # Convert raw to str if needed
        name = str(name_raw)
        if isinstance(name_raw, (bytes, bytearray)):
            name = name_raw.decode("utf-8", errors="ignore").rstrip("\x00")
        symbol = str(symbol_raw)
        if isinstance(symbol_raw, (bytes, bytearray)):
            symbol = symbol_raw.decode("utf-8", errors="ignore").rstrip("\x00")
    except Exception as e:
        print(f"Error fetching token name/symbol for {address}: {e}")
        if token_address:
            cache[token_address] = {
                "name": None,
                "symbol": None,
                "address": None,
            }
        try:
            dirn = os.path.dirname(TOKEN_NAME_FILE) or "."
            fd, tmp = tempfile.mkstemp(dir=dirn, text=True)
            with os.fdopen(fd, "w", encoding="utf-8") as f:
                json.dump(cache, f, indent=2, ensure_ascii=False)
            os.replace(tmp, TOKEN_NAME_FILE)
        except Exception as e:
            print(f"Warning: failed to save token cache: {e}")
        return {
            "name": None,
            "symbol": None,
            "address": None,
        }

    # Update cache
    cache[address] = {
        "name": name,
        "symbol": symbol,
        "address": address,
    }

    # Write back atomically (overwrite)
    try:
        dirn = os.path.dirname(TOKEN_NAME_FILE) or "."
        fd, tmp = tempfile.mkstemp(dir=dirn, text=True)
        with os.fdopen(fd, "w", encoding="utf-8") as f:
            json.dump(cache, f, indent=2, ensure_ascii=False)
        os.replace(tmp, TOKEN_NAME_FILE)
    except Exception as e:
        print(f"Warning: failed to save token cache: {e}")

    return cache[address]


def decode_topics(log):
    _, abi, contract = get_address_abi_contract(log["address"])
    # Try matching this log against the ABI events
    for item in abi:
        if item.get("type") == "event":
            event_signature = (
                f'{item["name"]}({",".join(i["type"] for i in item["inputs"])})'
            )
            event_hash = w3.keccak(text=event_signature).hex()

            if log["topics"][0].hex() == event_hash:
                # Found matching event
                decoded = contract.events[item["name"]]().process_log(log)
                return {
                    "event": item["name"],
                    "args": dict(decoded["args"]),
                }

    return {}  # no matching event in ABI


def release_list(a):
    del a[:]
    del a

In [None]:
def append_jsonl(OUTPUT_FILE, txs):
    with open(OUTPUT_FILE, "a", encoding="utf-8") as f:
        for tx in txs:
            f.write(json.dumps(tx, ensure_ascii=False) + "\n")

def load_state(STATE_FILE):
    if not os.path.exists(STATE_FILE):
        return []

    with open(STATE_FILE, "r", encoding="utf-8") as f:
        data = json.load(f, object_pairs_hook=OrderedDict)

    return [tuple(pair) for pair in data]

def save_state(interval, STATE_FILE):
    data = sorted([[l, r] for (l, r) in interval])
    tmp = STATE_FILE + ".tmp"
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(data, f)
    os.replace(tmp, STATE_FILE)


In [None]:
START_BLOCK = 0

# END_BLOCK = 'latest'
END_BLOCK = 10000000
CHUNK_SIZE = 10000

token_filter = [
    "0x2C4BD064B998838076FA341A83D007FC2FA50957",
    "0x255E60C9D597DCAA66006A904ED36424F7B26286",
    "0xE8E45431B93215566BA923a7E611B7342EA954DF",
    "0xF173214C720F58E03E194085B1DB28B50ACDEEAD",
    "0xC6581CE3A005E2801C1E0903281BBD318EC5B5C2",
    "0x494D82667C3ED3AC859CCA94B1BE65B0540EE3BB",
    "0x077D52B047735976DFDA76FEF74D4D988AC25196",
    "0xC4A1C45D5546029FD57128483AE65B56124BFA6A",
    "0x7DC095A5CF7D6208CC680FA9866F80A53911041A",
    "0x2135D193BF81ABBEAD93906166F2BE32B2492C04",
    "0x4D2F5CFBA55AE412221182D8475BC85799A5644B",
    "0x87D80DBD37E551F58680B4217B23AF6A752DA83F",
    "0x060A0D4539623B6AA28D9FC39B9D6622AD495F41",
    "0x6B4540F5EE32DDD5616C792F713435E6EE4F24AB",
    "0xB99A23B1A4585FC56D0EC3B76528C27CAD427473",
    "0x04045481B044534ED3CB1E24254B471CFADDEB3D",
    "0xC0E77CDD039A3F731AE0F5C6C9F4A91D4BC28880",
]
token_filter = [Web3.to_checksum_address(k) for k in token_filter]
FULL_EVENT_BY_CONTRACTS = json.load(open(r"real/FULL_EVENT_BY_CONTRACTS.json"))
FULL_EVENT_BY_CONTRACTS = {
    Web3.to_checksum_address(k): v for k, v in FULL_EVENT_BY_CONTRACTS.items()
}
PARTIAL_EVENT_BY_CONTRACTS = {
    k: FULL_EVENT_BY_CONTRACTS[k] for k in token_filter if k in FULL_EVENT_BY_CONTRACTS
}

# We start to crawl
EVENTS = FULL_EVENT_BY_CONTRACTS
END_BLOCK = w3.eth.get_block(END_BLOCK).number
validated_interval = load_state(STATE_FILE)
delay = 3
try:
    logging.info(f"Scanning blocks {START_BLOCK} to {END_BLOCK}")
    l_current_block = START_BLOCK
    r_current_block = min(l_current_block + CHUNK_SIZE, END_BLOCK)
    if validated_interval:
        l_current_block, r_current_block = validated_interval.pop()
        if r_current_block == END_BLOCK:
            l_current_block = END_BLOCK
    while l_current_block < END_BLOCK:
        txs = []
        if (l_current_block, r_current_block) not in validated_interval:
            logging.info(f"Processing blocks [{l_current_block}, {r_current_block}]")
            try:
                params = {
                    "fromBlock": l_current_block,
                    "toBlock": r_current_block,
                    "address": list(EVENTS.keys()),
                }
                logs = w3.eth.get_logs(params)
                for log in logs:
                    transaction = {
                        "transactionHash": w3.to_hex(log["transactionHash"]),
                        "blockNumber": log["blockNumber"],
                        "address": log["address"],
                        "data": w3.to_hex(log["data"]),
                    }
                    topics = decode_topics(log)
                    transaction.update(topics)
                    txs.append(transaction)
                append_jsonl(OUTPUT_FILE, txs)
                validated_interval.append((l_current_block, r_current_block))
                save_state(validated_interval, STATE_FILE)
                l_current_block = r_current_block + 1
                r_current_block = min(l_current_block + CHUNK_SIZE, END_BLOCK)
            except requests.exceptions.HTTPError as e:
                if e.response.status_code == 429:
                    jitter = random.uniform(3, delay)
                    total_delay = (delay + jitter) / 10
                    logging.warning(
                        f"Delay: {total_delay}: {e}"
                    )
                    time.sleep(total_delay)
                    delay += 2  # exponential backoff
                    continue
                elif e.response.status_code == 402:
                    logging.critical(
                        f"Payment Required Code {e.response.status_code}, Stopping Blocks [{l_current_block}, {r_current_block}]: {e}"
                    )
                    break
                else:
                    logging.error(f"HTTP error occurred: {e}")
                    break
            except Web3RPCError as e:
                logging.warning(f"{e}")
                r_current_block = (l_current_block + r_current_block) // 2
            except Exception as e:
                logging.error(
                    f"Exception when processing interval [{l_current_block}, {r_current_block}]: {e}"
                )
                logging.error(
                    print(traceback.format_exc())
                )
                break
except KeyboardInterrupt:
    validated_interval.pop()
    save_state(validated_interval, STATE_FILE)
    logging.info(f"Interrupted by user — exiting loop.")
except Exception as e:
    logging.fatal(f"Unexpected fatal error in main: {e}")
logging.info(f"Finished parsing the last block")

In [None]:
sorted_transactions = read_and_sort_jsonl(OUTPUT_FILE)
DISTINCT_PROVIDER = set()
D_BLOCK_TOTAL_LIQUIDITY_BY_CONTRACT_BY_BLOCK = defaultdict(
    lambda: defaultdict(lambda: Decimal("0"))
)
L_LIQUIDITY_BOOK = []
L_UNI_LIQUIDITY = []
FAILED_EVENTS = []

def analyze_transaction(transaction):
    block = transaction["blockNumber"]
    event = transaction["event"]
    event_args = transaction["args"]
    address = transaction["address"]
    tx_hash = transaction["transactionHash"]
    #if address not in token_filter:
    #    return
    # Transaction data in case we need to investigate  further (but we could directly store them in jsonl from the sniffer)
    # transaction_information = w3.eth.get_transaction(transaction["txHash"])
    # d_transaction_information = event_to_dict(transaction_information)
    # In case we need ABI and retrieve token information
    # token_address, contract_abi, contract = get_address_abi_contract(
    #    transaction["contract"]
    # )

    # Event part
    if event == "AddLiquidity":
        provider = event_args["provider"]
        token_amount = event_args["token_amount"]
        eth_amount = event_args["eth_amount"]
        # Provider are liquidity participant, we track all of them to count
        DISTINCT_PROVIDER.add(provider)
        L_LIQUIDITY_BOOK.append(
            {
                "block": block,
                "address": address,
                "event": event,
                "provider": provider,
                "token_amount": w3.from_wei(token_amount, "ether"),
                "eth_amount": w3.from_wei(eth_amount, "ether"),
            }
        )
    elif event == "RemoveLiquidity":
        provider = event_args["provider"]
        token_amount = event_args["token_amount"]
        eth_amount = event_args["eth_amount"]
        L_LIQUIDITY_BOOK.append(
            {
                "block": block,
                "address": address,
                "event": event,
                "provider": provider,
                "token_amount": -w3.from_wei(token_amount, "ether"),
                "eth_amount": -w3.from_wei(eth_amount, "ether"),
            }
        )

    elif event == "Transfer":
        _from = event_args["_from"]
        _to = event_args["_to"]
        _value = event_args["_value"]  # UniswapV1-TOKEN-Liquidity-debt
        if _from == w3.to_checksum_address(
            "0x0000000000000000000000000000000000000000"
        ):
            # We Mint Liquidity
            D_BLOCK_TOTAL_LIQUIDITY_BY_CONTRACT_BY_BLOCK[block][address] += w3.from_wei(
                _value, "ether"
            )
            L_UNI_LIQUIDITY.append(
                {
                    "block": block,
                    "address": address,
                    "event": event,
                    "provider": _to,
                    "value": w3.from_wei(_value, "ether"),
                }
            )

        if _to == w3.to_checksum_address(
            w3.to_checksum_address("0x0000000000000000000000000000000000000000")
        ):
            # We Burn Liquidity
            D_BLOCK_TOTAL_LIQUIDITY_BY_CONTRACT_BY_BLOCK[block][
                address
            ] -= w3.from_wei(_value, "ether")
            L_UNI_LIQUIDITY.append(
                {
                    "block": block,
                    "address": address,
                    "event": event,
                    "provider": _from,
                    "value": -w3.from_wei(_value, "ether"),
                }
            )
    # Pursechase will be used to compute SWAP (Volume, Fees) Later
    elif event == "TokenPurchase":
        buyer = event_args["buyer"]
        eth_sold = event_args["eth_sold"]
        tokens_bought = event_args["tokens_bought"]
    elif event == "EthPurchase":
        buyer = event_args["buyer"]
        tokens_sold = event_args["tokens_sold"]
        eth_bought = event_args["eth_bought"]
    # Not that useful for the moment
    elif event == "Approval":
        _owner = event_args["_owner"]
        _spender = event_args["_spender"]
        _value = event_args["_value"]
    else:
        FAILED_EVENTS.append(event)
        # print(f"Event not Known: {event}")


result = []
print(len(sorted_transactions))
for tx in sorted_transactions:
    analyze_transaction(tx)

print(len(DISTINCT_PROVIDER))
print(len(FAILED_EVENTS))
release_list(sorted_transactions)

In [None]:
def make_dummy_fast(
    n_blocks=100_000,
    n_providers=200,
    n_addresses=100,
    block_max=24_000_000,
    max_events_per_block=5,
    max_mint_amount=1000,
    seed=None,
):
    rng = np.random.default_rng(seed)
    providers = np.array([f"prov_{i}" for i in range(n_providers)], dtype=object)
    addresses = np.array([f"add_{j}" for j in range(n_addresses)], dtype=object)

    blocks = rng.integers(0, block_max + 1, size=n_blocks)
    blocks = np.unique(blocks)
    blocks.sort()

    # build arrays for all events upfront
    event_list = []
    for blk in blocks:
        k = min(max_events_per_block, n_providers)
        chosen = rng.choice(n_providers, size=k, replace=False)
        for pi in chosen:
            addr_idx = rng.choice(n_addresses)
            sign = rng.choice([-1, 1])
            mag = rng.uniform(0, max_mint_amount)
            event_list.append((blk, pi, addr_idx, sign * mag))

    # Convert to numpy arrays
    arr = np.array(
        event_list,
        dtype=[
            ("block", np.int64),
            ("prov_i", np.int64),
            ("addr_i", np.int64),
            ("value", np.float64),
        ],
    )
    # sort by block
    arr.sort(order="block")

    # initialize cumulative array (provider-level)
    cum = np.zeros(n_providers, dtype=np.float64)
    cum_values = np.zeros(len(arr), dtype=np.float64)

    # Numba function to enforce non-negative cumulative:
    @njit
    def clamp_cumulative(arr_block, arr_prov, arr_val, cum_out, cum):
        n = len(arr_val)
        for i in range(n):
            pi = arr_prov[i]
            v = arr_val[i]
            prev = cum[pi]
            new = prev + v
            if new < 0:
                v = -prev
                new = 0.0
            cum_out[i] = new
            cum[pi] = new

    clamp_cumulative(arr["block"], arr["prov_i"], arr["value"], cum_values, cum)

    # Build DataFrame
    df = pd.DataFrame(
        {
            "block": arr["block"],
            "provider": providers[arr["prov_i"]],
            "address": addresses[arr["addr_i"]],
            "value": arr["value"],
            "cum_value_provider": cum_values,
        }
    )

    # compute share per block
    df_wide = (
        df.pivot(index="block", columns="provider", values="cum_value_provider")
        .ffill()
        .fillna(0.0)
    )
    pool = df_wide.clip(lower=0).sum(axis=1)
    block_to_pool = pool.to_dict()
    df["share"] = df.apply(
        lambda r: (
            (r["cum_value_provider"] / block_to_pool.get(r["block"], 0.0) * 100.0)
            if block_to_pool.get(r["block"], 0.0) > 0
            else 0.0
        ),
        axis=1,
    )
    return df

# df = make_dummy_fast(
#     n_blocks=100_000,
#     n_providers=250,
#     n_addresses=10,
#     block_max=100_100,
#     max_events_per_block=10,
#     max_mint_amount=1000,
#     seed=None,
# )

In [None]:
print(len(L_LIQUIDITY_BOOK) + len(L_UNI_LIQUIDITY))
df_liquidity_book = pd.DataFrame(L_LIQUIDITY_BOOK)
df_uni_liquidity = pd.DataFrame(L_UNI_LIQUIDITY)
df = pd.concat([df_liquidity_book, df_uni_liquidity], ignore_index=False)
numeric_cols = ["token_amount", "eth_amount", "value"]
df[numeric_cols] = df[numeric_cols].fillna(0)
df["event"] = df["event"].astype("category")
df["provider"] = df["provider"].apply(Web3.to_checksum_address)
df["address"] = df["address"].apply(Web3.to_checksum_address)
unique = df["address"].unique()
# addr_to_symbol = {
#     addr: GLOBAL_DICT_TOKEN_SYMBOL[addr]["symbol"]
#     for addr in unique
# }
addr_to_symbol = {
    addr: get_token_name_by_contract(token_address=addr, proxy_address=UNISWAP_V1_CONTRACT)["symbol"]
    for addr in unique
}
df["symbol"] = df["address"].map(addr_to_symbol)
df["symbol"] = df["symbol"].astype("category")
df["block"] = df["block"].astype(np.int32)
# df["value"] = df["value"].astype(float)
df["value"] = pd.to_numeric(
    df["value"], downcast="float"
)  # Downcast float to save memory

# df["token_amount"] = df["token_amount"].astype(float)
# df["eth_amount"] = df["eth_amount"].astype(float)

# df["token_amount"] = df["token_amount"].astype(float)
# df["eth_amount"] = df["eth_amount"].astype(float)
df = df.drop(columns=["token_amount", "eth_amount"])

df
#df.to_pickle("out/V1/df_before_graph")

In [None]:
df = pd.read_pickle("out/V1/df_before_graph")
df

In [None]:
# group by block and address, and aggregate into dicts of provider → value
# We use a custom aggregator:
def prov_val_dict(subdf):
    # subdf is the slice for a given (block, address)
    # return dict mapping provider → value
    return dict(zip(subdf["provider"], subdf["value"]))

g = (
    df.groupby(["block", "address"])[["provider", "value"]]
    .apply(prov_val_dict, include_groups=False)
    .reset_index(name="prov_value_dict")
)

df_pivot = g.pivot(index="block", columns="address", values="prov_value_dict")
df_pivot = df_pivot.fillna({addr: {} for addr in df["address"].unique()})
df_pivot = df_pivot.reset_index()

def make_empty_if_nan(cell):
    if cell is pd.NA or (isinstance(cell, float) and np.isnan(cell)):
        return {}
    return cell


# df_pivot = df_pivot.applymap(make_empty_if_nan)
# df_pivot = df_pivot.map(make_empty_if_nan)

del df_pivot
df_pivot = None
gc.collect()

In [None]:
# NEED TO ME APPROPRIATED
# ---------------------------
# ASSUMPTION:
# df_pivot has columns: 'block', 'add_0', 'add_1', ... each add_* cell is a dict (provider->value)
# Example head: df_pivot[['block','add_0','add_1']].head()
# ---------------------------

df = df_pivot.copy()  # keep original safe
addr_cols = [c for c in df.columns if c.startswith("add_")]  # adjust pattern if needed


# safe cell-sum (handles dict, empty dict, NaN)
def sum_dict_cell(x):
    if isinstance(x, dict):
        # sum provider values inside the cell
        return float(sum(x.values()))
    # handle missing / NaN cell
    if pd.isna(x):
        return 0.0
    # fallback (if cell already numeric)
    try:
        return float(x)
    except Exception:
        return 0.0


# 1) sort by block to ensure temporal order
df = df.sort_values("block").reset_index(drop=True)

# 2) compute per-address net change (delta) at each block
# use Series.map inside apply to avoid applymap deprecation
df_delta = df[addr_cols].apply(lambda col: col.map(sum_dict_cell))
# rename columns to make intent clear
df_delta = df_delta.rename(columns={c: f"{c}_delta" for c in df_delta.columns})

# attach block back (index aligned)
df_delta.insert(0, "block", df["block"].values)

# 3) compute cumulative liquidity per address (cumsum across rows in block order)
# set index to block for convenience
df_delta_idx = df_delta.set_index("block")
df_cum = (
    df_delta_idx.cumsum()
)  # cumsum per column (per-address cumulative liquidity). See pandas cumsum. :contentReference[oaicite:1]{index=1}

# 4) merge the numeric results back into the original table (optional)
# - for each address add two columns: {add_X}_delta and {add_X}_liquidity
for addr in addr_cols:
    delta_col = f"{addr}_delta"
    liq_col = f"{addr}_liquidity"
    df[delta_col] = df_delta[delta_col].values
    df[liq_col] = df_cum[delta_col].values

# Now `df` has, per row (block), for each address:
#   - add_X_delta      : net change at that block (sum of providers' values in that address cell)
#   - add_X_liquidity  : cumulative liquidity for that address up to that block

# ---------------------------
# OPTIONAL: Reindex to full block range and forward-fill (WARNING: can be enormous)
# If you truly need a row for every integer block between min->max:
#  - this will create (max_block - min_block + 1) rows and can be memory/time-expensive.
#  - prefer using the "query last event" approach below if range is large.
# ---------------------------
min_b, max_b = int(df["block"].min()), int(df["block"].max())

# build full block index (only if you're sure it's acceptable memory-wise)
# blocks_full = np.arange(min_b, max_b + 1, dtype=int)
# df_cum_full = df_cum.reindex(blocks_full).ffill().fillna(0.0)   # reindex + forward-fill. See reindex/ffill docs. :contentReference[oaicite:2]{index=2}
# df_cum_full.index.name = "block"

# ---------------------------
# FAST & memory-friendly query: "what are per-address liquidities at arbitrary block B?"
# without reindexing entire range:
# ---------------------------
blocks_sorted = df["block"].values  # sorted unique event blocks


def liquidity_at_block(B):
    """
    Return a dict {addr_col: liquidity_value} representing cumulative liquidity
    at block B (i.e. last event <= B). If B < first event, returns zeros.
    """
    i = np.searchsorted(blocks_sorted, B, side="right") - 1
    if i < 0:
        # before first event
        return {addr: 0.0 for addr in addr_cols}
    row = df.iloc[i]
    return {addr: float(row[f"{addr}_liquidity"]) for addr in addr_cols}


# Example:
# liquidity_at_block(1000)  -> gives per-address liquidity snapshot at block 1000

In [None]:
# 1ST GRAPH, evolution of the UNISWAP v1 (UNI-V1) amount of token issued/burned (GLOBAL TOTAL over block)
# Important to compare the size of every pool but we need to link "value" to either $ or something relevant for comparison
# NEED: df
totals = (
    df.groupby(["block", "address"], as_index=False)["value"]
    .sum()
    .sort_values(["address", "block"])
)
totals["cum_value"] = totals.groupby("address")["value"].cumsum()

# # 2) fill missing blocks only inside each address' span (min..max), then cumulate
# totals = totals.groupby("address", group_keys=False).apply(
#     lambda g: (
#         g.set_index("block")
#         .reindex(range(g["block"].min(), g["block"].max() + 1), fill_value=0)
#         .rename_axis("block")
#         .reset_index()
#         .assign(address=g.name)
#     )
# )
# totals = (
#     totals[["block", "address", "value"]]
#     .sort_values(["address", "block"])
#     .reset_index(drop=True)
# )

pools_of_interest = [
    w3.to_checksum_address("0x006B6E89EE1531CFE5B6D32DA0D80CC30506A339"),
    w3.to_checksum_address("0x010E2558EAB0639EDADC9F83C81CC87DF66F8029"),
    w3.to_checksum_address("0x01A700DC924D837740B2CF5EA8C9FC46A5A76A3A"),
]

# pools_of_interest = ["add_1","add_2","add_3"]
cum_long_sub = totals[totals["address"].isin(pools_of_interest)]

fig = px.area(
    cum_long_sub,
    x="block",
    y="cum_value",
    color="address",
    line_group="address",
    title="Cumulative liquidity evolution per pool",
    labels={"cum_value": "Cumulative liquidity", "address": "Pool address"},
)

# Optionally, you can also do px.line instead of px.area if you prefer lines without fill
fig = px.line(cum_long_sub, x="block", y="cum_value", color="address",
              title="Cumulative liquidity per pool")
# You can also make it not stacked (i.e. overlayed) by doing:
# fig = px.area(
#     cum_long_sub,
#     x="block",
#     y="cum_value",
#     color="address",
#     line_group="address",
#     facet_col=None,
#     # maybe set `groupnorm=None` or other arguments
# )

fig.update_layout(legend_title="Pool address")
fig.show()

In [None]:
chosen_address = w3.to_checksum_address("0x006B6e89eE1531cfE5b6d32da0d80CC30506A339")

df_one_addr = df[(df["address"] == chosen_address) & (df["event"] == 'Transfer')].copy()
df_one_addr = df_one_addr.sort_values(["block"])


def forward_backward_fill_blocks(df, step=100):
    """
    For each row, create blocks from (block-step) to (block+step).
    Forward fill after the event, backward fill before from latest data or 0.

    Parameters:
    -----------
    df : DataFrame
        Input dataframe with columns: block, address, event, provider, value, symbol
    step : int
        Number of blocks before and after each transfer event

    Returns:
    --------
    DataFrame
        DataFrame with forward/backward filled values
    """

    # Create offset array once: [-step, -step+1, ..., 0, ..., step-1, step]
    offsets = np.arange(-step, step + 1)
    n_repeats = len(offsets)  # 2*step + 1

    # Repeat each row n_repeats times
    expanded = df.loc[df.index.repeat(n_repeats)].reset_index(drop=True).copy()

    # Create offset column using modulo indexing - guaranteed to match length
    expanded["offset"] = offsets[np.arange(len(expanded)) % n_repeats]

    # Calculate actual block numbers
    expanded["block"] = expanded["block"] + expanded["offset"]

    # For backward fill (offset < 0), set value to NaN
    expanded.loc[expanded["offset"] < 0, "value"] = np.nan

    # Sort by block for fill operations
    expanded = expanded.sort_values("block").reset_index(drop=True)

    # Forward fill then fill remaining with 0
    expanded["value"] = expanded["value"].ffill().fillna(0)

    # Drop helper column
    result = expanded.drop(columns=["offset"])

    return result


# Usage:
# smoothed = forward_backward_fill_blocks(df, step=100)

In [None]:
df_one_addr

In [None]:
smoothed = forward_backward_fill_blocks(df_one_addr, step=5)
smoothed

In [None]:
# token = "add_1"

# filtered_df = detailed.xs(token, level="address", drop_level=True)

# # Reset index to have columns
# df_d = filtered_df.reset_index()  # columns: block, address, provider, value

# # Sort by pool address, then provider, then block
# df_d = df_d.sort_values(["block"])


# # Compute cumulative sum per (address, provider)
# df_d["cum_value_provider"] = df_d.groupby(["provider"])["value"].cumsum()
# df_d["total_liquidity"] = df_d["value"].cumsum()
# df_d["share"] = (df_d["cum_value_provider"] / df_d["total_liquidity"]) * 100
# df_d["cum_value_provider"] = df_d["cum_value_provider"].astype(float)
# df_d["total_liquidity"] = df_d["total_liquidity"].astype(float)
# df_d["share"] = df_d["share"].astype(float).fillna(0)
# # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
# #     display(
# #         df_d[
# #             [
# #                 "block",
# #                 "provider",
# #                 "value",
# #                 "cum_value_provider",
# #                 "share",
# #                 #"total_liquidity",
# #             ]
# #         ]
# #     )
def forward_backward_fill_blocks(df, step=100):
    """
    For each row, create blocks from (block-step) to (block+step).
    Forward fill after the event, backward fill before from latest data or 0.

    Parameters:
    -----------
    df : DataFrame
        Input dataframe with columns: block, address, event, provider, value, symbol
    step : int
        Number of blocks before and after each transfer event

    Returns:
    --------
    DataFrame
        DataFrame with forward/backward filled values
    """

    # Create offset array once: [-step, -step+1, ..., 0, ..., step-1, step]
    offsets = np.arange(-step, step + 1)
    n_repeats = len(offsets)  # 2*step + 1

    # Repeat each row n_repeats times
    expanded = df.loc[df.index.repeat(n_repeats)].reset_index(drop=True).copy()

    # Create offset column using modulo indexing - guaranteed to match length
    expanded["offset"] = offsets[np.arange(len(expanded)) % n_repeats]

    # Calculate actual block numbers
    expanded["block"] = expanded["block"] + expanded["offset"]

    # For backward fill (offset < 0), set value to NaN
    expanded.loc[expanded["offset"] < 0, "value"] = np.nan

    # Sort by block for fill operations
    expanded = expanded.sort_values("block").reset_index(drop=True)

    # Forward fill then fill remaining with 0
    expanded["value"] = expanded["value"].ffill().fillna(0)

    # Drop helper column
    result = expanded.drop(columns=["offset"])

    return result


# 1. Filter for the token address of interest
chosen_address = w3.to_checksum_address("0x006B6e89eE1531cfE5b6d32da0d80CC30506A339")

df_one_addr = df[df["address"] == chosen_address ].copy()

# filter + aggregate per (block, provider)
df_one_addr_grouped = df_one_addr.groupby(["block", "provider"], as_index=False)["value"].sum()

# cumulative per provider
gno = df_one_addr_grouped.sort_values(["provider", "block"])
gno["cum_provider"] = gno.groupby("provider")["value"].cumsum()

# pool cumulative by block (sum the block deltas, then cumsum)
pool = gno.groupby("block", as_index=False)["value"].sum().sort_values("block")
pool["cum_pool"] = pool["value"].cumsum()

# merge pool cumulative into provider rows, compute share %
liquidity_df = gno.merge(pool[["block", "cum_pool"]], on="block", how="left")
liquidity_df["share_pct"] = (
    liquidity_df["cum_provider"] / liquidity_df["cum_pool"] * 100
)
liquidity_df["share_pct"] = liquidity_df["share_pct"].astype(
    pd.SparseDtype("float64", fill_value=0.0)
)

smoothed = forward_backward_fill_blocks(liquidity_df, step=5)

In [None]:
fig_norm = px.area(
    liquidity_df,
    x="block",
    y="cum_provider",
    color="provider",
    line_group="provider",
    groupnorm="percent",  # normalize to 100%
    labels={
        "cum_provider": "Liquidity (normalized)",
        "block": "Block",
        "provider": "Provider",
    },
    title=f"Normalized Cumulative Liquidity Share for {chosen_address}",
)
fig_norm.update_layout(
    xaxis=dict(
        tickmode="array",
        tickvals=liquidity_df["block"].unique(),
        ticktext=[str(block) for block in liquidity_df["block"]],#.unique()],
    )
)
fig_norm.show()

In [None]:
# ============================================================
# COMPLETE LIQUIDITY POOL ANALYSIS CODE
# ============================================================
def forward_backward_fill_blocks(df, step=100):
    """
    For each row, create blocks from (block-step) to (block+step).
    Forward fill after the event, backward fill before from latest data or 0.

    Parameters:
    -----------
    df : DataFrame
        Input dataframe with columns: block, address, event, provider, value, symbol
    step : int
        Number of blocks before and after each transfer event

    Returns:
    --------
    DataFrame
        DataFrame with forward/backward filled values
    """

    # Create offset array once: [-step, -step+1, ..., 0, ..., step-1, step]
    offsets = np.arange(-step, step + 1)
    n_repeats = len(offsets)  # 2*step + 1

    # Repeat each row n_repeats times
    expanded = df.loc[df.index.repeat(n_repeats)].reset_index(drop=True).copy()

    # Create offset column using modulo indexing - guaranteed to match length
    expanded["offset"] = offsets[np.arange(len(expanded)) % n_repeats]

    # Calculate actual block numbers
    expanded["block"] = expanded["block"] + expanded["offset"]

    # For backward fill (offset < 0), set value to NaN
    expanded.loc[expanded["offset"] < 0, "value"] = np.nan

    # Sort by block for fill operations
    expanded = expanded.sort_values("block").reset_index(drop=True)

    # Forward fill then fill remaining with 0
    expanded["value"] = expanded["value"].ffill().fillna(0)

    # Drop helper column
    result = expanded.drop(columns=["offset"])

    return result


def create_provider_labels(providers, w3):
    """
    Create human-readable labels for provider addresses.
    Format: Provider A, Provider B, etc. with shortened address
    """
    labels = {}
    for idx, provider in enumerate(providers):
        # Ensure checksum address
        checksum_provider = w3.to_checksum_address(provider)
        # Shorten address: first 6 + last 4 characters
        short_addr = f"{checksum_provider[:6]}...{checksum_provider[-4:]}"
        labels[checksum_provider] = f"Provider {chr(65 + idx)} ({short_addr})"
    return labels


def calculate_pool_liquidity_sparse(df, token_address, w3):
    """
    Calculate pool liquidity distribution WITHOUT smoothing.
    Only keeps actual event blocks for accurate staircase visualization.

    Parameters:
    -----------
    df : DataFrame
        Transfer events with columns: block, address, provider, value
    token_address : str
        Token contract address to analyze
    w3 : Web3
        Web3 instance for address checksumming

    Returns:
    --------
    DataFrame
        Liquidity data at actual event blocks only
    """

    # 1. Filter for the token address (with checksum)
    token_address = w3.to_checksum_address(token_address)

    # Ensure all addresses in df are checksummed
    df_filtered = df.copy()
    df_filtered["address"] = df_filtered["address"].apply(
        lambda x: w3.to_checksum_address(x)
    )
    df_filtered["provider"] = df_filtered["provider"].apply(
        lambda x: w3.to_checksum_address(x)
    )

    df_filtered = df_filtered[df_filtered["address"] == token_address].copy()

    # 2. Aggregate transfers per (block, provider)
    df_grouped = (
        df_filtered.groupby(["block", "provider"], as_index=False)["value"]
        .sum()
        .sort_values(["block", "provider"])
    )

    # 3. Get all unique blocks and providers
    all_blocks = sorted(df_grouped["block"].unique())
    all_providers = sorted(df_grouped["provider"].unique())

    # 4. Create provider labels
    provider_labels = create_provider_labels(all_providers, w3)

    # 5. Calculate cumulative per provider across ALL their events
    df_grouped = df_grouped.sort_values(["provider", "block"])
    df_grouped["cum_provider"] = df_grouped.groupby("provider")["value"].cumsum()

    # 6. For each provider, get their balance at each event block
    provider_histories = []

    for provider in all_providers:
        provider_data = df_grouped[df_grouped["provider"] == provider].copy()

        # Get first and last blocks for this provider
        first_block = provider_data["block"].min()

        # Only include this provider at blocks where they have > 0 balance
        for block in all_blocks:
            if block >= first_block:
                # Get their balance at this block
                balance_at_block = (
                    provider_data[provider_data["block"] <= block]["cum_provider"].iloc[
                        -1
                    ]
                    if len(provider_data[provider_data["block"] <= block]) > 0
                    else 0
                )

                # Only add if balance > 0
                if balance_at_block > 1e-8:
                    provider_histories.append(
                        {
                            "block": block,
                            "provider": provider,
                            "cum_provider": balance_at_block,
                        }
                    )

    df_full = pd.DataFrame(provider_histories)

    # 7. Calculate total pool per block
    pool_per_block = (
        df_full.groupby("block", as_index=False)["cum_provider"]
        .sum()
        .rename(columns={"cum_provider": "cum_pool"})
    )

    # 8. Merge and calculate share percentage
    df_full = df_full.merge(pool_per_block, on="block", how="left")

    df_full["share_pct"] = np.where(
        df_full["cum_pool"].abs() < 1e-10,
        0.0,
        (df_full["cum_provider"] / df_full["cum_pool"] * 100),
    )
    df_full["share_pct"] = df_full["share_pct"].clip(0, 100)

    # 9. Add human-readable provider labels
    df_full["provider_label"] = df_full["provider"].map(provider_labels)

    # 10. Final filter: remove any remaining near-zero shares
    df_full = df_full[df_full["share_pct"] >= 0.1].copy()

    return df_full.sort_values(["block", "provider"]).reset_index(drop=True)


def plot_staircase_ownership(df):
    """
    Staircase stacked area chart showing ownership % at each block.
    Each provider's area height = their % share of the pool.
    """

    # Calculate block range in millions
    min_block = df["block"].min()
    max_block = df["block"].max()

    # Create figure manually with individual traces per provider
    fig = go.Figure()

    # Get unique providers
    providers = sorted(df["provider_label"].unique())

    # Add a trace for each provider (let plotly use default colors)
    for idx, provider in enumerate(providers):
        provider_data = df[df["provider_label"] == provider].sort_values("block")

        # Only include blocks where this provider actually exists
        fig.add_trace(
            go.Scatter(
                x=provider_data["block"],
                y=provider_data["share_pct"],
                name=provider,
                mode="lines",
                line=dict(width=0.5, shape="hv"),
                stackgroup="one",
                groupnorm="",
                hovertemplate="<b>%{fullData.name}</b><br>Block: %{x}<br>Share: %{y:.4f}%<extra></extra>",
            )
        )

    # Add vertical lines for each million blocks
    million_blocks = range(
        int(min_block // 1_000_000) * 1_000_000,
        int(max_block // 1_000_000 + 1) * 1_000_000 + 1,
        1_000_000,
    )

    for million_block in million_blocks:
        if min_block <= million_block <= max_block:
            fig.add_vline(
                x=million_block,
                line_width=2,
                line_dash="dash",
                line_color="black",
                opacity=0.4,
                annotation_text=f"{million_block / 1_000_000:.0f}M",
                annotation_position="top",
                annotation_font_size=12,
            )

    fig.update_layout(
        title="Pool Ownership Distribution (Staircase View)",
        hovermode="x",
        yaxis_title="Ownership Share (%)",
        xaxis_title="Block Number",
        legend=dict(
            title="Provider",
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02,
        ),
        yaxis=dict(range=[0, 100]),
    )

    return fig


def plot_absolute_liquidity_staircase(df):
    """
    Staircase chart showing absolute liquidity amounts (not percentages).
    Provider areas are proportional to their actual liquidity contribution.
    """

    min_block = df["block"].min()
    max_block = df["block"].max()

    # Create figure manually
    fig = go.Figure()

    # Get unique providers
    providers = sorted(df["provider_label"].unique())

    # Add a trace for each provider (default colors)
    for idx, provider in enumerate(providers):
        provider_data = df[df["provider_label"] == provider].sort_values("block")

        fig.add_trace(
            go.Scatter(
                x=provider_data["block"],
                y=provider_data["cum_provider"],
                name=provider,
                mode="lines",
                line=dict(width=0.5, shape="hv"),
                stackgroup="one",
                hovertemplate="<b>%{fullData.name}</b><br>Block: %{x}<br>Amount: %{y:.6f}<extra></extra>",
            )
        )

    # Add vertical lines for each million blocks
    million_blocks = range(
        int(min_block // 1_000_000) * 1_000_000,
        int(max_block // 1_000_000 + 1) * 1_000_000 + 1,
        1_000_000,
    )

    for million_block in million_blocks:
        if min_block <= million_block <= max_block:
            fig.add_vline(
                x=million_block,
                line_width=2,
                line_dash="dash",
                line_color="black",
                opacity=0.4,
                annotation_text=f"{million_block / 1_000_000:.0f}M",
                annotation_position="top",
                annotation_font_size=12,
            )

    fig.update_layout(
        title="Pool Liquidity by Provider (Absolute Values)",
        hovermode="x",
        yaxis_title="Liquidity Amount (Token Units)",
        xaxis_title="Block Number",
        legend=dict(
            title="Provider",
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02,
        ),
    )

    return fig


def plot_ownership_concentration(df):
    """
    Calculate and plot concentration metrics (Herfindahl-Hirschman Index).
    HHI ranges from 0 (perfect competition) to 10,000 (monopoly).
    """

    # Convert sparse to dense and handle inf values
    df_clean = df.copy()

    share_pct_values = df_clean["share_pct"].to_numpy()
    share_pct_values = np.where(np.isinf(share_pct_values), 0, share_pct_values)
    share_pct_values = np.where(np.isnan(share_pct_values), 0, share_pct_values)

    df_clean = df_clean.assign(share_pct_clean=share_pct_values)

    # Calculate HHI per block
    hhi_data = []
    for block in df_clean["block"].unique():
        block_data = df_clean[df_clean["block"] == block]
        # HHI = sum of squared market shares
        hhi = (block_data["share_pct_clean"] ** 2).sum()

        # Count active providers (with >0.01% share)
        active_providers = (block_data["share_pct_clean"] > 0.01).sum()

        hhi_data.append(
            {"block": block, "hhi": hhi, "active_providers": active_providers}
        )

    hhi_df = pd.DataFrame(hhi_data)

    # Create dual-axis plot
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Add HHI trace
    fig.add_trace(
        go.Scatter(
            x=hhi_df["block"],
            y=hhi_df["hhi"],
            name="HHI (Concentration)",
            line=dict(color="#F46821", width=2),
        ),
        secondary_y=False,
    )

    # Add provider count trace
    fig.add_trace(
        go.Scatter(
            x=hhi_df["block"],
            y=hhi_df["active_providers"],
            name="Active Providers",
            line=dict(color="#29BEFD", width=2),
        ),
        secondary_y=True,
    )

    fig.update_layout(title="Pool Concentration Analysis", hovermode="x unified")

    fig.update_xaxes(title_text="Block Number")
    fig.update_yaxes(title_text="HHI Score", secondary_y=False)
    fig.update_yaxes(title_text="Number of Providers", secondary_y=True)

    # Add interpretation zones
    fig.add_hrect(
        y0=0,
        y1=1500,
        fillcolor="green",
        opacity=0.1,
        annotation_text="Competitive",
        secondary_y=False,
    )
    fig.add_hrect(
        y0=1500,
        y1=2500,
        fillcolor="yellow",
        opacity=0.1,
        annotation_text="Moderate",
        secondary_y=False,
    )
    fig.add_hrect(
        y0=2500,
        y1=10000,
        fillcolor="red",
        opacity=0.1,
        annotation_text="Concentrated",
        secondary_y=False,
    )

    return fig, hhi_df


# ============================================================
# MAIN EXECUTION CODE
# ============================================================

# 1. Define the token address you want to analyze
chosen_address = w3.to_checksum_address("0x006B6e89eE1531cfE5b6d32da0d80CC30506A339")

# 2. Calculate liquidity distribution (sparse, no smoothing)
print("Calculating pool liquidity distribution...")
liquidity_df = calculate_pool_liquidity_sparse(df, chosen_address, w3)

print(f"Total rows in liquidity data: {len(liquidity_df)}")
print(f"Block range: {liquidity_df['block'].min()} to {liquidity_df['block'].max()}")
print(f"Number of unique providers: {liquidity_df['provider'].nunique()}")

# 3. Plot percentage ownership (stacked 0-100%)
print("\nGenerating percentage ownership chart...")
fig_pct = plot_staircase_ownership(liquidity_df)
fig_pct.show()

# 4. Plot absolute liquidity amounts
print("Generating absolute liquidity chart...")
fig_abs = plot_absolute_liquidity_staircase(liquidity_df)
fig_abs.show()

# 5. Plot concentration metrics
print("Generating concentration analysis...")
fig_conc, concentration_metrics = plot_ownership_concentration(liquidity_df)
fig_conc.show()

# 6. Print summary statistics
print("\n" + "=" * 60)
print("LIQUIDITY SUMMARY")
print("=" * 60)

summary = (
    liquidity_df.groupby(["provider", "provider_label"])["cum_provider"]
    .last()
    .sort_values(ascending=False)
)

for (provider, label), amount in summary.items():
    provider_checksum = w3.to_checksum_address(provider)
    final_share = liquidity_df[
        (liquidity_df["provider"] == provider_checksum)
        & (liquidity_df["block"] == liquidity_df["block"].max())
    ]["share_pct"].values

    if len(final_share) > 0:
        print(f"{label}: {amount:.6f} tokens ({final_share[0]:.2f}% of pool)")
    else:
        print(f"{label}: {amount:.6f} tokens (exited)")

print("\n" + "=" * 60)
print("CONCENTRATION METRICS")
print("=" * 60)
print(f"Average HHI: {concentration_metrics['hhi'].mean():.2f}")
print(f"Current HHI: {concentration_metrics['hhi'].iloc[-1]:.2f}")
print(f"Max providers at any block: {concentration_metrics['active_providers'].max()}")
print(f"Current active providers: {concentration_metrics['active_providers'].iloc[-1]}")

# Interpretation
current_hhi = concentration_metrics["hhi"].iloc[-1]
if current_hhi < 1500:
    print("Pool status: ✅ COMPETITIVE (Decentralized)")
elif current_hhi < 2500:
    print("Pool status: ⚠️  MODERATE CONCENTRATION")
else:
    print("Pool status: 🔴 HIGHLY CONCENTRATED")

print("=" * 60)

In [None]:
def plot_bubble_ownership(df):
    """
    Bubble chart showing provider ownership at the latest block.
    Bubble size is proportional to ownership share.
    """

    # Get data at the latest block only
    latest_block = df["block"].max()
    latest_data = df[df["block"] == latest_block].copy()

    # Sort by share percentage descending
    latest_data = latest_data.sort_values("share_pct", ascending=False)

    # Create bubble chart
    fig = go.Figure()

    # Add bubble trace
    fig.add_trace(
        go.Scatter(
            x=latest_data["provider_label"],
            y=[1] * len(latest_data),  # All on same horizontal line
            mode="markers+text",
            marker=dict(
                size=latest_data["share_pct"] * 10,  # Scale up for visibility
                sizemode="diameter",
                sizemin=20,
                color=latest_data["share_pct"],
                colorscale="Viridis",
                showscale=True,
                colorbar=dict(title="Share (%)", thickness=15, len=0.7),
                line=dict(color="white", width=2),
            ),
            text=latest_data["share_pct"].apply(lambda x: f"{x:.2f}%"),
            textposition="middle center",
            textfont=dict(size=14, color="white", family="Arial Black"),
            hovertemplate=(
                "<b>%{x}</b><br>"
                + "Share: "
                + latest_data["share_pct"].apply(lambda x: f"{x:.4f}%")
                + "<br>"
                + "Amount: "
                + latest_data["cum_provider"].apply(lambda x: f"{x:.6f}")
                + "<extra></extra>"
            ),
            customdata=latest_data[["share_pct", "cum_provider"]],
        )
    )

    fig.update_layout(
        title=f"Pool Ownership at Block {latest_block} (Bubble Size = Share %)",
        xaxis=dict(title="", tickangle=-45, showgrid=False),
        yaxis=dict(visible=False, range=[0.5, 1.5]),
        height=500,
        showlegend=False,
        hovermode="closest",
        plot_bgcolor="rgba(240, 240, 240, 0.5)",
    )

    return fig


def plot_bubble_ownership_2d(df):
    """
    Alternative: 2D bubble chart with providers positioned in a grid.
    Bubble size represents ownership share.
    """

    # Get data at the latest block only
    latest_block = df["block"].max()
    latest_data = df[df["block"] == latest_block].copy()

    # Sort by share percentage descending
    latest_data = latest_data.sort_values("share_pct", ascending=False).reset_index(
        drop=True
    )

    # Calculate grid positions
    n_providers = len(latest_data)
    cols = int(np.ceil(np.sqrt(n_providers)))

    latest_data["x_pos"] = latest_data.index % cols
    latest_data["y_pos"] = latest_data.index // cols

    # Create bubble chart
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=latest_data["x_pos"],
            y=latest_data["y_pos"],
            mode="markers+text",
            marker=dict(
                size=latest_data["share_pct"] * 15,  # Scale for visibility
                sizemode="diameter",
                sizemin=30,
                color=latest_data["share_pct"],
                colorscale="RdYlGn_r",  # Red (high concentration) to Green (low)
                showscale=True,
                colorbar=dict(title="Share (%)", thickness=20, len=0.7),
                line=dict(color="darkgray", width=3),
                opacity=0.8,
            ),
            text=latest_data["provider_label"]
            .str.split("(")
            .str[0]
            .str.strip(),  # Just "Provider A"
            textposition="middle center",
            textfont=dict(size=12, color="black", family="Arial Black"),
            hovertemplate=(
                "<b>%{customdata[0]}</b><br>"
                + "Share: %{customdata[1]:.4f}%<br>"
                + "Amount: %{customdata[2]:.6f}<br>"
                + "<extra></extra>"
            ),
            customdata=latest_data[
                ["provider_label", "share_pct", "cum_provider"]
            ].values,
        )
    )

    # Add percentage labels inside bubbles
    for idx, row in latest_data.iterrows():
        fig.add_annotation(
            x=row["x_pos"],
            y=row["y_pos"] - 0.15,
            text=f"{row['share_pct']:.2f}%",
            showarrow=False,
            font=dict(size=10, color="white", family="Arial Black"),
            bgcolor="rgba(0,0,0,0.5)",
            borderpad=2,
        )

    fig.update_layout(
        title=f"Pool Ownership Distribution at Block {latest_block}",
        xaxis=dict(visible=False, range=[-0.5, cols - 0.5]),
        yaxis=dict(visible=False, scaleanchor="x", scaleratio=1),
        height=600,
        width=800,
        showlegend=False,
        hovermode="closest",
        plot_bgcolor="white",
    )

    return fig


# Add to main execution code:
print("\nGenerating bubble ownership chart...")
fig_bubble = plot_bubble_ownership(liquidity_df)
fig_bubble.show()

print("\nGenerating 2D bubble ownership chart...")
fig_bubble_2d = plot_bubble_ownership_2d(liquidity_df)
fig_bubble_2d.show()

In [None]:
# PIECE OF CODE TO GET THE UNIV1 EXCHANGE ADDRESS

### We focus on the Factory contract of uniswap v1 "0xc0a47dFe034B400B47bDaD5FecDa2621de6c4d95"
# We need the ProviderNode to be initialized already
# Why debug_traceTransaction Is the Best Option
# It replays the transaction within the exact historical state using your archive node and returns a detailed call graph, including internal calls and value flows—not just high-level transfers. You can choose tracers like callTracer (which outputs call frames and nested structure) for highest clarity and insight.
# Alternatives like event logs or transaction receipts won't capture internal calls, since those are not emitted as events. You need a trace API to follow what's happening inside smart contract execution.
uniswap_v1_factory_address, uniswap_v1_factory_abi, uniswap_v1_factory_contract = get_address_abi_contract(
    "0xc0a47dFe034B400B47bDaD5FecDa2621de6c4d95"
)
def trace_internal_transactions(tx_hash: str, tracer: str = "callTracer") -> dict:
    """
    Performs debug_traceTransaction with specified tracer (default: callTracer).
    Returns the full trace result as a Python dict.
    """
    trace = w3.provider.make_request(
        "debug_traceTransaction", [tx_hash, {"tracer": tracer}]
    )
    return trace.get("result", {})


def extract_internal_transfers_from_trace(trace: dict) -> list:
    """
    Recursively traverses the 'calls' in the trace to gather internal transfers.
    Returns list of dicts with from, to, value, gasUsed, etc.
    """
    transfers = []

    def recurse(call):
        # Internal transfer if value is non-zero
        value = int(call.get("value", "0x0"), 16)
        if value > 0:
            transfers.append(
                {
                    "from": call.get("from"),
                    "to": call.get("to"),
                    "value": value,
                    "gas": int(call.get("gas", "0x0"), 16),
                    "gasUsed": int(call.get("gasUsed", "0x0"), 16),
                    "type": call.get("type"),
                    "error": call.get("error"),
                }
            )
        for sub in call.get("calls", []) or []:
            recurse(sub)

    recurse(trace)
    return transfers


def get_internal_transactions_for_contract(
    contract_address: str, from_block: int, to_block: int
):
    """Scan blocks, identify txs to/from contract, and trace internal calls."""
    results = []
    for block_num in range(from_block, to_block + 1):
        block = w3.eth.get_block(block_num, full_transactions=True)
        for tx in block.transactions:
            if (
                 tx["to"]
                 and tx["to"] == uniswap_v1_factory_address
                 or tx["from"] == uniswap_v1_factory_address
            ):

                function, params = uniswap_v1_factory_contract.decode_function_input(
                    tx["input"]
                )
                if function.fn_name == 'createExchange':
                    #print(tx)
                    # print('Called function:', function.fn_name)
                    #print('With arguments:', params)
                    uni_created_token = params['token']
                    univ1_token_address = w3.to_checksum_address(uni_created_token)
                    univ1_factory_abi = get_abi(univ1_token_address, ETHERSCAN_API_KEY)
                    univ1_factory_contract = w3.eth.contract(
                        address=univ1_token_address, abi=univ1_factory_abi
                    )
                    token_name =  None
                    token_symbol = None 
                    try:
                        token_name = univ1_factory_contract.functions.name().call()
                        token_symbol = univ1_factory_contract.functions.symbol().call()
                        print(f"Token Name: {token_name}")
                        print(f"Token Symbol: {token_symbol}")
                    except:
                        print(f"Contract is a proxy {uni_created_token}")

                    token_created_exchange_address = uniswap_v1_factory_contract.functions.getExchange(
                        uni_created_token
                    ).call()
                    print(f"Token {token_name} UniExchange_address: {token_created_exchange_address}")
                    a = w3.to_checksum_address(token_created_exchange_address)
                    b = get_abi(a, ETHERSCAN_API_KEY)
                    c = w3.eth.contract(
                        address=a, abi=b
                    )
                    try:
                        #print(f"{c.functions.name.call()}")
                        #print(f"{c.functions.symbol.call()}")
                        print(f"Token Address: {c.functions.tokenAddress.call()}")
                        #print(
                        #    f"Is this the same ? {c.functions.tokenAddress.call() == uni_created_token}"
                        #)
                    except:
                        print(f"proxy: {c}")
            #     tx_hash = tx.hash.hex()
            #     trace = trace_internal_transactions(tx_hash)
            #     transfers = extract_internal_transfers_from_trace(trace)
            #     results.append(
            #         {
            #             "tx_hash": tx_hash,
            #             "block": block_num,
            #             "internal_transfers": transfers,
            #         }
            #     )
    return results

internal_txs = get_internal_transactions_for_contract(
    uniswap_v1_factory_contract, 6627900, w3.eth.get_block("latest").number
    #uniswap_v1_factory_contract, 6500000, w3.eth.get_block("latest").number
)
for entry in internal_txs:
    print(entry["tx_hash"], entry["internal_transfers"])

In [None]:
# Super important code, for 1 transaction, we get all the Transfer event and we analyze which token has been exchanged
# we also get the Gas + ETH send. If we analyze all the transaction from 1 exchange, we can probably deduct all the liquidity
# token issued by the pair_exchange
transaction = w3.eth.get_transaction(
    "1b53439a36b357c712a4abe860607c6e4d88a002dd26f97244a8ef3208b2f8b6"
)
(
    uniswap_v1_BNB_exchange_address,
    uniswap_v1_BNB_exchange_abi,
    uniswap_v1_BNB_exchange_contract,
) = get_address_abi_contract("0x255e60c9d597dCAA66006A904eD36424F7B26286")

d_transaction = event_to_dict(transaction)
tx_eth_value = w3.from_wei(d_transaction["value"], "ether")
decoded = uniswap_v1_BNB_exchange_contract.events.Transfer().process_receipt(
    w3.eth.get_transaction_receipt(transaction.hash),
    DISCARD,
)
for ev in decoded:
    d_ev = event_to_dict(ev)
    d_from = d_ev['args']['_from']
    d_to = d_ev['args']['_to']
    d_value = w3.from_wei(d_ev["args"]["_value"], "ether")
    d_address = d_ev['address']
    d_block = d_ev['blockNumber']
    d_tx_hash = d_ev["transactionHash"]
    token_address = w3.to_checksum_address(d_address)
    token_abi = get_abi(token_address, ETHERSCAN_API_KEY)
    token_contract = w3.eth.contract(address=token_address, abi=token_abi)
    symbol = token_contract.functions.symbol().call()
    decimals = token_contract.functions.decimals().call()
    print(
        f"Block number {d_block}, {tx_eth_value} ETH was used, {d_to} received {d_value} of {symbol} from {d_from} (tx_hash is {d_tx_hash})"
    )

def analyze_transaction_transfers(tx_hash, pair_exchange_contract, etherscan_api_key):
    result = []
    transaction = w3.eth.get_transaction(
        tx_hash
    )
    d_transaction = event_to_dict(transaction)
    tx_eth_value = w3.from_wei(d_transaction["value"], "ether")
    decoded = pair_exchange_contract.events.Transfer().process_receipt(
        w3.eth.get_transaction_receipt(transaction.hash),
        DISCARD,
    )
    for ev in decoded:
        _ = {}
        d_ev = event_to_dict(ev)
        d_from = d_ev["args"]["_from"]
        d_to = d_ev["args"]["_to"]
        d_value = w3.from_wei(d_ev["args"]["_value"], "ether")
        d_address = d_ev["address"]
        d_block = d_ev["blockNumber"]
        d_tx_hash = d_ev["transactionHash"]
        token_address = w3.to_checksum_address(d_address)
        token_abi = get_abi(token_address, etherscan_api_key)
        token_contract = w3.eth.contract(address=token_address, abi=token_abi)
        symbol = token_contract.functions.symbol().call()
        decimals = token_contract.functions.decimals().call()
        _["d_block"] = d_block
        _["d_from"] = d_from
        _["d_to"] = d_to
        _["d_value"] = d_value
        _["d_address"] = d_address
        _["d_tx_hash"] = d_tx_hash
        _["symbol"] = symbol
        _["decimals"] = decimals
        print(
            f"Block number {d_block}, {tx_eth_value} ETH was used, {d_to} received {d_value} of {symbol} from {d_from} (tx_hash is {d_tx_hash})"
        )
        result.append(_)
    return result

In [None]:
# Copy of the exploration block to trace function block by block
# Super interesting to get all the liquidity
# The problem is, we need the receipt of the transaction, and also its not straightforward to see
# the amount of liquidity directly from those call

uniswap_v1_factory_address, uniswap_v1_factory_abi, uniswap_v1_factory_contract = (
    get_address_abi_contract("0x255e60c9d597dCAA66006A904eD36424F7B26286")
)
def get_internal_transactions_for_contract(
    contract_address: str, from_block: int, to_block: int
):
    """Scan blocks, identify txs to/from contract, and trace internal calls."""
    results = []
    for block_num in range(from_block, to_block + 1):
        block = w3.eth.get_block(block_num, full_transactions=True)
        for tx in block.transactions:
            if (
                tx["to"]
                and (tx["to"] == uniswap_v1_factory_address
                or tx["from"] == uniswap_v1_factory_address)
            ):
                function, params = uniswap_v1_factory_contract.decode_function_input(
                    tx["input"]
                )
                if function.fn_name == "createExchange":
                    print(tx)
                    print('Called function:', function.fn_name)
                    print('With arguments:', params)
                    # print(f"Deadline: {block_to_utc(params['deadline'])}")
                    print(f"Deadline: {datetime.fromtimestamp(params['deadline'], tz=timezone.utc)}")
                    print(f"Value: {w3.from_wei(tx.value, 'ether')}")
                    try:
                        print(f"Receipt from transaction: {w3.eth.get_transaction_receipt(tx.hash)}")
                    except:
                        print(f"Can't find 0x{tx.hash.hex()}")
                    # tx.hash.hex()
    return results

# Interesting but I'm pruned
# trace = w3.provider.make_request("trace_transaction", [tx_hash])
# print(trace)
block_1 = 6845140
block_2 = 6850000
internal_txs = get_internal_transactions_for_contract(
    uniswap_v1_factory_contract,
    block_1,
    block_2
    #w3.eth.get_block("latest").number,
)

In [None]:
# Very old piece of code, just interesting for getting the signature of event
# Let's keep it in case of

# Example: get the Transfer event signature.
transfer_sig = get_event_signature("Transfer", token_abi)
add_liq_sig = get_event_signature("AddLiquidity", token_abi)
remove_liq_sig = get_event_signature("RemoveLiquidity", token_abi)
print("Transfer signature hash:", transfer_sig)

# -- Step 3: Determine Token Genesis Block and Set Starting Block --
# Assume you have a helper function get_contract_creation_block() that returns the creation block number.
try:
    genesis_block = get_contract_creation_block_etherscan(token_address, ETHERSCAN_API_KEY)
    start_block = max(genesis_block - 1, 0)
except Exception as e:
    print("Error retrieving genesis block, defaulting to block 0:", e)
    start_block = 0

# -- Step 4: Fetch Transfer Events and Dump to a File (JSON Serializing) --
def get_transfer_events_paginated(token_contract, from_block: int, to_block: int, chunk_size: int = 5000, max_workers: int = 1) -> list:
    """
    Fetches Transfer events for a token_contract in the block range [from_block, to_block],
    paginating by chunk_size to avoid Infura's result limit. Uses moderate parallelization.

    Args:
        token_contract: A Web3 contract instance with a loaded ABI.
        from_block (int): The starting block number.
        to_block (int): The ending block number.
        chunk_size (int): How many blocks to query per chunk (default 5000).
        max_workers (int): Maximum number of parallel workers (default 4).

    Returns:
        List of events.
    """
    events_collected = []
    block_ranges = []
    
    # Divide the full range into chunks.
    for start_blk in range(from_block, to_block + 1, chunk_size):
        end_blk = min(start_blk + chunk_size - 1, to_block)
        block_ranges.append((start_blk, end_blk))
    
    def fetch_range(brange):
        print(f"Fetching for {brange}")
        start_blk, end_blk = brange
        attempts = 0
        max_retries = 5
        while attempts < max_retries:
            try:
                # Add delay to mitigate rate limits.
                time.sleep(random.uniform(1, 3))
                #events = token_contract.events.Transfer.get_logs(from_block=start_blk, to_block=end_blk)
                events = token_contract.events.AddLiquidity.get_logs(from_block=start_blk, to_block=end_blk)
                print(len(events))
                return events
            except Exception as e:
                if "429" in str(e):
                    sleep_time = random.uniform(1, 5)
                    print(f"429 error for blocks {start_blk}-{end_blk}: retrying after {sleep_time:.2f} seconds...")
                    time.sleep(sleep_time)
                    attempts += 1
                else:
                    print(f"Error fetching logs for blocks {start_blk}-{end_blk}: {e}")
                    return []
        return []  # Return empty list if all retries fail.
    
    # Use moderate parallelization.
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_range = {executor.submit(fetch_range, brange): brange for brange in block_ranges}
        for future in concurrent.futures.as_completed(future_to_range):
            events = future.result()
            events_collected.extend(events)
    
    return events_collected

# Custom function to convert a web3 event (and its custom types) to a plain dict.
def serialize_event(event):
    # Convert the AttributeDict to a normal dict.
    event_dict = dict(event)
    # Ensure all values are JSON serializable (convert any bytes, HexBytes etc. to a string)
    for key, value in event_dict.items():
        if hasattr(value, "hex"):
            event_dict[key] = value.hex()
    # Also convert inner "args" if present.
    if "args" in event_dict:
        args = dict(event_dict["args"])
        for k, v in args.items():
            if hasattr(v, "hex"):
                args[k] = v.hex()
        event_dict["args"] = args
    return event_dict

# Fetch logs from start_block to the current block (latest)
latest_block = w3.eth.block_number


event_list = get_transfer_events_paginated(token_contract, start_block, latest_block)
# Dump the result to a file (pretty-printing the JSON)
output_filename = f"transfer_events_{contract_address}.json"
with open(output_filename, "w") as f:
    json.dump(serialized_events, f, indent=4)

print(f"Dumped {len(serialized_events)} events to {output_filename}")


# Mint function signature and selector
target_types = "address,int24,int24,uint128,bytes"
function_name = "mint"

target_types = "uint16"
function_name = "increaseObservationCardinalityNext"

target_types = ("bytes[]",)
function_name = "multicall"

target_selector = Web3.keccak(text="multicall(bytes[])")[:4].hex()
print(target_selector)
# MINT_SELECTOR = "ac9650d8"


# ----- Helper Functions -----
def load_processed_tx_hashes(file_path):
    """Load processed transaction hashes from a text file (one per line)."""
    try:
        with open(file_path, "r") as f:
            return set(line.strip() for line in f if line.strip())
    except FileNotFoundError:
        return set()


def append_processed_tx_hashes(new_hashes, file_path):
    """Append a set of transaction hashes to a file, one per line."""
    with open(file_path, "a") as f:
        for tx in new_hashes:
            # f.write(tx + "\n")
            pass


def append_mint_calls(mint_calls, file_path):
    """Append mint call results to the output file, one JSON object per line."""
    with open(file_path, "a") as f:
        for call in mint_calls:
            f.write(json.dumps(call) + "\n")


def load_transactions_data(file_path):
    """Load the transactions data from a JSON file (assumed to be a dict of tx_hash -> tx data)."""
    try:
        with open(file_path, "r") as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        return {}


def sanitize_value(value):
    """
    Recursively convert bytes to hex strings.
    """
    if isinstance(value, bytes):
        return value.hex()
    elif isinstance(value, (list, tuple)):
        return type(value)(sanitize_value(v) for v in value)
    else:
        return value


def decode_tx_by_function(tx, contract, target_function_name):
    """
    Decode a transaction's input using the contract ABI, returning a human-readable
    mapping of parameter names to values if the transaction calls the target function.

    Parameters:
      tx (dict): The transaction dictionary.
      contract: The contract instance (with ABI loaded).
      target_function_name (str): The name of the function to decode (e.g. "mint").

    Returns:
      dict or None: If the transaction calls the target function, returns a dict:
         {
           "transaction_hash": <tx hash>,
           "blockNumber": <block number>,
           "<target_function_name>_args": { <param1>: <value1>, <param2>: <value2>, ... }
         }
         Otherwise, returns None.
    """
    input_data = tx.get("input", "")
    if not input_data:
        return None
    try:
        # This will automatically try to decode the function input based on the contract ABI.
        func_obj, params = contract.decode_function_input(input_data)
        if func_obj.fn_name != target_function_name:
            return None
        # Build a mapping from parameter names to sanitized values.
        param_mapping = {}
        for inp in func_obj.abi.get("inputs", []):
            name = inp.get("name")
            value = params.get(name)
            param_mapping[name] = sanitize_value(value)
        return {
            "transaction_hash": tx.get("hash"),
            "blockNumber": tx.get("blockNumber"),
            f"{target_function_name}_args": param_mapping,
        }
    except Exception as e:
        print(f"Error decoding {target_function_name} call in tx {tx.get('hash')}: {e}")
        return None


def process_single_transaction(tx, target_selector, target_types, function_name):
    """
    Process a single transaction.
    If the transaction's input field starts with the target selector, decode the parameters
    according to target_types and return a dict with the function call details;
    otherwise return None.

    Parameters:
      tx (dict): A transaction dictionary.
      target_selector (str): The 4-byte function selector (as hex string, e.g. "0xabcdef12").
      target_types (list): A list of ABI types for the function's parameters
                           (e.g. ["address", "int24", "int24", "uint128", "bytes"]).
      function_name (str): A label for the function being decoded (e.g. "mint").

    Returns:
      dict or None: A dictionary containing the transaction hash, block number, and a key
                    named "<function_name>_args" mapped to the decoded parameters, or None if not matching.
    """
    input_data = tx.get("input", "")
    if input_data and input_data.startswith(target_selector):
        # Remove the "0x" and the selector (first 10 characters: "0x" + 8 hex digits)
        data_without_selector = input_data[8:]
        try:
            # Decode the parameters using the provided types.
            raw_params = decode(target_types, bytes.fromhex(data_without_selector))
            # Sanitize: convert any bytes into hex strings.
            sanitized_params = tuple(sanitize_value(p) for p in raw_params)
            # Create a mapping from parameter names to values.
            param_mapping = {
                name: value for name, value in zip(parameter_names, sanitized_params)
            }

            return {
                "transaction_hash": tx.get("hash"),
                "blockNumber": tx.get("blockNumber"),
                f"{function_name}_args": sanitized_params,
            }
        except Exception as e:
            print(f"Error decoding {function_name} call in tx {tx.get('hash')}: {e}")
            return None
    return None


def process_transactions_in_batches(
    transactions_file,
    processed_tx_file,
    mint_calls_file,
    batch_size,
    target_selector,
    target_types,
    function_name,
):
    """
    Process transactions from transactions_file to find Mint calls.

    - Loads the transactions (as a dict mapping tx_hash -> tx data).
    - Loads already processed transaction hashes from processed_tx_file.
    - Iterates over transactions that haven't been processed.
    - In parallel, processes each transaction to see if it is a Mint call.
    - Every batch_size transactions processed, flush the results to mint_calls_file
      (one JSON object per line) and append the processed transaction hashes to processed_tx_file.
    """
    all_tx = load_transactions_data(transactions_file)
    processed = load_processed_tx_hashes(processed_tx_file)

    # Only process transactions that have not been processed yet.
    tx_list = [tx for tx in all_tx.values() if tx.get("hash") not in processed]
    print(f"Total transactions to process: {len(tx_list)}")

    processed_in_batch = set()
    mint_calls_batch = []
    total_processed = 0

    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {
            executor.submit(
                process_single_transaction,
                tx,
                target_selector,
                target_types,
                function_name,
            ): tx
            for tx in tx_list
        }
        for future in as_completed(futures):
            tx = futures[future]
            tx_hash = tx.get("hash")
            processed_in_batch.add(tx_hash)
            result = future.result()
            if result is not None:
                mint_calls_batch.append(result)
            total_processed += 1

            # If we've processed a batch, flush the results.
            if total_processed % batch_size == 0:
                if mint_calls_batch:
                    print(type(mint_calls_batch))
                    print(mint_calls_batch)
                    append_mint_calls(mint_calls_batch, mint_calls_file)
                    print(
                        f"Flushed {len(mint_calls_batch)} mint call entries to {mint_calls_file}."
                    )
                    mint_calls_batch = []
                if processed_in_batch:
                    append_processed_tx_hashes(processed_in_batch, processed_tx_file)
                    processed_in_batch = set()

    # Flush any remaining entries after processing all transactions.
    if mint_calls_batch:
        append_mint_calls(mint_calls_batch, mint_calls_file)
        print(
            f"Flushed remaining {len(mint_calls_batch)} mint call entries to {mint_calls_file}."
        )
    if processed_in_batch:
        append_processed_tx_hashes(processed_in_batch, processed_tx_file)
        print(
            f"Updated processed transaction file with remaining {len(processed_in_batch)} entries."
        )

    print("Transaction processing complete.")

In [None]:
# This piece of code explore block 1 by 1 to find every transaction emitted or received from 1 address
# It's super slow but very deep investigation

def get_internal_transactions_with_trace(
    contract_address: str,
    from_block: int,
    to_block: int,
    max_workers: int = 16,
):
    """
    Fetch all transactions involving a contract and optionally trace internal calls.

    Parameters:
        w3: Web3 instance connected to a local archive node.
        contract_address: Ethereum contract address (string).
        from_block: Starting block number (int).
        to_block: Ending block number (int).
        max_workers: Number of threads for parallel fetching.

    Returns:
        List of dictionaries, each containing:
            - 'transaction': The transaction object.
            - 'internal_calls': List of internal calls (empty if trace not available).
    """
    results = []

    def process_block(block_number: int):
        """Fetch transactions in a block and trace internal calls if available."""
        block = w3.eth.get_block(block_number, full_transactions=True)
        block_results = []

        for tx in block.transactions:
            # Filter top-level transactions to/from the contract
            if tx["to"] == contract_address or tx["from"] == contract_address:
                tx_entry = {"transaction": tx, "internal_calls": []}

                # Try to fetch internal calls via trace_transaction
                try:
                    trace = w3.manager.request_blocking(
                        "trace_transaction", [tx["hash"].hex()]
                    )
                    tx_entry["internal_calls"] = trace
                except Exception as e:
                    # If trace API not enabled, just skip internal calls
                    tx_entry["internal_calls"] = []

                block_results.append(tx_entry)

        return block_results

    # Use ThreadPoolExecutor to fetch blocks in parallel
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {
            executor.submit(process_block, b): b
            for b in range(from_block, to_block + 1)
        }
        for future in as_completed(futures):
            results.extend(future.result())

    return results

contract_address = Web3.toChecksumAddress("0x255e60c9d597dCAA66006A904eD36424F7B26286")
from_block = 6845140
to_block = 6850000
txs_with_internal = get_internal_transactions_with_trace(
    contract_address, from_block, to_block
)
for tx_entry in txs_with_internal:
    print(tx_entry)

In [None]:
# Important code
# We look for the Genesis Uniswap factory, and we get all its events (Only 1 for the factory: 'NewExchange')
# Then we scan from 0 to latest block every NexEchange created from this Factory
# (We have the filter of events in case we are filtering events from contract that have multiple events to remove when we don't care)
address, abi, contract = get_address_abi_contract("0xC0A47DFE034B400B47BDAD5FECDA2621DE6C4D95") # Uniswap Genesis Factory
start_block = 0
end_block = 'latest'
# list all event names
event_names = [ev.event_name for ev in contract.events]
print(event_names)

# define which events you want and filters directly
events_to_scan = [
    contract.events.NewExchange().get_logs,
    #contract.events.Transfer().get_logs,
    #contract.events.Approval().get_logs,
]
L_LOGS = [] # IMPORTANT
for get_logs_fn in events_to_scan:
    logs = get_logs_fn(
        from_block=start_block,
        to_block=end_block,
        argument_filters={},  # or {"from": some_address}, {"to": [addr1, addr2]}
    )
    for log in logs:
        # print(log["transactionHash"].hex(), log["blockNumber"], log["event"])
        L_LOGS.append(log)

# Important code we use in combination with the events filter
# We created a list of Exchange created by the Uniswap V1 Factory Contract and we list all their Events
# We create the Dictionnary
# "exchange_address_1": {"event_1": {}, event_2: {}, event_3:{}}
# This dict fed with the code allow us to retrieve every transactions with the events(logs) of this exchange
# we can then sniff Liquidity out of it

FULL_EVENT_BY_CONTRACTS = {}  # IMPORTANT
for i in L_LOGS:
    add, abi, contract = get_address_abi_contract(i.args.exchange)
    event_names = [ev.event_name for ev in contract.events]
    FULL_EVENT_BY_CONTRACTS[add] = {event: {} for event in event_names}
    time.sleep(1)

print(len(FULL_EVENT_BY_CONTRACTS)) # ~ 4019
filename = "real/FULL_EVENT_BY_CONTRACTS.json"
if os.path.exists(filename):
    print(f"{filename} already exists!")
else:
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(filename, f, ensure_ascii=False, indent=4)

In [None]:
# I Think it helps print the name of the token we find in Uniswap pair exchange V1
FULL_EVENT_BY_CONTRACTS = json.load(open(r"real/FULL_EVENT_BY_CONTRACTS.json"))
# We often need the Initial Factory informations
uniswap_factory_address, uniswap_factory_abi, uniswap_factory_contract = (
    get_address_abi_contract(Web3.to_checksum_address("0xC0A47DFE034B400B47BDAD5FECDA2621DE6C4D95"))
)  # Uniswap Genesis Factory
result = []
for exchange_address in FULL_EVENT_BY_CONTRACTS.keys():
    try:
        token_addr = uniswap_factory_contract.functions.getToken(exchange_address).call()
        token_name = get_token_name_by_contract(token_addr)
    except Exception as e:
        print(
            f"Something went wrong: {e} for: Exchange {exchange_address}, underlying token {token_addr}"
        )
        continue
    if isinstance(token_name, bytes):
        name = token_name.decode("utf-8", errors="ignore").replace("\x00", "").strip()
    else:
        name = token_name.strip()
    result.append((name, exchange_address))

print(result)

In [None]:
latest = liquidity_df["block"].max()
blocks = sorted(liquidity_df["block"].unique())
prev = blocks[-2] if len(blocks) >= 2 else None

df_now = liquidity_df[liquidity_df["block"] == latest][["provider", "share_pct"]].copy()
if prev is not None:
    df_prev = (
        liquidity_df[liquidity_df["block"] == prev][["provider", "share_pct"]]
        .copy()
        .rename(columns={"share_pct": "share_prev"})
    )
    df = df_now.merge(df_prev, on="provider", how="left")
    df["share_prev"] = df["share_prev"].fillna(0.0)
else:
    df = df_now.copy()
    df["share_prev"] = 0.0

max_share = max(df["share_pct"].max(), df["share_prev"].max(), 1e-9)
desired_px = 80
sizeref = 2 * max_share / (desired_px**2)

fig = go.Figure()

if prev is not None:
    fig.add_trace(
        go.Scatter(
            x=df["provider"],
            y=df["share_prev"],
            mode="markers",
            name=f"Block {prev}",
            marker=dict(
                size=df["share_prev"],
                sizemode="area",
                sizeref=sizeref,
                color="lightgrey",
                opacity=0.4,
                line=dict(color="grey", width=1),
            ),
            hovertemplate="Provider: %{x}<br>Prev share: %{marker.size:.2%}<extra></extra>",
        )
    )

fig.add_trace(
    go.Scatter(
        x=df["provider"],
        y=df["share_pct"],
        mode="markers+text",
        name=f"Block {latest}",
        marker=dict(
            size=df["share_pct"],
            sizemode="area",
            sizeref=sizeref,
            color="steelblue",
            line=dict(color="DarkSlateGrey", width=1),
        ),
        text=df["share_pct"].apply(lambda s: f"{s:.1%}"),
        textposition="top center",
        hovertemplate="Provider: %{x}<br>Current share: %{marker.size:.2%}<extra></extra>",
    )
)

fig.update_layout(
    title=f"Liquidity share per provider at block {latest}",
    xaxis=dict(title="Provider"),
    yaxis=dict(title="Share of pool"),
    showlegend=True,
    margin=dict(t=50, b=100),
)

fig.show()