### Imports

In [None]:
from web3 import Web3
import requests
import json


from web3 import AsyncWeb3
from web3.providers.persistent import WebSocketProvider
import asyncio
import threading
from time import sleep
from typing import List
import random
import os

### Funcs

In [None]:
# 1. Load the Router contract using etherscan API
def fetch_abi(address: str) -> list:
    ETHERSCAN_API_KEY = "QGS1VSEPNEHZ8W4M686QCNM1Z6WGIT14Q1"
    url = (
      "https://api.etherscan.io/api"
      f"?module=contract&action=getabi&address={address}"
      f"&apikey={ETHERSCAN_API_KEY}"
    )
    resp = requests.get(url).json()
    return json.loads(resp["result"])


### Initializations

In [None]:
# 0. set up RPC w3 connection
api_key = "75db4a2f907d4525866a728681b3b458"
infura_url = f"https://mainnet.infura.io/v3/{api_key}" # select mainnet or sepolia here
w3 = Web3(Web3.HTTPProvider(infura_url))

### Keeping track & serializing all pairs in factory contract

In [None]:
# Instantiate factory contract
factory_address_string = "0x5C69bEe701ef814a2B6a3EDD4B1652CB9cc5aA6f"
factory_address = Web3.to_checksum_address(factory_address_string)
factory_abi     = fetch_abi(factory_address)
factory         = w3.eth.contract(address=factory_address, abi=factory_abi)

# Shared state globals
PAIRADDR_PATH = "/Volumes/Extreme SSD/arbot_data/pairaddr.json"
DEPLOY_BLOCK = 10_008_355
pairaddr: List[str] = []
last_synced_block = 0
pairaddr_lock = asyncio.Lock()
initial_update = asyncio.Event()
pair_created_topic = w3.keccak(text="PairCreated(address,address,address,uint256)")


# (0) Functions to read/write from disk
def load_state():
    global pairaddr, last_synced_block
    if os.path.isfile(PAIRADDR_PATH):
        with open(PAIRADDR_PATH, "r") as f:
            data = json.load(f)
        last_synced_block = data.get("last_synced_block", DEPLOY_BLOCK - 1)
        pairaddr          = data.get("pairs", [])
    else:
        last_synced_block = DEPLOY_BLOCK - 1
        pairaddr          = []

async def dump_state():
    """ Write global pairaddr to disk atomically """
    # Assume lock already held
    tmp_path = PAIRADDR_PATH + ".tmp"
    payload = {
        "last_synced_block": last_synced_block,
        "pairs": pairaddr,
    }
    with open(tmp_path, "w") as f:
        json.dump(payload, f)
    os.replace(tmp_path, PAIRADDR_PATH)


# (1) Functions to fetch all new pairs created up to current block
async def process_log_range(from_blk: int, to_blk: int) -> int:
    global pairaddr, last_synced_block

    logs = w3.eth.get_logs({
        "fromBlock": from_blk,
        "toBlock":   to_blk,
        "address":   factory_address_string,
        "topics":    [pair_created_topic],
    })

    # sort by (block, tx, log index)
    logs.sort(key=lambda L: (
        L["blockNumber"],
        L["transactionIndex"],
        L["logIndex"],
    ))
    new = 0
    for log in logs:
        decoded_log = factory.events.PairCreated().process_log(log)
        pair_addr = decoded_log["args"]["pair"]
        if pair_addr not in pairaddr:
            pairaddr.append(pair_addr)
            new += 1
    last_synced_block = to_blk

    # persist under lock
    if new:
        async with pairaddr_lock:
            await dump_state()
    
    return new


async def initial_pairaddr_sync(chunk_size: int = 50_000):
    global pairaddr, last_synced_block

    # 1) Historical catch-up in chunks
    start = last_synced_block + 1
    latest = w3.eth.block_number
    for blk in range(start, latest + 1, chunk_size):
        end = min(blk + chunk_size - 1, latest)
        new = await process_log_range(blk, end)
        print(f"Processed blocks {blk}-{end}: {new} new → total {len(pairaddr)}")
        await asyncio.sleep(2)
    
    # 2) Final "bridge" to catch anything that happened during step 1
    head = w3.eth.block_number
    if head > last_synced_block:
        new = await process_log_range(last_synced_block + 1, head)
        print(f"Bridge blocks {last_synced_block + 1}-{head}: {new} new → total {len(pairaddr)}")
    
    # 3) Now that truly in-sync with chain head, open gate 
    initial_update.set()


# (2) Functions to continuously monitor and update pairs
async def _handle_pair_creation(log):
    global last_synced_block
    """ Called back on each new PairCreated log """

    # Wait until completion of intitial sync
    await initial_update.wait()
    
    decoded_log = factory.events.PairCreated().process_log(log)
    addr = decoded_log["args"]["pair"]
    async with pairaddr_lock:
        if addr not in pairaddr:
            pairaddr.append(addr)
            last_synced_block = log["blockNumber"]
            await dump_state()
            print(f"Appended live pair {addr} (total now {len(pairaddr)})")

async def monitor_pairs():
    async with AsyncWeb3(WebSocketProvider(f"wss://mainnet.infura.io/ws/v3/{api_key}")) as w3s:
        sub_id = await w3s.eth.subscribe("logs", {
            "address": factory_address_string,
            "topics": [pair_created_topic]
        })
        async for msg in w3s.socket.process_subscriptions():
            await _handle_pair_creation(msg["result"])

In [None]:
# Entrypoint: this thread runs indefinitely keeping pairaddr up to date
async def main():

    # 1. Load any existing dump
    load_state()

    await asyncio.gather(
        # 2. Update historical pairs
        initial_pairaddr_sync(),
        # 3. launch continuous update thread
        monitor_pairs(),
    )

await main()

In [None]:
# Testing
def test_rand_pair():
    test_i = random.randint(0, len(pairaddr) - 1)
    assert( factory.functions.allPairs(test_i).call() == pairaddr[test_i] )
    print(f"Assertion passed for pair # {test_i}")

while True:
    test_rand_pair()
    sleep(1)

In [17]:
# Up to date?
len(pairaddr) == factory.functions.allPairsLength().call()

True

In [12]:
curr_num = factory.functions.allPairsLength().call() - 10
factory.functions.allPairs(curr_num).call() == pairaddr[curr_num]

True

In [18]:
pairaddr[0]

'0xB4e16d0168e52d35CaCD2c6185b44281Ec28C9Dc'

In [None]:
# Do all pair contracts use the same abi? (looks like they do, hence save unique abi under this pair_abi object)
pair_num = -1

pair_address = Web3.to_checksum_address(pairaddr[pair_num])
pair_abi     = fetch_abi(pair_address)