In [1]:
import pandas as pd

In [2]:
fn = "X:\pomabuster\swap\swap-000000000000"

In [3]:
df = pd.read_json(fn, lines=True)

In [4]:
df.sample()

Unnamed: 0,transaction_hash,token_address,from_address,to_address,value,operator,recipient,is_trade
34512,0x5f208ed113c84bbc8e99e544ee9ce22f23b7d3b45df1...,"[0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2, 0...","[0x8661ae7918c0115af9e3691662f605e9c550ddc9, 0...","[0xb0402c6dbd0d8dd4cc5c6d928f2a27a1935fe902, 0...","[1455298939524891316, 1520000000000000000000]",0x8661ae7918c0115af9e3691662f605e9c550ddc9,0x8661ae7918c0115af9e3691662f605e9c550ddc9,True


In [5]:
from dataclasses import dataclass

@dataclass
class Log:
    sender: str
    receiver: str
    amount: str
    asset: str
    idx: int
    
@dataclass
class Trade:
    operator: str
    recipient: str
    pool: str
    asset_in: str
    asset_out: str
    amount_in: str
    amount_out: str
    log_idx: list

In [16]:
import itertools

ZERO_ADDRESS = "0x0000000000000000000000000000000000000000"
ZERO_AMOUNT = "0"
def is_transfer_normal(log):
    return (log.sender != log.receiver 
        and log.sender != ZERO_ADDRESS
        and log.receiver != ZERO_ADDRESS
        and log.amount != ZERO_AMOUNT)

def is_transfer_minting(log):
    return (log.sender == ZERO_ADDRESS and log.amount != ZERO_AMOUNT)

def is_transfer_burning(log):
    return (log.receiver == ZERO_ADDRESS and log.amount != ZERO_AMOUNT)

def has_transfer_normal(logs):
    return [log for log in logs if is_transfer_normal(log)]

def has_transfer_minting(logs):
    return [log for log in logs if is_transfer_minting(log)]

def has_transfer_burning(logs):
    return [log for log in logs if is_transfer_burning(log)]

def has_liquidity_mining(logs):
    ts = has_transfer_normal(logs)
    tms = has_transfer_minting(logs)
    if not ts or not tms:
        return None
    for t, tm in pairwise(ts, tms):
        if t and tm and t.asset != tm.asset:
            trade_dict = {"operator": t.sender, 
                    "recipient": tm.receiver, 
                    "pool": t.receiver, 
                    "asset_in": t.asset,
                    "asset_out": tm.asset,
                    "amount_in": t.amount,
                    "amount_out": tm.amount,
                    "log_idx": [t.idx, tm.idx]}
            yield Trade(**trade_dict)
        return None

def has_liquidity_cancel(logs):
    ts = has_transfer_normal(logs)
    tbs = has_transfer_burning(logs)
    if not ts or not tbs:
        return None
    for t, tb in pairwise(ts, tbs):
        if t and tb and t.asset != tb.asset:
            trade_dict =  {"operator": tb.sender, 
                    "recipient": t.receiver, 
                    "pool": t.sender, 
                    "asset_in": tb.asset,
                    "asset_out": t.asset,
                    "amount_in": tb.amount,
                    "amount_out": t.amount,
                    "log_idx": [t.idx, tb.idx]}
            yield Trade(**trade_dict)
    return None

def pairwise(iterable_1, iterable_2=None):
    if iterable_2 is None:
        return itertools.product(iterable_1, iterable_1)
    return itertools.product(iterable_1, iterable_2)

def has_trade(logs):
    t = has_transfer_normal(logs)
    if not t:
        return None
    for t1, t2 in itertools.pairwise(t):
        if t1 and t2 and t1.asset != t2.asset and t1.receiver == t2.sender:
            trade_dict = {"operator": t1.sender, 
                    "recipient": t2.receiver, 
                    "pool": t1.receiver, 
                    "asset_in": t1.asset,
                    "asset_out": t2.asset,
                    "amount_in": t1.amount,
                    "amount_out": t2.amount,
                    "log_idx": [t1.idx, t2.idx]}
            yield Trade(**trade_dict)
    return None

def has_trade_alt(logs):
    lms = has_liquidity_mining(logs)
    lcs = has_liquidity_cancel(logs)
    if not lms or lcs:
        return None
    for lm, lc in itertools.pairwise(lms, lcs):
        if (lm and lc 
            and lm.recipient == lc.operator 
            and lm.asset_in != lc.asset_out 
            and lm.asset_out != lc.asset_in):
            trade_dict =  {"operator": lm.operator, 
                    "recipient": lc.recipient, 
                    "pool": lm.recipient, 
                    "asset_in": lm.asset_in,
                    "asset_out": lc.asset_out,
                    "amount_in": lm.amount_in,
                    "amount_out": lc.amount_out,
                    "log_idx": [lm.idx, lc.idx]}
            yield Trade(**trade_dict)
    return None

In [9]:
sample = df.iloc[34512]

In [13]:
def parse_record(record):
    assets = record.token_address
    senders = record.from_address
    receivers = record.to_address
    amounts = record.value
    logs = []
    for i in range(len(assets)):      
        log_dict = {
            "idx": i,
            "asset": assets[i],
            "sender": senders[i],
            "receiver": receivers[i],
            "amount": amounts[i],
        }
        logs.append(Log(**log_dict))
    return logs

In [14]:
sample_log = parse_record(sample)

In [17]:
list(has_trade(sample_log))

[Trade(operator='0x8661ae7918c0115af9e3691662f605e9c550ddc9', recipient='0x8661ae7918c0115af9e3691662f605e9c550ddc9', pool='0xb0402c6dbd0d8dd4cc5c6d928f2a27a1935fe902', asset_in='0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2', asset_out='0x0f5d2fb29fb7d3cfee444a200298f468908cc942', amount_in='1455298939524891316', amount_out='1520000000000000000000', log_idx=[0, 1])]

In [21]:
for i, sample in df.iterrows():
    sample_log = parse_record(sample)
    trades = list(has_trade(sample_log))
    if len(trades) >= 2:
        print(i)
        for t in trades:
            print(t)
        break

12
Trade(operator='0xdb38ae75c5f44276803345f7f02e95a0aeef5944', recipient='0xdb38ae75c5f44276803345f7f02e95a0aeef5944', pool='0x7924a818013f39cf800f5589ff1f1f0def54f31f', asset_in='0x0000000000095413afc295d19edeb1ad7b71c952', asset_out='0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2', amount_in='4000000000000000000000', amount_out='17710896833533647813', log_idx=[1, 2])
Trade(operator='0x7924a818013f39cf800f5589ff1f1f0def54f31f', recipient='0x55d31f68975e446a40a2d02ffa4b0e1bfb233c2f', pool='0xdb38ae75c5f44276803345f7f02e95a0aeef5944', asset_in='0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2', asset_out='0x0000000000095413afc295d19edeb1ad7b71c952', amount_in='17710896833533647813', amount_out='6000000000000000000000', log_idx=[2, 3])
Trade(operator='0xdb38ae75c5f44276803345f7f02e95a0aeef5944', recipient='0xdb38ae75c5f44276803345f7f02e95a0aeef5944', pool='0x55d31f68975e446a40a2d02ffa4b0e1bfb233c2f', asset_in='0x0000000000095413afc295d19edeb1ad7b71c952', asset_out='0xdac17f958d2ee523a220620699459

In [22]:
sample = df.iloc[12]
sample_log = parse_record(sample)
sample_trade = list(has_trade(sample_log))

In [41]:
def is_profitable(d: dict) -> bool:
    is_profit = [int(v) >= 0 for _, v in d.items()]
    profit = [int(v) for _, v in d.items()]
    return True if all(is_profit) and sum(profit) > 0 else False

In [31]:
def is_arbitrage(trades) -> bool:
    profit = {} # token => revenue
    for t in trades:
        if t.asset_in not in profit:
            profit[t.asset_in] = - int(t.amount_in)
        else:
            profit[t.asset_in] -= int(t.amount_in)
        if t.asset_out not in profit:
            profit[t.asset_out] = int(t.amount_out)
        else:
            profit[t.asset_out] += int(t.amount_out)
    return is_profitable(profit)

is_arbitrage(sample_trade)

False

In [57]:
trade_is_arb = []
for i, sample in df.iterrows():
    sample_log = parse_record(sample)
    trades = list(has_trade(sample_log))
    res = is_arbitrage(trades)
    trade_is_arb.append(res)
print(sum(trade_is_arb))

1383


In [52]:
sample = df.iloc[0]
sample_log = parse_record(sample)
trades = list(has_trade(sample_log))
res = is_arbitrage(trades)
res

False

In [54]:
sample = df.iloc[1222]
sample_log = parse_record(sample)
trades = list(has_trade(sample_log))
res = is_arbitrage(trades)
res

True

In [56]:
arb_txns = []
for _, record in df.iterrows():
    logs = parse_record(record)
    trades = list(has_trade(logs))
    if is_arbitrage(trades):
        arb_txns.append(record.transaction_hash)
arb_df = df.loc[df["transaction_hash"].isin(arb_txns)]
arb_df

Unnamed: 0,transaction_hash,token_address,from_address,to_address,value,operator,recipient,is_trade
1222,0x0e9168923be674e50ed6ccbd2ed1369aefede5f279cc...,"[0x111111111117dc0aa78b770fa6a738034120c302, 0...","[0x56178a0d5f301baf6cf3e1cd53d9863437345bf9, 0...","[0xbf3f6477dbd514ef85b7d3ec6ac2205fd0962039, 0...","[1155597197212263846744952, 193400311000000000...",0x56178a0d5f301baf6cf3e1cd53d9863437345bf9,0x56178a0d5f301baf6cf3e1cd53d9863437345bf9,True
1664,0xd1c5806298f10cf69d481454f0dedf15f5b0d42323e7...,"[0x1776e1f26f98b1a5df9cd347953a26dd3cb46671, 0...","[0xb2c2f9412c42a469dccd1ea95d5bf7461d7d8faf, 0...","[0xf2f400c138f9fb900576263af0bc7fcde2b1b8a8, 0...","[133971359000000000000, 18755990260000000000, ...",0xb2c2f9412c42a469dccd1ea95d5bf7461d7d8faf,0x2bf5a5ba29e60682fc56b2fcf9ce07bef4f6196f,True
1666,0x3e709df7c0531aa0df29921688ea9f1a4104ba09c164...,"[0x1776e1f26f98b1a5df9cd347953a26dd3cb46671, 0...","[0xe201da81da8ceb36a2b1d0582ca4b41325c0f6e3, 0...","[0x220bda5c8994804ac96ebe4df184d25e5c2196d4, 0...","[154993870000000000000, 71297180200000000000, ...",0xe201da81da8ceb36a2b1d0582ca4b41325c0f6e3,0xe201da81da8ceb36a2b1d0582ca4b41325c0f6e3,True
2033,0x3ae8583b807a6c4e856d40bd44569278ba7807a2bbc7...,"[0x1f573d6fb3f13d689ff844b4ce37794d79a7ff1c, 0...","[0x56178a0d5f301baf6cf3e1cd53d9863437345bf9, 0...","[0xbf3f6477dbd514ef85b7d3ec6ac2205fd0962039, 0...","[275217811885254567151882, 5263125560218512738...",0x56178a0d5f301baf6cf3e1cd53d9863437345bf9,0x56178a0d5f301baf6cf3e1cd53d9863437345bf9,True
2152,0x417a7d2b1887265d6ff93096f8b22ee842a29681a210...,"[0x1f573d6fb3f13d689ff844b4ce37794d79a7ff1c, 0...","[0x56178a0d5f301baf6cf3e1cd53d9863437345bf9, 0...","[0xbf3f6477dbd514ef85b7d3ec6ac2205fd0962039, 0...","[38755184787733954274739, 11100640446712779898...",0x56178a0d5f301baf6cf3e1cd53d9863437345bf9,0x56178a0d5f301baf6cf3e1cd53d9863437345bf9,True
...,...,...,...,...,...,...,...,...
85953,0xeffd8d050fef7cdb6d90cca0c494eaf054248271d000...,"[0xeb4c2781e4eba804ce9a9803c67d0893436bb27d, 0...","[0x4f868c1aa37fcf307ab38d215382e88fca6275e2, 0...","[0x1c073d5045b1abb6924d5f0f8b2f667b1653a4c3, 0...","[1703680000, 1703680000, 1702100195, 170210019...",0x4f868c1aa37fcf307ab38d215382e88fca6275e2,0x9a67f1940164d0318612b497e8e6038f902a00a4,True
85983,0xb0187776cb8f4d881843c4291df1dddd75969bd9fd9b...,"[0xeb4c2781e4eba804ce9a9803c67d0893436bb27d, 0...","[0x4f868c1aa37fcf307ab38d215382e88fca6275e2, 0...","[0x1c073d5045b1abb6924d5f0f8b2f667b1653a4c3, 0...","[1548800000, 1548800000, 15481378474323147683,...",0x4f868c1aa37fcf307ab38d215382e88fca6275e2,0x9a67f1940164d0318612b497e8e6038f902a00a4,True
85984,0x84296a97993b1f480b75f4c360cfd69772c5ffa6bcf8...,"[0xeb4c2781e4eba804ce9a9803c67d0893436bb27d, 0...","[0xaae0633e15200bc9c50d45cd762477d268e126bd, 0...","[0x445f0381482017043dc84c6f6b265abf4c20dcc1, 0...","[3000000000, 3000000000, 29668828581616499117,...",0xaae0633e15200bc9c50d45cd762477d268e126bd,0x21711c69387dcc6cebc3caff45d9e7c7dee5476a,True
85985,0xb21e8025ce292eb4cc9a1e644319f1b9b9b8ef5b3b67...,"[0xeb4c2781e4eba804ce9a9803c67d0893436bb27d, 0...","[0x4f868c1aa37fcf307ab38d215382e88fca6275e2, 0...","[0x1c073d5045b1abb6924d5f0f8b2f667b1653a4c3, 0...","[1408000000, 1408000000, 13981350487709091844,...",0x4f868c1aa37fcf307ab38d215382e88fca6275e2,0x9a67f1940164d0318612b497e8e6038f902a00a4,True


In [60]:
from tqdm import tqdm

fn_template = "X:\pomabuster\swap-new\swap-000000000" # 000 - 399 # swap-000000000000
for i in tqdm(range(400)):
    if len(str(i)) == 1:
        file_number = "00" + str(i)
    elif len(str(i)) == 2:
        file_number = "0" + str(i)
    else:
        file_number = str(i)
    # print(file_number)
    df = pd.read_json(fn_template + file_number, lines=True)
    arb_txns = []
    for _, record in df.iterrows():
        logs = parse_record(record)
        trades = list(has_trade(logs))
        if is_arbitrage(trades):
            arb_txns.append(record.transaction_hash)
    arb_df = df.loc[df["transaction_hash"].isin(arb_txns)]
    with open("X:\\pomabuster\\arb\\arb2.jsonl", "a") as f:
        f.write(arb_df.to_json(orient='records', lines=True))

100%|█████████████████████████████████████████████████████████████████████████| 400/400 [56:44<00:00,  8.51s/it]
