In [1]:
import pandas as pd

In [2]:
fn = "../trade_dataset/trade-part4-000000000000"
df = pd.read_json(fn, lines=True)
df.sample(1)

Unnamed: 0,transaction_hash,token_address,from_address,to_address,value,operator,recipient,is_trade
27579,0xc33998c23f5c1f71eda3ed7fecd4c0a67f5278701387...,"[0x514910771af9ca656af840dff83e8264ecf986ca, 0...","[0x8df51a9714ae6357a5b829cc8d677b43d7e8bd53, 0...","[0xebcc959479634eec5a4d7162e36f8b8cc763f491, 0...","[25011998775975344965, 159377409878541566084, ...",0x8df51a9714ae6357a5b829cc8d677b43d7e8bd53,0x83a5544bcfaeba596a3f4769e52ff6e473665851,True


In [3]:
from dataclasses import dataclass

@dataclass
class Log:
    sender: str
    receiver: str
    amount: str
    asset: str
    idx: int
    
@dataclass
class Trade:
    operator: str
    recipient: str
    pool: str
    asset_in: str
    asset_out: str
    amount_in: str
    amount_out: str
    log_idx: list


In [4]:
import itertools

ZERO_ADDRESS = "0x0000000000000000000000000000000000000000"
ZERO_AMOUNT = "0"
def is_transfer_normal(log):
    return (log.sender != log.receiver 
        and log.sender != ZERO_ADDRESS
        and log.receiver != ZERO_ADDRESS
        and log.amount != ZERO_AMOUNT)

def is_transfer_minting(log):
    return (log.sender == ZERO_ADDRESS and log.amount != ZERO_AMOUNT)

def is_transfer_burning(log):
    return (log.receiver == ZERO_ADDRESS and log.amount != ZERO_AMOUNT)

def has_transfer_normal(logs):
    return [log for log in logs if is_transfer_normal(log)]

def has_transfer_minting(logs):
    return [log for log in logs if is_transfer_minting(log)]

def has_transfer_burning(logs):
    return [log for log in logs if is_transfer_burning(log)]

def has_liquidity_mining(logs):
    ts = has_transfer_normal(logs)
    tms = has_transfer_minting(logs)
    if not ts or not tms:
        return None
    for t, tm in pairwise(ts, tms):
        if t and tm and t.asset != tm.asset:
            trade_dict = {"operator": t.sender, 
                    "recipient": tm.receiver, 
                    "pool": t.receiver, 
                    "asset_in": t.asset,
                    "asset_out": tm.asset,
                    "amount_in": t.amount,
                    "amount_out": tm.amount,
                    "log_idx": [t.idx, tm.idx]}
            yield Trade(**trade_dict)
        return None

def has_liquidity_cancel(logs):
    ts = has_transfer_normal(logs)
    tbs = has_transfer_burning(logs)
    if not ts or not tbs:
        return None
    for t, tb in pairwise(ts, tbs):
        if t and tb and t.asset != tb.asset:
            trade_dict =  {"operator": tb.sender, 
                    "recipient": t.receiver, 
                    "pool": t.sender, 
                    "asset_in": tb.asset,
                    "asset_out": t.asset,
                    "amount_in": tb.amount,
                    "amount_out": t.amount,
                    "log_idx": [t.idx, tb.idx]}
            yield Trade(**trade_dict)
    return None

def pairwise(iterable_1, iterable_2=None):
    if iterable_2 is None:
        return itertools.product(iterable_1, iterable_1)
    return itertools.product(iterable_1, iterable_2)

def has_trade(logs):
    t = has_transfer_normal(logs)
    if not t:
        return None
    for t1, t2 in pairwise(t):
        if t1 and t2 and t1.asset != t2.asset and t1.receiver == t2.sender:
            trade_dict = {"operator": t1.sender, 
                    "recipient": t2.receiver, 
                    "pool": t1.receiver, 
                    "asset_in": t1.asset,
                    "asset_out": t2.asset,
                    "amount_in": t1.amount,
                    "amount_out": t2.amount,
                    "log_idx": [t1.idx, t2.idx]}
            yield Trade(**trade_dict)
    return None

def has_trade_alt(logs):
    lms = has_liquidity_mining(logs)
    lcs = has_liquidity_cancel(logs)
    if not lms or lcs:
        return None
    for lm, lc in pairwise(lms, lcs):
        if (lm and lc 
            and lm.recipient == lc.operator 
            and lm.asset_in != lc.asset_out 
            and lm.asset_out != lc.asset_in):
            trade_dict =  {"operator": lm.operator, 
                    "recipient": lc.recipient, 
                    "pool": lm.recipient, 
                    "asset_in": lm.asset_in,
                    "asset_out": lc.asset_out,
                    "amount_in": lm.amount_in,
                    "amount_out": lc.amount_out,
                    "log_idx": [lm.idx, lc.idx]}
            yield Trade(**trade_dict)
    return None



In [5]:
def compare_trade(trade1, trade2, direct=True):
    return (trade1.operator == trade1.recipient
            and trade2.operator == trade2.recipient
            and trade1.pool == trade2.pool
            and trade1.asset_in == trade2.asset_out
            and trade1.asset_out == trade2.asset_in
            and trade1.amount_out == trade2.amount_in
            and (trade1.amount_in < trade2.amount_out if direct 
                 else trade1.amount_in == trade2.amount_out))

def basic_action_in_between(logs, trade1, trade2):
    used_idx = trade1.log_idx + trade2.log_idx
    new_logs = [log for log in logs if log.idx not in used_idx]
    return (has_transfer_minting(new_logs) 
            + has_transfer_normal(new_logs) 
            + has_transfer_burning(new_logs))

def trade_in_between(logs, trade1, trade2):
    used_idx = trade1.log_idx + trade2.log_idx
    new_logs = [log for log in logs if log.idx not in used_idx]
    return has_trade(new_logs)

def advance_action_in_between(logs, trade1, trade2):
    used_idx = trade1.log_idx + trade2.log_idx
    new_logs = [log for log in logs if log.idx not in used_idx]
    return (has_trade(new_logs)
            or has_liquidity_cancel(new_logs) 
            or has_liquidity_mining(new_logs) )
    return False
        
def is_dierct_pm(logs):
    trades = list(has_trade(logs)) + list(has_trade_alt(logs))
    if not trades:
        return False
    for t1, t2 in pairwise(trades):
        if compare_trade(t1, t2, direct=True):
            bas = basic_action_in_between(logs, t1, t2) 
            trs = trade_in_between(logs, t1, t2)
            for ba in bas:
                if ba.receiver == t1.pool and ba.asset != t1.asset_out:
                    return True
            for t3 in trs:
                if (t3.operator != t1.operator 
                    and t3.pool == t1.pool 
                    and t3.asset_out == t1.asset_out):
                    return True
    return False

        
def is_indierct_pm(logs):
    trades = list(has_trade(logs)) + list(has_trade_alt(logs))
    if not trades:
        return False
    for t1, t2 in pairwise(trades):
        if compare_trade(t1, t2, direct=False):
            bas = basic_action_in_between(logs, t1, t2) 
            aas = advance_action_in_between(logs, t1, t2)
            for ba in bas:
                if ba.receiver == t1.operator:
                    return True
            for aa in aas:
                if (aa.pool != t1.pool 
                    and aa.recipient == t1.operator):
                    return True
    return False

In [6]:
def parse_record(record):
    assets = record.token_address
    senders = record.from_address
    receivers = record.to_address
    amounts = record.value
    logs = []
    for i in range(len(assets)):      
        log_dict = {
            "idx": i,
            "asset": assets[i],
            "sender": senders[i],
            "receiver": receivers[i],
            "amount": amounts[i],
        }
        logs.append(Log(**log_dict))
    return logs


In [7]:
pm_txns = []
for _, record in df.iterrows():
    logs = parse_record(record)
    if is_dierct_pm(logs) or is_indierct_pm(logs):
        pm_txns.append(record.transaction_hash)

In [8]:
len(pm_txns)

457

In [9]:
def inspect_pm_log(txn_hash):
    logs = parse_record(df[df.transaction_hash == txn_hash].iloc[0])
    return pd.DataFrame(logs)

def inspect_pm_trade(txn_hash):
    logs = parse_record(df[df.transaction_hash == txn_hash].iloc[0])
    # print(logs)
    return pd.DataFrame(list(has_trade(logs)) + list(has_trade_alt(logs)))

In [10]:
inspect_pm_log(pm_txns[-1])

Unnamed: 0,sender,receiver,amount,asset,idx
0,0xd51a44d3fae010294c616388b506acda1bfaae46,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,140298390190,0xdac17f958d2ee523a2206206994597c13d831ec7,0
1,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,140298390190,0xdac17f958d2ee523a2206206994597c13d831ec7,1
2,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,140298390190,0xdac17f958d2ee523a2206206994597c13d831ec7,2
3,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0xd632f22692fac7611d2aa1c0d552930d43caed3b,140298390190,0xdac17f958d2ee523a2206206994597c13d831ec7,3
4,0xd632f22692fac7611d2aa1c0d552930d43caed3b,0xbebc44782c7db0a1a60cb6fe97d0b483032ff1c7,140298390190,0xdac17f958d2ee523a2206206994597c13d831ec7,4
5,0xd632f22692fac7611d2aa1c0d552930d43caed3b,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,140262565544754008195852,0x853d955acef822db058eb8505911ed77f175b99e,5
6,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,140262565544754008195852,0x853d955acef822db058eb8505911ed77f175b99e,6
7,0x9a834b70c07c81a9fcd6f22e842bf002fbffbe4d,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,140248391978,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,7
8,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0x9a834b70c07c81a9fcd6f22e842bf002fbffbe4d,140262565544754008195852,0x853d955acef822db058eb8505911ed77f175b99e,8
9,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8,140248391978,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,9


In [11]:
inspect_pm_trade(pm_txns[-1])

Unnamed: 0,operator,recipient,pool,asset_in,asset_out,amount_in,amount_out,log_idx
0,0xd51a44d3fae010294c616388b506acda1bfaae46,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0xdac17f958d2ee523a2206206994597c13d831ec7,0x853d955acef822db058eb8505911ed77f175b99e,140298390190,140262565544754008195852,"[0, 6]"
1,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0x9a834b70c07c81a9fcd6f22e842bf002fbffbe4d,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0xdac17f958d2ee523a2206206994597c13d831ec7,0x853d955acef822db058eb8505911ed77f175b99e,140298390190,140262565544754008195852,"[1, 8]"
2,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0xdac17f958d2ee523a2206206994597c13d831ec7,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,140298390190,140248391978,"[1, 9]"
3,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0xdac17f958d2ee523a2206206994597c13d831ec7,0x853d955acef822db058eb8505911ed77f175b99e,140298390190,140262565544754008195852,"[2, 6]"
4,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0xd632f22692fac7611d2aa1c0d552930d43caed3b,0xdac17f958d2ee523a2206206994597c13d831ec7,0x853d955acef822db058eb8505911ed77f175b99e,140298390190,140262565544754008195852,"[3, 5]"
5,0xd632f22692fac7611d2aa1c0d552930d43caed3b,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0x853d955acef822db058eb8505911ed77f175b99e,0xdac17f958d2ee523a2206206994597c13d831ec7,140262565544754008195852,140298390190,"[5, 1]"
6,0xd632f22692fac7611d2aa1c0d552930d43caed3b,0xd632f22692fac7611d2aa1c0d552930d43caed3b,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0x853d955acef822db058eb8505911ed77f175b99e,0xdac17f958d2ee523a2206206994597c13d831ec7,140262565544754008195852,140298390190,"[5, 3]"
7,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0x853d955acef822db058eb8505911ed77f175b99e,0xdac17f958d2ee523a2206206994597c13d831ec7,140262565544754008195852,140298390190,"[6, 2]"
8,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0x853d955acef822db058eb8505911ed77f175b99e,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,140262565544754008195852,140248391978,"[6, 9]"
9,0x9a834b70c07c81a9fcd6f22e842bf002fbffbe4d,0x81c46feca27b31f3adc2b91ee4be9717d1cd3dd7,0xbadc0defafcf6d4239bdf0b66da4d7bd36fcf05a,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,0xdac17f958d2ee523a2206206994597c13d831ec7,140248391978,140298390190,"[7, 2]"


In [12]:
pm_txns[-1]

'0xf9f20949ed9989978d2be9f55b3ff827871c50ee25dbd3cf9a94b69794589779'

In [15]:
filepath = "drf_000"
pd.DataFrame(pm_txns).to_csv(filepath, index=False)