In [1]:
import pandas as pd

In [4]:
fn = "../tokens/scrape_erc20/erc20.jsonlines"
token_df = pd.read_json(fn, lines=True)
token_df.sample()

Unnamed: 0,index,name,address,price_in_usd,volume,market_cap_circulating,market_cap_onchain,holders,market_cap_fully_diluted,total_supply,decimals,official_website
834,429,UniBright (UBT),0x8400d94a5cb0fa0d041a3788e395285d61c9ee5e,0.0717,40497,10801015,10758900.0,22845,10713900.0,150000000,8,unibright.io


In [5]:
token_supply = {}
for _, row in token_df.iterrows():
    token_supply[row["address"]] = int(row["total_supply"])
    
token_decimal = {}
for _, row in token_df.iterrows():
    token_decimal[row["address"]] = int(row["decimals"])

In [48]:
fn = "X:\pomabuster\swap\swap-000000000000"
df = pd.read_json(fn, lines=True)
df.sample(1)

Unnamed: 0,transaction_hash,token_address,from_address,to_address,value,operator,recipient,is_trade
23472,0xfb85101e676b32727747973578a39292b068fd419f5f...,"[0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48, 0...","[0xea5db10aacf3178aec750d1df3fc97ad8be553ef, 0...","[0x74de5d4fcbf63e00296fd95d33236b9794016631, 0...","[7000000000, 7000000000, 2288815836654671872]",0xea5db10aacf3178aec750d1df3fc97ad8be553ef,0xdef1c0ded9bec7f1a1670819833240f027b25eff,True


In [7]:
filepath = "drf_000"
poma = pd.read_csv(filepath)
poma.sample()

Unnamed: 0,0
418,0x5f6f124b8dbb9ddd59ee6206febbd4aaa9ceab705ed8...


In [8]:
from dataclasses import dataclass

@dataclass
class Log:
    sender: str
    receiver: str
    amount: str
    asset: str
    idx: int
    
@dataclass
class Trade:
    operator: str
    recipient: str
    pool: str
    asset_in: str
    asset_out: str
    amount_in: str
    amount_out: str
    log_idx: list

In [9]:
import itertools

ZERO_ADDRESS = "0x0000000000000000000000000000000000000000"
ZERO_AMOUNT = "0"
def is_transfer_normal(log):
    return (log.sender != log.receiver 
        and log.sender != ZERO_ADDRESS
        and log.receiver != ZERO_ADDRESS
        and log.amount != ZERO_AMOUNT)

def is_transfer_minting(log):
    return (log.sender == ZERO_ADDRESS and log.amount != ZERO_AMOUNT)

def is_transfer_burning(log):
    return (log.receiver == ZERO_ADDRESS and log.amount != ZERO_AMOUNT)

def has_transfer_normal(logs):
    return [log for log in logs if is_transfer_normal(log)]

def has_transfer_minting(logs):
    return [log for log in logs if is_transfer_minting(log)]

def has_transfer_burning(logs):
    return [log for log in logs if is_transfer_burning(log)]

def has_liquidity_mining(logs):
    ts = has_transfer_normal(logs)
    tms = has_transfer_minting(logs)
    if not ts or not tms:
        return None
    for t, tm in pairwise(ts, tms):
        if t and tm and t.asset != tm.asset:
            trade_dict = {"operator": t.sender, 
                    "recipient": tm.receiver, 
                    "pool": t.receiver, 
                    "asset_in": t.asset,
                    "asset_out": tm.asset,
                    "amount_in": t.amount,
                    "amount_out": tm.amount,
                    "log_idx": [t.idx, tm.idx]}
            yield Trade(**trade_dict)
        return None

def has_liquidity_cancel(logs):
    ts = has_transfer_normal(logs)
    tbs = has_transfer_burning(logs)
    if not ts or not tbs:
        return None
    for t, tb in pairwise(ts, tbs):
        if t and tb and t.asset != tb.asset:
            trade_dict =  {"operator": tb.sender, 
                    "recipient": t.receiver, 
                    "pool": t.sender, 
                    "asset_in": tb.asset,
                    "asset_out": t.asset,
                    "amount_in": tb.amount,
                    "amount_out": t.amount,
                    "log_idx": [t.idx, tb.idx]}
            yield Trade(**trade_dict)
    return None

def pairwise(iterable_1, iterable_2=None):
    if iterable_2 is None:
        return itertools.product(iterable_1, iterable_1)
    return itertools.product(iterable_1, iterable_2)

def has_trade(logs):
    t = has_transfer_normal(logs)
    if not t:
        return None
    for t1, t2 in pairwise(t):
        if t1 and t2 and t1.asset != t2.asset and t1.receiver == t2.sender:
            trade_dict = {"operator": t1.sender, 
                    "recipient": t2.receiver, 
                    "pool": t1.receiver, 
                    "asset_in": t1.asset,
                    "asset_out": t2.asset,
                    "amount_in": t1.amount,
                    "amount_out": t2.amount,
                    "log_idx": [t1.idx, t2.idx]}
            yield Trade(**trade_dict)
    return None

def has_trade_alt(logs):
    lms = has_liquidity_mining(logs)
    lcs = has_liquidity_cancel(logs)
    if not lms or lcs:
        return None
    for lm, lc in pairwise(lms, lcs):
        if (lm and lc 
            and lm.recipient == lc.operator 
            and lm.asset_in != lc.asset_out 
            and lm.asset_out != lc.asset_in):
            trade_dict =  {"operator": lm.operator, 
                    "recipient": lc.recipient, 
                    "pool": lm.recipient, 
                    "asset_in": lm.asset_in,
                    "asset_out": lc.asset_out,
                    "amount_in": lm.amount_in,
                    "amount_out": lc.amount_out,
                    "log_idx": [lm.idx, lc.idx]}
            yield Trade(**trade_dict)
    return None



In [10]:
def parse_record(record):
    assets = record.token_address
    senders = record.from_address
    receivers = record.to_address
    amounts = record.value
    logs = []
    for i in range(len(assets)):      
        log_dict = {
            "idx": i,
            "asset": assets[i],
            "sender": senders[i],
            "receiver": receivers[i],
            "amount": amounts[i],
        }
        logs.append(Log(**log_dict))
    return logs

In [11]:
PERCENTAGE = 0.02

def has_trade_huge(trades, percentage=PERCENTAGE):
    for trade in trades:
        if (int(trade.amount_in) / (10 ** token_decimal[trade.asset_in]) >= token_supply[trade.asset_in] * PERCENTAGE 
                or int(trade.amount_out) / (10 ** token_decimal[trade.asset_out]) >= token_supply[trade.asset_out] * PERCENTAGE ):
            yield trade
            
def has_wash_trade(trades, percentage=PERCENTAGE):
    total_amount = {}
    for trade in trades:
        if trade.asset_out not in total_amount:
            total_amount[trade.asset_out] = int(trade.amount_out)
        else:
            total_amount[trade.asset_out] += int(trade.amount_out)
    for token, amount in total_amount.items():
        if total_amount[token] / (10 ** token_decimal[token]) >= token_supply[token] * PERCENTAGE:
            return True
    return False

In [12]:
def is_direct_pm(logs, percentage):
    trades = list(has_trade(logs)) + list(has_trade_alt(logs))
    huge_trades = list(has_trade_huge(trades, percentage))
    if huge_trades:
        return True
    if has_wash_trade(trades, percentage):
        return True
    return False

In [24]:
pm_txns = []
for _, record in df.iterrows():
    logs = parse_record(record)
    if is_direct_pm(logs, PERCENTAGE):
        pm_txns.append(record.transaction_hash)

In [25]:
len(pm_txns)

148

In [16]:
len(df)

88103

In [17]:
def inspect_pm_log(txn_hash):
    logs = parse_record(df[df.transaction_hash == txn_hash].iloc[0])
    return pd.DataFrame(logs)

def inspect_pm_trade(txn_hash):
    logs = parse_record(df[df.transaction_hash == txn_hash].iloc[0])
    # print(logs)
    return pd.DataFrame(list(has_trade(logs)) + list(has_trade_alt(logs)))

In [26]:
pm_txns[-1]

'0x9f44b4a8584c3f4a53910330c65ca96c9adb60569bb60d0e19f4912ce4dcdbc1'

In [19]:
inspect_pm_log(pm_txns[-2])

Unnamed: 0,sender,receiver,amount,asset,idx
0,0x8f893cef5611d17b5c421ced8279c9c4b0bece4f,0x0000000000000000000000000000000000000000,5323257118107965,0xfe18be6b3bd88a2d2a7f928d00292e7a9963cfc6,0
1,0x8f893cef5611d17b5c421ced8279c9c4b0bece4f,0xd1602f68cc7c4c7b59d686243ea35a9c73b0c6a2,3206418950788923120,0xfe18be6b3bd88a2d2a7f928d00292e7a9963cfc6,1
2,0xd1602f68cc7c4c7b59d686243ea35a9c73b0c6a2,0x7fc77b5c7614e1533320ea6ddc2eb61fa00a9714,3206418950788923120,0xfe18be6b3bd88a2d2a7f928d00292e7a9963cfc6,2
3,0x7fc77b5c7614e1533320ea6ddc2eb61fa00a9714,0xd1602f68cc7c4c7b59d686243ea35a9c73b0c6a2,325276327,0xeb4c2781e4eba804ce9a9803c67d0893436bb27d,3
4,0xd1602f68cc7c4c7b59d686243ea35a9c73b0c6a2,0x05c31f9623a5b691f18e60e68f23c64cd708badb,325276327,0xeb4c2781e4eba804ce9a9803c67d0893436bb27d,4


In [20]:
inspect_pm_trade(pm_txns[-2])

Unnamed: 0,operator,recipient,pool,asset_in,asset_out,amount_in,amount_out,log_idx
0,0x8f893cef5611d17b5c421ced8279c9c4b0bece4f,0x05c31f9623a5b691f18e60e68f23c64cd708badb,0xd1602f68cc7c4c7b59d686243ea35a9c73b0c6a2,0xfe18be6b3bd88a2d2a7f928d00292e7a9963cfc6,0xeb4c2781e4eba804ce9a9803c67d0893436bb27d,3206418950788923120,325276327,"[1, 4]"
1,0xd1602f68cc7c4c7b59d686243ea35a9c73b0c6a2,0xd1602f68cc7c4c7b59d686243ea35a9c73b0c6a2,0x7fc77b5c7614e1533320ea6ddc2eb61fa00a9714,0xfe18be6b3bd88a2d2a7f928d00292e7a9963cfc6,0xeb4c2781e4eba804ce9a9803c67d0893436bb27d,3206418950788923120,325276327,"[2, 3]"
2,0x7fc77b5c7614e1533320ea6ddc2eb61fa00a9714,0x7fc77b5c7614e1533320ea6ddc2eb61fa00a9714,0xd1602f68cc7c4c7b59d686243ea35a9c73b0c6a2,0xeb4c2781e4eba804ce9a9803c67d0893436bb27d,0xfe18be6b3bd88a2d2a7f928d00292e7a9963cfc6,325276327,3206418950788923120,"[3, 2]"


In [None]:
pm_number = []
for percentage in range(1000, 1, -1):
    pm_txns = []
    percentage = percentage/1000
    for _, record in df.iterrows():
        logs = parse_record(record)
        if is_direct_pm(logs, percentage):
            pm_txns.append(record.transaction_hash)
    #print(f"{percentage}: {len(pm_txns)}")
    pm_number.append((percentage, len(pm_txns)))

In [None]:
len(pm_number)

In [27]:
import pickle
with open('pm_number.pkl', 'wb') as file:
    pickle.dump(pm_number, file)

In [32]:
pm_number[998]

(0.002, 220)

In [50]:
def parse_record(record):
    assets = record.token_address
    senders = record.from_address
    receivers = record.to_address
    amounts = record.value
    logs = []
    for i in range(len(assets)):      
        log_dict = {
            "idx": i,
            "asset": assets[i],
            "sender": senders[i],
            "receiver": receivers[i],
            "amount": amounts[i],
        }
        logs.append(Log(**log_dict))
    return logs

trades = []
for _, record in df.iterrows():
    logs = parse_record(record)
    trade = list(has_trade(logs)) + list(has_trade_alt(logs))
    trades += trade

In [51]:
len(trades)

213157

In [52]:
pm_number = []
for percentage in range(1000, 1, -1):
    percentage = percentage/1000
    count = 0
    for trade in trades:
        if (int(trade.amount_in) / 
            (10 ** token_decimal[trade.asset_in]) 
            >= token_supply[trade.asset_in] * percentage 
            or int(trade.amount_out) / 
            (10 ** token_decimal[trade.asset_out]) 
            >= token_supply[trade.asset_out] * percentage ):
            count += 1
    pm_number.append((percentage, count))

In [53]:
pm_df = df.loc[df["transaction_hash"].isin(pm_txns)]
with open("X:\pomabuster\pom\pom.jsonl", "a") as f:
    f.write(pm_df.to_json(orient='records', lines=True))

In [55]:
from tqdm import tqdm

fn_template = "X:\pomabuster\swap-new\swap-000000000" # 000 - 399 # swap-000000000000
for i in tqdm(range(400)):
    if len(str(i)) == 1:
        file_number = "00" + str(i)
    elif len(str(i)) == 2:
        file_number = "0" + str(i)
    else:
        file_number = str(i)
    # print(file_number)
    df = pd.read_json(fn_template + file_number, lines=True)
    pm_txns = []
    for _, record in df.iterrows():
        logs = parse_record(record)
        if is_direct_pm(logs, PERCENTAGE):
            pm_txns.append(record.transaction_hash)
    pm_df = df.loc[df["transaction_hash"].isin(pm_txns)]
    with open("X:\pomabuster\pom\pom2.jsonl", "a") as f:
        f.write(pm_df.to_json(orient='records', lines=True))

100%|█████████████████████████████████████████████████████████████████████████| 400/400 [58:17<00:00,  8.74s/it]
