In [2]:
import os
import pandas as pd
import requests
from statistics import stdev as std
from collections import Counter
from datetime import datetime
from time import sleep

# Set manual jika belum ada di environment
ETHERSCAN_KEY = os.getenv("ETHERSCAN_API_KEY") or "ISI_API_KEY_MU_DI_SINI"
ETHERSCAN_API = "https://api.etherscan.io/api"

In [3]:
def most_common(lst):
    return Counter(lst).most_common(1)[0][0] if lst else None

def fetch_transactions(address):
    url = f"{ETHERSCAN_API}?module=account&action=txlist&address={address}&startblock=0&endblock=99999999&sort=asc&apikey={ETHERSCAN_KEY}"
    response = requests.get(url)
    data = response.json()
    if data.get("status") == "1":
        return data["result"]
    else:
        return []

def extract_features(wallet):
    txs = fetch_transactions(wallet)
    if not txs:
        return None
    
    dates = []
    hours = []
    block_numbers = []
    amounts = []
    tx_per_block = {}
    counterparties = set()
    in_out_by_day = {}

    for tx in txs:
        timestamp = int(tx['timeStamp'])
        dt = datetime.utcfromtimestamp(timestamp)
        date = dt.date()
        hour = dt.hour
        block = int(tx['blockNumber'])
        value = int(tx['value']) / 1e18
        from_addr = tx['from'].lower()
        to_addr = tx['to'].lower() if tx['to'] else ""

        dates.append(date)
        hours.append(hour)
        block_numbers.append(block)
        amounts.append(value)

        tx_per_block[block] = tx_per_block.get(block, 0) + 1

        if from_addr == wallet.lower():
            counterparties.add(to_addr)
            in_out_by_day.setdefault(date, {"out": 0, "in": 0})["out"] += 1
        elif to_addr == wallet.lower():
            counterparties.add(from_addr)
            in_out_by_day.setdefault(date, {"out": 0, "in": 0})["in"] += 1

    same_day_in_out = sum(1 for v in in_out_by_day.values() if v["in"] > 0 and v["out"] > 0)

    feats = dict(
        chain="ETH",
        address=wallet,
        tx_total=len(txs),
        avg_tx_per_day=round(len(txs) / len(set(dates)), 2),
        avg_tx_per_block=round(len(txs) / len(set(block_numbers)), 4) if block_numbers else 0,
        max_tx_per_block=max(tx_per_block.values()) if tx_per_block else 0,
        min_tx_per_block=min(tx_per_block.values()) if tx_per_block else 0,
        std_tx_amount=round(std(amounts), 6) if len(amounts) > 1 else 0,
        tx_between_00_04=round(sum(1 for h in hours if 0 <= h < 4) / len(txs), 4),
        num_unique_counterparties=len(counterparties),
        same_day_deposit_withdraw=int(same_day_in_out > 0),
        most_common_amount=most_common(amounts),
    )
    return feats

In [10]:
# Misal kamu sudah punya df sebelumnya
df = pd.read_csv("../datasets/ETH/gambling_address_dataset.csv")
count_1 = df[df['address_type'] == 1]

In [11]:
results = []
for idx, wallet in enumerate(count_1['address_hash']):
    print(f"[{idx+1}/{len(count_1)}] Processing {wallet}")
    try:
        feats = extract_features(wallet)
        if feats:
            results.append(feats)
    except Exception as e:
        print(f"Error on {wallet}: {e}")
    sleep(0.2) 

[1/10423] Processing 0xd1ceeee271fd5a8b0e2bfc12ea5b5b2e5cedec95
[2/10423] Processing 0x4a7a92fe5fdee55e90e199aaeb99f5b0f36e026d
[3/10423] Processing 0x00000000c0293c8ca34dac9bcc0f953532d34e4d
[4/10423] Processing 0xddfd7f68662bef333bb7891580948e83dcd3c988
[5/10423] Processing 0xfdc84ce4b42acccd34df47ce88c09a9ec7da14bf
[6/10423] Processing 0x04fc0ec03551ae1919c124b74da35ef7de4d3067
[7/10423] Processing 0xf5492e21132e4a81cb1823e43e747203cc3eac1a
[8/10423] Processing 0xfea0c2e840b3679e82003fcc1441634fe40832ec
[9/10423] Processing 0xfa2944cb867e9e76c0668b9d2e8cd4957542b0ef
[10/10423] Processing 0x9424bfc9f19d2c40d79ca20094a7da9d4024c44b
[11/10423] Processing 0x1bdae00202724067e441e55e4b13538c6c5b3eb4
[12/10423] Processing 0x89247ef46707cb6ffc93208938bf08d3cba10e56
[13/10423] Processing 0x5730c66a374e23946624917bae081ceb90e81932
[14/10423] Processing 0xe7813792da36c7d52940405fa59e9ffd50edf611
[15/10423] Processing 0x81fbc6cba20b65391d69c3ce883ad557ae50c0c9
[16/10423] Processing 0x320d7e2366

In [12]:
features_df = pd.DataFrame(results)

In [13]:
features_df.to_csv("wallet_features_1.csv", index=False)

In [14]:
features_df.head()

Unnamed: 0,chain,address,tx_total,avg_tx_per_day,avg_tx_per_block,max_tx_per_block,min_tx_per_block,std_tx_amount,tx_between_00_04,num_unique_counterparties,same_day_deposit_withdraw,most_common_amount
0,ETH,0xd1ceeee271fd5a8b0e2bfc12ea5b5b2e5cedec95,5527,6.49,1.3677,39,1,0.905637,0.0329,39,1,0.0
1,ETH,0x4a7a92fe5fdee55e90e199aaeb99f5b0f36e026d,5404,16.28,1.0048,3,1,0.939983,0.2185,98,1,0.1
2,ETH,0x00000000c0293c8ca34dac9bcc0f953532d34e4d,10000,123.46,1.0857,9,1,0.003174,0.1612,7,1,0.0
3,ETH,0xddfd7f68662bef333bb7891580948e83dcd3c988,10000,416.67,1.2142,7,1,13.28501,0.0689,11,1,5.0
4,ETH,0xfdc84ce4b42acccd34df47ce88c09a9ec7da14bf,8635,239.86,1.2079,8,1,9.890027,0.0563,13,1,5.0
