# 📚 Импорт библиотек

In [1]:
import requests
import time
from datetime import datetime
import pandas as pd
from tqdm import tqdm

import pickle

# 👛 Импорт адресов

In [2]:
df = pd.read_pickle("df_addresses.pickle")
df

Unnamed: 0,index,user,airdropped_tokens,get_drop
0,0,0x934204e4617792b8ec2a34f0330a19ab3393041f,5.464521,0
1,1,0xa99979dca47c7c41262936521d59d9a922f7a1d2,4.161325,0
2,2,0x5fc7b11e3281b7404573465bc99d9aebc87ec59b,5.756350,0
3,3,0x7c56d095261c0c9921adf95212f499018340bdde,19.357885,1
4,4,0xa33f601b04ba846586266bfae01ae47d5362eff4,6.113811,1
...,...,...,...,...
745228,745228,0x5e73cf9cebd82d8c192f538f41186b55965e6fc6,25.185039,1
745229,745229,0x335b7c6633a29e43dbe2f7804803cc8243f2a830,65.942124,1
745230,745230,0xecebbda750e02674eab8929c69e9de5b30c4446f,26.084444,1
745231,745231,0x45d1d1124a10dafdf0fe6c6e12a1dea9c1c6db25,159.581385,1


In [3]:
df_15k = df[df["index"] < 740000]
list_of_addresses_15k = list(df_15k["user"])

# 🧰 Нужные функции

## Перевод строки в UNIX-timestamp 

In [4]:
def date_to_timestamp(date_str: str) -> int:
    """
    Переводит дату вида 'YYYY-MM-DD' в UNIX‑таймстамп (секунды).
    """
    dt = datetime.strptime(date_str, "%Y-%m-%d")
    return int(time.mktime(dt.timetuple()))

## Функции сбора транзакций и тарнсферов

In [5]:
def fetch_transactions(
    address: str, api_key: str, end_date: str, tx_type: str = "normal"
) -> list:
    """
    Собирает все транзакции для address до end_date.
    tx_type: 'normal' или 'internal'.
    """
    base_url = "https://api.arbiscan.io/api"
    action = "txlist" if tx_type == "normal" else "txlistinternal"
    cutoff_ts = date_to_timestamp(end_date)

    params = {
        "module": "account",
        "action": action,
        "address": address,
        "startblock": 0,
        "endblock": 99999999,
        "page": 1,
        "offset": 10000,
        "sort": "asc",
        "apikey": api_key,
    }

    result_txs = []
    while True:
        resp = requests.get(base_url, params=params)
        data = resp.json()
        if data.get("status") != "1" or not isinstance(data.get("result"), list):
            break

        txs = data["result"]
        for tx in txs:
            ts = int(tx["timeStamp"])
            if ts <= cutoff_ts:
                result_txs.append(tx)
            else:
                return result_txs

        if len(txs) < params["offset"]:
            break
        params["page"] += 1

    return result_txs


def fetch_token_transfers(address: str, api_key: str, end_date: str) -> list:
    """
    Собирает все ERC-20 переводы для address до end_date.
    """
    base_url = "https://api.arbiscan.io/api"
    cutoff_ts = date_to_timestamp(end_date)

    params = {
        "module": "account",
        "action": "tokentx",
        "address": address,
        "startblock": 0,
        "endblock": 99999999,
        "page": 1,
        "offset": 10000,
        "sort": "asc",
        "apikey": api_key,
    }

    transfers = []
    while True:
        resp = requests.get(base_url, params=params)
        data = resp.json()
        if data.get("status") != "1" or not isinstance(data.get("result"), list):
            break

        batch = data["result"]
        for tx in batch:
            if int(tx["timeStamp"]) <= cutoff_ts:
                transfers.append(tx)
            else:
                return transfers

        if len(batch) < params["offset"]:
            break
        params["page"] += 1

    return transfers

## Функция объединения собранных данных

In [6]:
def collect_all_to_dataframe(
    addresses: list, api_key: str, end_date: str
) -> pd.DataFrame:
    """
    Собирает все три типа транзакций для списка адресов и возвращает DataFrame,
    где каждая строка — одна транзакция с колонками:
      - Все поля транзакции
      - tx_type        : 'normal', 'internal' или 'erc20'
      - address        : адрес кошелька
    """
    rows = []

    for addr in addresses:
        # Normal and internal
        for tx_type in ("normal", "internal"):
            txs = fetch_transactions(addr, api_key, end_date, tx_type=tx_type)
            for tx in txs:
                tx_record = tx.copy()
                tx_record["tx_type"] = tx_type
                tx_record["address"] = addr
                rows.append(tx_record)
        # ERC-20 transfers
        erc20s = fetch_token_transfers(addr, api_key, end_date)
        for tx in erc20s:
            tx_record = tx.copy()
            tx_record["tx_type"] = "erc20"
            tx_record["address"] = addr
            rows.append(tx_record)

    df = pd.DataFrame(rows)
    return df

# 🦍 Сбор данных с Arbiscan

In [None]:
# Параметры
ARBISCAN_API_KEY = [
    "9FWKQXMRZXCSFV428DC3EJZJN5SG6NBP2U",
    "PUMC3UX9SZSVKZH7FK7ZJCKFKJR2Y1JZNH",
][1]
END_DATE = "2024-07-04"

list_of_ours_df = []

In [None]:
for lll in tqdm(range(37, 50)):
    for i in range(300 * lll, 300 * (lll + 1)):
        try:
            ADDRESSES = list_of_addresses_15k[i * 5 : (i + 1) * 5]
            df = collect_all_to_dataframe(ADDRESSES, ARBISCAN_API_KEY, END_DATE)
            list_of_ours_df.append(df)
            time.sleep(0.3)

        except Exception:
            time.sleep(1)
            continue

    with open(f"list_of_df_15k_offset_300_{lll}.pkl", "wb") as f:
        pickle.dump(list_of_ours_df, f)

    list_of_ours_df = []

 23%|██▎       | 3/13 [2:08:43<7:05:05, 2550.54s/it]