In [2]:
import requests
import time
import csv
import pandas as pd
import os
from dotenv import load_dotenv

In [1]:
load_dotenv()
api_key = os.getenv('ARBISCAN_API_KEY')

def get_transfers_in_block_range(start_block, end_block, contract_address):
    transfers = []
    url = f"https://api.arbiscan.io/api"
    
    params = {
        "module": "account",
        "action": "tokentx",
        "contractaddress": contract_address,
        "startBlock": start_block,
        "endBlock": end_block,
        "sort": "asc",
        "apikey": api_key
    }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    if 'result' in data:
        for transfer in data['result']:
            transfer_info = {
                'timestamp': transfer['timeStamp'],
                'value': transfer['value'],
                'from': transfer['from'],
                'to': transfer['to']
            }
            transfers.append(transfer_info)
    
    return transfers

def save_transfers_to_csv(transfers, filename):
    print("Saving")
    keys = transfers[0].keys()
    with open(filename, 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
        dict_writer.writeheader()
        dict_writer.writerows(transfers)
        
def get_all_transfers_in_chunks(start_block, end_block, contract_address, block_chunk_size=10000):
    all_transfers = []
    current_block = start_block
    
    while current_block < end_block:
        next_block = min(current_block + block_chunk_size, end_block)
        print(f"Querying transfers from block {current_block} to {next_block}...")
        
        transfers = get_transfers_in_block_range(current_block, next_block, contract_address)
        all_transfers.extend(transfers)
        
        current_block = next_block + 1
        
        time.sleep(0.2)
    
    return all_transfers

In [None]:
transfers = get_all_transfers_in_chunks(127151615, 259085543, "0x9c4ec768c28520B50860ea7a15bd7213a9fF58bf", block_chunk_size=500000)
# transfers = get_transfers_in_block_range(21304174, 259085543, "0xf329e36C7bF6E5E86ce2150875a84Ce77f477375")
save_transfers_to_csv(transfers, "../data/arbitrum/cArbUSDC.csv")

In [3]:
folder_path = '../data/arbitrum'

all_addresses = set()

for filename in os.listdir(folder_path):
    if filename.endswith('.csv') and filename != 'distinct_addresses.csv':
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        
        df = df[df['timestamp'] > 1696118399]

        all_addresses.update(df['from'].unique())
        all_addresses.update(df['to'].unique())

distinct_addresses_df = pd.DataFrame(list(all_addresses), columns=['address'])

distinct_addresses_df.to_csv('../data/arbitrum/distinct_addresses_1_year.csv', index=False)