In [4]:
import requests
import pandas as pd
from datetime import datetime
from test import predicted

# Etherscan API key
api_key = '1RY32CCK7M9Z9AQPFZIMXB6HZE8PW2XMDN'

# Function to get the latest block number
def get_latest_block():
    url = f"https://api.etherscan.io/api?module=proxy&action=eth_blockNumber&apikey={api_key}"
    response = requests.get(url)
    data = response.json()
    if 'result' in data:
        return int(data['result'], 16)
    else:
        print(f"Error fetching latest block: {data['message']}")
        return None

# Function to get transactions in a block
def get_block_transactions(block_number):
    url = f"https://api.etherscan.io/api?module=proxy&action=eth_getBlockByNumber&tag={hex(block_number)}&boolean=true&apikey={api_key}"
    response = requests.get(url)
    data = response.json()
    if 'result' in data:
        transactions = data['result']['transactions']
        return transactions
    else:
        print(f"Error fetching block transactions: {data['message']}")
        return []

# Function to get Ethereum transactions
def get_ethereum_transactions(address, start_block, end_block, total_transactions=500):
    all_transactions = []
    page = 1
    offset = 100

    while len(all_transactions) < total_transactions:
        url = f"https://api.etherscan.io/api?module=account&action=txlist&address={address}&startblock={start_block}&endblock={end_block}&page={page}&offset={offset}&apikey={api_key}"
        
        response = requests.get(url)
        data = response.json()
        
        if data['status'] == '1' and data['message'] == 'OK':
            transactions = data['result']
            all_transactions.extend(transactions)
            page += 1
        else:
            print(f"Error fetching data: {data['message']}")
            break
    
    return pd.DataFrame(all_transactions[:total_transactions])

# Function to get ERC20 transactions
def get_erc20_transactions(address, start_block, end_block, total_transactions=500):
    all_transactions = []
    page = 1
    offset = 100

    while len(all_transactions) < total_transactions:
        url = f"https://api.etherscan.io/api?module=account&action=tokentx&address={address}&startblock={start_block}&endblock={end_block}&page={page}&offset={offset}&apikey={api_key}"
        
        response = requests.get(url)
        data = response.json()
        
        if data['status'] == '1' and data['message'] == 'OK':
            transactions = data['result']
            all_transactions.extend(transactions)
            page += 1
        else:
            print(f"Error fetching data: {data['message']}")
            break
    
    return pd.DataFrame(all_transactions[:total_transactions])

# Function to get account balance
def get_account_balance(address):
    url = f"https://api.etherscan.io/api?module=account&action=balance&address={address}&tag=latest&apikey={api_key}"
    response = requests.get(url)
    data = response.json()
    
    if data['status'] == '1' and data['message'] == 'OK':
        balance = float(data['result']) / 10**18
    else:
        print(f"Error fetching balance: {data['message']}")
        balance = 0.0
    
    return balance

# Function to calculate unique from addresses for each transaction
def calculate_unique_from_addresses(transactions_df):
    unique_from_counts = transactions_df['from'].nunique()
    transactions_df['Unique Received From Addresses'] = unique_from_counts - 1
    return transactions_df

# Function to calculate time difference between first and last transaction in minutes
def calculate_time_difference(transactions_df):
    if 'timeStamp' in transactions_df.columns:
        transactions_df['timeStamp'] = pd.to_datetime(transactions_df['timeStamp'], unit='s')
        first_transaction_time = transactions_df['timeStamp'].min()
        last_transaction_time = transactions_df['timeStamp'].max()
        time_diff_minutes = (last_transaction_time - first_transaction_time).total_seconds() / 60
        transactions_df['Time Diff between first and last (Mins)'] = time_diff_minutes
    return transactions_df

def calculate_additional_features(transactions_df, erc20_transactions_df, address):
    features = {
        " ERC20 max val rec": 0,
        " Total ERC20 tnxs": 0,
        "total transactions (including tnx to create contract": len(transactions_df),
        " ERC20 total Ether received": 0,
        "total ether balance": get_account_balance(address),
        "avg val received": 0,
        "total ether received": 0,
        "Avg min between received tnx": 0,
        " ERC20 total ether sent": 0,
        " ERC20 avg val rec": 0,
        "Received Tnx": 0,
        "min val sent": 0,
        " ERC20 min val rec": 0,
        "min value received": 0,
        " ERC20 uniq rec contract addr": 0,
        "max value received ": 0,
        "total Ether sent": 0,
        " ERC20 uniq sent token name": 0,
        " ERC20 uniq sent addr": 0,
        "max val sent": 0,
        " ERC20 avg val sent": 0,
        "Unique Sent To Addresses": 0,
        " ERC20 max val sent": 0,
        " ERC20 uniq rec token name": 0,
        "Number of Created Contracts": 0,
        "Avg min between sent tnx": 0,
        "Sent tnx": 0,
        "avg val sent": 0,
        "min value sent to contract": 0,
        "max val sent to contract": 0,
        "avg value sent to contract": 0,
        "total ether sent contracts": 0,
        " ERC20 total Ether sent contract": 0,
        " ERC20 uniq rec addr": 0
    }

    if 'to' in erc20_transactions_df.columns:
        erc20_received = erc20_transactions_df[erc20_transactions_df['to'].str.lower() == address.lower()]
        if not erc20_received.empty:
            erc20_received['value'] = erc20_received['value'].astype(float) / 10**18
            features[" ERC20 max val rec"] = erc20_received['value'].max()
            features[" ERC20 min val rec"] = erc20_received['value'].min()
            features[" ERC20 avg val rec"] = erc20_received['value'].mean()
            features[" ERC20 total Ether received"] = erc20_received['value'].sum()
            features[" ERC20 uniq rec token name"] = erc20_received['tokenName'].nunique()
            features[" ERC20 uniq rec contract addr"] = erc20_received['contractAddress'].nunique()
            features[" Total ERC20 tnxs"] = len(erc20_received)
            features[" ERC20 uniq rec addr"] = erc20_received['from'].nunique()

    if 'from' in erc20_transactions_df.columns:
        erc20_sent = erc20_transactions_df[erc20_transactions_df['from'].str.lower() == address.lower()]
        if not erc20_sent.empty:
            erc20_sent['value'] = erc20_sent['value'].astype(float) / 10**18
            features[" ERC20 max val sent"] = erc20_sent['value'].max()
            features[" ERC20 avg val sent"] = erc20_sent['value'].mean()
            features[" ERC20 total ether sent"] = erc20_sent['value'].sum()
            features[" ERC20 uniq sent token name"] = erc20_sent['tokenName'].nunique()
            features[" ERC20 uniq sent addr"] = erc20_sent['to'].nunique()
            features[" ERC20 total Ether sent contract"] = erc20_sent['value'].sum()

    if 'from' in transactions_df.columns:
        sent_transactions = transactions_df[transactions_df['from'].str.lower() == address.lower()]
        if not sent_transactions.empty:
            sent_transactions['value'] = sent_transactions['value'].astype(float) / 10**18
            features["Avg min between sent tnx"] = sent_transactions['timeStamp'].diff().mean().total_seconds() / 60
            features["Sent tnx"] = len(sent_transactions)
            features["avg val sent"] = sent_transactions['value'].mean()
            features["min val sent"] = sent_transactions['value'].min()
            features["max val sent"] = sent_transactions['value'].max()
            features["total Ether sent"] = sent_transactions['value'].sum()
            features["Unique Sent To Addresses"] = sent_transactions['to'].nunique()

            sent_to_contracts = sent_transactions[sent_transactions['to'].apply(lambda x: x.startswith('0x'))]
            if not sent_to_contracts.empty:
                features["min value sent to contract"] = sent_to_contracts['value'].min()
                features["max val sent to contract"] = sent_to_contracts['value'].max()
                features["avg value sent to contract"] = sent_to_contracts['value'].mean()
                features["total ether sent contracts"] = sent_to_contracts['value'].sum()

    features["Number of Created Contracts"] = len(transactions_df[transactions_df['isError'] == '0'])

    for key, value in features.items():
        transactions_df[key] = value

    return transactions_df

# Function to process an address
def process_address(address, latest_block):
    transactions = get_ethereum_transactions(address, start_block=0, end_block=latest_block, total_transactions=500)
    erc20_transactions = get_erc20_transactions(address, start_block=0, end_block=latest_block, total_transactions=500)
    
    if not transactions.empty:
        transactions = calculate_unique_from_addresses(transactions)
        transactions = calculate_time_difference(transactions)
        transactions = calculate_additional_features(transactions, erc20_transactions, address)
        # csv_file = f'{address}_transactions_with_additional_features.csv'
        csv_file = f'{address}_transactions_with_additional_features.csv'
        transactions.to_csv(csv_file, index=False)
        print(f"Transaction data with additional features for {address} has been saved to '{csv_file}'.")
        result = predicted()
        print(result)
    else:
        print(f"No transactions found for address {address}.")

# 获取最新的区块号
latest_block = get_latest_block()

# 获取最新区块中的交易信息
if latest_block:
    transactions = get_block_transactions(latest_block)
    for tx in transactions:
        addresses = set()
        addresses.add(tx['from'])
        if tx['to']:
            addresses.add(tx['to'])
        
        # 处理提取到的每个地址
        for address in addresses:
            process_address(address, latest_block)
else:
    print("Failed to retrieve the latest block.")


Error fetching data: No transactions found
Transaction data with additional features for 0x6b75d8af000000e20b7a7ddf000ba900b4009a80 has been saved to '0x6b75d8af000000e20b7a7ddf000ba900b4009a80_transactions_with_additional_features.csv'.
[LightGBM] [Info] Number of positive: 1765, number of negative: 6191
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001448 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6701
[LightGBM] [Info] Number of data points in the train set: 7956, number of used features: 34
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.221845 -> initscore=-1.254946
[LightGBM] [Info] Start training from score -1.254946
Training until validation scores don't improve for 5 rounds
Early stopping, best iteration is:
[63]	valid_0's binary_logloss: 0.0373683
0
Transaction data with additional features for 0xae2fc483527b8ef99eb5d9b44875f005ba1fae13 has been saved to '0xae2fc483527b8ef99eb5d9b

KeyboardInterrupt: 

In [None]:
x = predicted

In [None]:
x

<function test.predicted()>