In [1]:
# Passo 1: Instalar todas as bibliotecas que vamos usar na sessão
# O '!' permite rodar comandos de terminal na célula do notebook
!pip install pandas sqlalchemy psycopg2-binary web3 tqdm

# Passo 2: Importar as bibliotecas para o nosso script
import pandas as pd

print("--- Bibliotecas prontas ---")

# Passo 3: Carregar o dataset do arquivo CSV
file_path = 'proxy_transactions.csv' # Use o nome exato do seu arquivo CSV

try:
    df = pd.read_csv(file_path)
    print(f"\nArquivo CSV '{file_path}' carregado com sucesso!")
    print(f"Total de {len(df)} transações históricas para análise.")
    display(df.head())
except FileNotFoundError:
    print(f"\n--- ERRO: Arquivo '{file_path}' não encontrado! ---")
    print("Verifique se o arquivo CSV está na mesma pasta que este notebook.")

--- Bibliotecas prontas ---

Arquivo CSV 'proxy_transactions.csv' carregado com sucesso!
Total de 5000 transações históricas para análise.


Unnamed: 0,Transaction Hash,Blockno,UnixTimestamp,DateTime (UTC),From,To,ContractAddress,Value_IN(ETH),Value_OUT(ETH),CurrentValue @ $0/Eth,TxnFee(ETH),TxnFee(USD),Historical $Price/Eth,Status,ErrCode,Method
0,0xe3e0649f7b70d03aa49e8f97e782c4cec38546d0f179...,8449777,1748736000,2025-06-01 00:00:00,0xc43497d9566193c37b16bf17d1bdb2b403ff450c,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,0.000202,0,,,,Send
1,0x4072bacb65a90e293b6fc0a842f6ac732e6e04e37d5c...,8449777,1748736000,2025-06-01 00:00:00,0x157867eee328bc6b0a2fa132db19b49b5660e370,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,0.000221,0,,,,Send
2,0xd994f29d8b7640815937945310b08ac98fe453b4ade1...,8449777,1748736000,2025-06-01 00:00:00,0x4bd970a520dead065877eaca7605065e5d678853,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,0.000221,0,,,,Send
3,0xb0eb945227d30078b72ac2aa553af2ad87d5878a381d...,8449777,1748736000,2025-06-01 00:00:00,0x4063889e1de1c6153bf3cf4d7ebde88ef58b7984,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,0.000221,0,,,,Send
4,0x9e6f7c2a5a9b47ee79aa4ec5b0851d8e6ca56f19451a...,8449777,1748736000,2025-06-01 00:00:00,0x0259a9427db645da006f5fada5850c4050b54cb5,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,0.000221,0,,,,Send


In [3]:
from web3 import Web3
from tqdm.auto import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# -------- Configuração da Conexão --------
# COLOQUE AQUI A SUA URL HTTP DA MAINNET OBTIDA NA ALCHEMY
alchemy_url = "https://eth-mainnet.g.alchemy.com/v2/OPbbmuEc74NysSD6jK28Z" 
w3 = Web3(Web3.HTTPProvider(alchemy_url))

# -------- Função para buscar dados de UMA transação --------
def fetch_full_tx_data(tx_hash):
    try:
        tx = w3.eth.get_transaction(tx_hash)
        receipt = w3.eth.get_transaction_receipt(tx_hash)
        return {
            'hash': tx_hash,
            'input_data': tx.input if tx else None,
            'gas_used': receipt.gasUsed if receipt else 0,
            'gas_price': tx.gasPrice if tx else 0,
            'status': receipt.status if receipt else -1
        }
    except Exception:
        return {'hash': tx_hash, 'input_data': 'error', 'gas_used': 0, 'gas_price': 0, 'status': -1}

# -------- Processo Concorrente para buscar todos os dados --------
print("Iniciando busca de dados completos para cada transação... Isso levará alguns minutos.")
tasks = []
enriched_data = []
tx_hashes = df['Transaction Hash'].tolist()

with ThreadPoolExecutor(max_workers=50) as executor:
    tasks = [executor.submit(fetch_full_tx_data, tx_hash) for tx_hash in tx_hashes]
    for future in tqdm(as_completed(tasks), total=len(tasks)):
        enriched_data.append(future.result())

enriched_df = pd.DataFrame(enriched_data)
df_enriquecido = pd.merge(df, enriched_df, left_on='Transaction Hash', right_on='hash', how='left')

print("\nEnriquecimento de dados concluído!")
display(df_enriquecido.head())

Iniciando busca de dados completos para cada transação... Isso levará alguns minutos.


  0%|          | 0/5000 [00:00<?, ?it/s]


Enriquecimento de dados concluído!


Unnamed: 0,Transaction Hash,Blockno,UnixTimestamp,DateTime (UTC),From,To,ContractAddress,Value_IN(ETH),Value_OUT(ETH),CurrentValue @ $0/Eth,...,TxnFee(USD),Historical $Price/Eth,Status,ErrCode,Method,hash,input_data,gas_used,gas_price,status
0,0xe3e0649f7b70d03aa49e8f97e782c4cec38546d0f179...,8449777,1748736000,2025-06-01 00:00:00,0xc43497d9566193c37b16bf17d1bdb2b403ff450c,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,...,0,,,,Send,0xe3e0649f7b70d03aa49e8f97e782c4cec38546d0f179...,error,0,0,-1
1,0x4072bacb65a90e293b6fc0a842f6ac732e6e04e37d5c...,8449777,1748736000,2025-06-01 00:00:00,0x157867eee328bc6b0a2fa132db19b49b5660e370,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,...,0,,,,Send,0x4072bacb65a90e293b6fc0a842f6ac732e6e04e37d5c...,error,0,0,-1
2,0xd994f29d8b7640815937945310b08ac98fe453b4ade1...,8449777,1748736000,2025-06-01 00:00:00,0x4bd970a520dead065877eaca7605065e5d678853,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,...,0,,,,Send,0xd994f29d8b7640815937945310b08ac98fe453b4ade1...,error,0,0,-1
3,0xb0eb945227d30078b72ac2aa553af2ad87d5878a381d...,8449777,1748736000,2025-06-01 00:00:00,0x4063889e1de1c6153bf3cf4d7ebde88ef58b7984,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,...,0,,,,Send,0xb0eb945227d30078b72ac2aa553af2ad87d5878a381d...,error,0,0,-1
4,0x9e6f7c2a5a9b47ee79aa4ec5b0851d8e6ca56f19451a...,8449777,1748736000,2025-06-01 00:00:00,0x0259a9427db645da006f5fada5850c4050b54cb5,0x5fbe74a283f7954f10aa04c2edf55578811aeb03,,0,0,0,...,0,,,,Send,0x9e6f7c2a5a9b47ee79aa4ec5b0851d8e6ca56f19451a...,error,0,0,-1


In [4]:
import json
import ast

print("--- Iniciando Engenharia de Features ---")

# Carrega o ABI do Roteador V2 para decodificar
router_v2_abi_str = '[{"inputs":[{"type":"uint256","name":"amountIn","internalType":"uint256"},{"type":"uint256","name":"amountOutMin","type":"uint256"},{"type":"address[]","name":"path","internalType":"address[]"},{"type":"address","name":"to","type":"address"},{"type":"uint256","name":"deadline","type":"uint256"}],"name":"swapExactTokensForTokens","outputs":[{"type":"uint256[]","name":"amounts","type":"uint256[]"}],"stateMutability":"nonpayable","type":"function"},{"inputs":[{"type":"uint256","name":"amountOutMin","type":"uint256"},{"type":"address[]","name":"path","type":"address[]"},{"type":"address","name":"to","type":"address"},{"type":"uint256","name":"deadline","type":"uint256"}],"name":"swapExactETHForTokens","outputs":[{"type":"uint256[]","name":"amounts","type":"uint256[]"}],"stateMutability":"payable","type":"function"}]'
router_abi = json.loads(router_v2_abi_str)
router_contract = w3.eth.contract(abi=router_abi)

# Função para decodificar uma única linha de dados de input
def decode_input_data(input_data):
    if not isinstance(input_data, str) or len(input_data) <= 10:
        return pd.Series([None, None, None]) # Retorna nulo se o input for inválido
    try:
        input_bytes = bytes.fromhex(input_data[2:])
        func_obj, func_params = router_contract.decode_function_input(input_bytes)
        
        path = func_params.get('path', [])
        token_in = path[0] if path else None
        token_out = path[-1] if path else None
        
        return pd.Series([func_obj.fn_name, token_in, token_out])
    except Exception:
        return pd.Series(['desconhecido', None, None])

# Aplica a função para criar as novas colunas
df_enriquecido[['function_name', 'token_in', 'token_out']] = df_enriquecido['input_data'].apply(decode_input_data)

# Cria a feature de custo da transação
df_enriquecido['custo_real_txn_eth'] = (df_enriquecido['gas_price'] * df_enriquecido['gas_used']) / 1e18

print("\nEngenharia de Features concluída!")

# Mostra as colunas mais importantes da nossa tabela final
display(df_enriquecido[['Transaction Hash', 'Method', 'function_name', 'token_in', 'token_out', 'custo_real_txn_eth', 'status']].head())

--- Iniciando Engenharia de Features ---

Engenharia de Features concluída!


Unnamed: 0,Transaction Hash,Method,function_name,token_in,token_out,custo_real_txn_eth,status
0,0xe3e0649f7b70d03aa49e8f97e782c4cec38546d0f179...,Send,,,,0.0,-1
1,0x4072bacb65a90e293b6fc0a842f6ac732e6e04e37d5c...,Send,,,,0.0,-1
2,0xd994f29d8b7640815937945310b08ac98fe453b4ade1...,Send,,,,0.0,-1
3,0xb0eb945227d30078b72ac2aa553af2ad87d5878a381d...,Send,,,,0.0,-1
4,0x9e6f7c2a5a9b47ee79aa4ec5b0851d8e6ca56f19451a...,Send,,,,0.0,-1
