In [1]:
import json
import pandas as pd
import os
from tqdm import tqdm

def extract_compound_features():
    input_path = r"C:\Users\Suyash\Desktop\compound-risk-scoring\data\all_wallets_data.json"
    output_path = r"C:\Users\Suyash\Desktop\compound-risk-scoring\data\features.csv"

    with open(input_path, "r") as f:
        all_data = json.load(f)

    features = []

    for wallet, txs in tqdm(all_data.items(), desc="Extracting features"):
        n_tx = len(txs)
        total_volume = 0
        token_set = set()
        stablecoin_tx = 0
        large_tx = 0

        for tx in txs:
            try:
                value = float(tx.get("value", 0))
                symbol = tx.get("token_symbol", "").lower()
                token_set.add(symbol)
                total_volume += value
                if symbol in ["usdc", "usdt", "dai"]:
                    stablecoin_tx += 1
                if value > 1000:
                    large_tx += 1
            except:
                continue

        features.append({
            "wallet_id": wallet,
            "n_transactions": n_tx,
            "unique_tokens": len(token_set),
            "total_volume": total_volume,
            "stablecoin_tx": stablecoin_tx,
            "large_tx_count": large_tx
        })

    df = pd.DataFrame(features)
    df.to_csv(output_path, index=False)
    print(f"✅ Features extracted for {len(df)} wallets. Saved to {output_path}")
    return df
