In [13]:
import requests
import csv
import json
import time

# 🔧 CONFIGURATION
API_KEY = "cqt_rQ9PHYdJqwYdb63kCrGc9xPfwxck"
CHAIN_ID = 1  # Ethereum Mainnet
INPUT_FILE = r'C:\Users\HP\OneDrive\Desktop\wallets.csv'
OUTPUT_FILE = 'covalent_wallet_data1.json'

def load_wallets(file_path):
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        wallets = [row[0].strip().lower() for row in reader if row]
    return wallets

def fetch_wallet_transactions(wallet):
    url = f"https://api.covalenthq.com/v1/{CHAIN_ID}/address/{wallet}/transactions_v2/"
    params = {'key': API_KEY}
    try:
        response = requests.get(url, params=params)
        data = response.json()
        if 'data' in data and data['data']:
            print(f"✅ Data fetched for {wallet}")
            return {'wallet': wallet, 'data': data['data']}
        else:
            print(f"⚠️ No data found for {wallet}")
            return {'wallet': wallet, 'data': None}
    except requests.exceptions.RequestException as e:
        print(f"❌ Request failed for {wallet}: {str(e)}")
        return {'wallet': wallet, 'error': str(e)}

def main():
    wallets = load_wallets(INPUT_FILE)
    all_data = []

    for idx, wallet in enumerate(wallets):
        print(f"\nFetching {idx + 1}/{len(wallets)}: {wallet}")
        result = fetch_wallet_transactions(wallet)
        all_data.append(result)
        time.sleep(1.2)  # Avoid rate limit

    with open(OUTPUT_FILE, 'w') as outfile:
        json.dump(all_data, outfile, indent=2)

    print(f"\n✅ Done. Data saved to {OUTPUT_FILE}")

if __name__ == '__main__':
    main()



Fetching 1/104: ï»¿wallet_id
⚠️ No data found for ï»¿wallet_id

Fetching 2/104: 0x0039f22efb07a647557c7c5d17854cfd6d489ef3
✅ Data fetched for 0x0039f22efb07a647557c7c5d17854cfd6d489ef3

Fetching 3/104: 0x06b51c6882b27cb05e712185531c1f74996dd988
✅ Data fetched for 0x06b51c6882b27cb05e712185531c1f74996dd988

Fetching 4/104: 0x0795732aacc448030ef374374eaae57d2965c16c
✅ Data fetched for 0x0795732aacc448030ef374374eaae57d2965c16c

Fetching 5/104: 0x0aaa79f1a86bc8136cd0d1ca0d51964f4e3766f9
✅ Data fetched for 0x0aaa79f1a86bc8136cd0d1ca0d51964f4e3766f9

Fetching 6/104: 0x0fe383e5abc200055a7f391f94a5f5d1f844b9ae
✅ Data fetched for 0x0fe383e5abc200055a7f391f94a5f5d1f844b9ae

Fetching 7/104: 0x104ae61d8d487ad689969a17807ddc338b445416
✅ Data fetched for 0x104ae61d8d487ad689969a17807ddc338b445416

Fetching 8/104: 0x111c7208a7e2af345d36b6d4aace8740d61a3078
✅ Data fetched for 0x111c7208a7e2af345d36b6d4aace8740d61a3078

Fetching 9/104: 0x124853fecb522c57d9bd5c21231058696ca6d596
✅ Data fetched for 0x1

In [4]:
import json
import math
from collections import defaultdict
import networkx as nx
import xgboost as xgb
import numpy as np

# --- Load and clean data ---
with open(r"C:\Users\HP\covalent_wallet_data1.json", "r") as f:
    raw_data = json.load(f)

# Flatten tx list
transactions = []
for wallet in raw_data:
    if wallet.get("data") and wallet["data"].get("items"):
        transactions.extend(wallet["data"]["items"])

# --- Build OutTxn and InTxn ---
out_txn = defaultdict(int)
in_txn = defaultdict(int)

for tx in transactions:
    out_txn[tx["from_address"]] += 1
    in_txn[tx["to_address"]] += 1

max_out = max(out_txn.values(), default=1)
max_in = max(in_txn.values(), default=1)

# --- Step 1: De-anonymous Score ---
def calc_score(u, v):
    out_u = out_txn.get(u, 0)
    in_v = in_txn.get(v, 0)

    if out_u == 0 or in_v == 0:
        return -1.0

    s1 = (2 * math.log(out_u) - math.log(max_out)) / math.log(max_out)
    s2 = (2 * math.log(in_v) - math.log(max_in)) / math.log(max_in)
    return 0.5 * (s1 + s2)

# --- Step 2: Build Graph with De-anonymous Scores ---
G = nx.DiGraph()
score_dict = {}
for tx in transactions:
    u = tx.get("from_address")
    v = tx.get("to_address")

    # Skip if from or to address is None
    if u is None or v is None:
        continue

    score = calc_score(u, v)
    if score == -1:
        continue  # skip unscorable edges

    G.add_edge(u, v, score=score)
    score_dict[(u, v)] = score


# --- Step 3: RiskProp Initialization ---
R = defaultdict(lambda: 0.7)  # Reliability
T = defaultdict(float)        # Trustiness
Conf = defaultdict(float)     # Confidence

# --- Step 4: RiskProp Iteration ---
def update_risk(iterations=10):
    for _ in range(iterations):
        # Update T
        for v in G.nodes:
            incoming = G.in_edges(v, data=True)
            if not incoming:
                continue
            sum_scores = sum(score_dict[(u, v)] * Conf[(u, v)] for u, v, d in incoming)
            T[v] = sum_scores / len(incoming)

        # Update Conf
        for u, v, d in G.edges(data=True):
            s_uv = score_dict[(u, v)]
            T_v = T[v]
            Conf[(u, v)] = (R[u] + (1 - abs(s_uv - T_v))) / 2

        # Update R
        for u in G.nodes:
            outgoing = G.out_edges(u)
            if not outgoing:
                continue
            sum_conf = sum(Conf[(u, v)] for u, v in outgoing)
            R[u] = sum_conf / len(outgoing)

update_risk()

# --- Step 5: Final Risk Score ---
risk_scores = {u: (1 - R[u]) * 10 for u in G.nodes}

# --- Print Top Risky Accounts ---
top_risky = sorted(risk_scores.items(), key=lambda x: x[1], reverse=True)[:10]
print("⚠️ Top High-Risk Wallets:")
for addr, risk in top_risky:
    print(f"{addr}: Risk Score = {risk:.2f}")

# --- Step 6: Optional XGBoost Training (if you have labels) ---
# You can use features like: [R, T, average_score, total_txn, in/out ratio] as input to XGBoost

# Example only (pseudo-code)
"""
X = []
y = []  # 1 for scam, 0 for normal
for addr in known_labeled_addresses:
    features = [
        R[addr],
        T[addr],
        len(list(G.successors(addr))),
        len(list(G.predecessors(addr))),
        score_agg_for_addr
    ]
    X.append(features)
    y.append(label_dict[addr])

model = xgb.XGBClassifier()
model.fit(X, y)
"""

# --- GNN Prep Placeholder (if you want to build a GNN later) ---
# Use torch_geometric or DGL for GNN on G with features from R, T, degrees, etc.


⚠️ Top High-Risk Wallets:
0x8a7b54864080b3aeb3321836c1b891c7485e35ec: Risk Score = 8.25
0xc4d2571e7f21538e109ef7fbde95bfd3b6be5ef5: Risk Score = 7.87
0x80121123bd2850946a0ddd399ade4449ca95cd0d: Risk Score = 7.71
0xce18a5129969984f54b88ca860343c3f32c843df: Risk Score = 7.35
0x712089481691cbe78ad40e85300eb01294f58a44: Risk Score = 7.11
0xaafd0db2f4874b080b40a467b54f0548cd1d88fc: Risk Score = 7.11
0x21a31ee1afc51d94c2efccaa2092ad1028285549: Risk Score = 6.73
0x09dd9e830d57eca2e683cb5a6706cecef72609c6: Risk Score = 6.70
0x87af0ea309642e90dacfadb0c526cc72eb9c89bf: Risk Score = 6.42
0xeb4b407df8cf53861507c8302bb40b3e977f0b77: Risk Score = 6.42


'\nX = []\ny = []  # 1 for scam, 0 for normal\nfor addr in known_labeled_addresses:\n    features = [\n        R[addr],\n        T[addr],\n        len(list(G.successors(addr))),\n        len(list(G.predecessors(addr))),\n        score_agg_for_addr\n    ]\n    X.append(features)\n    y.append(label_dict[addr])\n\nmodel = xgb.XGBClassifier()\nmodel.fit(X, y)\n'