In [1]:
import json
import os
from collections import defaultdict
from datetime import datetime
import math
import pandas as pd

# ---------- STEP 1: Load JSON data ----------
data_dir = "/content/drive/MyDrive/compoundv2/dataset"
files = ["/content/drive/MyDrive/compoundv2/dataset/compoundV2_transactions_ethereum_chunk_0.json", "/content/drive/MyDrive/compoundv2/dataset/compoundV2_transactions_ethereum_chunk_1.json", "/content/drive/MyDrive/compoundv2/dataset/compoundV2_transactions_ethereum_chunk_2.json"]

all_deposits = []
all_liquidates = []

for file in files:
    with open(os.path.join(data_dir, file), "r") as f:
        data = json.load(f)
        all_deposits.extend(data.get("deposits", []))
        all_liquidates.extend(data.get("liquidates", []))

print(f"Loaded {len(all_deposits)} deposits and {len(all_liquidates)} liquidations.")

Loaded 30000 deposits and 20737 liquidations.


In [2]:
# ---------- STEP 2: Feature Engineering ----------
wallet_features = defaultdict(lambda: {
    "total_deposit_usd": 0.0,
    "num_deposits": 0,
    "first_deposit_ts": float("inf"),
    "last_deposit_ts": 0,
    "assets_deposited": set(),
    "times_liquidated": 0,
    "total_liquidated_usd": 0.0
})

# Process deposits
for tx in all_deposits:
    wallet = tx["account"]["id"]
    amount_usd = float(tx.get("amountUSD", 0))
    ts = int(tx["timestamp"])
    symbol = tx["asset"]["symbol"]

    features = wallet_features[wallet]
    features["total_deposit_usd"] += amount_usd
    features["num_deposits"] += 1
    features["first_deposit_ts"] = min(features["first_deposit_ts"], ts)
    features["last_deposit_ts"] = max(features["last_deposit_ts"], ts)
    features["assets_deposited"].add(symbol)

# Process liquidations
for tx in all_liquidates:
    wallet = tx["liquidatee"]["id"]
    amount_usd = float(tx.get("amountUSD", 0))

    features = wallet_features[wallet]
    features["times_liquidated"] += 1
    features["total_liquidated_usd"] += amount_usd

# Finalize features
wallet_records = []
for wallet, f in wallet_features.items():
    if f["first_deposit_ts"] == float("inf"):
        deposit_duration_days = 0
    else:
        duration_secs = f["last_deposit_ts"] - f["first_deposit_ts"]
        deposit_duration_days = duration_secs / (60 * 60 * 24)

    wallet_records.append({
        "wallet": wallet,
        "total_deposit_usd": f["total_deposit_usd"],
        "num_deposits": f["num_deposits"],
        "deposit_duration_days": deposit_duration_days,
        "unique_assets": len(f["assets_deposited"]),
        "times_liquidated": f["times_liquidated"],
        "total_liquidated_usd": f["total_liquidated_usd"]
    })

df = pd.DataFrame(wallet_records)

output_csv = "wallet_features.csv"
df.to_csv(output_csv, index=False)
print(f"Wallet features saved to: {output_csv}")

Wallet features saved to: wallet_features.csv


In [3]:
# ---------- STEP 3: Score Wallets (0 to 100 scale) ----------
# Define scoring formula
def score_wallet(row):
    score = (
        0.4 * math.log1p(row["total_deposit_usd"]) +
        0.2 * row["deposit_duration_days"] +
        0.2 * row["unique_assets"] -
        0.2 * row["times_liquidated"] -
        0.2 * math.log1p(row["total_liquidated_usd"])
    )
    return score

df["raw_score"] = df.apply(score_wallet, axis=1)

# Normalize scores to 0–100
min_score = df["raw_score"].min()
max_score = df["raw_score"].max()
df["credit_score"] = ((df["raw_score"] - min_score) / (max_score - min_score)) * 100



In [4]:
# ---------- STEP 4: Output ----------
top_1000 = df.sort_values("credit_score", ascending=False).head(1000)
top_1000[["wallet", "credit_score"]].to_csv("wallet_scores.csv", index=False)

print("Scoring complete. Output saved as 'wallet_scores.csv'")

Scoring complete. Output saved as 'wallet_scores.csv'
