In [1]:
from collections import defaultdict
import json
import pandas as pd

In [3]:
with open("user-wallet-transactions.json", "r") as f:
    data = json.load(f)

In [17]:
wallet_stats = defaultdict(lambda: {
    'num_transactions': 0,
    'num_deposit': 0,
    'num_borrow': 0,
    'num_repay': 0,
    'num_liquidationcall': 0,
    'num_redeemunderlying': 0,
    'total_deposit_usd': 0.0,
    'total_borrow_usd': 0.0,
    'total_repay_usd': 0.0,
    'first_tx': float('inf'),
    'last_tx': float('-inf')
})

In [19]:
for tx in data:
    wallet = tx['userWallet']
    action = tx['action'].lower()
    timestamp = tx['timestamp']

    # Parse USD value
    try:
        amount = float(tx['actionData']['amount'])
        price = float(tx['actionData'].get('assetPriceUSD', 0))
        usd_value = amount * price
    except (KeyError, ValueError, TypeError):
        usd_value = 0.0

    # Update counts and totals
    wallet_stats[wallet]['num_transactions'] += 1
    action_key = f'num_{action}'
    if action_key not in wallet_stats[wallet]:
       wallet_stats[wallet][action_key] = 0
       wallet_stats[wallet][action_key] += 1

    if action == 'deposit':
        wallet_stats[wallet]['total_deposit_usd'] += usd_value
    elif action == 'borrow':
        wallet_stats[wallet]['total_borrow_usd'] += usd_value
    elif action == 'repay':
        wallet_stats[wallet]['total_repay_usd'] += usd_value

    # Track activity
    wallet_stats[wallet]['first_tx'] = min(wallet_stats[wallet]['first_tx'], timestamp)
    wallet_stats[wallet]['last_tx'] = max(wallet_stats[wallet]['last_tx'], timestamp)

In [21]:
df = pd.DataFrame.from_dict(wallet_stats, orient='index')
df.index.name = 'userWallet'
df.reset_index(inplace=True)

# Add additional derived features
df['deposit_to_borrow_ratio'] = df['total_deposit_usd'] / (df['total_borrow_usd'] + 1e-6)
df['repay_to_borrow_ratio'] = df['total_repay_usd'] / (df['total_borrow_usd'] + 1e-6)
df['active_days'] = (df['last_tx'] - df['first_tx']) / 86400 + 1
df['avg_tx_per_day'] = df['num_transactions'] / df['active_days']

df.to_csv("wallet_features.csv", index=False)
print("Saved wallet_features.csv with extracted features.")

Saved wallet_features.csv with extracted features.


In [23]:
from sklearn.preprocessing import MinMaxScaler

In [31]:
features = [
    'total_deposit_usd',
    'total_borrow_usd',
    'total_repay_usd',
    'deposit_to_borrow_ratio',
    'repay_to_borrow_ratio',
    'avg_tx_per_day'
]

# Replace infinite or NaN values
df.replace([float('inf'), -float('inf')], 0, inplace=True)
df.fillna(0, inplace=True)
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[features])
weights = [0.2, 0.1, 0.2, 0.15, 0.25, 0.1]
df['raw_score'] = (scaled * weights).sum(axis=1)

# Penalize
df['penalty'] = df['num_liquidationcall'].apply(lambda x: 0.5 if x > 0 else 1.0)

# Final score scaled to 0–1000
df['creditScore'] = (df['raw_score'] * df['penalty']) * 1000
df['creditScore'] = df['creditScore'].clip(0, 1000).astype(int)

df[['userWallet', 'creditScore']].to_csv("wallet_credit_scores.csv", index=False)
print("✅ Saved wallet_credit_scores.csv with credit scores.")
print(df[['userWallet', 'creditScore']].head(10))

✅ Saved wallet_credit_scores.csv with credit scores.
                                   userWallet  creditScore
0  0x00000000001accfa9cef68cf5371a23025b6d4b6            0
1  0x000000000051d07a4fb3bd10121a343d85818da6            0
2  0x000000000096026fb41fc39f9875d164bd82e2dc            0
3  0x0000000000e189dd664b9ab08a33c4839953852c            0
4  0x0000000002032370b971dabd36d72f3e5a7bf1ee            2
5  0x000000000a38444e0a6e37d3b630d7e855a7cb13            0
6  0x000000003853fcedcd0355fec98ca3192833f00b            0
7  0x000000003ce0cf2c037493b1dc087204bd7f713e            3
8  0x000000007858e6f2668e1e06111cfa24403a5466            0
9  0x00000001a0f57e850c9db68b4a9bc34677437c5c            0
