In [1]:
import json
import pandas as pd
from datetime import datetime, timedelta
import math
import os 

# Asset Decimals Mapping 
ASSET_DECIMALS = {
    "USDC": 6,
    "USDT": 6,
    "DAI": 18,
    "WETH": 18,
    "WMATIC": 18,
    "WBTC": 8,
    "AAVE": 18,
    
}

# Converts a raw amount string from transaction data to its USD equivalent.
def amount_to_usd(amount_str, asset_symbol, asset_price_usd_str):
    
    try:
        raw_amount = int(amount_str)
        price_usd = float(asset_price_usd_str)
        decimals = ASSET_DECIMALS.get(asset_symbol, 18) 

       
        human_readable_amount = raw_amount / (10 ** decimals)
        return human_readable_amount * price_usd
    except (ValueError, TypeError):
        return 0.0 
    
# Main Credit Score Calculation Function
def calculate_wallet_scores(transactions_data):
    """
    Calculates a credit score (0-1000) for each wallet based on Aave V2 transaction behavior.

    Args:
        transactions_data (list of dict): List of raw transaction records.

    Returns:
        dict: A dictionary where keys are wallet addresses and values are their scores (0-1000).
    """

    wallet_features = {}

    # Phase 1: Initial Feature Extraction and Aggregation 
    # Iterate through each transaction to build up raw features per wallet
    for tx in transactions_data:
        wallet = tx.get('userWallet')
        if not wallet:
            continue

        # Initialize wallet features if not already present
        if wallet not in wallet_features:
            wallet_features[wallet] = {
                'total_transactions': 0,
                'deposit_count': 0,
                'borrow_count': 0,
                'repay_count': 0,
                'redeem_count': 0,
                'liquidation_count': 0,
                'total_deposit_usd': 0.0,
                'total_borrow_usd': 0.0,
                'total_repay_usd': 0.0,
                'total_redeem_usd': 0.0,
                'total_liquidation_usd': 0.0,
                'first_transaction_timestamp': None, 
                'last_transaction_timestamp': None, 
                'borrow_timestamps': [],
                'repay_timestamps': [],
                'unique_deposited_assets': set(),
                'unique_borrowed_assets': set(),
                'highest_single_deposit_usd': 0.0,
                'highest_single_borrow_usd': 0.0,
            }

        # Update basic transaction counts and timestamps
        wallet_features[wallet]['total_transactions'] += 1
        
        # Convert timestamp to datetime object for easier calculations
        tx_datetime = datetime.fromtimestamp(tx['timestamp']) 
        
        # Update first_transaction_timestamp
        if wallet_features[wallet]['first_transaction_timestamp'] is None or \
           tx_datetime < wallet_features[wallet]['first_transaction_timestamp']:
            wallet_features[wallet]['first_transaction_timestamp'] = tx_datetime
        
        # Update last_transaction_timestamp
        if wallet_features[wallet]['last_transaction_timestamp'] is None or \
           tx_datetime > wallet_features[wallet]['last_transaction_timestamp']:
            wallet_features[wallet]['last_transaction_timestamp'] = tx_datetime

        action = tx.get('action')
        action_data = tx.get('actionData', {})
        amount_str = action_data.get('amount')
        asset_symbol = action_data.get('assetSymbol')
        asset_price_usd_str = action_data.get('assetPriceUSD')

        # Convert amount to USD for consistent calculations
        amount_in_usd = amount_to_usd(amount_str, asset_symbol, asset_price_usd_str)

        # Update action-specific counts and total USD amounts
        if action == 'deposit':
            wallet_features[wallet]['deposit_count'] += 1
            wallet_features[wallet]['total_deposit_usd'] += amount_in_usd
            wallet_features[wallet]['unique_deposited_assets'].add(asset_symbol)
            wallet_features[wallet]['highest_single_deposit_usd'] = max(
                wallet_features[wallet]['highest_single_deposit_usd'], amount_in_usd
            )
        elif action == 'borrow':
            wallet_features[wallet]['borrow_count'] += 1
            wallet_features[wallet]['total_borrow_usd'] += amount_in_usd
            wallet_features[wallet]['borrow_timestamps'].append(tx_datetime)
            wallet_features[wallet]['unique_borrowed_assets'].add(asset_symbol)
            wallet_features[wallet]['highest_single_borrow_usd'] = max(
                wallet_features[wallet]['highest_single_borrow_usd'], amount_in_usd
            )
        elif action == 'repay':
            wallet_features[wallet]['repay_count'] += 1
            wallet_features[wallet]['total_repay_usd'] += amount_in_usd
            wallet_features[wallet]['repay_timestamps'].append(tx_datetime)
        elif action == 'redeemunderlying':
            wallet_features[wallet]['redeem_count'] += 1
            wallet_features[wallet]['total_redeem_usd'] += amount_in_usd
        elif action == 'liquidationcall':
            wallet_features[wallet]['liquidation_count'] += 1
            wallet_features[wallet]['total_liquidation_usd'] += amount_in_usd

    # Phase 2: Derive Advanced Features and Calculate Scores 
    wallet_scores = {}
    for wallet, features in wallet_features.items():
        score = 500 
        
        activity_duration_days = 0
        if features['first_transaction_timestamp'] is not None and \
           features['last_transaction_timestamp'] is not None:
            activity_duration_days = (features['last_transaction_timestamp'] - \
                                      features['first_transaction_timestamp']).days
        
       
        score += min(100, activity_duration_days * 0.2) 

        # Repay to Borrow Ratio (Most Critical Positive/Negative)
      
        repay_to_borrow_ratio = 0.0
        if features['total_borrow_usd'] > 0:
            repay_to_borrow_ratio = features['total_repay_usd'] / features['total_borrow_usd']
            
            if repay_to_borrow_ratio >= 1.0: # Fully repaid or overpaid
                score += 350
            elif repay_to_borrow_ratio >= 0.8:
                score += 250
            elif repay_to_borrow_ratio >= 0.5:
                score += 100
            elif repay_to_borrow_ratio > 0:
                score -= 100 
                score -= 300 
        elif features['borrow_count'] == 0 and features['total_deposit_usd'] > 0:
            
            score += 200 

        #  Liquidation Events (Strong Negative)
       
        if features['liquidation_count'] > 0:
            score -= min(400, features['liquidation_count'] * 150)

        # Net Borrowed Amount (Outstanding Debt - Negative)      
        net_borrowed_usd = features['total_borrow_usd'] - features['total_repay_usd']
        if net_borrowed_usd > 0:
            
            score -= min(150, net_borrowed_usd / 100) 

        #  Consistency of Repayments (Approximation - Positive/Negative)
        avg_repay_time_days = 0
        if features['borrow_timestamps'] and features['repay_timestamps']:
           
            sorted_borrows = sorted(features['borrow_timestamps'])
            sorted_repays = sorted(features['repay_timestamps'])

            time_diffs = []
            repay_idx = 0
            for borrow_ts in sorted_borrows:
                while repay_idx < len(sorted_repays) and sorted_repays[repay_idx] < borrow_ts:
                    repay_idx += 1
                
                if repay_idx < len(sorted_repays):
                    time_taken = sorted_repays[repay_idx] - borrow_ts
                    if time_taken.total_seconds() > 0:
                        time_diffs.append(time_taken.total_seconds() / (60 * 60 * 24)) 
                    repay_idx += 1

            if time_diffs:
                avg_repay_time_days = sum(time_diffs) / len(time_diffs)
                if avg_repay_time_days < 14: 
                    score += 70
                elif avg_repay_time_days < 60: 
                    score += 30
                else: 
                    score -= 50

        #  Activity Level (Positive)
        
        if features['total_transactions'] > 5:
            score += min(50, (features['total_transactions'] - 5) * 1) 

        #  Asset Diversification (Slightly Positive)
        score += min(20, len(features['unique_deposited_assets']) * 5)
        score += min(10, len(features['unique_borrowed_assets']) * 5)

        #  Net Deposited Value (Positive)
        net_deposited_usd = features['total_deposit_usd'] - features['total_redeem_usd']
        if net_deposited_usd > 0:
            score += min(50, net_deposited_usd / 1000) 

        #  High Single Transaction Amounts (Neutral to Slightly Positive)
        if features['highest_single_deposit_usd'] > 10000:
            score += 10
        if features['highest_single_borrow_usd'] > 10000:
            score += 5 
            
        # Final Score Clamping 
        # Ensure score stays within the 0-1000 range
        score = max(0, min(1000, score))
        wallet_scores[wallet] = round(score)

    return wallet_scores



In [3]:
# Define the local file path for your transactions JSON
if __name__ == "__main__":
    
    JSON_FILE_NAME = r"C:\Users\sahit\Downloads\user-wallet-transactions.json"
    OUTPUT_SCORES_FILE = r"C:\Users\sahit\Downloads\wallet_scores.json"
    
    # Load the large JSON file
    try:
       
        with open(JSON_FILE_NAME, 'r') as f:
            raw_data = json.load(f)
        print(f"Data loaded successfully. Total transactions: {len(raw_data)}. Starting score calculation...")

        # Calculate scores
        wallet_scores = calculate_wallet_scores(raw_data)

        # Output scores for a few wallets
        print("\n--- Sample Wallet Scores ---")
        
        # Print first 10 wallets to avoid flooding console
        for i, (wallet, score) in enumerate(wallet_scores.items()):
            if i >= 100:
                break
            print(f"Wallet: {wallet}, Score: {score}")
        print(f"\nTotal unique wallets scored: {len(wallet_scores)}")

        # Save all scores to a JSON file
        with open(OUTPUT_SCORES_FILE, 'w') as f:
            json.dump(wallet_scores, f, indent=4)
        print(f"\nAll wallet scores saved to '{OUTPUT_SCORES_FILE}'")

    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from '{JSON_FILE_NAME}'. Make sure it's a valid JSON file.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

Data loaded successfully. Total transactions: 100000. Starting score calculation...

--- Sample Wallet Scores ---
Wallet: 0x00000000001accfa9cef68cf5371a23025b6d4b6, Score: 707
Wallet: 0x000000000051d07a4fb3bd10121a343d85818da6, Score: 705
Wallet: 0x000000000096026fb41fc39f9875d164bd82e2dc, Score: 706
Wallet: 0x0000000000e189dd664b9ab08a33c4839953852c, Score: 538
Wallet: 0x0000000002032370b971dabd36d72f3e5a7bf1ee, Score: 691
Wallet: 0x000000000a38444e0a6e37d3b630d7e855a7cb13, Score: 942
Wallet: 0x000000003853fcedcd0355fec98ca3192833f00b, Score: 724
Wallet: 0x000000003ce0cf2c037493b1dc087204bd7f713e, Score: 190
Wallet: 0x000000007858e6f2668e1e06111cfa24403a5466, Score: 705
Wallet: 0x00000001a0f57e850c9db68b4a9bc34677437c5c, Score: 705
Wallet: 0x0000000506063a51c6ce59906d8c40f7d7fe92a7, Score: 961
Wallet: 0x00000029ff545c86524ade7caf132527707948c4, Score: 793
Wallet: 0x00000087c4cebffb95746d1935de7fbcab092f40, Score: 792
Wallet: 0x000000e28faa823d5b53ff6c2922c28335840375, Score: 692
Wall