<a href="https://colab.research.google.com/github/2303A52486/Aave-Wallet-Credit-Scoring/blob/main/Aave_Wallet_Credit_Scoring_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from tqdm import tqdm

def load_data(json_file):
    with open(json_file) as f:
        data = json.load(f)
    return pd.DataFrame(data)

def extract_features(df):
    results = []
    grouped = df.groupby('userWallet')

    for wallet, group in tqdm(grouped, desc="Extracting features"):
        seq = ' '.join(group.sort_values('timestamp')['action'])

        deposit = sum([float(x['amount']) for x in group[group['action']=='deposit']['actionData']])
        borrow = sum([float(x['amount']) for x in group[group['action']=='borrow']['actionData']])
        repay = sum([float(x['amount']) for x in group[group['action']=='repay']['actionData']])
        liquidation = group[group['action']=='liquidationcall'].shape[0]

        leverage = borrow / deposit if deposit > 0 else 0
        repay_ratio = repay / borrow if borrow > 0 else 1

        results.append({
            'wallet': wallet,
            'action_seq': seq,
            'deposit': deposit,
            'borrow': borrow,
            'repay': repay,
            'liquidation': liquidation,
            'leverage': leverage,
            'repay_ratio': repay_ratio
        })

    return pd.DataFrame(results)

def create_pseudo_score(df):
    base = df['repay_ratio'] * 500
    health = (df['deposit'] - df['borrow']) / (df['deposit'] + 1e-6) * 300
    liquidation_penalty = df['liquidation'] * -200
    pseudo_score = base + health + liquidation_penalty
    return np.clip(pseudo_score, 0, 1000)

def prepare_dataset(df):
    scaler = MinMaxScaler()
    financial_feats = df[['deposit', 'borrow', 'repay', 'leverage', 'repay_ratio']]
    financial_scaled = scaler.fit_transform(financial_feats)

    tfidf = TfidfVectorizer(ngram_range=(2,3), max_features=100)
    action_feats = tfidf.fit_transform(df['action_seq']).toarray()

    X = np.hstack((financial_scaled, action_feats))
    return X

def train_credit_model(X, y):
    model = GradientBoostingRegressor(n_estimators=200, random_state=42)
    model.fit(X, y)
    return model

def plot_score_distribution(scores, output_img='credit_score_distribution.png'):
    plt.figure(figsize=(8,5))
    plt.hist(scores, bins=10, color='orange', edgecolor='black')
    plt.title('Wallet Credit Score Distribution')
    plt.xlabel('Credit Score')
    plt.ylabel('Wallet Count')
    plt.savefig(output_img)
    plt.close()

def generate_wallet_scores(input_json, output_csv):
    df = load_data(input_json)
    feats = extract_features(df)
    feats['pseudo_score'] = create_pseudo_score(feats)

    X = prepare_dataset(feats)
    y = feats['pseudo_score']

    model = train_credit_model(X, y)
    predictions = model.predict(X)

    predictions = np.clip(predictions, 0, 1000)
    predictions = np.round(predictions).astype(int)

    pd.DataFrame({'wallet': feats['wallet'], 'score': predictions}).to_csv(output_csv, index=False)
    plot_score_distribution(predictions)
    print(f"Saved wallet scores to {output_csv} and distribution plot.")

generate_wallet_scores('user-wallet-transactions.json', 'wallet_credit_scores.csv')

Extracting features: 100%|██████████| 3497/3497 [00:04<00:00, 716.24it/s]


Saved wallet scores to wallet_credit_scores.csv and distribution plot.
