In [None]:
# Uploading data to colab notebook
from google.colab import files
u = files.upload()

In [22]:
# Using pandas to load data into dataframe
import pandas as pd
df = pd.read_json("user-wallet-transactions.json")

In [None]:
df.head(5)

In [None]:
df.info()

In [None]:
df.columns

In [26]:
# Parsing action data if it's a string
import ast
def safe_parse_action_data(x):
    if pd.isnull(x):
        return {}
    if isinstance(x, str):
        try:
            return ast.literal_eval(x)
        except:
            return {}
    return x  # already a dict

df['actionData'] = df['actionData'].apply(safe_parse_action_data)

In [None]:
# Feature Engineering by Wallet
from tqdm import tqdm

wallets = df['userWallet'].unique()
rows = []

for wallet in tqdm(wallets):
    user_df = df[df['userWallet'] == wallet]

    total_tx = len(user_df)
    deposit_count = 0
    borrow_count = 0
    repay_count = 0
    redeem_count = 0
    liquidation_count = 0
    total_amount = 0
    borrow_amount = 0
    repaid_amount = 0

    for idx, row in user_df.iterrows():
        action = str(row["action"]).lower()
        data = row["actionData"]
        amount = float(data.get("amount", 0))

        total_amount += amount

        if action == "deposit":
            deposit_count += 1
        elif action == "borrow":
            borrow_count += 1
            borrow_amount += amount
        elif action == "repay":
            repay_count += 1
            repaid_amount += amount
        elif action == "redeemunderlying":
            redeem_count += 1
        elif action == "liquidationcall":
            liquidation_count += 1

    rows.append({
        "wallet": wallet,
        "total_tx": total_tx,
        "deposit_count": deposit_count,
        "borrow_count": borrow_count,
        "repay_count": repay_count,
        "redeem_count": redeem_count,
        "liquidation_count": liquidation_count,
        "total_amount": total_amount,
        "borrow_amount": borrow_amount,
        "repaid_amount": repaid_amount
    })

wallet_df = pd.DataFrame(rows)
wallet_df.head()

In [None]:
# Calculating sand assigning credit score
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Avoid divide-by-zero
wallet_df["repay_ratio"] = wallet_df["repaid_amount"] / (wallet_df["borrow_amount"] + 1e-6)
wallet_df["liquidation_risk"] = wallet_df["liquidation_count"] / (wallet_df["total_tx"] + 1e-6)

# Normalize features
scaler = MinMaxScaler()

wallet_df["norm_repay"] = scaler.fit_transform(wallet_df[["repay_ratio"]])
wallet_df["norm_liquidation"] = 1 - scaler.fit_transform(wallet_df[["liquidation_risk"]])  # invert: lower risk is better
wallet_df["norm_tx"] = scaler.fit_transform(wallet_df[["total_tx"]])
wallet_df["norm_borrow"] = scaler.fit_transform(wallet_df[["borrow_amount"]])

# Weighted sum
wallet_df["score_raw"] = (
    (wallet_df["norm_repay"] * 0.4) +
    (wallet_df["norm_liquidation"] * 0.3) +
    (wallet_df["norm_tx"] * 0.2) +
    (wallet_df["norm_borrow"] * 0.1)
)

# Rescale to 0–1000
wallet_df["credit_score"] = scaler.fit_transform(wallet_df[["score_raw"]]) * 1000
wallet_df["credit_score"] = wallet_df["credit_score"].round(2)

# Top 10 scores
wallet_df_sorted = wallet_df[["wallet", "credit_score"]].sort_values(by="credit_score", ascending=False)
wallet_df_sorted.head(10)

In [30]:
# Save and Download the Scores
output_file = "wallet_credit_scores.csv"
wallet_df_sorted.to_csv(output_file, index=False)

from google.colab import files
files.download(output_file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data from the CSV file
output_file = "wallet_credit_scores.csv"
wallet_scores_from_csv = pd.read_csv(output_file)

# Plot the histogram of credit scores
plt.figure(figsize=(12, 6))
bins = list(range(0, 1100, 100))  # 0 to 1000 in steps of 100
plt.hist(wallet_scores_from_csv['credit_score'], bins=bins, color='skyblue', edgecolor='black')
plt.title('Wallet Credit Score Distribution', fontsize=16)
plt.xlabel('Score Range', fontsize=14)
plt.ylabel('Number of Wallets', fontsize=14)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(bins)
plt.show()