In [21]:
import json
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import MinMaxScaler

In [22]:
# Loading ML model that trained in assignment1
model = joblib.load("trained_wallet_model.pkl")

In [23]:
# Load new wallets data
with open("compound_transactions_filtered.json", "r") as f:
    new_data = json.load(f)

df = pd.DataFrame(new_data)

In [24]:
df.head()

Unnamed: 0,userWallet,network,protocol,txHash,logId,timestamp,blockNumber,action,actionData
0,0x0039f22efb07a647557c7c5d17854cfd6d489ef3,ethereum,compound_v2,0x278bf9dbf8b23e259a3dfa3e2d2f8908555023b38462...,0x278bf9dbf8b23e259a3dfa3e2d2f8908555023b38462...,1638889167,13759193,redeemunderlying,"{'type': 'Redeemunderlying', 'amount': '0', 'a..."
1,0x0039f22efb07a647557c7c5d17854cfd6d489ef3,ethereum,compound_v2,0x2a23706388058e55dce35bad628d511280c47f795e31...,0x2a23706388058e55dce35bad628d511280c47f795e31...,1638303263,13716853,deposit,"{'type': 'Deposit', 'amount': '0', 'assetSymbo..."
2,0x0039f22efb07a647557c7c5d17854cfd6d489ef3,ethereum,compound_v2,0xb58878d9c2733ce7c0791f7969efec1e9ff3a79d69c9...,0xb58878d9c2733ce7c0791f7969efec1e9ff3a79d69c9...,1624693565,12708396,deposit,"{'type': 'Deposit', 'amount': '0', 'assetSymbo..."
3,0x0039f22efb07a647557c7c5d17854cfd6d489ef3,ethereum,compound_v2,0x72f35f938c73944ce06628159b3abad785f976e3d066...,0x72f35f938c73944ce06628159b3abad785f976e3d066...,1616099263,12064851,redeemunderlying,"{'type': 'Redeemunderlying', 'amount': '0', 'a..."
4,0x0039f22efb07a647557c7c5d17854cfd6d489ef3,ethereum,compound_v2,0x8f4c35854a7fd6fcda351970b0c1a977b9c1bd390d42...,0x8f4c35854a7fd6fcda351970b0c1a977b9c1bd390d42...,1615696754,12034580,redeemunderlying,"{'type': 'Redeemunderlying', 'amount': '0', 'a..."


In [25]:
# Null values treatement
df.isnull().sum()

userWallet     0
network        0
protocol       0
txHash         0
logId          0
timestamp      0
blockNumber    0
action         0
actionData     0
dtype: int64

In [26]:
# Preprocessing as model requirement
def extract_amount(row):
    try:
        return float(row['actionData'].get('amount', 0)) / 1e18
    except:
        return 0

df["amount"] = df.apply(extract_amount, axis=1)
df["datetime"] = pd.to_datetime(df["timestamp"], unit="s")

def generate_wallet_features(df):
    grouped = df.groupby("userWallet")
    features = pd.DataFrame()
    features["total_transactions"] = grouped.size()
    features["deposit_count"] = grouped.apply(lambda x: (x["action"] == "deposit").sum())
    features["borrow_count"] = grouped.apply(lambda x: (x["action"] == "borrow").sum())
    features["repay_count"] = grouped.apply(lambda x: (x["action"] == "repay").sum())
    features["redeem_count"] = grouped.apply(lambda x: (x["action"] == "redeemunderlying").sum())
    features["liquidation_count"] = grouped.apply(lambda x: (x["action"] == "liquidationcall").sum())
    features["total_deposit_amount"] = grouped.apply(lambda x: x.loc[x["action"] == "deposit", "amount"].sum())
    features["total_borrow_amount"] = grouped.apply(lambda x: x.loc[x["action"] == "borrow", "amount"].sum())
    features["total_repay_amount"] = grouped.apply(lambda x: x.loc[x["action"] == "repay", "amount"].sum())
    features["active_days"] = grouped["datetime"].apply(lambda x: x.dt.date.nunique())
    features["last_active_days_ago"] = grouped["datetime"].apply(lambda x: (pd.Timestamp.now() - x.max()).days)
    return features.reset_index()

features_df = generate_wallet_features(df)
features_df["repay_to_borrow_ratio"] = features_df["total_repay_amount"] / (features_df["total_borrow_amount"] + 1e-6)
features_df.fillna(0, inplace=True)

  features["deposit_count"] = grouped.apply(lambda x: (x["action"] == "deposit").sum())
  features["borrow_count"] = grouped.apply(lambda x: (x["action"] == "borrow").sum())
  features["repay_count"] = grouped.apply(lambda x: (x["action"] == "repay").sum())
  features["redeem_count"] = grouped.apply(lambda x: (x["action"] == "redeemunderlying").sum())
  features["liquidation_count"] = grouped.apply(lambda x: (x["action"] == "liquidationcall").sum())
  features["total_deposit_amount"] = grouped.apply(lambda x: x.loc[x["action"] == "deposit", "amount"].sum())
  features["total_borrow_amount"] = grouped.apply(lambda x: x.loc[x["action"] == "borrow", "amount"].sum())
  features["total_repay_amount"] = grouped.apply(lambda x: x.loc[x["action"] == "repay", "amount"].sum())


In [27]:
# Selecting features used in training
X_new = features_df.drop(columns=["userWallet"])

In [28]:
# Predict credit scores 
predicted_scores = model.predict(X_new)

In [29]:
# Add to DataFrame
features_df["predicted_score"] = predicted_scores

In [30]:
# === Final wallet score output ===
print(features_df[["userWallet", "predicted_score"]].head())

                                   userWallet  predicted_score
0  0x0039f22efb07a647557c7c5d17854cfd6d489ef3       626.238098
1  0x06b51c6882b27cb05e712185531c1f74996dd988         7.564269
2  0x0795732aacc448030ef374374eaae57d2965c16c         7.564269
3  0x0aaa79f1a86bc8136cd0d1ca0d51964f4e3766f9         7.564269
4  0x0fe383e5abc200055a7f391f94a5f5d1f844b9ae         7.564269


In [31]:
 # top wallets by predicted score
sorted_scores = features_df.sort_values(by="predicted_score", ascending=False)
print(sorted_scores.head()) 

                                    userWallet  total_transactions  \
0   0x0039f22efb07a647557c7c5d17854cfd6d489ef3                  33   
25  0x427f2ac5fdf4245e027d767e7c3ac272a1f40a65                  41   
46  0x70d8e4ab175dfe0eab4e9a7f33e0a2d19f44001e                  36   
26  0x4814be124d7fe3b240eb46061f7ddfab468fe122                  46   
90  0xf340b9f2098f80b86fbc5ede586c319473aa11f3                  14   

    deposit_count  borrow_count  repay_count  redeem_count  liquidation_count  \
0              15             1            1            16                  0   
25             20             7            5             9                  0   
46             12             4            2            18                  0   
26             21             6            5            14                  0   
90              3             6            3             2                  0   

    total_deposit_amount  total_borrow_amount  total_repay_amount  \
0             200.00638

In [32]:
features_df[["userWallet", "predicted_score"]].to_csv("wallet_scores.csv", index=False)
print("Saved 'wallet_scores.csv' with userWallet and predicted_score.")

Saved 'wallet_scores.csv' with userWallet and predicted_score.
