In [1]:
import pandas as pd
from collections import defaultdict
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.cluster import KMeans

In [2]:
dataset = pd.read_json('dataset/user-wallet-transactions.json')
wallets = dataset['userWallet'].unique()
userInteractions = defaultdict(lambda: defaultdict(list))

In [3]:
userInteractions

defaultdict(<function __main__.<lambda>()>, {})

In [4]:
for data in dataset['actionData']:
    userInteractions[data['userId']][data['type']].append(data['amount'])

In [5]:
import numpy as np
import pandas as pd

# feature lists
wallet_ids = []
repay_ratio = []
redeem_ratio = []
times_redeemed = []
liquidation_count = []
times_deposit = []
times_repayed = []

# Loop through all user interactions
for wallet_id in userInteractions:
    wallet_ids.append(wallet_id)
    user = userInteractions[wallet_id]

    # Liquidation if any
    if user.get('LiquidationCall'):
        liquidation_count.append(len(user['LiquidationCall']))
    else:
        liquidation_count.append(0)

    # Deposit and Redeem
    if user.get('Deposit'):
        deposits = list(map(int, user['Deposit']))
        total_deposited = np.sum(deposits)
        times_deposit.append(len(deposits))

        if user.get('RedeemUnderlying'):
            redeems = list(map(int, user['RedeemUnderlying']))
            total_redeemed = np.sum(redeems)
            times_redeemed.append(len(redeems))
            if total_deposited > 0:
                redeem_ratio.append(total_redeemed / total_deposited)
            else:
                redeem_ratio.append(0)
        else:
            times_redeemed.append(0)
            redeem_ratio.append(0)
    else:
        times_deposit.append(0)
        times_redeemed.append(0)
        # If there are no deposits
        redeem_ratio.append(-1) 

    # Borrow and Repay
    if user.get('Borrow'):
        total_borrowed = np.sum(list(map(int, user['Borrow'])))
        if user.get('Repay'):
            repays = list(map(int, user['Repay']))
            total_repayed = np.sum(repays)
            times_repayed.append(len(repays))
            if total_borrowed > 0:
                repay_ratio.append(total_repayed / total_borrowed)
            else:
                repay_ratio.append(0)
        else:
            # If there is no repayment
            repay_ratio.append(-1)
            times_repayed.append(0)
    else:
        repay_ratio.append(0)
        times_repayed.append(0)

# Create dataframe
df = pd.DataFrame({
    'wallet_address': wallet_ids,
    'repay_ratio': repay_ratio,
    'redeem_ratio': redeem_ratio,
    'times_redeemed': times_redeemed,
    'liquidation_count': liquidation_count,
    'times_deposit': times_deposit,
    'times_repayed': times_repayed
})


In [6]:
df.head(10)

Unnamed: 0,wallet_address,repay_ratio,redeem_ratio,times_redeemed,liquidation_count,times_deposit,times_repayed
0,0x00000000001accfa9cef68cf5371a23025b6d4b6,0.0,0.0,0,0,1,0
1,0x000000000051d07a4fb3bd10121a343d85818da6,0.0,0.0,0,0,1,0
2,0x000000000096026fb41fc39f9875d164bd82e2dc,0.0,0.0,0,0,2,0
3,0x7f90122bf0700f9e7e1f688fe926940e8839f353,0.0,-1.0,0,0,0,0
4,0x445fe580ef8d70ff569ab36e80c647af338db351,0.0,-1.0,0,0,0,0
5,0x7e13d3b4845db1508cc5f311e067925e3cf77b64,0.0,-1.0,0,0,0,0
6,0x4244eb811d6e0ef302326675207a95113db4e1f8,0.0,-1.0,0,0,0,0
7,0x3fcd5de6a9fc8a99995c406c77dda3ed7e406f81,0.0,-1.0,0,0,0,0
8,0x1d8b86e3d88cdb2d34688e87e72f388cb541b7c8,0.0,-1.0,0,0,0,0
9,0x0000000002032370b971dabd36d72f3e5a7bf1ee,1.774571e-12,0.417211,126,0,250,4


In [7]:
df.describe()

Unnamed: 0,repay_ratio,redeem_ratio,times_redeemed,liquidation_count,times_deposit,times_repayed
count,3452.0,3452.0,3452.0,3452.0,3452.0,3452.0
mean,0.123935,58514040.0,3.634415,0.071842,10.952491,3.605156
std,0.918085,3367584000.0,12.865921,0.697344,30.037416,13.426305
min,-1.35235,-1.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,1.0,0.0
50%,0.0,0.0,0.0,0.0,2.0,0.0
75%,0.341157,0.1549115,2.0,0.0,6.0,2.0
max,41.238836,197816100000.0,234.0,26.0,511.0,291.0


In [8]:
df.to_csv('dataset/new_dataset.csv', index=False)