In [4]:
import json
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from datetime import datetime
from collections import defaultdict

# ------------------------ Feature Engineering ------------------------

def extract_features(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    df['wallet'] = df['userWallet']
    df['usd_amount'] = df['actionData'].apply(lambda x: float(x['amount']) * float(x['assetPriceUSD']) / (10 ** 6 if x['assetSymbol'] == 'USDC' else 10 ** 18))

    wallets = df['wallet'].unique()
    wallet_stats = []

    for wallet in wallets:
        user_df = df[df['wallet'] == wallet]
        deposits = user_df[user_df['action'] == 'deposit']
        redeems = user_df[user_df['action'] == 'redeemunderlying']

        stats = {
            'wallet': wallet,
            'total_tx': len(user_df),
            'deposit_count': len(deposits),
            'redeem_count': len(redeems),
            'total_deposit_usd': deposits['usd_amount'].sum(),
            'total_redeem_usd': redeems['usd_amount'].sum(),
            'active_days': user_df['timestamp'].dt.date.nunique()
        }

        # Ratio feature
        stats['redeem_deposit_ratio'] = stats['total_redeem_usd'] / stats['total_deposit_usd'] if stats['total_deposit_usd'] > 0 else 0

        wallet_stats.append(stats)

    return pd.DataFrame(wallet_stats)

# ------------------------ Scoring Logic ------------------------

def score_wallets(features_df):
    features = ['total_tx', 'deposit_count', 'redeem_count',
                'total_deposit_usd', 'total_redeem_usd',
                'active_days', 'redeem_deposit_ratio']

    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(features_df[features])

    # Assign weights to features
    weights = np.array([0.1, 0.1, 0.05, 0.25, 0.2, 0.2, 0.1])
    weighted_scores = np.dot(scaled, weights)

    # Scale to 0–1000
    scores = (weighted_scores - weighted_scores.min()) / (weighted_scores.max() - weighted_scores.min())
    scores = (scores * 1000).round().astype(int)

    features_df['score'] = scores
    return features_df[['wallet', 'score']]

# ------------------------ Score Distribution Plot ------------------------

def plot_score_distribution(df, output_path="score_distribution.png"):
    bins = list(range(0, 1100, 100))
    plt.figure(figsize=(10,6))
    plt.hist(df['score'], bins=bins, edgecolor='black')
    plt.xticks(bins)
    plt.xlabel("Score Range")
    plt.ylabel("Number of Wallets")
    plt.title("Wallet Score Distribution")
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.savefig(output_path)
    plt.close()

# ------------------------ Main Function ------------------------

def main():
    # Load data
    with open('C:/Users/m0ham/Saved Games/problem statement/user-wallet-transactions.json/user-wallet-transactions.json', 'r') as f:
        data = json.load(f)

    df = pd.DataFrame(data)

    # Step 1: Feature Extraction
    features_df = extract_features(df)

    # Step 2: Score Wallets
    scored_df = score_wallets(features_df)

    # Step 3: Save to CSV
    scored_df.to_csv("wallet_scores.csv", index=False)

    # Step 4: Plot Distribution
    plot_score_distribution(scored_df)

    print("✅ Wallet scoring completed. Check 'wallet_scores.csv' and 'score_distribution.png'.")

if __name__ == "__main__":
    main()


✅ Wallet scoring completed. Check 'wallet_scores.csv' and 'score_distribution.png'.
