In [2]:
import json
import pandas as pd
import os

file_path = r"C:\Users\sahit\Downloads\user-wallet-transactions.json"

def analyze_user_wallet_transactions(file_path):
    """
    Analyzes user wallet transaction data from a JSON file.

    Args:
        file_path (str): The path to the user-wallet-transactions.json file.
    """
    print(f"'{file_path}' found locally. Loading data...")

    # Check if the file exists
    if not os.path.exists(file_path):
        print(f"Error: File not found at '{file_path}'. Please ensure the path is correct.")
        return

    try:
        with open(file_path, 'r') as f:
            transactions_data = json.load(f)
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from '{file_path}'. Please check if the file contains valid JSON.")
        return
    except Exception as e:
        print(f"An unexpected error occurred while loading data: {e}")
        return

    print("Data loaded successfully.")

    # Convert to pandas DataFrame for easier analysis
    # This assumes transactions_data is a list of dictionaries
    if not isinstance(transactions_data, list) or not all(isinstance(item, dict) for item in transactions_data):
        print("Error: Expected JSON data to be a list of dictionaries (transactions).")
        return

    if not transactions_data:
        print("The transaction file is empty. No data to analyze.")
        print("\n--- Analysis Summary ---")
        print("Total transactions (rows): 0")
        print("Number of columns: 0")
        print("Column labels: None")
        print("Number of unique user wallet IDs: 0")
        return

    df = pd.DataFrame(transactions_data)

    # 1. Total number of transactions (rows)
    total_transactions = len(df)
    print(f"Total transactions (rows): {total_transactions}")

    # 2. Number of columns and their labels
    num_columns = len(df.columns)
    column_labels = df.columns.tolist()
    print(f"Number of columns: {num_columns}")
    print(f"Column labels: {column_labels}")

    # Ensure 'user_wallet_id' column exists
    user_id_column = 'userWallet' # Assuming this is the column name for user IDs
    if user_id_column not in df.columns:
        print(f"\nWarning: '{user_id_column}' column not found. Cannot group by user IDs.")
        # Attempt to find a common alternative if user_wallet_id is not present
        possible_id_columns = ['user_id', 'wallet_id', 'customer_id', 'id']
        found_id_column = None
        for col in possible_id_columns:
            if col in df.columns:
                found_id_column = col
                print(f"Attempting to use '{found_id_column}' as the user ID column instead.")
                user_id_column = found_id_column
                break
        if not found_id_column:
            print("Could not find a suitable user ID column. Skipping unique ID count.")
            return

    # 3. Merge same IDs and count unique IDs
    # Group by the user_id_column and count the number of transactions per user
    grouped_by_user = df.groupby(user_id_column).size().reset_index(name='transaction_count')
    unique_user_ids_count = len(grouped_by_user)
    unique_user_ids = grouped_by_user[user_id_column].tolist()

    print(f"\nNumber of unique user wallet IDs: {unique_user_ids_count}")
    # print(f"Unique User Wallet IDs: {unique_user_ids[:10]}...") # Print first 10 for brevity

    print("\n--- Transactions per unique user (top 5) ---")
    print(grouped_by_user.sort_values(by='transaction_count', ascending=False).head())

    print("\nAnalysis complete.")

# --- IMPORTANT: Set your actual file path here ---
# Replace 'C:\\Users\\sahit\\Downloads\\user-wallet-transactions.json'
# with the actual path to your JSON file.
file_path_to_analyze = 'C:\\Users\\sahit\\Downloads\\user-wallet-transactions.json'

# Run the analysis
analyze_user_wallet_transactions(file_path_to_analyze)


'C:\Users\sahit\Downloads\user-wallet-transactions.json' found locally. Loading data...
Data loaded successfully.
Total transactions (rows): 100000
Number of columns: 13
Column labels: ['_id', 'userWallet', 'network', 'protocol', 'txHash', 'logId', 'timestamp', 'blockNumber', 'action', 'actionData', '__v', 'createdAt', 'updatedAt']

Number of unique user wallet IDs: 3497

--- Transactions per unique user (top 5) ---
                                      userWallet  transaction_count
3322  0x05c9db563db8e38cc2899297da41ce430b61a484              14265
1554  0x0298b2ecdef68bc139b098461217a5b3161b69c8               1227
2793  0x04d9f6ecd792e48a09fa5dc2138baed8e628a7e5               1089
2579  0x047a96ef72d7ee6a3f193bdb92e998fb300265df                820
259   0x005f16f017aa933bb41965b52848ceb8ee48b171                767

Analysis complete.
