## Section 1: Overview Metrics
 * How many unique addresses are in the dataset
 * What percentage are flagged as scams
 * How many total transactions are represented

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('../Data/Features/feature_ethereum_data_with_ratios.csv')

In [3]:
num_addresses = df['address'].nunique()
print(f"Number of unique addresses: {num_addresses}")



Number of unique addresses: 9816


In [7]:
num_scams = df['flag'].sum()
print(f"Number of scams: {num_scams}")
num_non_scams = num_addresses - num_scams
print(f"Number of non-scams: {num_non_scams}")
scam_percentage = num_scams / num_addresses * 100
print(f"Percentage of scams: {scam_percentage:.2f}%")

Number of scams: 2179
Number of non-scams: 7637
Percentage of scams: 22.20%


## Section 2: User Segmentation
 * What % of wallets are sender-only, receiver-only, or both
 * How does this differ between scam and non-scam addresses


In [9]:
senders = set(df[df['sent_tnx'] > 0]['address'])
receivers = set(df[df['received_tnx'] > 0]['address'])

sender_only = senders - receivers
receiver_only = receivers - senders
both = senders & receivers

segmentation = {
    'Sender Only': len(sender_only),
    'Receiver Only': len(receiver_only),
    'Both': len(both)
}