In [2]:
import pandas as pd
import numpy as np

# Statistic

- #Customer
- #Alerted Customer
- %Alerted Customer
- #Transactions
- Total Transaction Amount
- Avg. Transaction Amount

# SAML-D

In [3]:
data = pd.read_csv("data/SAML-D/SAML-D.csv")

In [6]:
data.columns

Index(['Time', 'Date', 'Sender_account', 'Receiver_account', 'Amount',
       'Payment_currency', 'Received_currency', 'Sender_bank_location',
       'Receiver_bank_location', 'Payment_type', 'Is_laundering',
       'Laundering_type'],
      dtype='object')

In [7]:
unique_accounts = pd.concat([data['Sender_account'], data['Receiver_account']]).drop_duplicates()

# Filter to find accounts involved in laundering
alerted_accounts = pd.concat([
    data.loc[data['Is_laundering'] == 1, 'Sender_account'],
    data.loc[data['Is_laundering'] == 1, 'Receiver_account']
]).drop_duplicates()

# Statistics calculations
stats = {
    '#Customer': len(unique_accounts),
    '#Alerted Customer': len(alerted_accounts),
    '%Alerted Customer': (len(alerted_accounts) / len(unique_accounts)) * 100 if len(unique_accounts) > 0 else 0,
    '#Transactions': len(data),
    'Total Transaction Amount': data['Amount'].sum(),
    'Avg. Transaction Amount': data['Amount'].mean()
}

# Printing the statistics
for key, value in stats.items():
    print(f"{key}: {value}")

#Customer: 855460
#Alerted Customer: 7902
%Alerted Customer: 0.9237135576181235
#Transactions: 9504852
Total Transaction Amount: 83290710127.60838
Avg. Transaction Amount: 8762.96760092723


# LI-Small

In [9]:
data = pd.read_csv("data/AML-World/LI-Small_Trans.csv")

In [13]:
data.columns

Index(['Timestamp', 'From Bank', 'Account', 'To Bank', 'Account.1',
       'Amount Received', 'Receiving Currency', 'Amount Paid',
       'Payment Currency', 'Payment Format', 'Is Laundering'],
      dtype='object')

In [20]:
# Convert 'From Bank' and 'Account' to string and create unique customer IDs for sender
data['Sender_Customer_ID'] = data['From Bank'].astype(str) + data['Account'].astype(str)

# Convert 'To Bank' and 'Account.1' to string and create unique customer IDs for receiver
data['Receiver_Customer_ID'] = data['To Bank'].astype(str) + data['Account.1'].astype(str)

# Preprocessing to ensure that accounts are not double-counted as both senders and receivers
unique_accounts = pd.concat([data['Sender_Customer_ID'], data['Receiver_Customer_ID']]).drop_duplicates()

# Filter to find accounts involved in laundering, again creating unique IDs and then finding unique ones
alerted_accounts = pd.concat([
    data.loc[data['Is Laundering'] == 1, 'Sender_Customer_ID'],
    data.loc[data['Is Laundering'] == 1, 'Receiver_Customer_ID']
]).drop_duplicates()

# Statistics calculations
stats = {
    '#Customer': len(unique_accounts),
    '#Alerted Customer': len(alerted_accounts),
    '%Alerted Customer': (len(alerted_accounts) / len(unique_accounts)) * 100 if len(unique_accounts) > 0 else 0,
    '#Transactions': len(data),
    'Total Transaction Amount': data[['Amount Received', 'Amount Paid']].sum().sum(),
    'Avg. Transaction Amount': data[['Amount Received', 'Amount Paid']].mean().mean()
}

# Printing the statistics
for key, value in stats.items():
    print(f"{key}: {value}")

#Customer: 515088
#Alerted Customer: 6357
%Alerted Customer: 1.234158046780356
#Transactions: 5078345
Total Transaction Amount: 53312462955026.55
Avg. Transaction Amount: 5248999.718907099


# HI-Small

In [18]:
data = pd.read_csv("data/AML-World/HI-Small_Trans.csv")

In [19]:
# Convert 'From Bank' and 'Account' to string and create unique customer IDs for sender
data['Sender_Customer_ID'] = data['From Bank'].astype(str) + data['Account'].astype(str)

# Convert 'To Bank' and 'Account.1' to string and create unique customer IDs for receiver
data['Receiver_Customer_ID'] = data['To Bank'].astype(str) + data['Account.1'].astype(str)

# Preprocessing to ensure that accounts are not double-counted as both senders and receivers
unique_accounts = pd.concat([data['Sender_Customer_ID'], data['Receiver_Customer_ID']]).drop_duplicates()

# Filter to find accounts involved in laundering, again creating unique IDs and then finding unique ones
alerted_accounts = pd.concat([
    data.loc[data['Is Laundering'] == 1, 'Sender_Customer_ID'],
    data.loc[data['Is Laundering'] == 1, 'Receiver_Customer_ID']
]).drop_duplicates()

# Statistics calculations
stats = {
    '#Customer': len(unique_accounts),
    '#Alerted Customer': len(alerted_accounts),
    '%Alerted Customer': (len(alerted_accounts) / len(unique_accounts)) * 100 if len(unique_accounts) > 0 else 0,
    '#Transactions': len(data),
    'Total Transaction Amount': data[['Amount Received', 'Amount Paid']].sum().sum(),
    'Avg. Transaction Amount': data[['Amount Received', 'Amount Paid']].mean().mean()
}

# Printing the statistics
for key, value in stats.items():
    print(f"{key}: {value}")

#Customer: 515088
#Alerted Customer: 6357
%Alerted Customer: 1.234158046780356
#Transactions: 5078345
Avg Transaction Count: 53312462955026.55
Avg. Transaction Amount: 5248999.718907099
