In [1]:
# 📦 Dependencies
import sqlite3
import random
from faker import Faker
from datetime import datetime, timedelta
from tqdm import tqdm

In [2]:
# 🎲 Faker setup
fake = Faker()
random.seed(42)
Faker.seed(42)

In [3]:
# 📂 Create SQLite DB
db_path = "../data/aml_simulation.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

In [4]:
# 🧹 Drop existing tables
tables = ["customers", "accounts", "transactions", "watchlist_entities", "flagged_txns"]
for t in tables:
    cursor.execute(f"DROP TABLE IF EXISTS {t}")

In [5]:
# 📐 Schema creation
cursor.executescript("""
CREATE TABLE customers (
    customer_id INTEGER PRIMARY KEY,
    name TEXT,
    dob TEXT,
    country TEXT,
    risk_category TEXT
);

CREATE TABLE accounts (
    account_id INTEGER PRIMARY KEY,
    customer_id INTEGER,
    account_type TEXT,
    open_date TEXT
);

CREATE TABLE transactions (
    txn_id INTEGER PRIMARY KEY,
    account_id INTEGER,
    timestamp TEXT,
    amount REAL,
    currency TEXT,
    origin_country TEXT,
    dest_country TEXT,
    channel TEXT,
    counterparty TEXT
);

CREATE TABLE watchlist_entities (
    entity_id INTEGER PRIMARY KEY,
    name TEXT,
    country TEXT
);

CREATE TABLE flagged_txns (
    flagged_id INTEGER PRIMARY KEY,
    txn_id INTEGER,
    rule_triggered TEXT,
    reason TEXT
);
""")
conn.commit()

In [6]:
# 👤 Generate 10,000 customers
countries = ['US', 'DE', 'GB', 'RU', 'CN', 'IR', 'PL', 'FR', 'BR', 'NG']
risk_weights = ['Low', 'Medium', 'High']
customer_rows = []

for i in range(1, 10001):
    customer_rows.append((
        i,
        fake.name(),
        fake.date_of_birth(minimum_age=18, maximum_age=85).isoformat(),
        random.choice(countries),
        random.choices(risk_weights, weights=[0.7, 0.25, 0.05])[0]
    ))

cursor.executemany("INSERT INTO customers VALUES (?, ?, ?, ?, ?)", customer_rows)
conn.commit()

In [7]:
# Generate accounts
account_id = 1
account_types = ['Checking', 'Savings', 'Investment']
account_rows = []

for cust_id in tqdm(range(1, 10001)):
    for _ in range(random.randint(1, 3)):
        account_rows.append((
            account_id,
            cust_id,
            random.choice(account_types),
            fake.date_between(start_date='-10y', end_date='-1d').isoformat()
        ))
        account_id += 1

cursor.executemany("INSERT INTO accounts VALUES (?, ?, ?, ?)", account_rows)
conn.commit()

100%|██████████| 10000/10000 [00:00<00:00, 13330.91it/s]


In [8]:
# Generate transactions (50-200 per account)
txn_id = 1
channels = ['Online', 'ATM', 'Branch', 'Mobile']
currencies = ['USD', 'EUR', 'GBP', 'PLN']
txn_batch = []

for acc in tqdm(account_rows):
    acc_id, _, _, open_date = acc
    num_txns = random.randint(50, 200)
    open_dt = datetime.fromisoformat(open_date)
    
    for _ in range(num_txns):
        txn_dt = open_dt + timedelta(days=random.randint(1, 3650), seconds=random.randint(0, 86400))
        txn_batch.append((
            txn_id,
            acc_id,
            txn_dt.isoformat(),
            round(random.uniform(10, 10000), 2),
            random.choice(currencies),
            random.choice(countries),
            random.choice(countries),
            random.choice(channels),
            fake.company()
        ))
        txn_id += 1

        if len(txn_batch) > 100000:
            cursor.executemany("INSERT INTO transactions VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", txn_batch)
            txn_batch = []

# Final flush
if txn_batch:
    cursor.executemany("INSERT INTO transactions VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", txn_batch)
conn.commit()

100%|██████████| 20054/20054 [08:19<00:00, 40.17it/s]


In [9]:
# Generate watchlist entities
watchlist = [(i, fake.company(), random.choice(countries)) for i in range(1, 21)]
cursor.executemany("INSERT INTO watchlist_entities VALUES (?, ?, ?)", watchlist)
conn.commit()

In [10]:
conn.close()
print(f"✅ Database created: {db_path}")

✅ Database created: ../data/aml_simulation.db
