In [18]:
import sqlite3
import pandas as pd
from datetime import datetime

In [19]:
# Paths
db_path = "../data/aml_simulation.db"
report_dir = "../data/output/regulatory_reports"

# Connect to DB
conn = sqlite3.connect(db_path)

In [20]:
query_top_customers = """
SELECT c.customer_id, c.name, COUNT(*) AS flagged_count
FROM flagged_txns ft
JOIN transactions t ON ft.txn_id = t.txn_id
JOIN accounts a ON t.account_id = a.account_id
JOIN customers c ON a.customer_id = c.customer_id
GROUP BY c.customer_id
ORDER BY flagged_count DESC
LIMIT 3;
"""
top_customers_df = pd.read_sql(query_top_customers, conn)
top_customers_df


Unnamed: 0,customer_id,name,flagged_count
0,4729,Rebecca Newman,108
1,3288,Jimmy Ortega,106
2,2444,Susan Jones,106


In [21]:
customer_ids = top_customers_df['customer_id'].tolist()

placeholders = ', '.join(['?'] * len(customer_ids))

detailed_report_query = f"""
SELECT 
    c.customer_id,
    c.name AS customer_name,
    c.dob,
    c.country,
    c.risk_category,
    a.account_id,
    t.txn_id,
    t.timestamp,
    t.amount,
    t.origin_country,
    t.dest_country,
    t.channel,
    ft.rule_triggered,
    ft.reason
FROM customers c
JOIN accounts a ON c.customer_id = a.customer_id
JOIN transactions t ON a.account_id = t.account_id
JOIN flagged_txns ft ON t.txn_id = ft.txn_id
WHERE c.customer_id IN ({placeholders})
ORDER BY c.customer_id, t.timestamp;
"""

detailed_sar_df = pd.read_sql(detailed_report_query, conn, params=customer_ids)
detailed_sar_df.to_csv(f"{report_dir}/customer_sar_batch_report.csv", index=False)


In [22]:
top_customers_df.to_csv(f"{report_dir}/top_suspicious_customers.csv", index=False)