In [20]:
import pandas as pd
from sqlalchemy import create_engine

In [21]:
# Load raw CSVs
customers = pd.read_csv("../data/customers.csv")
accounts = pd.read_csv("../data/accounts.csv")
transactions = pd.read_csv("../data/transactions.csv")

In [22]:
# Cleaning
transactions['date'] = pd.to_datetime(transactions['date'])
transactions['amount'] = transactions['amount'].astype(float)

In [23]:
# Join customers → accounts → transactions
merged_df = transactions.merge(accounts, on="account_id") \
                        .merge(customers, on="customer_id")

In [24]:
# Add basic risk features
merged_df["high_value"] = merged_df["amount"] > 10000
merged_df["offshore"] = ~merged_df["country"].isin(["PL", "US", "ES", "AE", "CN"])
merged_df["suspicious_channel"] = merged_df["channel"].isin(["cash", "atm"])

In [25]:
# Save to SQLite
engine = create_engine("sqlite:///aml_database.db")
merged_df.to_sql("transaction_log", con=engine, if_exists="replace", index=False)

print("ETL pipeline completed and saved to 'aml_database.db'")

ETL pipeline completed and saved to 'aml_database.db'
