In [1]:
import pandas as pd
import time
import warnings
warnings.filterwarnings("ignore")

def logga(meddelande):
    tid = time.strftime("%Y-%m-%d %H:%M:%S")
    rad = f"[{tid}] {meddelande}"
    print(rad)
    with open("transactions_logs.txt", "a", encoding="utf-8") as f:
        f.write(rad + "\n")


In [2]:
logga("Loading CSV-files...")

df_customers = pd.read_csv("./data/sebank_customers_with_accounts.csv")
df_transactions = pd.read_csv("./data/transactions.csv")

logga(f"Extracted {len(df_customers)} customers and {len(df_transactions)} transactions.")

df_transactions["timestamp"] = pd.to_datetime(df_transactions["timestamp"])


[2025-06-04 17:00:06] Loading CSV-files...
[2025-06-04 17:00:06] Extracted 1000 customers and 100000 transactions.


In [3]:
logga("Starting currency converter to SEK...")

conversion_rates = {
    "DKK": 1.5527, "EUR": 11.6084, "GBP": 13.7361, "JPY": 0.0660, "NOK": 0.9830,
    "RMB": 1.4368, "USD": 10.2930, "ZAR": 0.5792, "ZMW": 0.3584, "AUD": 6.9731,
    "BGN": 5.8452, "BRL": 1.9697, "CAD": 7.7143, "CHF": 12.0045, "CNY": 1.4680,
    "CZK": 0.4550, "HKD": 1.3536, "HUF": 0.0289, "IDR": 0.0007, "ILS": 2.8540,
    "INR": 0.1262, "ISK": 0.0766, "KRW": 0.0078, "MXN": 0.5796, "MYR": 2.3127,
    "NZD": 6.3946, "PHP": 0.1844, "PLN": 2.6551, "RON": 2.2981, "SGD": 7.9076,
    "THB": 0.2997, "TRY": 0.3220
}

df_transactions["amount_sek"] = df_transactions.apply(
    lambda row: round(row["amount"] * conversion_rates.get(row["currency"], 0), 2),
    axis=1
)

logga("Currency convertions done.")


[2025-06-04 17:00:11] Starting currency converter to SEK...
[2025-06-04 17:00:12] Currency convertions done.


In [4]:
logga("Extracting and controlling data...")

print("Columns in df_transactions:", df_transactions.columns.tolist())

null_check = df_transactions.drop(columns=["notes"]).isnull().any(axis=1)
invalid_transactions = df_transactions[null_check]
logga(f"Invalid columns (except for notes): {len(invalid_transactions)}")

missing_notes = df_transactions["notes"].isnull() | (df_transactions["notes"].str.strip() == "")
logga(f"Invalid notes: {missing_notes.sum()}")


[2025-06-04 17:00:22] Extracting and controlling data...
Columns in df_transactions: ['transaction_id', 'timestamp', 'amount', 'currency', 'sender_account', 'receiver_account', 'sender_country', 'sender_municipality', 'receiver_country', 'receiver_municipality', 'transaction_type', 'notes', 'amount_sek']
[2025-06-04 17:00:22] Invalid columns (except for notes): 1990
[2025-06-04 17:00:22] Invalid notes: 9948


In [5]:
logga("Extracting nightly transactions (00:00–05:59)...")

df_transactions["hour"] = df_transactions["timestamp"].dt.hour
night_tx = df_transactions[(df_transactions["hour"] >= 0) & (df_transactions["hour"] <= 5)]

logga(f"Nighlt transactions: {len(night_tx)}")
night_tx[["transaction_id", "timestamp", "amount", "currency"]].head()


[2025-06-04 17:01:45] Extracting nightly transactions (00:00–05:59)...
[2025-06-04 17:01:45] Nighlt transactions: 23576


Unnamed: 0,transaction_id,timestamp,amount,currency
0,62cacc89-95ca-41c0-a06f-d12950c85a37,2025-01-08 03:17:00,1174.47,SEK
11,0308a664-8b78-40bd-9281-8623b3d2584a,2025-03-08 00:00:00,10977.56,SEK
14,ccc76011-9857-40a8-b951-b8c90144449a,2025-04-09 05:56:00,31999.11,SEK
16,e0827340-f916-4f38-b268-ed6570416886,2025-02-26 04:23:00,15452.26,SEK
17,4fab6c87-e40f-4e77-8dc2-43aa086c946c,2025-03-20 01:11:00,23696.98,SEK
