In [2]:
import pandas as pd
import time
import warnings
warnings.filterwarnings("ignore")

def logga(meddelande):
    tid = time.strftime("%Y-%m-%d %H:%M:%S")
    rad = f"[{tid}] {meddelande}"
    print(rad)
    with open("customers_logs.txt", "a", encoding="utf-8") as f:
        f.write(rad + "\n")


In [3]:
logga("Extracting customers and transactions data...")

df_customers = pd.read_csv("./data/sebank_customers_with_accounts.csv")
df_transactions = pd.read_csv("./data/transactions.csv")
df_transactions["timestamp"] = pd.to_datetime(df_transactions["timestamp"])

logga(f"Extracted {len(df_customers)} customers {len(df_transactions)} transactions.")


[2025-06-07 10:33:09] Extracting customers and transactions data...
[2025-06-07 10:33:10] Extracted 1000 customers 100000 transactions.


In [3]:
logga("Extracting customers with multiple accounts...")

df_customers["KontonPerPerson"] = df_customers.groupby("Personnummer")["BankAccount"].transform("count")
multiple_accounts = df_customers[df_customers["KontonPerPerson"] > 1]

antal_personer = multiple_accounts["Personnummer"].nunique()
logga(f"Customers with multiple accounts: {antal_personer}")

multiple_accounts[["Personnummer", "BankAccount", "KontonPerPerson"]].drop_duplicates().head(10)


[2025-06-04 17:09:01] Extracting customers with multiple accounts...
[2025-06-04 17:09:01] Customers with multiple accounts: 281


Unnamed: 0,Personnummer,BankAccount,KontonPerPerson
0,400118-5901,SE8902EPWK73250364544965,2
1,400118-5901,SE8902IDSK51225196610969,2
2,391117-9285,SE8902OGIV86383792142837,2
3,391117-9285,SE8902QZEZ52320024971424,2
4,981215-7254,SE8902DWZI85436013187521,5
5,981215-7254,SE8902UJGR36411837401781,5
6,981215-7254,SE8902VAPD72818464374400,5
7,981215-7254,SE8902VEIB72029945509184,5
8,981215-7254,SE8902CHSI75448480838708,5
9,960709-2138,SE8902XPHQ15953762308017,2


In [4]:
logga("Extracting phone numbers starting with '00'...")

starts_with_00 = df_customers[df_customers["Phone"].astype(str).str.startswith("00")]
unique_phones = starts_with_00[["Customer", "Phone"]].drop_duplicates()

logga(f"Phone numbers starting with '00': {len(unique_phones)}")
unique_phones.head()


[2025-06-04 17:09:06] Extracting phone numbers starting with '00'...
[2025-06-04 17:09:06] Phone numbers starting with '00': 14


Unnamed: 0,Customer,Phone
165,Zahra Svensson-Johansson,0026-601 74
252,Caroline Sjölander,0080-904 34
350,Iris Bäck,0049-114 10
416,Alice Carlsson,005-124 78 23
471,Maria Nyström,002-864 34 55


In [5]:
logga("Extracting phone numbers starting with '+46'...")

plus46 = df_customers[df_customers["Phone"].astype(str).str.startswith("+46")]
plus46_unique = plus46[["Customer", "Phone"]].drop_duplicates()

logga(f"Phone numbers starting with '+46': {len(plus46_unique)}")
plus46_unique.head()


[2025-06-04 17:09:09] Extracting phone numbers starting with '+46'...
[2025-06-04 17:09:09] Phone numbers starting with '+46': 178


Unnamed: 0,Customer,Phone
2,Mona Lundgren,+46 (0)396 101 64
4,Tuulikki Blomqvist,+46 (0)918 939 10
11,Isabelle Hjelm-Larsson,+46 (0)20 79 17 12
23,Ida Haraldsson,+46 (0)507 376 03
31,Karin Nilsson,+46 (0)975 970 92


In [6]:
logga("Extracting phone numbers containing spaces and/or dashes...")

space_or_dash = df_customers[df_customers["Phone"].str.contains(r"[ \-]")]
space_or_dash_unique = space_or_dash[["Customer", "Phone"]].drop_duplicates()

logga(f"Phone numbers containing spaces and/or dashes: {len(space_or_dash_unique)}")
space_or_dash_unique.head()


[2025-06-04 17:09:13] Extracting phone numbers containing spaces and/or dashes...
[2025-06-04 17:09:13] Phone numbers containing spaces and/or dashes: 581


Unnamed: 0,Customer,Phone
0,Sofie Ibrahim,061-608 60 88
2,Mona Lundgren,+46 (0)396 101 64
4,Tuulikki Blomqvist,+46 (0)918 939 10
9,Maria Lund,011-396 09 07
11,Isabelle Hjelm-Larsson,+46 (0)20 79 17 12


In [7]:
logga("Extracting addresses...")

df_customers["Street"] = df_customers["Address"].str.extract(r"^(.*),")[0]
df_customers["StreetNumber"] = df_customers["Street"].str.extract(r"(\d+)$")

gator_som_borjar_pa_0 = df_customers[df_customers["StreetNumber"].astype(str).str.startswith("0")]
gator_unique = gator_som_borjar_pa_0[["Customer", "Address"]].drop_duplicates()

logga(f"Addresses with street numbers starting with '0': {len(gator_unique)}")
gator_unique.head()


[2025-06-04 17:09:16] Extracting addresses...
[2025-06-04 17:09:16] Addresses with street numbers starting with '0': 76


Unnamed: 0,Customer,Address
0,Sofie Ibrahim,"Ängsvägen 03, 14010 Gävle"
2,Mona Lundgren,"Kyrkvägen 084, 49722 Göteborg"
12,Camilla Johansson,"Åkertorget 069, 39757 Motala"
28,Mari Granath Åkesson,"Ringvägen 046, 63456 Karlstad"
40,Margareta Eriksson,"Parkgatan 083, 83788 Göteborg"


In [8]:
logga("Extracting addresses with street number '00'...")

gator_med_00 = df_customers[df_customers["StreetNumber"] == "00"]

logga(f"Addresses with street number '00': {len(gator_med_00)}")
gator_med_00[["Customer", "Address"]].head()


[2025-06-04 17:09:21] Extracting addresses with street number '00'...
[2025-06-04 17:09:21] Addresses with street number '00': 1


Unnamed: 0,Customer,Address
447,Christina Dahl,"Parkstigen 00, 80634 Alingsås"
