In [7]:
import pandas as pd

# === 1. Läs in kundfil ===
df = pd.read_csv(r"/Bank_transntioner/data/sebank_customers_with_accounts.csv")

# === 2. Strip och städa strängar ===
df["Personnummer"] = df["Personnummer"].astype(str).str.strip()
df["Customer"] = df["Customer"].astype(str).str.strip()
df["Phone"] = df["Phone"].astype(str).str.strip()
df["Address"] = df["Address"].astype(str).str.strip()
df["BankAccount"] = df["BankAccount"].astype(str).str.strip()

# === 3. Valideringar ===
# Kontroll: unika personnummer
duplicates = df["Personnummer"].duplicated(keep=False)
print(f"Dubbletter i personnummer: {duplicates.sum()} st")

# Kontroll: saknade fält
missing_counts = df.isnull().sum()
print("Saknade värden per kolumn:\n", missing_counts)

# Kontroll: telefonnummer – enkla regler
df["valid_phone"] = df["Phone"].str.match(r"^[\d\s\+\-\(\)]+$")

# === 4. Skapa två separata tabeller ===
# Unika kunder
df_customers = df.drop_duplicates(subset="Personnummer")[["Personnummer", "Customer", "Phone", "Address"]].copy()

# Koppling kund → konton
df_accounts = df[["Personnummer", "BankAccount"]].drop_duplicates()

# === 5. Spara till filer ===
df_customers.to_csv("customers_cleaned.csv", index=False)
df_accounts.to_csv("accounts_cleaned.csv", index=False)

print("Sparade:")
print("→ customers_cleaned.csv")
print("→ accounts_cleaned.csv")


Dubbletter i personnummer: 700 st
Saknade värden per kolumn:
 Customer        0
Address         0
Phone           0
Personnummer    0
BankAccount     0
dtype: int64
Sparade:
→ customers_cleaned.csv
→ accounts_cleaned.csv
