In [6]:
import great_expectations as gx
import pandas as pd
import warnings

# Stäng specifika varningar från GX
warnings.filterwarnings("ignore", message="`result_format` configured at the Validator-level*")

# 1. Läs in data
df = pd.read_csv("./data/sebank_customer_FINAL.csv")
df.index += 2

# Lägg till fix för telefonnummer
df["Phone"] = df["Phone"].astype(str)
df["Phone"] = df["Phone"].apply(lambda x: x if x.startswith("0") else "0" + x)

# 2. Skapa Great Expectations-kontext
context = gx.get_context()

# 3. Lägg till pandas-datakälla
data_source = context.data_sources.add_pandas(name="pandas")
data_asset = data_source.add_dataframe_asset(name="accounts_data")
batch_def = data_asset.add_batch_definition_whole_dataframe(name="batch_def")
batch = batch_def.get_batch(batch_parameters={"dataframe": df})

# 4. Skapa förväntningssuite
suite = gx.core.expectation_suite.ExpectationSuite(name="accounts_suite")
validator = context.get_validator(batch=batch, expectation_suite=suite)

# 5. Funktionsbaserade valideringar
def validate_customer(v):
    v.expect_column_values_to_not_be_null("Customer")

def validate_address(v):
    v.expect_column_values_to_not_be_null("Street")
    v.expect_column_values_to_not_be_null("PostalCode")
    v.expect_column_values_to_match_regex("PostalCode", r"^\d{5}$")
    v.expect_column_values_to_not_be_null("City")



def validate_phone(v):
    v.expect_column_values_to_not_be_null("Phone")
    v.expect_column_values_to_match_regex("Phone", r"^0[1-9]\d{6,9}$")

def validate_personnummer(v):
    v.expect_column_values_to_not_be_null("Personnummer")
    v.expect_column_values_to_match_regex("Personnummer", r"^\d{6,8}[-]?\d{4}$")


def validate_bank_account(v):
    v.expect_column_values_to_not_be_null("BankAccount")
    v.expect_column_values_to_match_regex("BankAccount", r"^SE8902")
    v.expect_column_values_to_be_unique("BankAccount")

# 6. Kör alla valideringar
validate_customer(validator)
validate_address(validator)
validate_phone(validator)
validate_personnummer(validator)
validate_bank_account(validator)

# 7. Kör validering en gång
results = validator.validate()

# 8. Skriv ut resultat per kolumn
for result in results["results"]:
    config = result.expectation_config.to_json_dict()
    result_data = result.result

    column = config.get("kwargs", {}).get("column", "❓(okänd kolumn)")
    expectation = config.get("expectation_type") or config.get("expectationType") or "Okänd förväntning"
    success = result.success

    print(f"{column} → {expectation}: {'✅' if success else '❌'}")

    if not success:
        unexpected_count = result_data.get("unexpected_count", "okänt")
        unexpected_values = result_data.get("partial_unexpected_list", [])

        print(f"      Number of unexpected values: {unexpected_count}")
        if unexpected_values:
            print(f"     Unexpected values: {unexpected_values}")
        print()



Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 698.53it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 700.06it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 793.10it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 720.61it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 779.20it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 1131.96it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 1026.69it/s]
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 1102.17it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 790.89it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 657.00it/s] 
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 614.57it/s] 
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 752.81it/s] 
Calculating Metrics: 100%|██████████| 39/39 [00:00<00:00, 900.41it/s]

Customer → Okänd förväntning: ✅
Street → Okänd förväntning: ✅
PostalCode → Okänd förväntning: ✅
PostalCode → Okänd förväntning: ✅
City → Okänd förväntning: ✅
Phone → Okänd förväntning: ✅
Phone → Okänd förväntning: ❌
      Number of unexpected values: 5
     Unexpected values: ['0341247', '0341247', '0341247', '0341247', '03387']

Personnummer → Okänd förväntning: ✅
Personnummer → Okänd förväntning: ✅
BankAccount → Okänd förväntning: ✅
BankAccount → Okänd förväntning: ✅
BankAccount → Okänd förväntning: ✅



