In [11]:
import great_expectations as gx
import pandas as pd
import warnings

# Stäng specifika varningar från GX
warnings.filterwarnings("ignore", message="`result_format` configured at the Validator-level*")

# 1. Läs in data
df = pd.read_csv("./data/sebank_customers_with_accounts.csv")
df.index += 2

# 2. Skapa Great Expectations-kontext
context = gx.get_context()

# 3. Lägg till pandas-datakälla
data_source = context.data_sources.add_pandas(name="pandas")
data_asset = data_source.add_dataframe_asset(name="accounts_data")
batch_def = data_asset.add_batch_definition_whole_dataframe(name="batch_def")
batch = batch_def.get_batch(batch_parameters={"dataframe": df})

# 4. Skapa förväntningssuite
suite = gx.core.expectation_suite.ExpectationSuite(name="accounts_suite")
validator = context.get_validator(batch=batch, expectation_suite=suite)

# 5. Funktionsbaserade valideringar
def validate_customer(v):
    v.expect_column_values_to_not_be_null("Customer")

def validate_address(v):
    v.expect_column_values_to_not_be_null("Address")
"""
def validate_postal_code(v):
    v.expect_column_values_to_not_be_null("PostalCode")
    v.expect_column_values_to_match_regex("PostalCode", r"^\d{5}$")
"""
def validate_phone(v):
    v.expect_column_values_to_not_be_null("Phone")
    v.expect_column_values_to_match_regex("Phone", r"^(\+46\s?\(0\)|0)?\d{1,4}[- ]\d{2,4}([ -]\d{2,4}){1,3}$")

def validate_personnummer(v):
    v.expect_column_values_to_not_be_null("Personnummer")
    v.expect_column_values_to_match_regex("Personnummer", r"^\d{6,8}[-]?\d{4}$")


def validate_bank_account(v):
    v.expect_column_values_to_not_be_null("BankAccount")
    v.expect_column_values_to_match_regex("BankAccount", r"^SE8902")
    v.expect_column_values_to_be_unique("BankAccount")

# 6. Kör alla valideringar
validate_customer(validator)
validate_address(validator)
# validate_postal_code(validator)
validate_phone(validator)
validate_personnummer(validator)
validate_bank_account(validator)

# 7. Kör validering en gång
results = validator.validate()

# 8. Skriv ut resultat per kolumn
for result in results["results"]:
    config = result.expectation_config.to_json_dict()
    result_data = result.result

    column = config.get("kwargs", {}).get("column", "❓(okänd kolumn)")
    expectation = config.get("expectation_type") or config.get("expectationType") or "Okänd förväntning"
    success = result.success

    print(f"{column} → {expectation}: {'✅' if success else '❌'}")

    if not success:
        unexpected_count = result_data.get("unexpected_count", "okänt")
        unexpected_values = result_data.get("partial_unexpected_list", [])

        print(f"    ⚠️  Antal oväntade värden: {unexpected_count}")
        if unexpected_values:
            print(f"    🚫 Exempel på oväntade värden: {unexpected_values}")
        print()





# 9. Spara suiten (valfritt)
# context.save_expectation_suite(expectation_suite=suite)


  """
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 743.32it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 680.58it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 412.89it/s] 
  return column.astype(str).str.contains(regex)
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 955.59it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 892.06it/s] 
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 702.62it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 931.76it/s] 
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 992.24it/s] 
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 989.66it/s] 
  return column.astype(str).str.contains(regex)
Calculating Metrics: 100%|██████████| 30/30 [00:00<00:00, 1879.90it/s]

Customer → Okänd förväntning: ✅
Address → Okänd förväntning: ✅
Phone → Okänd förväntning: ✅
Phone → Okänd förväntning: ✅
Personnummer → Okänd förväntning: ✅
Personnummer → Okänd förväntning: ✅
BankAccount → Okänd förväntning: ✅
BankAccount → Okänd förväntning: ✅
BankAccount → Okänd förväntning: ✅



