In [8]:
import great_expectations as gx
import pandas as pd
import warnings
warnings.filterwarnings("ignore", message="`result_format` configured at the Validator-level*")

# Load the data
df = pd.read_csv("./data/sebank_customers_with_accounts.csv")
df.index += 2

# Create the ephemeral GX context
context = gx.get_context()

# Add a pandas datasource
data_source = context.data_sources.add_pandas(name="pandas")

# Add a dataframe asset
data_asset = data_source.add_dataframe_asset(name="accounts_data")

# Define the batch (entire DataFrame)
batch_definition = data_asset.add_batch_definition_whole_dataframe(name="batch_def")
batch = batch_definition.get_batch(batch_parameters={"dataframe": df})

# Create the expectation suite with a name
suite = gx.core.expectation_suite.ExpectationSuite(name="accounts_suite")

# Get the validator using the suite
validator = context.get_validator(batch=batch, expectation_suite=suite)

# Add expectations
validator.expect_column_values_to_not_be_null("Customer")
validator.expect_column_values_to_match_regex("Phone", r"^(\+46\s?\(0\)|0)?\d{1,4}[- ]\d{2,4}([ -]\d{2,4}){1,3}$")
validator.expect_column_values_to_match_regex("Personnummer", r"^\d{6,8}[-]?\d{4}$")
validator.expect_column_values_to_match_regex("BankAccount", r"^[A-Z]{2}\d{4}[A-Z]{4}\d{14}$")
validator.expect_column_values_to_be_unique("BankAccount")

# Validate
results = validator.validate()

# Personer med fler än ett konto
multi_account_holders = df.groupby("Personnummer").size().reset_index(name="AntalKonton")
multi_account_holders = multi_account_holders[multi_account_holders["AntalKonton"] > 1]
multi_account_details = df[df["Personnummer"].isin(multi_account_holders["Personnummer"])]
print(f"\nPersoner med fler än ett konto: {len(multi_account_holders)} personer")
display(multi_account_details)

# Print results
print(results)

Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 376.12it/s]
  return column.astype(str).str.contains(regex)
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 507.42it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 500.04it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 498.22it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 718.97it/s]
  return column.astype(str).str.contains(regex)
Calculating Metrics: 100%|██████████| 24/24 [00:00<00:00, 1525.36it/s]


Personer med fler än ett konto: 281 personer





Unnamed: 0,Customer,Address,Phone,Personnummer,BankAccount
2,Sofie Ibrahim,"Ängsvägen 03, 14010 Gävle",061-608 60 88,400118-5901,SE8902EPWK73250364544965
3,Sofie Ibrahim,"Ängsvägen 03, 14010 Gävle",061-608 60 88,400118-5901,SE8902IDSK51225196610969
4,Mona Lundgren,"Kyrkvägen 084, 49722 Göteborg",+46 (0)396 101 64,391117-9285,SE8902OGIV86383792142837
5,Mona Lundgren,"Kyrkvägen 084, 49722 Göteborg",+46 (0)396 101 64,391117-9285,SE8902QZEZ52320024971424
6,Tuulikki Blomqvist,"Kvarnvägen 654, 94181 Borås",+46 (0)918 939 10,981215-7254,SE8902DWZI85436013187521
...,...,...,...,...,...
993,Maj Hammar,"Furuvägen 576, 49096 Västerås",08-987 29 10,380222-3051,SE8902PDPB59162046614533
994,Maj Hammar,"Furuvägen 576, 49096 Västerås",08-987 29 10,380222-3051,SE8902GYOM68592348894619
997,Klara Robertsson,"Ängstorget 866, 51673 Sundsvall",08-960 721 45,480314-7307,SE8902FXEO42999261877496
998,Klara Robertsson,"Ängstorget 866, 51673 Sundsvall",08-960 721 45,480314-7307,SE8902YFAI41572077231677


{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_not_be_null",
        "kwargs": {
          "batch_id": "pandas-accounts_data",
          "column": "Customer"
        },
        "meta": {}
      },
      "result": {
        "element_count": 1000,
        "unexpected_count": 0,
        "unexpected_percent": 0.0,
        "partial_unexpected_list": []
      },
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_match_regex",
        "kwargs": {
          "batch_id": "pandas-accounts_data",
          "column": "Phone",
          "regex": "^(\\+46\\s?\\(0\\)|0)?\\d{1,4}[- ]\\d{2,4}([ -]\\d{2,4}){1,3}$"
        },
        "meta": {}
      },
      "result": {
        "element_count": 1000,
      