In [7]:
import great_expectations as gx
import pandas as pd
import warnings
warnings.filterwarnings("ignore", message="`result_format` configured at the Validator-level*")


# Load the data
df = pd.read_csv("C:/Users/johan/Desktop/Johan/python-bank-project-start-main/data/sebank_customers_with_accounts.csv")

# Create the ephemeral GX context
context = gx.get_context()

# Add a pandas datasource
data_source = context.data_sources.add_pandas(name="pandas")

# Add a dataframe asset
data_asset = data_source.add_dataframe_asset(name="customers_with_accounts_data")

# Define the batch (entire DataFrame)
batch_definition = data_asset.add_batch_definition_whole_dataframe(name="batch_def")
batch = batch_definition.get_batch(batch_parameters={"dataframe": df})

# Create the expectation suite with a name
suite = gx.core.expectation_suite.ExpectationSuite(name="customers_with_accounts_suite")

# Get the validator using the suite
validator = context.get_validator(batch=batch, expectation_suite=suite)

# Add expectations:
#Customer
validator.expect_column_values_to_not_be_null("Customer") #kollar att inga rader är NULL


#Address
validator.expect_column_values_to_not_be_null("Address") #kollar att inga rader är NULL
#Phone
validator.expect_column_values_to_not_be_null("Phone")#inget NULL-värde


#Personnummer
validator.expect_column_values_to_not_be_null("Personnummer") #kollar att inga rader är NULL
validator.expect_column_values_to_match_regex("Personnummer", r"^\d{6}-\d{4}$|^\d{8}-\d{4}$") #validera personnummer där både YYMMDD-XXXX och YYYYMMDD-XXXXq

#BankAccount
validator.expect_column_values_to_be_unique("BankAccount") # kollar så att alla kontonummer är unika
validator.expect_column_values_to_not_be_null("BankAccount") #kollar att inget kontonumme är NULL
validator.expect_column_values_to_match_regex("BankAccount", r"^SE8902.*$") #  konton börjar på SE8902.
null_rows = df[df['BankAccount'].isnull()]
print(null_rows)



# Validate
results = validator.validate()

# Print results
print(results)


Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 600.40it/s] 
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 1000.19it/s]
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 1000.03it/s]
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 1329.56it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 954.31it/s] 
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 1333.85it/s]
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 1200.20it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 1142.71it/s]


Empty DataFrame
Columns: [Customer, Address, Phone, Personnummer, BankAccount]
Index: []


Calculating Metrics: 100%|██████████| 27/27 [00:00<00:00, 2701.03it/s]

{
  "success": false,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_not_be_null",
        "kwargs": {
          "batch_id": "pandas-customers_with_accounts_data",
          "column": "Customer"
        },
        "meta": {}
      },
      "result": {
        "element_count": 1001,
        "unexpected_count": 0,
        "unexpected_percent": 0.0,
        "partial_unexpected_list": []
      },
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_not_be_null",
        "kwargs": {
          "batch_id": "pandas-customers_with_accounts_data",
          "column": "Address"
        },
        "meta": {}
      },
      "result": {
        "element_count": 1001,
        "unexpected_count": 0,
        "unexpected_percent




In [8]:
import json
import os

output_path = os.path.join("report", "validation_results_customers_with_accounts.json")
with open(output_path, "w") as f:
    json.dump(results.to_json_dict(), f, indent=4)
