In [49]:
import great_expectations as gx
import pandas as pd
import warnings

warnings.filterwarnings("ignore", message="`result_format` configured at the Validator-level*")
df_trans = pd.read_csv("./data/transactions.csv")
context = gx.get_context()
datasource = context.data_sources.add_pandas(name="pandas")
data_asset = datasource.add_dataframe_asset(name="transactions_data")
batch_def = data_asset.add_batch_definition_whole_dataframe(name="batch_def")
batch = batch_def.get_batch(batch_parameters={"dataframe": df_trans})


In [50]:
suite = gx.core.ExpectationSuite(name="transactions_suite")
validator = context.get_validator(batch=batch, expectation_suite=suite)


In [51]:
# transaction_id
validator.expect_column_values_to_not_be_null("transaction_id")
validator.expect_column_values_to_be_unique("transaction_id")

Calculating Metrics: 100%|██████████| 6/6 [00:00<?, ?it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 154.76it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_be_unique",
    "kwargs": {
      "batch_id": "pandas-transactions_data",
      "column": "transaction_id"
    },
    "meta": {}
  },
  "result": {
    "element_count": 100000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [52]:
# timestamp
validator.expect_column_values_to_match_strftime_format("timestamp", "%Y-%m-%d %H:%M:%S")

Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 16.14it/s] 


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_match_strftime_format",
    "kwargs": {
      "batch_id": "pandas-transactions_data",
      "column": "timestamp",
      "strftime_format": "%Y-%m-%d %H:%M:%S"
    },
    "meta": {}
  },
  "result": {
    "element_count": 100000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [53]:
# amount
validator.expect_column_values_to_be_between("amount", min_value=0.01, max_value=100000)

Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 274.16it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_be_between",
    "kwargs": {
      "batch_id": "pandas-transactions_data",
      "column": "amount",
      "min_value": 0.01,
      "max_value": 100000.0
    },
    "meta": {}
  },
  "result": {
    "element_count": 100000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [54]:
# currency
validator.expect_column_values_to_be_in_set("currency", ["SEK", "USD", "EUR", "DKK", "JPY", "ZMW", "NOK", "ZAR", "RMB", "GBP"])

Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 253.35it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_be_in_set",
    "kwargs": {
      "batch_id": "pandas-transactions_data",
      "column": "currency",
      "value_set": [
        "SEK",
        "USD",
        "EUR",
        "DKK",
        "JPY",
        "ZMW",
        "NOK",
        "ZAR",
        "RMB",
        "GBP"
      ]
    },
    "meta": {}
  },
  "result": {
    "element_count": 100000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [55]:
# transaction_type
validator.expect_column_values_to_be_in_set("transaction_type", ["incoming", "outgoing"])

Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 251.78it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_be_in_set",
    "kwargs": {
      "batch_id": "pandas-transactions_data",
      "column": "transaction_type",
      "value_set": [
        "incoming",
        "outgoing"
      ]
    },
    "meta": {}
  },
  "result": {
    "element_count": 100000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [56]:
validator.expect_column_values_to_match_regex(
    "receiver_account",
    r"^(?:SE\d{4}[A-Z]{4}\d{14}|GB\d{2}[A-Z]{4}\d{14})$"
)

validator.expect_column_values_to_match_regex(
    "sender_account",
    r"^(?:SE\d{4}[A-Z]{4}\d{14}|GB\d{2}[A-Z]{4}\d{14})$"
)

Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 111.58it/s]
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 111.53it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_match_regex",
    "kwargs": {
      "batch_id": "pandas-transactions_data",
      "column": "sender_account",
      "regex": "^(?:SE\\d{4}[A-Z]{4}\\d{14}|GB\\d{2}[A-Z]{4}\\d{14})$"
    },
    "meta": {}
  },
  "result": {
    "element_count": 100000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [57]:
#sender/receiver_country
validator.expect_column_values_to_not_be_null("sender_country")
validator.expect_column_values_to_not_be_null("receiver_country")

Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 934.70it/s]
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 2574.77it/s]


In [58]:
#markera null värder i receiver_country
df_trans["receiver_country_missing"] = df_trans["receiver_country"].isnull()
missing_rows = df_trans[df_trans["receiver_country_missing"]]
print(missing_rows.head())

                           transaction_id            timestamp    amount  \
675  99723967-1bc3-4f9f-85ed-cc2bfe675f5c  2025-04-17 11:12:00  46995.74   
742  1dd877ea-185c-4f6c-b3f4-668032000d9e  2025-05-12 23:53:00  49040.75   
762  39794385-3319-4f1d-b73a-5830638aa2a6  2025-04-10 23:55:00  15630.05   
845  5b2636bb-70eb-4204-bc42-1b5bc87f3e6e  2025-05-12 03:55:00  29449.80   
906  b2daea58-16d5-40ae-ba90-3d844362b519  2025-03-14 14:17:00  43954.08   

    currency            sender_account          receiver_account  \
675      SEK  SE8902JWPZ44810854846338  SE8902PVDU32397262267370   
742      SEK  SE8902JFXZ40334167631989  SE8902DGRO18476288845448   
762      SEK  SE8902OLFT37200388827050  SE8902FZSN55947389902845   
845      SEK  SE8902GZKC59110904284187  SE8902CFEY43701881421531   
906      SEK  SE8902UFZY41218181108055  SE8902XBIN60287954526260   

    sender_country sender_municipality receiver_country receiver_municipality  \
675         Sweden            Nyköping              N

In [59]:
#sender/receiver_municipality
validator.expect_column_values_to_not_be_null("sender_municipality")
validator.expect_column_values_to_not_be_null("receiver_municipality")

Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 382.98it/s]
Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 371.34it/s]


{
  "success": false,
  "expectation_config": {
    "type": "expect_column_values_to_not_be_null",
    "kwargs": {
      "batch_id": "pandas-transactions_data",
      "column": "receiver_municipality"
    },
    "meta": {}
  },
  "result": {
    "element_count": 100000,
    "unexpected_count": 500,
    "unexpected_percent": 0.5,
    "partial_unexpected_list": [
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null
    ]
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [60]:
#markera null värder i receiver_municipality
df_trans['missing_receiver_municipality'] = df_trans['receiver_municipality'].isnull()
missing_rows = df_trans[df_trans['missing_receiver_municipality'] == True]
print(missing_rows.head())

                            transaction_id            timestamp    amount  \
179   0af039ef-fc14-481b-a528-98b1913056fc  2025-02-11 10:49:00  32588.84   
185   04ce7b53-4291-4e4c-b30c-fb43ab7cacf8  2025-03-07 02:09:00   9593.38   
559   995b1a26-2f92-4d9f-b9f5-0841ed45c935  2025-01-03 16:02:00  45182.86   
1322  43028e8c-31f5-40f2-87ea-53ecacb06efb  2025-01-27 04:13:00  49186.59   
1332  42099361-786a-4001-8965-831dd69df345  2025-05-10 00:06:00   8806.56   

     currency            sender_account          receiver_account  \
179       SEK  SE8902ZRZI47417483372579  SE8902PBJH90393502972632   
185       SEK  SE8902ERJA13877835143324  SE8902ARJI52424267786590   
559       SEK  SE8902GXKS66714923613080  SE8902ZRRV53604198996393   
1322      SEK  SE8902XNTB41827621868449  SE8902IWYC64669423845053   
1332      SEK  SE8902EQCK63728076800609  SE8902JIJC96071565969922   

     sender_country sender_municipality receiver_country  \
179          Sweden           Sundsvall           Sweden   
18

In [61]:
results = validator.validate()
print(results)


Calculating Metrics: 100%|██████████| 54/54 [00:00<00:00, 73.54it/s]

{
  "success": false,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_not_be_null",
        "kwargs": {
          "batch_id": "pandas-transactions_data",
          "column": "transaction_id"
        },
        "meta": {}
      },
      "result": {
        "element_count": 100000,
        "unexpected_count": 0,
        "unexpected_percent": 0.0,
        "partial_unexpected_list": []
      },
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_be_unique",
        "kwargs": {
          "batch_id": "pandas-transactions_data",
          "column": "transaction_id"
        },
        "meta": {}
      },
      "result": {
        "element_count": 100000,
        "unexpected_count": 0,
        "unexpected_percent": 0.0,




In [63]:
import json

with open("validation_results_transactions.json", "w") as f:
    json.dump(results.to_json_dict(), f, indent=4)
