In [6]:
import great_expectations as gx
import pandas as pd
import warnings
warnings.filterwarnings("ignore", message="`result_format` configured at the Validator-level*")

# Load the data
df = pd.read_csv("./data/transactions.csv")
df.index += 2
df["timestamp"] = pd.to_datetime(df["timestamp"])


# Create the ephemeral GX context
context = gx.get_context()

# Add a pandas datasource
data_source = context.data_sources.add_pandas(name="pandas")

# Add a dataframe asset
data_asset = data_source.add_dataframe_asset(name="transactions_data")

# Define the batch (entire DataFrame)
batch_definition = data_asset.add_batch_definition_whole_dataframe(name="batch_def")
batch = batch_definition.get_batch(batch_parameters={"dataframe": df})

# Create the expectation suite with a name
suite = gx.core.expectation_suite.ExpectationSuite(name="transactions_suite")

# Get the validator using the suite
validator = context.get_validator(batch=batch, expectation_suite=suite)

# Add expectations
validator.expect_column_values_to_be_between("amount", min_value=0.01, max_value=100000)

# Add expectations for transaction data
validator.expect_column_values_to_not_be_null("transaction_id")
validator.expect_column_values_to_be_unique("transaction_id")
validator.expect_column_values_to_match_regex(
    "transaction_id",
    r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
)
validator.expect_column_value_lengths_to_equal("transaction_id", 36)

validator.expect_column_values_to_not_be_null("timestamp")

validator.expect_column_values_to_be_between("amount", min_value=0.01)
validator.expect_column_values_to_not_be_null("currency")
validator.expect_column_values_to_match_regex("currency", r"^[A-Z]{3}$")

validator.expect_column_values_to_not_be_null("sender_account")
validator.expect_column_values_to_not_be_null("receiver_account")

validator.expect_column_values_to_not_be_null("sender_country")
validator.expect_column_values_to_not_be_null("receiver_country")
validator.expect_column_values_to_not_be_null("sender_municipality")
validator.expect_column_values_to_not_be_null("receiver_municipality")

validator.expect_column_values_to_not_be_null("transaction_type")

validator.expect_column_values_to_not_be_null("notes")

# Validate
results = validator.validate()

# Print results
print(results)



Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s][A
Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s][A
Calculating Metrics:  25%|██▌       | 2/8 [00:00<00:00, 331.25it/s][A
Calculating Metrics:  25%|██▌       | 2/8 [00:00<00:00, 198.77it/s][A
Calculating Metrics:  38%|███▊      | 3/8 [00:00<00:00, 248.58it/s][A
Calculating Metrics:  38%|███▊      | 3/8 [00:00<00:00, 189.47it/s][A
Calculating Metrics:  62%|██████▎   | 5/8 [00:00<00:00, 85.26it/s] [A
Calculating Metrics:  62%|██████▎   | 5/8 [00:00<00:00, 79.06it/s][A
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 73.99it/s][A
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 73.99it/s][A
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 73.99it/s][A
Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 66.29it/s][A

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s][A
Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s][A
Calculating Metrics:  33%|███▎      | 2/6 [00

MetricResolutionError: Values passed to expect_column_values_to_match_strftime_format must be of type string.
If you want to validate a column of dates or timestamps, please call the expectation before converting from string format.

In [7]:
validator.expect_column_values_to_match_strftime_format(
    "timestamp",
    "%Y-%m-%d %H:%M:%S"
)



Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s][A[A

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s][A[A

Calculating Metrics:  25%|██▌       | 2/8 [00:00<00:00, 670.45it/s][A[A

Calculating Metrics:  25%|██▌       | 2/8 [00:00<00:00, 309.46it/s][A[A

Calculating Metrics:  38%|███▊      | 3/8 [00:00<00:00, 354.02it/s][A[A

Calculating Metrics:  38%|███▊      | 3/8 [00:00<00:00, 227.26it/s][A[A

MetricResolutionError: Values passed to expect_column_values_to_match_strftime_format must be of type string.
If you want to validate a column of dates or timestamps, please call the expectation before converting from string format.