In [None]:
# Import the great_expectations library, which is a Python package for data validation and data quality
# Great Expectations is a framework for data validation and data quality that allows you to define expectations for your data
# and then validate your data against those expectations.
import great_expectations as gx

In [None]:
# Get the Great Expectations context object, which is the main entry point for interacting with the library
# The context object is the central hub of the Great Expectations framework, and it provides access to all the other components.
context = gx.get_context()

In [None]:
# Add a new data source to the context, which is a source of data that can be validated
# In this case, we're adding a pandas data source, which is a type of data source that can be used with Pandas DataFrames
# A data source is a source of data that can be validated using Great Expectations.
# It could be a CSV file, a database, or any other type of data storage.
datasource = context.sources.add_pandas(name="example datasource")

In [None]:
# Add a new asset to the data source, which is a specific dataset or file that can be validated
# In this case, we're adding a CSV asset, which is a type of asset that represents a CSV file
# The filepath_or_buffer parameter specifies the path to the CSV file or a buffer containing the CSV data
# An asset is a specific dataset or file that can be validated using Great Expectations.
# It could be a CSV file, a Pandas DataFrame, or any other type of data.
asset = datasource.add_csv_asset(name="example asset", filepath_or_buffer="data/example.csv")

In [None]:
# Add or update an expectation suite, which is a collection of expectations that can be used to validate data
# Expectations are specific checks that can be applied to data to ensure it meets certain criteria
# An expectation suite is a collection of expectations that can be used to validate data.
# It defines the rules for what the data should look like, and it can be used to validate data against those rules.
expectation_suite = context.add_or_update_expectation_suite("example expectation suite")

In [None]:
# Get a validator object, which is an object that can be used to validate data against an expectation suite
# The batch_request parameter specifies the data to be validated, and the expectation_suite parameter specifies the expectations to apply
# A validator object is used to interactively define expectations for the expectation suite.
# The batch_request is used essentially as an example and provides instant feedback on whether the expectations are met.
validator = context.get_validator(
    batch_request=asset.build_batch_request(),
    expectation_suite=expectation_suite,
)

In [None]:
# Get the first few rows of the data to be validated, which can be useful for debugging or exploring the data
validator.head()

In [None]:
# Add an expectation with the validator that checks the number of rows in the data
# In this case, we're expecting the number of rows to be between 1 and 100
validator.expect_table_row_count_to_be_between(1, 100)

In [None]:
# Add an expectation with the validator that checks the values in a specific column
# In this case, we're expecting the values in the "fruit" column to be one of "apple", "banana", or "grapefruit"
validator.expect_column_values_to_be_in_set(column="fruit", value_set=["apple", "banana", "grapefruit"])

In [None]:
# Save the expectation suite to the context, which allows it to be reused later
validator.save_expectation_suite()

In [None]:
# Add or update a checkpoint, which is a named collection of expectations and data that can be run together
# A checkpoint bundles together data, set of expectations, and further actions to be taken after the validation.
# A checkpoint can take in n number of data and expectation pairings.
# The actions that can be taken after the validation are for example:
# - Store the results
# - Generate a report
# - Send out a message
# The default actions are to store the results in a file and generate a report.
# The validator object contains the definitions for data and expectations.
checkpoint =  context.add_or_update_checkpoint(name="example checkpoint", validator=validator)

In [None]:
# Run the checkpoint, which validates the data against the expectations and returns the results
result = checkpoint.run()