In [14]:
import pandas as pd
from ruamel import yaml

import great_expectations as ge
from great_expectations.core.batch import RuntimeBatchRequest


In [15]:
# initialize list of lists
data = [['tom', 10], ['nick', 15], ['juli', 14]]
  
# Create the pandas DataFrame
df = pd.DataFrame(data, columns=['Name', 'Age'])
  
# print dataframe.
df

Unnamed: 0,Name,Age
0,tom,10
1,nick,15
2,juli,14


In [16]:
context = ge.get_context()

In [17]:
datasource_config = {
    "name": "example_datasource",
    "class_name": "Datasource",
    "module_name": "great_expectations.datasource",
    "execution_engine": {
        "module_name": "great_expectations.execution_engine",
        "class_name": "PandasExecutionEngine",
    },
    "data_connectors": {
        "default_runtime_data_connector_name": {
            "class_name": "RuntimeDataConnector",
            "module_name": "great_expectations.datasource.data_connector",
            "batch_identifiers": ["default_identifier_name"],
        },
    },
}

In [18]:
context.test_yaml_config(yaml.dump(datasource_config))

Attempting to instantiate class from config...
	Instantiating as a Datasource, since class_name is Datasource
	Successfully instantiated Datasource


ExecutionEngine class name: PandasExecutionEngine
Data Connectors:
	default_runtime_data_connector_name:RuntimeDataConnector

	Available data_asset_names (0 of 0):
		Note : RuntimeDataConnector will not have data_asset_names until they are passed in through RuntimeBatchRequest

	Unmatched data_references (0 of 0): []



<great_expectations.datasource.new_datasource.Datasource at 0x246289d8eb0>

In [19]:
"""# add_datasource only if it doesn't already exist in your Data Context
try:
    context.get_datasource(datasource_config["name"])
except ValueError:
    context.add_datasource(**datasource_config)
else:
    print(
        f"The datasource {datasource_config['name']} already exists in your Data Context!"
    )"""

'# add_datasource only if it doesn\'t already exist in your Data Context\ntry:\n    context.get_datasource(datasource_config["name"])\nexcept ValueError:\n    context.add_datasource(**datasource_config)\nelse:\n    print(\n        f"The datasource {datasource_config[\'name\']} already exists in your Data Context!"\n    )'

In [20]:
context.add_datasource(**datasource_config)

<great_expectations.datasource.new_datasource.Datasource at 0x24628c124f0>

In [21]:
batch_request = RuntimeBatchRequest(
    datasource_name="example_datasource",
    data_connector_name="default_runtime_data_connector_name",
    data_asset_name="poc_data_asset_name",  # This can be anything that identifies this data_asset for you
    runtime_parameters={"batch_data": df},  # df is your dataframe
    batch_identifiers={"default_identifier_name": "default_identifier"},
)


In [22]:
context.create_expectation_suite(
    expectation_suite_name="test_suite", overwrite_existing=True
)
validator = context.get_validator(
    batch_request=batch_request, expectation_suite_name="test_suite"
)
print(validator.head())

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

   Name  Age
0   tom   10
1  nick   15
2  juli   14


checkpoint_config = {
    "name": "poc_batch_request_checkpoint",
    "config_version": 1,
    "class_name": "SimpleCheckpoint",
    "expectation_suite_name": "test_suite",
}
context.add_checkpoint(**checkpoint_config)

In [23]:
res = validator.expect_column_values_to_be_null(column='Name', result_format="COMPLETE", catch_exceptions=True)

Calculating Metrics:   0%|          | 0/7 [00:00<?, ?it/s]

In [24]:
validator.get_expectation_suite()

validator.save_expectation_suite(discard_failed_expectations=False)

In [25]:
results = context.run_checkpoint(
        checkpoint_name="poc_batch_request_checkpoint",
        validations = [
            {"batch_request": batch_request}
        
        ],
        
        run_id="poc_id",
    )


Calculating Metrics:   0%|          | 0/7 [00:00<?, ?it/s]

In [26]:
"""We can render these results to a friendly report, called a data doc. These data docs will describe the expectations that the data should meet, 
as well as the metrics detailing how well the data meets the requirements. This is how Great Expectations combines testing with documenting."""


context.open_data_docs()