# Great Expectations Configuration 

You should first have a directory with similar structure:

<pre>
├── conf
├── data
└── gx
    ├── checkpoints
    ├── expectations
    ├── great_expectations.yml
</pre>

In [None]:
from pathlib import Path
import os
import pandas as pd

# Move path to root folder
os.chdir(str(Path("../")))

%load_ext kedro.extras.extensions.ipython
%reload_kedro

In [14]:
import great_expectations as ge

name_suite = "olist_order_items_dataset.raw.suite" 

dataset = "olist_order_items_dataset"

metadata = {"source": dataset, "layer": "raw"}

## Layers
- raw
- intermediate
- final

In [15]:
context = ge.get_context()
data = catalog.load(dataset)
df = ge.from_pandas(data)

# add some sample validations
for col in df:
    df.expect_column_to_exist(col, meta=metadata)
    df.expect_column_values_to_not_be_null(col, meta=metadata)

# get suite definition
suite = df.get_expectation_suite(
        discard_failed_expectations=False
    )

# add the name
suite.expectation_suite_name = name_suite

# save expectation
context.add_or_update_expectation_suite(
    expectation_suite=suite
)


[1m{[0m
  [32m"expectation_suite_name"[0m: [32m"olist_order_items_dataset.raw.suite"[0m,
  [32m"ge_cloud_id"[0m: null,
  [32m"expectations"[0m: [1m[[0m
    [1m{[0m
      [32m"expectation_type"[0m: [32m"expect_column_to_exist"[0m,
      [32m"kwargs"[0m: [1m{[0m
        [32m"column"[0m: [32m"order_id"[0m
      [1m}[0m,
      [32m"meta"[0m: [1m{[0m
        [32m"source"[0m: [32m"olist_order_items_dataset"[0m,
        [32m"layer"[0m: [32m"raw"[0m
      [1m}[0m
    [1m}[0m,
    [1m{[0m
      [32m"expectation_type"[0m: [32m"expect_column_values_to_not_be_null"[0m,
      [32m"kwargs"[0m: [1m{[0m
        [32m"column"[0m: [32m"order_id"[0m
      [1m}[0m,
      [32m"meta"[0m: [1m{[0m
        [32m"source"[0m: [32m"olist_order_items_dataset"[0m,
        [32m"layer"[0m: [32m"raw"[0m
      [1m}[0m
    [1m}[0m,
    [1m{[0m
      [32m"expectation_type"[0m: [32m"expect_column_to_exist"[0m,
      [32m"kwargs"[0m: [1m{[

## Create Checkpoint

- https://docs.greatexpectations.io/docs/reference/checkpoints_and_actions
- https://docs.greatexpectations.io/docs/guides/validation/checkpoints/how_to_create_a_new_checkpoint
- https://docs.greatexpectations.io/docs/guides/validation/checkpoints/how_to_configure_a_new_checkpoint_using_test_yaml_config

In [8]:
context.get_available_data_asset_names()

[1m{[0m[32m'pandas_dataframe'[0m: [1m{[0m[32m'default_runtime_data_connector_name'[0m: [1m[[0m[1m][0m[1m}[0m[1m}[0m

In [9]:
context.list_expectation_suite_names()

[1m[[0m[32m'olist_order_items_dataset.intermediate.suite'[0m[1m][0m