In [7]:
# Question: Data Quality Automation Tools - Introduction to Great Expectations
# Description: Set up a simple Great Expectations check for missing values in a numeric column.



In [8]:
import pandas as pd
import great_expectations as ge
from great_expectations.core.batch import BatchRequest
from great_expectations.data_context import BaseDataContext
from great_expectations.validator.validator import Validator
import os

# Step 1: Create CSV file programmatically
csv_path = "sample_data.csv"
data = {
    "id": [1, 2, 3, 4, 5],
    "score": [95, 88, None, 76, 85],  # 'score' column has one missing value
}
df = pd.DataFrame(data)
df.to_csv(csv_path, index=False)

# Step 2: Create a minimal Great Expectations context and datasource for pandas
context = BaseDataContext(
    project_config={
        "datasources": {
            "my_pandas_datasource": {
                "class_name": "Datasource",
                "execution_engine": {
                    "class_name": "PandasExecutionEngine"
                },
                "data_connectors": {
                    "default_runtime_data_connector": {
                        "class_name": "RuntimeDataConnector",
                        "batch_identifiers": ["default_identifier_name"],
                    }
                },
            }
        },
        "stores": {
            "expectations_store": {"class_name": "InMemoryStore"},
            "validations_store": {"class_name": "InMemoryStore"},
            "checkpoint_store": {"class_name": "InMemoryStore"},
        },
        "expectations_store_name": "expectations_store",
        "validations_store_name": "validations_store",
        "checkpoint_store_name": "checkpoint_store",
        "validation_operators": {},
        "data_docs_sites": {},
    }
)

# Step 3: Load data and create a Validator for it
batch_request = BatchRequest(
    datasource_name="my_pandas_datasource",
    data_connector_name="default_runtime_data_connector",
    data_asset_name="my_data_asset",  # arbitrary name
    runtime_parameters={"batch_data": pd.read_csv(csv_path)},
    batch_identifiers={"default_identifier_name": "default_identifier"},
)

validator: Validator = context.get_validator(batch_request=batch_request)

# Step 4: Add expectation - no missing values in 'score' column
validator.expect_column_values_to_not_be_null("score")

# Step 5: Run validation
results = validator.validate()

# Step 6: Print summary
print(results)

# Optional: print just the missing values check result
for res in results["results"]:
    if res["expectation_config"]["expectation_type"] == "expect_column_values_to_not_be_null":
        print("\nExpectation:", res["expectation_config"]["expectation_type"])
        print("Success:", res["success"])
        print("Details:", res["result"])


ImportError: cannot import name 'BaseDataContext' from 'great_expectations.data_context' (/home/vscode/.local/lib/python3.10/site-packages/great_expectations/data_context/__init__.py)

AttributeError: module 'great_expectations' has no attribute 'from_pandas'