In [1]:
# Activity 4: Data Quality Automation Tools
# Task A: Using Great Expectations
# 19. Setting Up Expectations:
# - Install Great Expectations and set up a basic expectation suite.
# - Validate a dataset and list unmet expectations.

# 20. Testing for Expectation:
# - Create expectations such as “column values must fall within a certain range.”

# 21. Generating Data Docs:
# - Automatically generate data quality documentation.

import great_expectations as gx
from great_expectations.expectations.expectation_configuration import ExpectationConfiguration

def setup_ge_validation(df, expectations):
    context = gx.get_context()
    datasource = context.sources.add_pandas("pandas_ds")
    suite = context.add_expectation_suite("validation_suite")
    
    for exp in expectations:
        expectation = ExpectationConfiguration(
            expectation_type=exp["type"],
            kwargs=exp["kwargs"]
        )
        suite.add_expectation(expectation)
    
    batch_request = datasource.add_dataframe_asset("df_asset").build_batch_request(dataframe=df)
    validator = context.get_validator(batch_request=batch_request, expectation_suite=suite)
    return validator.validate()

# Example usage
if __name__ == "__main__":
    import pandas as pd
    data = pd.DataFrame({
        "age": [25, 30, None, 40],
        "score": [85, 92, 88, 120]
    })
    
    expectations = [
        {"type": "expect_column_values_to_not_be_null", "kwargs": {"column": "age"}},
        {"type": "expect_column_values_to_be_between", "kwargs": {"column": "score", "min_value": 0, "max_value": 100}}
    ]
    
    results = setup_ge_validation(data, expectations)
    print(results)
    gx.get_context().build_data_docs()






ImportError: cannot import name 'contextfilter' from 'jinja2' (/home/vscode/.local/lib/python3.10/site-packages/jinja2/__init__.py)

In [None]:
# Task B: Using DQ Labs

# 22. Tool Setup and Configuration:
# - Download and configure DQ Labs on your local environment.
# - Create a new data quality project.

# 23. Data Analysis Automation:
# - Apply DQ Labs for automating data profiling and quality checks.

# 24. Quality Rule Creation:
# - Create quality rules for detecting and handling duplicates or enforcing standards.








