## Automated Data Quality Monitoring
**Objective**: Use Great Expectations to perform data profiling and write validation rules.

1. Data Profiling with Great Expectations

### Profile a JSON dataset with product sales data to check for null values in the 'ProductID' and 'Price' fields.
- Create an expectation suite and connect it to the data context.
- Use the `expect_column_values_to_not_be_null` expectation to profile these fields.
- Review the summary to identify any unexpected null values.

In [None]:
# write your code from here

import great_expectations as gx
import pandas as pd
import os

# Initialize Great Expectations context
context = gx.get_context()

# Create a sample JSON dataset for product sales data
json_data = [
    {"ProductID": "PROD101", "ProductName": "Laptop", "Price": 1200.00, "QuantitySold": 10},
    {"ProductID": None, "ProductName": "Mouse", "Price": 25.00, "QuantitySold": 50},
    {"ProductID": "PROD104", "ProductName": "Keyboard", "Price": 75.00, "QuantitySold": 30},
    {"ProductID": "PROD105", "ProductName": "Monitor", "Price": None, "QuantitySold": 15},
    {"ProductID": "PROD106", "ProductName": "Webcam", "Price": 50.00, "QuantitySold": 25},
]

# Save the JSON data to a file
json_file_path = 'product_sales.json'
with open(json_file_path, 'w') as f:
    import json
    json.dump(json_data, f)

# Define datasource parameters for JSON
datasource_name = 'product_json_datasource'
data_connector_name = 'default_inferred_data_connector_name'
data_asset_name = 'product_sales_data'

# Add a JSON datasource if it doesn't exist
if datasource_name not in [ds['name'] for ds in context.list_datasources()]:
    context.add_pandas_json_datasource(
        name=datasource_name,
        base_directory=os.path.dirname(os.path.abspath(json_file_path)),
        batching_regex=r"(.+)\.json",
    )

# Create a BatchRequest to load the JSON data
batch_request = context.get_batch_request(
    datasource_name=datasource_name,
    data_connector_name='default_inferred_data_connector_name',
    data_asset_name='product_sales.json',
)

# Create an Expectation Suite for profiling null values
expectation_suite_name = 'product_sales_null_profiling'
try:
    suite = context.suites.get(expectation_suite_name)
    print(f"Loaded existing Expectation Suite: {expectation_suite_name}")
except gx.exceptions.ExpectationSuiteNotFoundError:
    suite = context.create_expectation_suite(
        expectation_suite_name=expectation_suite_name, overwrite_existing=True
    )
    print(f"Created Expectation Suite: {expectation_suite_name}")

# Create a validator
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite=suite,
)
print(f"Using validator for data asset: {validator.active_batch_request.data_asset_name}")

# Profile for null values in 'ProductID'
validator.expect_column_values_to_not_be_null(column='ProductID')

# Profile for null values in 'Price'
validator.expect_column_values_to_not_be_null(column='Price')

# Save the expectation suite with the profiling expectations
validator.save_expectation_suite()
print(f"Saved Expectation Suite: {expectation_suite_name}")

# Run the validation (which acts as our profiling run in this case)
validation_result = validator.validate()

print("\nValidation Results (Summary of Null Values):\n")
for result in validation_result['results']:
    if result['expectation_config']['expectation_type'] == 'expect_column_values_to_not_be_null':
        column_name = result['expectation_config']['kwargs']['column']
        success = result['success']
        if not success:
            unexpected_count = result['result']['unexpected_count']
            print(f"Column '{column_name}': Found {unexpected_count} null values.")
        else:
            print(f"Column '{column_name}': No null values found.")

# Build Data Docs to view the full validation results
context.build_data_docs(validation_result_list=[validation_result])
print(f"\nData Docs generated. Check 'great_expectations/uncommitted/data_docs/local_site/index.html' for detailed profiling.")

# Clean up the sample JSON file
os.remove(json_file_path)

AttributeError: 'EphemeralDataContext' object has no attribute 'add_json'

2. Writing Validation Rules for Data Ingestion

### Define validation rules for an API data source to confirm that 'Status' field contains only predefined statuses ('Active', 'Inactive').

- Apply `expect_column_values_to_be_in_set` to check field values during data ingestion.
- Execute the validation and review any mismatches.