### Implementing Basic Data Validation with Great Expectations
**Description**: Set up a simple data validation using Great Expectations to check the completeness of a dataset.

**Steps**:
1. Installation
2. Initialize Great Expectations
3. Create a Data Context in Python
4. Create an Expectation Suite
5. Load Sample Data and Validate Completeness
6. Run Validations

In [1]:
import pandas as pd
import great_expectations as ge

def create_sample_data():
    """
    Creates a sample healthcare DataFrame with intentional missing and invalid values.
    """
    data = {
        "patient_id": [1, 2, 3, 4],
        "age": [25, None, 35, "fifty"],  # 'fifty' is invalid type to test error handling
        "heart_rate": [72, 80, 76, None],
        "blood_pressure": [120, 130, 115, 140]
    }
    return pd.DataFrame(data)

def validate_healthcare_data(df):
    """
    Validates healthcare data completeness and range using Great Expectations.
    Returns the validation results dictionary.
    """
    try:
        # Convert to GE DataFrame
        ge_df = ge.from_pandas(df)

        # Define expectations:
        # - patient_id, age, heart_rate, blood_pressure should NOT be null
        ge_df.expect_column_values_to_not_be_null("patient_id")
        ge_df.expect_column_values_to_not_be_null("age")
        ge_df.expect_column_values_to_not_be_null("heart_rate")
        ge_df.expect_column_values_to_not_be_null("blood_pressure")

        # - age should be between 0 and 120 (skip invalid types will cause failure)
        ge_df.expect_column_values_to_be_between("age", min_value=0, max_value=120)

        # - heart_rate realistic bounds
        ge_df.expect_column_values_to_be_between("heart_rate", min_value=40, max_value=180)

        # Run validation
        results = ge_df.validate()

        return results

    except Exception as e:
        print(f"❌ Exception during validation: {e}")
        return None

def print_validation_results(results):
    """
    Nicely print validation summary.
    """
    if results is None:
        print("No validation results to show due to error.")
        return

    print("\n✅ Validation Completed")
    print("----------------------------")
    print(f"Overall Success: {results['success']}")
    for r in results["results"]:
        exp = r["expectation_config"]["expectation_type"]
        col = r["expectation_config"]["kwargs"].get("column", "")
        print(f"{exp} on {col}: {r['success']}")

def unit_test_validation():
    """
    Basic unit test to verify that validation catches errors.
    """
    print("\nRunning Unit Test for Validation...")

    # Create invalid data purposely
    test_data = {
        "patient_id": [1, 2, None],
        "age": [25, -5, 150],  # -5 and 150 are invalid ages
        "heart_rate": [72, 200, None],  # 200 out of range, None missing
        "blood_pressure": [120, None, 115]
    }
    df = pd.DataFrame(test_data)

    results = validate_healthcare_data(df)
    print_validation_results(results)

    assert results is not None, "Validation returned None due to exception"
    assert results["success"] is False, "Validation should fail with invalid data"
    failed = [r for r in results["results"] if not r["success"]]
    assert len(failed) >= 1, "Expected at least one failing expectation"

    print("✅ Unit Test Passed: Validation correctly detects invalid data.\n")

if __name__ == "__main__":
    # Create sample data and validate
    df = create_sample_data()
    results = validate_healthcare_data(df)
    print_validation_results(results)

    # Run unit test
    unit_test_validation()

❌ Exception during validation: module 'great_expectations' has no attribute 'from_pandas'
No validation results to show due to error.

Running Unit Test for Validation...
❌ Exception during validation: module 'great_expectations' has no attribute 'from_pandas'
No validation results to show due to error.


AssertionError: Validation returned None due to exception