## Real-World Case Studies

### Healthcare - Medical Prediction Errors:
**Description**: Implement validation rules using a healthcare dataset to reduce errors in
predictive models by automating data quality checks.

In [3]:
import pandas as pd
from great_expectations.dataset import PandasDataset

# STEP 1: Create sample healthcare dataset
data = {
    "patient_id": [1, 2, 3, 4],
    "age": [25, 140, 35, 50],
    "heart_rate": [72, 80, None, 88],
    "blood_pressure": [120, 130, 115, 140],
    "glucose_level": [90, 110, 85, None],
    "birth_date": ["1998-03-12", "1880-01-01", "1989-07-22", "1975-10-02"],
    "diagnosis_date": ["2022-05-10", "2021-11-03", "2020-08-15", "1970-01-01"],
}

df = pd.DataFrame(data)

# Convert date columns to datetime
df["birth_date"] = pd.to_datetime(df["birth_date"])
df["diagnosis_date"] = pd.to_datetime(df["diagnosis_date"])

# STEP 2: Create a Great Expectations dataset
class HealthcareDataset(PandasDataset):
    pass

ge_df = HealthcareDataset(df)

# STEP 3: Define expectations
ge_df.expect_column_values_to_be_between("age", min_value=0, max_value=120)
ge_df.expect_column_values_to_not_be_null("heart_rate")
ge_df.expect_column_values_to_not_be_null("blood_pressure")
ge_df.expect_column_values_to_not_be_null("glucose_level")
ge_df.expect_column_pair_values_A_to_be_less_than_B("birth_date", "diagnosis_date")

# STEP 4: Run validation
results = ge_df.validate()

# STEP 5: Output results
print("\n✅ Validation Summary")
print("---------------------")
print(f"Overall Success: {results['success']}")
for r in results["results"]:
    print(f"{r['expectation_config']['expectation_type']}: {r['success']}")

ModuleNotFoundError: No module named 'great_expectations.dataset'