In [None]:
# Step 1: Install Great Expectations
!pip install great_expectations --quiet

# Step 2: Import necessary libraries
import pandas as pd
import great_expectations as ge
from great_expectations.dataset import PandasDataset

# Step 3: Create sample dataset (or load your own)
data = {
    "id": [101, 102, 103, 104, 105],
    "age": [23, 45, None, 29, 40],
    "salary": [50000, 70000, 60000, None, 80000],
    "email": ["a@x.com", "b@y.com", "invalidemail", "d@z.com", None]
}
df = pd.DataFrame(data)

# Step 4: Convert to a GE-compatible dataset
ge_df = ge.from_pandas(df)

# Step 5: Define validation rules
ge_df.expect_column_values_to_not_be_null("id")
ge_df.expect_column_values_to_be_between("age", min_value=18, max_value=65)
ge_df.expect_column_values_to_not_be_null("salary")
ge_df.expect_column_values_to_be_unique("id")
ge_df.expect_column_values_to_match_regex("email", r"^[^@]+@[^@]+\.[^@]+$")

# Step 6: Validate and print the result
results = ge_df.validate()
results

# Optional: Pretty summary
import json
print(json.dumps(results, indent=2))
