In [None]:
import great_expectations as gx
from great_expectations.core.batch import BatchRequest
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.data_context import FileDataContext
import pandas as pd

# Initialize a data context
FileDataContext(project_root_dir = "../services")
context = gx.get_context(project_root_dir = "../services")

# Create a new expectation suite
suite_name = "feature_validation_suite"
context.create_expectation_suite(suite_name, overwrite_existing=True)

# Step 3: Create expectations to validate all features
# Load data

data = pd.read_csv('../data/data.csv')

# Initialize a DataFrameExpectations object
df_ge = gx.from_pandas(data)

# Define expectations
# Expect one-hot encoded columns to be 0 or 1
one_hot_columns = [col for col in data.columns if 'flat_type_' in col or 'flat_model_' in col or 'storey_range_' in col]
for col in one_hot_columns:
    df_ge.expect_column_values_to_be_in_set(col, [0, 1])

# Expect 'floor_area_sqm' to be within a reasonable range
df_ge.expect_column_values_to_be_between("floor_area_sqm", -2, 2)

# Expect 'latitude' and 'longitude' to be within valid ranges
df_ge.expect_column_values_to_be_between("latitude", -90, 90)
df_ge.expect_column_values_to_be_between("longitude", -180, 180)

# Other expectations based on domain knowledge
# For example, expect 'remaining_lease' to be positive
df_ge.expect_column_values_to_be_greater_than("remaining_lease", 0)

# Save expectations to suite
context.save_expectation_suite(expectation_suite=df_ge.get_expectation_suite(), expectation_suite_name=suite_name)

# Step 4: Create a batch request where the data asset is a pandas dataframe
batch_request = BatchRequest(
    datasource_name="pandas",
    data_connector_name="default_inferred_data_connector_name",
    data_asset_name="your_file",  # this should match the name of your file
    batch_identifiers={"default_identifier_name": "default_identifier"}
)

# Step 5: Create a checkpoint to check the validity of the features
checkpoint_config = {
    "name": "feature_validation_checkpoint",
    "config_version": 1.0,
    "class_name": "SimpleCheckpoint",
    "run_name_template": "%Y%m%d-%H%M%S-validation",
    "expectation_suite_name": suite_name,
    "batch_request": batch_request,
    "action_list": [
        {
            "name": "store_validation_result",
            "action": {
                "class_name": "StoreValidationResultAction"
            }
        },
        {
            "name": "store_evaluation_parameters",
            "action": {
                "class_name": "StoreEvaluationParametersAction"
            }
        },
        {
            "name": "update_data_docs",
            "action": {
                "class_name": "UpdateDataDocsAction"
            }
        }
    ]
}

context.add_checkpoint(**checkpoint_config)