In [None]:
pip install great-expectations

In [None]:
from great_expectations.dataset import PandasDataset
import pandas as pd

# Loading the actual dataset
actual_data = PandasDataset(pd.read_csv("Clean_data.csv"))

# Basic Expectations
def test_actual_data_integrity():
    # Checking existence and null values for key columns
    assert actual_data.expect_column_to_exist("sideb").success
    assert actual_data.expect_column_to_exist("sidea").success
    assert actual_data.expect_column_values_to_not_be_null("country_primary").success
    assert actual_data.expect_column_values_to_not_be_null("NSAdyad_id").success

# Schema Validation
def test_actual_data_schema():
    schema = {
        "sideb": "string",
        "sideb_full": "string",
        "country_primary": "string",
        "sidea": "string",
        "sideb_id": "int",
        "NSAdyad_id": "int",
        "frontline_prev_best": "float",
        "frontline": "float",
        "lead": "float"
    }
    for col, dtype in schema.items():
        assert actual_data.expect_column_values_to_be_of_type(col, dtype).success

# Relationship Checks
def test_actual_data_relationships():
    # Example: frontline must always be greater than or equal to frontline_prev_best
    assert actual_data.expect_column_pair_values_a_to_be_greater_than_or_equal_to_b(
        "frontline", "frontline_prev_best"
    ).success

    # Example: lead values should be greater than zero
    assert actual_data.expect_column_values_to_be_greater_than(
        "lead", 0
    ).success

    # Example: Check correlation between columns, if applicable
    assert actual_data.expect_column_pair_values_to_be_in_set(
        "sideb", "sidea"
    ).success

if __name__ == "__main__":
    test_actual_data_integrity()
    test_actual_data_schema()
    test_actual_data_relationships()

    print("All tests passed successfully!")