In [32]:
import great_expectations as gx
from great_expectations.core.expectation_configuration import ExpectationConfiguration
from great_expectations.core.expectation_suite import ExpectationSuite

In [33]:

context = gx.get_context()
print("Great Expectations context loaded successfully!")

Great Expectations context loaded successfully!


In [34]:
from src.data.load_data import load_raw_data

df = load_raw_data()
print(df.shape)
print(df.dtypes)

Loaded dataset with shape: (7043, 21)
(7043, 21)
customerID           object
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                object
dtype: object


In [35]:
suite_name = "telco_churn.raw"


In [36]:
suite: ExpectationSuite = context.add_or_update_expectation_suite(expectation_suite_name=suite_name)


In [40]:
suite.expectations = []

# 1. Table-level
suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_table_column_count_to_equal",
        kwargs={"value": 21}   # FIXED: column_count, not value
    )
)

# 2. Required columns exist
required_columns = ["customerID", "tenure", "MonthlyCharges", "TotalCharges", "Churn"]
for col in required_columns:
    suite.add_expectation(
        ExpectationConfiguration(
            expectation_type="expect_column_to_exist",
            kwargs={"column": col}
        )
    )

# 3. Churn values valid
suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={"column": "Churn", "value_set": ["Yes", "No"]}
    )
)

# 4. tenure: type and range
suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_of_type",
        kwargs={"column": "tenure", "type_": "int64"}
    )
)
suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={"column": "tenure", "min_value": 0, "max_value": 80}
    )
)

# 5. MonthlyCharges: non-negative
suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={"column": "MonthlyCharges", "min_value": 0}
    )
)

# 6. customerID unique
suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_unique",
        kwargs={"column": "customerID"}
    )
)

# 7. SeniorCitizen binary
suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={"column": "SeniorCitizen", "value_set": [0, 1]}
    )
)

# 8. TotalCharges: special handling (string column with possible empties)
# We check it's string/object, and non-empty values are numeric-convertible
suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_of_type",
        kwargs={"column": "TotalCharges", "type_": "object"}  # or str if pandas infers
    )
)
suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_not_be_null",
        kwargs={"column": "TotalCharges", "mostly": 0.998}  # allow ~0.16% empty (11/7043)
    )
)





# Cell: Validate against current df (modern fluent Pandas way)
# Create a temporary Pandas datasource for validation
datasource = context.sources.add_or_update_pandas(name="temp_pandas_datasource")

# Add dataframe as asset
asset = datasource.add_dataframe_asset(name="telco_raw_asset", dataframe=df)

# Build batch request
batch_request = asset.build_batch_request()

# Get validator
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=suite_name
)





In [41]:
# Run validation
results = validator.validate()
context.add_or_update_expectation_suite(expectation_suite=suite)
print(f"Expectation Suite '{suite_name}' created and saved successfully!")

print(results)
print("\nSuccess:", results.success)
print("Number of evaluated expectations:", len(results.results))
for res in results.results:
    print(f"- {res.expectation_config.expectation_type}: {res.success}")

Calculating Metrics:   0%|          | 0/32 [00:00<?, ?it/s]

Expectation Suite 'telco_churn.raw' created and saved successfully!
{
  "success": true,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "expectation_type": "expect_table_column_count_to_equal",
        "kwargs": {
          "value": 21,
          "batch_id": "temp_pandas_datasource-telco_raw_asset"
        },
        "meta": {}
      },
      "result": {
        "observed_value": 21
      },
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "expectation_type": "expect_column_to_exist",
        "kwargs": {
          "column": "customerID",
          "batch_id": "temp_pandas_datasource-telco_raw_asset"
        },
        "meta": {}
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,


In [23]:
print(context.list_expectation_suite_names())

['telco_churn.raw']
