In [1]:
import pandas
import great_expectations as gx
from great_expectations import expectations as gxe

In [2]:
# Создаем контекст
context = gx.get_context()

In [3]:
# Загружаем датасет
df = pandas.read_csv("./../data/raw/UCI_Credit_Card.csv")

In [4]:
# Создаем датасурс на основе датасета
data_source_name = "df"
data_source = context.data_sources.add_pandas(name=data_source_name)

In [5]:
# Создаем дата_ассет для нашего датасурса
data_asset_name = "df_asset"
data_asset = data_source.add_dataframe_asset(name=data_asset_name)

In [6]:
# Создаем батч для дата_ассета
batch_definition_name = "df_batch_definition"
batch_definition = data_asset.add_batch_definition_whole_dataframe(
    batch_definition_name
)

In [7]:
# Задаем параметры батча
batch_parameters = {"dataframe": df}

In [8]:
# Получаем датафрейм как батч
batch = batch_definition.get_batch(batch_parameters=batch_parameters)

In [9]:
# Создаем сьют для проверок
suite_name = "df_expectation_suite"
suite = gx.ExpectationSuite(name=suite_name)
suite = context.suites.add(suite)

In [10]:
# Создаем правила для проверки
expectations = [
    gxe.ExpectColumnToExist(column="LIMIT_BAL"),
    gxe.ExpectColumnValuesToNotBeNull(column="LIMIT_BAL"),
    gxe.ExpectColumnValuesToBeBetween(column="AGE", max_value=100, min_value=18),
    gxe.ExpectColumnDistinctValuesToBeInSet(column="default.payment.next.month", value_set=[0,1])
]

In [11]:
for exp in expectations:
    suite.add_expectation(exp)

In [12]:
# Определяем валидирующий объект
definition_name = "df_validation_definition"
validation_definition = gx.ValidationDefinition(
    data=batch_definition, suite=suite, name=definition_name
)

In [13]:
# Добавляем валидацию в контекст
validation_definition = context.validation_definitions.add(validation_definition)

In [14]:
# Производим валидацию
validation_results = validation_definition.run(batch_parameters=batch_parameters)

Calculating Metrics:   0%|          | 0/16 [00:00<?, ?it/s]

In [23]:
if validation_results.success:
    print('Все проверки пройдены')
else:
    raise ValueError(f'''
Статус проверки: {validation_results.success}\n
{validation_results}
''')

ValueError: 
Статус проверки: False

{
  "success": false,
  "results": [
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_to_exist",
        "kwargs": {
          "batch_id": "df-df_asset",
          "column": "LIMIT_BAL"
        },
        "meta": {},
        "id": "48a7b210-c5c9-4341-84fb-ba800d7d2ad2"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_values_to_not_be_null",
        "kwargs": {
          "batch_id": "df-df_asset",
          "column": "LIMIT_BAL"
        },
        "meta": {},
        "id": "68904901-f9e8-48c1-9592-0522ce2edb5d"
      },
      "result": {
        "element_count": 30000,
        "unexpected_count": 0,
        "unexpected_percent": 0.0,
        "partial_unexpected_list": [],
        "partial_unexpected_counts": [],
        "partial_unexpected_index_list": []
      },
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_be_between",
        "kwargs": {
          "batch_id": "df-df_asset",
          "column": "AGE",
          "min_value": 18.0,
          "max_value": 20.0
        },
        "meta": {},
        "id": "58f7f768-1986-46d8-80e9-37a2a6a1ba4a"
      },
      "result": {
        "element_count": 30000,
        "unexpected_count": 30000,
        "unexpected_percent": 100.0,
        "partial_unexpected_list": [
          24,
          26,
          34,
          37,
          57,
          37,
          29,
          23,
          28,
          35,
          34,
          51,
          41,
          30,
          29,
          23,
          24,
          49,
          49,
          29
        ],
        "missing_count": 0,
        "missing_percent": 0.0,
        "unexpected_percent_total": 100.0,
        "unexpected_percent_nonmissing": 100.0,
        "partial_unexpected_counts": [
          {
            "value": 29,
            "count": 3
          },
          {
            "value": 23,
            "count": 2
          },
          {
            "value": 24,
            "count": 2
          },
          {
            "value": 34,
            "count": 2
          },
          {
            "value": 37,
            "count": 2
          },
          {
            "value": 49,
            "count": 2
          },
          {
            "value": 26,
            "count": 1
          },
          {
            "value": 28,
            "count": 1
          },
          {
            "value": 30,
            "count": 1
          },
          {
            "value": 35,
            "count": 1
          },
          {
            "value": 41,
            "count": 1
          },
          {
            "value": 51,
            "count": 1
          },
          {
            "value": 57,
            "count": 1
          }
        ],
        "partial_unexpected_index_list": [
          0,
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          11,
          12,
          13,
          14,
          15,
          16,
          17,
          18,
          19
        ]
      },
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    },
    {
      "success": true,
      "expectation_config": {
        "type": "expect_column_distinct_values_to_be_in_set",
        "kwargs": {
          "batch_id": "df-df_asset",
          "column": "default.payment.next.month",
          "value_set": [
            0,
            1
          ]
        },
        "meta": {},
        "id": "8a4c6f81-8d5c-4271-8025-a20ba1b232f1"
      },
      "result": {
        "observed_value": [
          0,
          1
        ],
        "details": {
          "value_counts": [
            {
              "value": 0,
              "count": 23364
            },
            {
              "value": 1,
              "count": 6636
            }
          ]
        }
      },
      "meta": {},
      "exception_info": {
        "raised_exception": false,
        "exception_traceback": null,
        "exception_message": null
      }
    }
  ],
  "suite_name": "df_expectation_suite",
  "suite_parameters": {},
  "statistics": {
    "evaluated_expectations": 4,
    "successful_expectations": 3,
    "unsuccessful_expectations": 1,
    "success_percent": 75.0
  },
  "meta": {
    "great_expectations_version": "1.5.8",
    "batch_spec": {
      "batch_data": "PandasDataFrame"
    },
    "batch_markers": {
      "ge_load_time": "20251030T202049.133364Z",
      "pandas_data_fingerprint": "afe5c415a15c571fef10348ec8f30bb2"
    },
    "active_batch_definition": {
      "datasource_name": "df",
      "data_connector_name": "fluent",
      "data_asset_name": "df_asset",
      "batch_identifiers": {
        "dataframe": "<DATAFRAME>"
      }
    },
    "validation_id": "9924d179-9704-4034-b062-18040f8994e9",
    "checkpoint_id": null,
    "run_id": {
      "run_name": null,
      "run_time": "2025-10-30T23:20:49.194421+03:00"
    },
    "validation_time": "2025-10-30T20:20:49.194421+00:00",
    "batch_parameters": {
      "dataframe": "<DATAFRAME>"
    }
  },
  "id": null
}
