In [2]:
import great_expectations as gx
import pandas as pd
from great_expectations.data_context import FileDataContext

## Create data context

In [11]:
# context = gx.get_context()
context = FileDataContext(project_root_dir="../services")

## Create data source

In [12]:
ds1 = context.sources.add_or_update_pandas(name="my_pandas_ds")
ds1

PandasDatasource(type='pandas', name='my_pandas_ds', id=None, assets=[])

## Create data assets

In [13]:
da1 = ds1.add_csv_asset(
    name = "asset01",
    filepath_or_buffer="../data/raw/Equity_Apartments_Data.csv"
)

In [14]:
batch_request = da1.build_batch_request()
batches = da1.get_batch_list_from_batch_request(batch_request)
for batch in batches:
    print(batch.batch_spec)

{'reader_method': 'read_csv', 'reader_options': {'filepath_or_buffer': PosixPath('../data/raw/Equity_Apartments_Data.csv')}}


In [15]:
context.add_or_update_expectation_suite("my_expectation_suite")

{
  "expectation_suite_name": "my_expectation_suite",
  "ge_cloud_id": null,
  "expectations": [],
  "data_asset_type": null,
  "meta": {
    "great_expectations_version": "0.18.15"
  }
}

In [17]:
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name="my_expectation_suite"
)

In [18]:
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0.1,Unnamed: 0,Price,Beds,Baths,sq.ft,Floor,Move_in_date,building_id,unit_id,URL,...,Fireplace,City_Skyline,Kitchen_Island,Stainless_Appliances,Renovated,Office_Space,Days_Till_Available,Day_of_the_week_recorded,Unique_ID,Estiamted_Vacancy
0,1,2377,0,1.0,523,5,2021-09-02,1,0507,https://www.equityapartments.com/washington-dc...,...,0.0,0.0,0.0,1.0,1.0,0.0,47.0,Wednesday,0105071210MassApartments,0.020833
1,2,2816,1,1.0,713,2,2021-09-13,1,0204,https://www.equityapartments.com/washington-dc...,...,0.0,0.0,0.0,1.0,1.0,0.0,58.0,Wednesday,0102041210MassApartments,0.020833
2,3,3811,2,2.0,1252,6,2021-10-08,1,0608,https://www.equityapartments.com/washington-dc...,...,1.0,0.0,0.0,0.0,0.0,0.0,83.0,Wednesday,0106081210MassApartments,0.020833
3,4,1549,0,1.0,456,2,2021-09-17,1,256 \r\n,https://www.equityapartments.com/washington-dc...,...,0.0,0.0,0.0,0.0,0.0,0.0,62.0,Wednesday,1256\r\n1500MassApartments,0.003597
4,5,1753,1,1.0,580,3,2021-10-05,1,337 \r\n,https://www.equityapartments.com/washington-dc...,...,0.0,0.0,0.0,0.0,0.0,0.0,80.0,Wednesday,1337\r\n1500MassApartments,0.003597


In [23]:
ex1 = validator.expect_column_values_to_be_unique("Unique_ID")
ex1

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": false,
  "result": {
    "element_count": 62810,
    "unexpected_count": 61807,
    "unexpected_percent": 99.75628651667259,
    "partial_unexpected_list": [
      "0105071210MassApartments",
      "0102041210MassApartments",
      "0106081210MassApartments",
      "1256\r\n1500MassApartments",
      "1337\r\n1500MassApartments",
      "1423\r\n455EyeStreetApartments",
      "1512\r\n455EyeStreetApartments",
      "401812\r\n\r\n425MassApartments",
      "401802\r\n\r\n425MassApartments",
      "010101CorcoranHouseatDupontCircleApartments",
      "010306CorcoranHouseatDupontCircleApartments",
      "010426TheFlatsatDupontCircleApartments",
      "010831TheFlatsatDupontCircleApartments",
      "010702TheFlatsatDupontCircleApartments",
      "001126\r\n\r\n2400MApartments",
      "001131\r\n\r\n2400MApartments",
      "001508\r\n\r\n2400MApartments",
      "11109\r\n100KApartments",
      "1801\r\n\r\n100KApartments",
      "1817\r\n\r\n100KApartments"
    ],
    "missing_

In [24]:
ex2 = validator.expect_column_values_to_be_between("Beds", min_value=0, max_value=10)
ex2

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 62810,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [25]:
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0.1,Unnamed: 0,Price,Beds,Baths,sq.ft,Floor,Move_in_date,building_id,unit_id,URL,...,Fireplace,City_Skyline,Kitchen_Island,Stainless_Appliances,Renovated,Office_Space,Days_Till_Available,Day_of_the_week_recorded,Unique_ID,Estiamted_Vacancy
0,1,2377,0,1.0,523,5,2021-09-02,1,0507,https://www.equityapartments.com/washington-dc...,...,0.0,0.0,0.0,1.0,1.0,0.0,47.0,Wednesday,0105071210MassApartments,0.020833
1,2,2816,1,1.0,713,2,2021-09-13,1,0204,https://www.equityapartments.com/washington-dc...,...,0.0,0.0,0.0,1.0,1.0,0.0,58.0,Wednesday,0102041210MassApartments,0.020833
2,3,3811,2,2.0,1252,6,2021-10-08,1,0608,https://www.equityapartments.com/washington-dc...,...,1.0,0.0,0.0,0.0,0.0,0.0,83.0,Wednesday,0106081210MassApartments,0.020833
3,4,1549,0,1.0,456,2,2021-09-17,1,256 \r\n,https://www.equityapartments.com/washington-dc...,...,0.0,0.0,0.0,0.0,0.0,0.0,62.0,Wednesday,1256\r\n1500MassApartments,0.003597
4,5,1753,1,1.0,580,3,2021-10-05,1,337 \r\n,https://www.equityapartments.com/washington-dc...,...,0.0,0.0,0.0,0.0,0.0,0.0,80.0,Wednesday,1337\r\n1500MassApartments,0.003597


In [21]:
# validator.save_expectation_suite("my_expectation_suite")

In [26]:
validator.save_expectation_suite(discard_failed_expectations=False)

In [27]:
validator.get_expectation_suite

<bound method Validator.get_expectation_suite of <great_expectations.validator.validator.Validator object at 0x168d3c940>>

In [28]:
checkpoint = context.add_or_update_checkpoint(
    name = "initial_data_validation_checkpoint",
    validations=[
        {
            "batch_request":batch_request,
            "expectation_suite_name" : "my_expectation_suite"
        }
    ]
)
checkpoint

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction"
      }
    }
  ],
  "batch_request": {},
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "module_name": "great_expectations.checkpoint",
  "name": "initial_data_validation_checkpoint",
  "profilers": [],
  "runtime_configuration": {},
  "validations": [
    {
      "batch_request": {
        "datasource_name": "my_pandas_ds",
        "data_asset_name": "asset01",
        "options": {}
      },
      "expectation_suite_name": "my_expectation_suite"
    }
  ]
}

In [29]:
checkpoint_result = checkpoint.run()

Calculating Metrics:   0%|          | 0/17 [00:00<?, ?it/s]

In [30]:
checkpoint_result.success

False

In [31]:
mysuite = context.get_expectation_suite("my_expectation_suite")
context.save_expectation_suite(mysuite)

'/Users/ninelco/Documents/MLOps/ApartmentPrice/notebooks/../services/gx/expectations/my_expectation_suite.json'

### Present validation results

In [32]:
context.build_data_docs()

{'local_site': 'file:///Users/ninelco/Documents/MLOps/ApartmentPrice/notebooks/../services/gx/uncommitted/data_docs/local_site/index.html'}

In [33]:
context.open_data_docs()