In [1]:
from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='./')

In [2]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'population-data-by-contry'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'population-data-2010'
path_to_data = 'data_clean_fp.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

In [3]:
# Creat an expectation suite
expectation_suite_name = 'expectation-population-dataset'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,country,gender,age,employed,unemployed,economically_active_population,not_economically_active_population,attending_school,not_attending_school,literate,illiterate,population,population_<15,population_>60,population_15_59
0,Albania,Female,15-59,294458.0,122033.0,416491.0,623460.0,0.0,0.0,0,0,1039951.0,,,1039951.0
1,Albania,Female,>60,5236.0,1722.0,6958.0,223705.0,0.0,0.0,0,0,230663.0,,230663.0,
2,Albania,Male,15-59,468212.0,178668.0,646880.0,381826.0,0.0,0.0,0,0,1028706.0,,,1028706.0
3,Albania,Male,>60,34556.0,8253.0,42809.0,174729.0,0.0,0.0,0,0,217538.0,,217538.0,
4,Andorra,Female,15-59,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,,,0.0


In [4]:
validator.expect_column_values_to_be_in_set(
    column='age',
    value_set=['<15', '15-59', '>60']
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 484,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [5]:
validator.expect_column_values_to_be_in_set(
    column='gender',
    value_set=['Male', 'Female']
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 484,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [6]:
validator.expect_column_values_to_not_be_null('country')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 484,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [7]:
validator.expect_column_to_exist(column='population')

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "success": true,
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [9]:
validator.expect_column_values_to_be_in_type_list('population', ['integer', 'float'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [10]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)

In [11]:
# Create a checkpoint

checkpoint_1 = context.add_or_update_checkpoint(
    name = 'validate-data',
    validator = validator,
)

In [12]:
# Run a checkpoint

checkpoint_result = checkpoint_1.run()

Calculating Metrics:   0%|          | 0/22 [00:00<?, ?it/s]

In [13]:
# Build data docs

context.build_data_docs()

{'local_site': 'file://d:\\CODA_001\\git\\coda-001-final-project-coda-001\\gx\\uncommitted/data_docs/local_site/index.html'}