In [1]:
from great_expectations.data_context import FileDataContext

# Instantiate Data Context

In [2]:
# Create a data context
context = FileDataContext.create(project_root_dir='./')

# Connect to A `Datasource`

In [3]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'csv-data-dandy'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'car_accident_dataset1'
path_to_data = 'P2M3_dandy_data_cleaned.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

# Create an Expectation Suite

In [4]:
# Create an expectation suite
expectation_suite_name = 'expectation-car-accident-dataset'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,accident_id,accident_index,accident_date,day_of_week,junction_detail,accident_severity,light_conditions,number_of_casualties,number_of_vehicles,road_surface_conditions,road_type,speed_limit,time,urban_or_rural_area,weather_conditions,vehicle_type
0,0,2010000000000.0,2021-07-27,Monday,T or staggered junction,Slight,Daylight,1,2,Dry,Single carriageway,30,20:11:00,Urban,Other,Car
1,1,2010000000000.0,2021-08-01,Saturday,Crossroads,Slight,Daylight,4,2,Wet or damp,Single carriageway,30,13:00:00,Urban,Other,Van
2,2,2010000000000.0,2021-07-31,Friday,T or staggered junction,Slight,Daylight,1,2,Dry,Single carriageway,30,18:03:00,Urban,Other,Car
3,3,2010000000000.0,2021-08-04,Tuesday,Private drive or entrance,Slight,Daylight,2,2,Wet or damp,Single carriageway,40,15:32:00,Rural,Other,Taxi
4,4,2010000000000.0,2021-08-03,Monday,Not at junction or within 20 metres,Slight,Daylight,2,3,Dry,Dual carriageway,30,17:15:00,Urban,Other,Car


## Expectations

In [5]:
# Expectation 1 : Column `accident_id` must be unique

validator.expect_column_values_to_be_unique('accident_id')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 29870,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [6]:
# Expectation 2 : Column `speed_limit` must be less than 70

validator.expect_column_values_to_be_between(
    column='speed_limit', min_value=0, max_value=70
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 29870,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [7]:
# Expectation 3 : Column `road_surface_conditions` must contain one of the following 6 things :
# 'Dry', 'Wet or damp', 'Frost or ice', 'Snow','Flood over 3cm. deep'

validator.expect_column_values_to_be_in_set('road_surface_conditions', ['Dry', 'Wet or damp', 'Frost or ice', 'Snow',
       'Flood over 3cm. deep'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 29870,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [8]:
# Expectation 4 : Column `number_of_casualties` must in form of int64

validator.expect_column_values_to_be_in_type_list('number_of_casualties', ['int64'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [9]:
# Expectation 5 : The median of `number_of_casualties` must in range 0 - 1 

validator.expect_column_median_to_be_between('number_of_casualties', 0, 1)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 1.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [10]:
# Expectation 6 : The value of standar deviation `number_of_vehicles` must be `0 to 0.8`

validator.expect_column_stdev_to_be_between('number_of_vehicles', 0,0.8)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 0.7339571238720937
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [11]:
# Expectation 7 : this column expected to parse most known formats to represent a date and/or time.

validator.expect_column_values_to_be_dateutil_parseable('accident_date')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 29870,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

**All the expectation have success : true, now i saved into expectation_suite**

In [12]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)

In [13]:
# Create a checkpoint

checkpoint_1 = context.add_or_update_checkpoint(
    name = 'checkpoint_1',
    validator = validator,
)

In [14]:
# Run a checkpoint

checkpoint_result = checkpoint_1.run()

Calculating Metrics:   0%|          | 0/33 [00:00<?, ?it/s]