
Objective: Membuat Great Expecation

In [29]:
# Membuat Konteks Data

from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='./')

In [30]:
# Menghubungkan dataset ke great expectation context dengan format pandas
datasource_name = 'India-Power-Infrastructure-Data'.lower()
datasource = context.sources.add_pandas(datasource_name)

asset_name = 'India-Power-Infrastructure-Data'.lower()
path_to_data = 'clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

batch_request = asset.build_batch_request()

In [31]:
# Membuat Expectation suite
expectation_suite_name = 'expectation-dataset'
context.add_or_update_expectation_suite(expectation_suite_name)

# Membuat Validator untuk data yang telah dihubungkan ke context
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# mengecek validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,teritory,year,power_spec,power_needed,kwh_needed,megawatt_capacity
0,Andaman and Nicobar Islands,2004.01.01,0,0,0.0,65
1,Andhra Pradesh,2004.01.01,5042,5006,656.9,10809
2,Arunachal Pradesh,2004.01.01,16,16,143.9,187
3,Assam,2004.01.01,379,358,134.4,1133
4,Bihar,2004.01.01,720,648,78.0,1644


In [32]:
# Expectation 1 : Column `year` tidak bisa memiliki missing value

validator.expect_column_values_to_not_be_null('year')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 612,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [33]:
# Expectation 2 : Column `teritory` tidak bisa berisi missing values


validator.expect_column_values_to_not_be_null('teritory')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 612,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [34]:
# Expectation 3 : Column `power_spec` harus integer

validator.expect_column_values_to_be_in_type_list('power_spec', ['int64'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [35]:
# Expectation 4 : Column `power_needed` harus dalam integer or float

validator.expect_column_values_to_be_in_type_list('power_needed', ['int64'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [36]:
# Expectation 5 : Column `kwh_needed` harus berisi integer or float

validator.expect_column_values_to_be_in_type_list('kwh_needed', ['int64', 'float64'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [37]:
# Expectation 6 : Column `megawatt_capacity` harus berisi integer or float

validator.expect_column_values_to_be_in_type_list('megawatt_capacity', ['int64', 'float64'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [39]:
validator.expect_column_values_to_match_strftime_format('year', '%Y.%m.%d')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 612,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}