# A. Install Great Expectation

In [1]:
# Install the library

!pip install -q great-expectations

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
apache-airflow-providers-common-io 1.4.0 requires apache-airflow>=2.8.0, but you have apache-airflow 2.3.4 which is incompatible.
apache-airflow-providers-fab 1.2.2 requires apache-airflow>=2.9.0, but you have apache-airflow 2.3.4 which is incompatible.
apache-airflow-providers-fab 1.2.2 requires flask-appbuilder==4.5.0, but you have flask-appbuilder 4.1.3 which is incompatible.
apache-airflow-providers-smtp 1.7.1 requires apache-airflow>=2.7.0, but you have apache-airflow 2.3.4 which is incompatible.


# B. Instantiate Data Context

In [2]:
# Create a data context

from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='./')

# C. Connect to A Datasource

In [3]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'P2M3_Muhammad_Rozzaaq_data_clean'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'Data_Milestone'
path_to_data = 'D:\HACKTIV8\HACKTIV 2\PHASE 2\GITHUB\p2-ftds033-rmt-m3-MuhammadRozzaaq\P2M3_Muhammad_Rozzaaq_data_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

# D. Create an Expectation Suite

In [4]:
# Creat an expectation suite
expectation_suite_name = 'Data_Milestone_Expecttion'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,loan_id,gender,married,dependents,education,self_employed,applicantincome,coapplicantincome,loanamount,loan_amount_term,credit_history,property_area,loan_status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,146.412162,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


## D.1. Expectations

In [5]:
# Expectation 1 : Column `pickup_datetime` can not contain missing values

validator.expect_column_values_to_not_be_null('gender')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "result": {
    "element_count": 614,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true
}

In [6]:
# Expectation 2 : Column `dropoff_datetime` must be unique

validator.expect_column_values_to_be_unique('loan_id')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "result": {
    "element_count": 614,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true
}

In [8]:
# Expectation 3 : Column `tip_amount` must be less than 361 months

validator.expect_column_values_to_be_between(
    column='loan_amount_term', min_value=11, max_value=361
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "result": {
    "element_count": 614,
    "unexpected_count": 15,
    "unexpected_percent": 2.44299674267101,
    "partial_unexpected_list": [
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0,
      480.0
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 2.44299674267101,
    "unexpected_percent_nonmissing": 2.44299674267101
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": false
}

In [12]:
# Expectation 4

validator.expect_column_to_exist(column='self_employed')

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true
}

In [10]:
# Expectation 5 : Column `dependents` must contain one of the following 4 things :
# 0 = tidak ada tanggungan
# 1 = tanggungan sebanyak 1 orang
# 2 = tanggungan sebanyak 2 orang
# 3+ = tanggungan lebih dari 3 orang

validator.expect_column_values_to_be_in_set('dependents', [0, 1, 2, '3+'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "result": {
    "element_count": 614,
    "unexpected_count": 563,
    "unexpected_percent": 91.69381107491856,
    "partial_unexpected_list": [
      "0",
      "1",
      "0",
      "0",
      "0",
      "2",
      "0",
      "2",
      "1",
      "2",
      "2",
      "2",
      "0",
      "2",
      "0",
      "1",
      "0",
      "0",
      "0",
      "0"
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 91.69381107491856,
    "unexpected_percent_nonmissing": 91.69381107491856
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": false
}

In [11]:
# Expectation 6 : Column `total_amount` must in form of integer or float

validator.expect_column_values_to_be_in_type_list('loanamount', ['integer', 'float'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true
}

In [None]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)