In [6]:
import great_expectations as gx

from great_expectations.checkpoint import Checkpoint

In [7]:
context = gx.get_context()

In [8]:
import pandas as pd

dataframe = pd.read_csv('P2M3_Sagara_data_clean.csv')

In [10]:
datasource = context.sources.add_pandas(name="pandas_datasource")

In [11]:
name = "loan_prediction"
data_asset = datasource.add_dataframe_asset(name=name)
my_batch_request = data_asset.build_batch_request(dataframe=dataframe)

In [14]:
expectation_suite_name = "hacktiv8_car_assignment"
context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)
validator = context.get_validator(
    batch_request=data_asset.build_batch_request(dataframe=dataframe),
    expectation_suite_name=expectation_suite_name,
)

print(validator.head())

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

    loan_id gender married dependents     education self_employed  \
0  LP001002   Male      No          0      Graduate            No   
1  LP001003   Male     Yes          1      Graduate            No   
2  LP001005   Male     Yes          0      Graduate           Yes   
3  LP001006   Male     Yes          0  Not Graduate            No   
4  LP001008   Male      No          0      Graduate            No   

   applicantincome  coapplicantincome  loanamount  loan_amount_term  \
0             5849                  0         128               360   
1             4583               1508         128               360   
2             3000                  0          66               360   
3             2583               2358         120               360   
4             6000                  0         141               360   

   credit_history property_area loan_status  
0               1         Urban           Y  
1               1         Rural           N  
2               1   

### 1. Validasi Nilai Unik (loan_id):

In [16]:
validator.expect_column_values_to_be_unique(column='loan_id')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 614,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 2. Validasi Nilai Not Be Null dan Be Between (loan_amount, applicantincome, coapplicantincome):

In [18]:
validator.expect_column_values_to_not_be_null(column='loanamount')
validator.expect_column_values_to_be_between(column='loanamount', min_value=1)
validator.expect_column_values_to_not_be_null(column='applicantincome')
validator.expect_column_values_to_be_between(column='applicantincome', min_value=1)
validator.expect_column_values_to_not_be_null(column='coapplicantincome')
validator.expect_column_values_to_be_between(column='coapplicantincome', min_value=0)

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 614,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 3. Validasi Rentang Nilai (loan_amount, applicantincome, coapplicantincome):

In [20]:
validator.expect_column_values_to_be_between(column='loanamount', min_value=1, max_value=1000)
validator.expect_column_values_to_be_between(column='applicantincome', min_value=1, max_value=100000)
validator.expect_column_values_to_be_between(column='coapplicantincome', min_value=0, max_value=50000)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 614,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 4. Validasi Rentang Nilai (loan_amount, applicantincome, coapplicantincome):

In [21]:
validator.expect_column_distinct_values_to_be_in_set(column='gender', value_set=['Male', 'Female'])
validator.expect_column_distinct_values_to_be_in_set(column='married', value_set=['Yes', 'No'])
validator.expect_column_distinct_values_to_be_in_set(column='education', value_set=['Graduate', 'Not Graduate'])
validator.expect_column_distinct_values_to_be_in_set(column='self_employed', value_set=['Yes', 'No'])
validator.expect_column_distinct_values_to_be_in_set(column='property_area', value_set=['Urban', 'Rural', 'Semiurban'])
validator.expect_column_distinct_values_to_be_in_set(column='loan_status', value_set=['Y', 'N'])

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": [
      "N",
      "Y"
    ],
    "details": {
      "value_counts": [
        {
          "value": "N",
          "count": 192
        },
        {
          "value": "Y",
          "count": 422
        }
      ]
    }
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 5. Validasi Jumlah Pinjaman (loanamount):

In [27]:
validator.expect_column_values_to_be_between(column='loanamount', max_value=700)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 614,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 6. Validasi Durasi Pinjaman (loan_amount_term):

In [24]:
validator.expect_column_values_to_be_between(column='loan_amount_term', min_value=1, max_value=480)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 614,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 7. Validasi Riwayat Kredit (credit_history):

In [25]:
validator.expect_column_distinct_values_to_be_in_set(column='credit_history', value_set=[0, 1])

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": [
      0,
      1
    ],
    "details": {
      "value_counts": [
        {
          "value": 0,
          "count": 89
        },
        {
          "value": 1,
          "count": 525
        }
      ]
    }
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [28]:
validator.save_expectation_suite(discard_failed_expectations=False)

In [29]:
my_checkpoint_name = "loan_prediction"

checkpoint = Checkpoint(
    name=my_checkpoint_name,
    run_name_template="%Y%m%d-%H%M%S-loan_prediction",
    data_context=context,
    batch_request=my_batch_request,
    expectation_suite_name=expectation_suite_name,
    action_list=[
        {
            "name": "store_validation_result",
            "action": {"class_name": "StoreValidationResultAction"},
        },
        {"name": "update_data_docs", "action": {"class_name": "UpdateDataDocsAction"}},
    ],
)

In [30]:
context.add_or_update_checkpoint(checkpoint=checkpoint)

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction"
      }
    }
  ],
  "batch_request": {
    "datasource_name": "pandas_datasource",
    "data_asset_name": "loan_prediction",
    "options": {}
  },
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "expectation_suite_name": "hacktiv8_car_assignment",
  "module_name": "great_expectations.checkpoint",
  "name": "loan_prediction",
  "profilers": [],
  "run_name_template": "%Y%m%d-%H%M%S-loan_prediction",
  "runtime_configuration": {},
  "validations": []
}

In [31]:
checkpoint_result = checkpoint.run()

Calculating Metrics:   0%|          | 0/54 [00:00<?, ?it/s]

In [32]:
context.build_data_docs()

{'local_site': 'file://d:\\Bootcamp Hacktiv8\\Phase 2\\Milestone\\p2-ftds007-hck-m3-Sagara7\\gx\\uncommitted/data_docs/local_site/index.html'}

In [33]:
context.open_data_docs()