# Introduction

**Milestone 3**
```
Name : Rio Ardiarta Makhiyyuddin

Batch : SBY-003

Objective : Great Expectation
```

# Instantiate the Data Context

In [6]:
# Create a data context

from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='./')

# Connect to a Datasource

In [7]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'P2M3_rio_ardiarta_data'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'P2M3_rio_ardiarta_travel_asset'
path_to_data = 'P2M3_rio_ardiarta_data_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

# Create an Expectation Suite

In [8]:
# Creat an expectation suite
expectation_suite_name = 'P2M3_rio_ardiarta_travel_asset'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,age,attrition,business_travel,daily_rate,department,distance_from_home,education,education_field,employee_count,employee_number,...,relationship_satisfaction,standard_hours,stock_option_level,total_working_years,training_times_last_year,work_life_balance,years_at_company,years_in_current_role,years_since_last_promotion,year_swith_curr_manager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,4,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,1,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2


## **Expectation 1** - Column `monthly_income`

In [9]:
validator.expect_column_values_to_not_be_null('monthly_income')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1470,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## **Expectation 2** - Column `employee_number` 

In [10]:
validator.expect_column_values_to_be_unique('employee_number')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1470,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## **Expectation 3** - Column `years_since_last_promotion`

In [11]:
# to be between min_value and max_value
validator.expect_column_values_to_be_between(column='years_since_last_promotion',min_value=0, max_value=15)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1470,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## **Expectation 4** - Column `marital_status`

In [12]:
# to be in set
validator.expect_column_values_to_be_in_set('marital_status', ['Single', 'Married', 'Divorced'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1470,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## **Expectation 5** - Column `monthly_income` int64

In [13]:
# to be in type list
validator.expect_column_values_to_be_in_type_list('monthly_income', ['int64'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## **Expectation 6** - Column `attrition`

In [14]:
validator.expect_column_to_exist('attrition')

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "success": true,
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## **Expectation 7** - Column `work_life_balance`

In [15]:
validator.expect_column_unique_value_count_to_be_between(column='work_life_balance',min_value=0,max_value=4)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 4
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# Saving into the Expectations Suite

In [16]:
validator.save_expectation_suite(discard_failed_expectations=False)

In [17]:
# Create a checkpoint

checkpoint = context.add_or_update_checkpoint(
    name = 'checkpoint',
    validator = validator,
)

In [18]:
checkpoint_result = checkpoint.run()

Calculating Metrics:   0%|          | 0/30 [00:00<?, ?it/s]

In [19]:
context.build_data_docs()

{'local_site': 'file://c:\\Users\\lenovo\\Documents\\HACKTIV8\\PHASE 2\\EXAM\\ML 3\\gx\\uncommitted/data_docs/local_site/index.html'}