# **IMPORT LIBRARY**

In [1]:
import pandas as pd
import great_expectations as gx

from great_expectations.checkpoint import Checkpoint

# **BUILD CONTEXT**

In [2]:
context = gx.get_context()

# **BUILD DATA SOURCE**

In [3]:
df = pd.read_csv(r'D:\git_practice\Phase 2\Week 2\MS3\data\P2M3_rais_yufli_data_clean.csv')

In [4]:
datasource = context.sources.add_pandas(name="pandas_datasource")

name = "House Price"
data_asset = datasource.add_dataframe_asset(name=name)

my_batch_request = data_asset.build_batch_request(dataframe=df)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   price              545 non-null    int64 
 1   area               545 non-null    int64 
 2   berdrooms          545 non-null    int64 
 3   bath_rooms         545 non-null    int64 
 4   stories            545 non-null    int64 
 5   main_road          545 non-null    object
 6   guest_room         545 non-null    object
 7   basement           545 non-null    object
 8   hot_water_heating  545 non-null    object
 9   air_conditioning   545 non-null    object
 10  parking            545 non-null    int64 
 11  prefarea           545 non-null    object
 12  furnishing_status  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB


In [6]:
df.describe()

Unnamed: 0,price,area,berdrooms,bath_rooms,stories,parking
count,545.0,545.0,545.0,545.0,545.0,545.0
mean,4766729.0,5150.541284,2.965138,1.286239,1.805505,0.693578
std,1870440.0,2170.141023,0.738064,0.50247,0.867492,0.861586
min,1750000.0,1650.0,1.0,1.0,1.0,0.0
25%,3430000.0,3600.0,2.0,1.0,1.0,0.0
50%,4340000.0,4600.0,3.0,1.0,2.0,0.0
75%,5740000.0,6360.0,3.0,2.0,2.0,1.0
max,13300000.0,16200.0,6.0,4.0,4.0,3.0


In [7]:
my_batch_request

BatchRequest(datasource_name='pandas_datasource', data_asset_name='House Price', options={})

# **BUILD EXPECTATION SUITE AND VALIDATOR**

In [8]:
# created an Expectation Suite
context.add_or_update_expectation_suite("my_expectation_suite")

# create a Validator using above Expectation Suite
validator = context.get_validator(
    batch_request=my_batch_request,
    expectation_suite_name="my_expectation_suite",
)

validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,price,area,berdrooms,bath_rooms,stories,main_road,guest_room,basement,hot_water_heating,air_conditioning,parking,prefarea,furnishing_status
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [10]:
# Expect the values in the "price" column to be greater than 0
validator.expect_column_values_to_be_between("price", min_value=1)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 545,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [11]:
# Expect the values in the "area" column to be greater than 0
validator.expect_column_values_to_be_between("area", min_value=1)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 545,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [13]:
# Expect the values in the "bedrooms" column to be between 1 and 10
validator.expect_column_values_to_be_between("berdrooms", min_value=1, max_value=10)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 545,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [15]:
# Expect the values in the "bath_rooms" column to be between 1 and 10
validator.expect_column_values_to_be_between("bath_rooms", min_value=1, max_value=10)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 545,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [17]:
# Expect the values in the "stories" column to be between 1 and 4
validator.expect_column_values_to_be_between("stories", min_value=1, max_value=4)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 545,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [18]:
# Expect the values in the "air_conditioning" column to be "yes" or "no"
validator.expect_column_values_to_be_in_set("air_conditioning", ["yes", "no"])


Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 545,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [19]:
# Expect the values in the "furnishing_status" column to be in the list ["furnished", "semi-furnished", "unfurnished"]
validator.expect_column_values_to_be_in_set("furnishing_status", ["furnished", "semi-furnished", "unfurnished"])


Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 545,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [20]:
# Save validator into expectation suite
validator.save_expectation_suite(discard_failed_expectations=False)

# **BUILD CHECKPOINT**

In [21]:
checkpoint = context.add_or_update_checkpoint(
    name="my_checkpoint",
    validations=[
        {
            "batch_request": my_batch_request,
            "expectation_suite_name": "my_expectation_suite",
        },
    ],
)

In [22]:
checkpoint_result = checkpoint.run()

Calculating Metrics:   0%|          | 0/52 [00:00<?, ?it/s]

In [23]:
context.build_data_docs()

{'local_site': 'file://C:\\Users\\RYX\\AppData\\Local\\Temp\\tmpjuz8a48u\\index.html'}

In [24]:
context.open_data_docs()