# Import Librarie

In [89]:
from great_expectations.data_context import FileDataContext
context = FileDataContext.create(project_root_dir='C:/Users/ASUS ROG -STRIX')

In [90]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'csv-mobil'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'My_Milsstone'
path_to_data = 'P2M3_Thariq_data_raw.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

In [91]:
# Creat an expectation suite
expectation_suite_name = 'expectation-trip-dataset'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,S.No.,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,26.6 km/kg,998 CC,58.16 bhp,5.0,,1.75
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,,12.5
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,,6.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,,17.74


## Expectation

In [92]:
#  Column `pickup_datetime` can not contain missing values

validator.expect_column_values_to_not_be_null('Year')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 7253,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [93]:
# Expectation 4 : Column `price` must be less than $ 100

validator.expect_column_values_to_be_between(
    column='Price', min_value=0, max_value=100
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": false,
  "result": {
    "element_count": 7253,
    "unexpected_count": 2,
    "unexpected_percent": 0.033228110981890685,
    "partial_unexpected_list": [
      160.0,
      120.0
    ],
    "missing_count": 1234,
    "missing_percent": 17.01364952433476,
    "unexpected_percent_total": 0.02757479663587481,
    "unexpected_percent_nonmissing": 0.033228110981890685
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [94]:
#  Column `name` must be unique

validator.expect_column_values_to_be_unique('Name')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": false,
  "result": {
    "element_count": 7253,
    "unexpected_count": 6391,
    "unexpected_percent": 88.11526264993795,
    "partial_unexpected_list": [
      "Maruti Wagon R LXI CNG",
      "Hyundai Creta 1.6 CRDi SX Option",
      "Honda Jazz V",
      "Maruti Ertiga VDI",
      "Audi A4 New 2.0 TDI Multitronic",
      "Nissan Micra Diesel XV",
      "Toyota Innova Crysta 2.8 GX AT 8S",
      "Volkswagen Vento Diesel Comfortline",
      "Tata Indica Vista Quadrajet LS",
      "Maruti Ciaz Zeta",
      "Honda City 1.5 V AT Sunroof",
      "Maruti Swift VDI BSIV",
      "Land Rover Range Rover 2.2L Pure",
      "Land Rover Freelander 2 TD4 SE",
      "Mitsubishi Pajero Sport 4X4",
      "Honda Amaze S i-Dtech",
      "Maruti Swift DDiS VDI",
      "Mercedes-Benz New C-Class C 220 CDI BE Avantgare",
      "BMW 3 Series 320d",
      "Maruti S Cross DDiS 200 Alpha"
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 88.1152626499379

In [95]:
 #Column `kilometer driven` must be exist to calculate the price 

validator.expect_column_to_exist(column='Kilometers_driven')

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "success": false,
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [96]:
# Column `price` must in form of integer or float

validator.expect_column_values_to_be_in_type_list('Price', ['integer', 'float'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [97]:
# Minmax column price

validator.expect_column_values_to_be_between(
    column='Price', min_value=0.440000, max_value=160000000
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 7253,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 1234,
    "missing_percent": 17.01364952433476,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [98]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)

## Checkpoint

In [99]:
# Create a checkpoint

checkpoint_1 = context.add_or_update_checkpoint(
    name = 'checkpoint_1',
    validator = validator,
)

In [100]:
# Run a checkpoint

checkpoint_result = checkpoint_1.run()

Calculating Metrics:   0%|          | 0/22 [00:00<?, ?it/s]

## Data Docs

In [101]:
# Build data docs

context.build_data_docs()

{'local_site': 'file://C:\\Users\\ASUS ROG -STRIX\\gx\\uncommitted/data_docs/local_site/index.html'}

## Data Validation using another file

In [102]:
# Connect to a data source

import great_expectations as gx

context_jan = gx.get_context(context_root_dir='./gx/')

In [109]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'csv-mobil2'
datasource = context_jan.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'My_Milsstone2'
path_to_data = 'P2M3_Thariq_data_raw_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request_mob = asset.build_batch_request()

In [111]:
# Create a checkpoint

checkpoint_2 = context_jan.add_or_update_checkpoint(
    name = 'checkpoint_2',
    batch_request = batch_request_mob,
    expectation_suite_name = expectation_suite_name
)

checkpoint_result = checkpoint_2

In [112]:
# Build data docs

context.build_data_docs()

{'local_site': 'file://C:\\Users\\ASUS ROG -STRIX\\gx\\uncommitted/data_docs/local_site/index.html'}