In [1]:
# Install the library

!pip install -q great-expectations

In [2]:
# Create a data context

from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='./')

In [3]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'BigMarts'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'Clean'
path_to_data = r'C:\Users\Richard E.V\Desktop\project-m3\dags\P2M3_Richard_Edgina_data_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

In [4]:
# Create an expectation suite
expectation_suite_name = 'MilestoneExpectationProject'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,productid,weight,fatcontent,productvisibility,producttype,mrp,outletid,establishmentyear,outletsize,locationtype,outlettype,outletsales,id
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1,3735.138,1
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2,443.4228,2
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,Supermarket Type1,2097.27,3
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,Medium,Tier 3,Grocery Store,732.38,4
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1,994.7052,5


### ***Expectation 1: Column `id` must be unique***

In [5]:
# Validation to check unique value
validator.expect_column_values_to_be_unique('id')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 8523,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### ***Expectation 2: Column weight must be less than 21.35 and more than 4.55***

In [6]:
# validation to check column values to be between
validator.expect_column_values_to_be_between(
    column='weight', min_value=4.55, max_value=21.35
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 8523,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### ***Expectation 3: Column Fatcontent must have common value to be in set Low fat or regular***

In [7]:
# Validation to validate common value for the column
validator.expect_column_most_common_value_to_be_in_set(
    column="fatcontent",
    value_set=['Low Fat', 'Regular'],
    ties_okay=True
)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": [
      "Low Fat"
    ]
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### ***Expectation 4: Column Outletsales must have value data type is integer or float***

In [8]:
# Validation to validate the data type of column
validator.expect_column_values_to_be_in_type_list('outletsales', ['integer', 'float'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### ***Expectation 5: Column establishmentyear must have value length equal to 4***

In [9]:
# Validator to validate column value length
validator.expect_column_value_lengths_to_equal(column='establishmentyear', value=4)

Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 8523,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### ***Expectation 6: Column mrp expect values have data type float64***

In [10]:
# Validator to validate the column type
validator.expect_column_values_to_be_of_type(column='mrp', type_="float64")

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### ***Expectation 7: Expect table row equal to 8523***

In [11]:
# Validator to validate table row count
validator.expect_table_row_count_to_equal(value=8523)

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 8523
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}