In [1]:

from great_expectations.data_context import FileDataContext

In [2]:
# Create a data context

context = FileDataContext.create(project_root_dir='./')

In [4]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'csv-invistico-airline-gx-m3'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'satisfication-report'
path_to_data = '/Users/daniyalkahfi/Desktop/Data/Phase 2/Milestone 3/Dataset_Modified_For_GX.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

In [5]:
# Creat an expectation suite
expectation_suite_name = 'expectation-invistico-airline-dataset'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,satisfaction,gender,customer_type,age,type_of_travel,class_type,flight_distance,seat_comfort,time_convenient,food_and_drink,...,ease_of_online_booking,on_board_service,leg_room_service,baggage_handling,checkin_service,cleanliness,online_boarding,departure_delay_in_minutes,arrival_delay_in_minute,unique
0,satisfied,Female,Loyal Customer,65,Personal Travel,Eco,265,0,0,0,...,3,3,0,3,5,3,2,0,0.0,65_Eco_265
1,satisfied,Male,Loyal Customer,47,Personal Travel,Business,2464,0,0,0,...,3,4,4,4,2,3,2,310,305.0,47_Business_2464
2,satisfied,Female,Loyal Customer,15,Personal Travel,Eco,2138,0,0,0,...,2,3,3,4,4,4,2,0,0.0,15_Eco_2138
3,satisfied,Female,Loyal Customer,60,Personal Travel,Eco,623,0,0,0,...,1,1,0,1,4,1,3,0,0.0,60_Eco_623
4,satisfied,Female,Loyal Customer,70,Personal Travel,Eco,354,0,0,0,...,2,2,0,2,4,2,5,0,0.0,70_Eco_354


# Expectations

In [6]:
# Expectation 1 : Column `satisfaction` can not contain missing values

validator.expect_column_values_to_not_be_null('satisfaction')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 129487,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [7]:
# Expectation 2 : Column `class_type` can not contain missing values


validator.expect_column_values_to_not_be_null('class_type')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 129487,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [8]:
# Expectation 3 : Column `unique` must be unique

validator.expect_column_values_to_be_unique('unique')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": false,
  "result": {
    "element_count": 129487,
    "unexpected_count": 40039,
    "unexpected_percent": 30.921250781931775,
    "partial_unexpected_list": [
      "47_Business_2464",
      "10_Eco_1812",
      "56_Business_73",
      "34_Eco_3633",
      "47_Eco_84",
      "25_Eco_2122",
      "30_Eco_1817",
      "20_Eco_2485",
      "48_Eco_1449",
      "57_Eco_2840",
      "31_Eco_2162",
      "22_Eco_2352",
      "33_Eco_2045",
      "30_Eco_1826",
      "65_Eco_423",
      "24_Eco_1473",
      "34_Eco_1952",
      "52_Eco_1744",
      "8_Eco_2457",
      "49_Eco_2316"
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 30.921250781931775,
    "unexpected_percent_nonmissing": 30.921250781931775
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [9]:
# Expectation 4 : Column `seat_comfort` must be less than 5

validator.expect_column_values_to_be_between(
    column='seat_comfort', min_value=0, max_value=5
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 129487,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [10]:
# Expectation 5 : Column `online_boarding` must be in integer data type
validator.expect_column_values_to_be_in_type_list('online_boarding', ['int'])


Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [11]:
# Expectation 6 : Validate that the 'flight_distance' column contains only non-negative values
validator.expect_column_min_to_be_between('flight_distance', min_value=0, max_value=None)


Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 50
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [12]:
# Expectation 7: Validate that the 'satisfaction' column contains only 'satisfied' and 'dissatisfied' values
validator.expect_column_distinct_values_to_be_in_set('satisfaction', ['satisfied', 'dissatisfied'])

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": [
      "dissatisfied",
      "satisfied"
    ],
    "details": {
      "value_counts": [
        {
          "value": "dissatisfied",
          "count": 58605
        },
        {
          "value": "satisfied",
          "count": 70882
        }
      ]
    }
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}