In [3]:
# Install the library

!pip install -q "great-expectations==0.18.19"

In [None]:
import great_expectations as gx

context = gx.get_context(context_root_dir="./gx")


In [None]:
# Tambah datasource
datasource_name = 'csv-data-megamart'
datasource = context.sources.add_pandas(datasource_name)

# Tambah data asset dari file CSV
asset_name = 'data-megamart'
path_to_data = 'data_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()


In [None]:
# membuat expectation suite
expectation_suite_name = 'megamart-dataset'
context.add_or_update_expectation_suite(expectation_suite_name)

# membuat validator
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,row_id,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales
0,1,CA-2017-152156,2017-11-08,2017-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420.0,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,26196.0
1,2,CA-2017-152156,2017-11-08,2017-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420.0,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",73194.0
2,3,CA-2017-138688,2017-06-12,2017-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036.0,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,1462.0
3,4,US-2016-108966,2016-10-11,2016-10-18,Standard Class,SO-20335,Sean O Donnel,Consumer,United States,Fort Lauderdale,Florida,33311.0,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,9575775.0
4,5,US-2016-108966,2016-10-11,2016-10-18,Standard Class,SO-20335,Sean O Donnel,Consumer,United States,Fort Lauderdale,Florida,33311.0,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold N Roll Cart System,22368.0


In [None]:
# 1. Pengecekan apakah kolom row_id memiliki nilai yang unik
validator.expect_column_values_to_be_unique("row_id") 

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 9789,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

Kolom order_id adalah kolom yang berisikan id dari user yang melakukan order di megamart jadi order_id ini tidak bersifat unik karena dapat digunakan berulang, sedangkan kolom row_id ini berisikan id dari semua transaksi yang dilakukan oleh megamart

In [None]:
# 2. Pengecekan apakah nilai dari kolom sales berada di antara 0 dan 25000000
validator.expect_column_values_to_be_between('sales', min_value=0, max_value=25000000)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 9789,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
#. 3. Pengecekan apakah nilai dari kolom ship_mode terdapat nilai 'Standard Class', 'Second Class', 'First Class', 'Same Day'
validator.expect_column_values_to_be_in_set('ship_mode', ['Standard Class', 'Second Class', 'First Class', 'Same Day'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 9789,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# 4. Pengecekan apakah kolom sales bertipe float
validator.expect_column_values_to_be_of_type('sales', 'float')

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# 5. Pengecekan panjang dari value di kolom order_date adalah 10
validator.expect_column_value_lengths_to_equal('order_date', 10)

Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 9789,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# 6. Pengecekan apakah jumlah kolom dataset adalah 18
validator.expect_table_column_count_to_equal(18)


Calculating Metrics:   0%|          | 0/3 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 18
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# 7. Pengecekan apakah jumlah row dataset ada 9789
validator.expect_table_row_count_to_equal(9789)


Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 9789
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [33]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)

In [34]:
# Create a checkpoint

checkpoint_1 = context.add_or_update_checkpoint(
    name = 'checkpoint_megamart',
    validator = validator,
)

In [35]:
checkpoint_result = checkpoint_1.run()

Calculating Metrics:   0%|          | 0/50 [00:00<?, ?it/s]

In [36]:
context.build_data_docs()

{'local_site': 'file://f:\\Hacktiv8\\M3\\gx\\uncommitted/data_docs/local_site/index.html'}