### **Konfigurasi dan inisialisasi Great Expectations**

In [1]:
from great_expectations.data_context import FileDataContext # mengimport dari pustaka Great Expectations. 
context = FileDataContext.create(project_root_dir='./') # Ini adalah direktori root proyek Great Expectations, di mana konfigurasi dan data terkait proyek akan disimpan.

In [2]:
# membuat nama sumber data
datasource_name = 'data_m3_fix'

# membuat sebuah sumber data dengan nama yang telah ditentukan
datasource = context.sources.add_pandas(datasource_name)

# menentukan nama aset
asset_name = 'birdstrikes_fix'
# menentukan jalur ke berkas data CSV yang akan diuji
path_to_data = 'C:\\Users\\noufa\\github-classroom\\FTDS-assignment-bay\\p2-ftds008-hck-m3-Antonio1206\\airflow\\data\\P2M3_antonius_daeng_data_clean.csv'
# menambahkan aset data ke sumber data yang telah ditentukan
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# membangun permintaan batch yang akan digunakan untuk menguji aset data
batch_request = asset.build_batch_request()

#### **ekspektasi suite** ***(expectation suite)***

In [26]:
expectation_suite_name = 'expectation-data-m3_fix' # menentukan nama ekspektasi suite.
context.add_or_update_expectation_suite(expectation_suite_name) # menambahkan atau memperbarui ekspektasi suite dengan nama yang telah ditentukan

# menciptakan sebuah validator. 
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)
# melihat hasil pengujian validasi data untuk beberapa baris pertama 
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,brand,model,sd_card,main_camera,resolution,display,sim_card,Sistem_operasi,color,region,location,screen_size,battery,storage,ram,selfie_camera,price
0,Samsung,Galaxy S10,yes,"3 Cameras: 12, 12, 16 MP",1440 x 3040,AMOLED,Single,Android,White,Greater Accra,Circle,6.1,3400.0,128.0,8.0,10.0,2450
1,Samsung,Galaxy Note 20 Ultra,yes,"3 Cameras: 108, 12, 12 MP",1440 x 3088,AMOLED,Dual,Android,Bronze,Greater Accra,Circle,6.9,4500.0,128.0,12.0,10.0,5000
2,Samsung,Galaxy A54 5G,yes,"3 Cameras: 50, 12, 5 MP",1080 x 2340,Super AMOLED,Dual,Android,Black,Greater Accra,Accra Metropolitan,6.4,5000.0,256.0,8.0,32.0,3000
3,Tecno,Camon 20 Pro,no,"2 Cameras: 64, 2 MP",1080 x 2400,AMOLED,Dual,Android,Black,Greater Accra,Accra Metropolitan,6.67,5000.0,256.0,8.0,32.0,1900
4,Samsung,Galaxy S22 Ultra,no,"4 Cameras: 108, 10, 10, 12 MP",1400 x 3088,AMOLED,Dual,Android,Black,Greater Accra,Accra Metropolitan,6.8,5000.0,512.0,12.0,40.0,5750


#### **1. apakah semua nilai dalam kolom `price` adalah unik**

In [18]:
validator.expect_column_values_to_be_unique('price', mostly=.08)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1724,
    "unexpected_count": 1579,
    "unexpected_percent": 91.5893271461717,
    "partial_unexpected_list": [
      2450,
      5000,
      3000,
      1900,
      2800,
      3710,
      3710,
      2450,
      2700,
      1930,
      1400,
      2050,
      2550,
      1410,
      2100,
      7500,
      5500,
      5000,
      1430,
      1930
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 91.5893271461717,
    "unexpected_percent_nonmissing": 91.5893271461717
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### **2. apakah semua nilai dalam kolom `battery` berada dalam rentang antara 0 hingga 100,000**

In [19]:
validator.expect_column_values_to_be_between(column='battery', min_value=0, max_value=100000)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1724,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### **3. apakah semua nilai dalam kolom `brand` adalah anggota dari set nilai yang telah ditentukan.**

In [24]:
validator.expect_column_values_to_be_in_set('brand', value_set=['Samsung', 'Tecno', 'LG', 'Vivo', 'Huawei', 'Infinix', 'Apple',
       'Google', 'Oppo', 'Nokia', 'Other Brand', 'Itel', 'Motorola',
       'Realme', 'Xiaomi', 'Meizu', 'OnePlus', 'Bontel', 'HTC', 'X-Tigi','Honor'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1724,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### **4. apakah semua nilai dalam kolom `main_camera` adalah tipe data list**

In [37]:
validator.expect_column_values_to_be_of_type('main_camera', 'list')


Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": false,
  "result": {
    "element_count": 1724,
    "unexpected_count": 1724,
    "unexpected_percent": 100.0,
    "partial_unexpected_list": [
      "3 Cameras: 12, 12, 16 MP",
      "3 Cameras: 108, 12, 12 MP",
      "3 Cameras: 50, 12, 5 MP",
      "2 Cameras: 64, 2 MP",
      "4 Cameras: 108, 10, 10, 12 MP",
      "4 Cameras: 108, 12, 5, 5 MP",
      "3 Cameras: 50, 12, 5 MP",
      "4 Cameras: 200, 10, 10, 12 MP",
      "3 Cameras: 50, 12, 5 MP",
      "3 Cameras: 50, 12, 5 MP",
      "3 Cameras: 50, 108, 2 MP",
      "3 Cameras: 50, 108, 2 MP",
      "4 Cameras: 200, 10, 10, 12 MP",
      "3 Cameras: 50, 12, 5 MP",
      "3 Cameras: 50, 12, 5 MP",
      "4 Cameras: 200, 10, 10, 12 MP",
      "2 Cameras: 64, 2 MP",
      "1 Cameras: 50 MP",
      "3 Cameras: 50, 12, 5 MP",
      "3 Cameras: 50, 108, 2 MP"
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 100.0,
    "unexpected_percent_nonmissing": 100.0
  },
  "meta": {},
  "

#### **5. memeriksa apakah semua nilai dalam kolom `price` adalah tipe data int64**

In [33]:
validator.expect_column_values_to_be_of_type('price', 'int64')

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### **6. memeriksa panjang (length) dari setiap nilai pada kolom `brand`**

In [39]:
validator.expect_column_value_lengths_to_be_between('brand', min_value=1, max_value=10)

Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1724,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### **7. panjang (length) dari setiap nilai dalam kolom `price` adalah sama dengan 4 karakter**

In [48]:
validator.expect_column_value_lengths_to_equal("price", 4, mostly=.9)

Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1724,
    "unexpected_count": 141,
    "unexpected_percent": 8.178654292343387,
    "partial_unexpected_list": [
      14800,
      980,
      650,
      600,
      850,
      13550,
      26650,
      14650,
      770,
      699,
      980,
      950,
      680,
      13800,
      650,
      650,
      990,
      22000,
      880,
      650
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 8.178654292343387,
    "unexpected_percent_nonmissing": 8.178654292343387
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### **menyimpan ekspektasi suite yang telah dibuat**

In [49]:
validator.save_expectation_suite(discard_failed_expectations=False)

####  **Membuat sebuah checkpoint**

In [51]:
Checkpoint = context.add_or_update_checkpoint(
    name = 'Checkpoint',
    validator = validator,
)

#### **menjalankan checkpoint**

In [52]:
checkpoint_result = Checkpoint.run()

Calculating Metrics:   0%|          | 0/53 [00:00<?, ?it/s]

#### **generate dokumen-dokumen data docs**

In [53]:

context.build_data_docs()

{'local_site': 'file://c:\\Users\\noufa\\github-classroom\\FTDS-assignment-bay\\p2-ftds008-hck-m3-Antonio1206\\gx\\uncommitted/data_docs/local_site/index.html'}