# Import Libraries

In [27]:
from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='./')

# Great Expectation

In [2]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'app-unduh'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'data-m3-app-unduhan'
path_to_data = r'\Users\dwina\hacktiv8_latian\project-m3\dags\P2M3_dendy_dwinanda_data_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

In [3]:
# Creat an expectation suite
expectation_suite_name = 'expectation-data-app-m3'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,app,category,rating,review,size,installs,type,price,content_rating,genres,last_updated,current_version,android_ver
0,PhotoEditorCandyCameraGridScrapBook,ART_AND_DESIGN,4.1,159,19000000.0,10000,Free,0.0,Everyone,Art Design,2018-01-07,1.0.0,4.0.3
1,Coloringbookmoana,ART_AND_DESIGN,3.9,967,14000000.0,500000,Free,0.0,Everyone,Art Design Pretend Play,2018-01-15,2.0.0,4.0.3
2,"ULauncherLite–FREELiveCoolThemes,HideApps",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art Design,2018-08-01,1.2.4,4.0.3
3,SketchDrawPaint,ART_AND_DESIGN,4.5,215644,25000000.0,50000000,Free,0.0,Teen,Art Design,2018-06-08,Varies with device,4.2
4,PixelDrawNumberArtColoringBook,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art Design Creativity,2018-06-20,1.1,4.4


## Expectation 1

In [4]:
# Expectation 1 : Column `app` must be unique

validator.expect_column_values_to_be_unique('app')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": false,
  "result": {
    "element_count": 7418,
    "unexpected_count": 697,
    "unexpected_percent": 9.396063629010515,
    "partial_unexpected_list": [
      "Coloringbookmoana",
      "UNICORNColorByNumberPixelArtColoring",
      "OxfordDictionaryofEnglish:Free",
      "EnglishDictionaryOffline",
      "OfficeSuite:FreeOffice+PDFEditor",
      "CurriculumvitaeAppCVBuilderFreeResumeMaker",
      "CallBlocker",
      "GoogleAds",
      "HelixJump",
      "OfficeSuite:FreeOffice+PDFEditor",
      "DailyMangaComicWebtoon",
      "MangaAZMangaComicReader",
      "YahooMail\u2013StayOrganized",
      "imofreevideocallsandchat",
      "FirefoxFocus:Theprivacybrowser",
      "UCBrowserMiniTinyFastPrivateSecure",
      "WhatsAppBusiness",
      "UCBrowserFastDownloadPrivateSecure",
      "MicrosoftEdge",
      "imofreevideocallsandchat"
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 9.396063629010515,
    "unexpected_percent_nonmiss

## Expectation 2

In [5]:
# Expectation 2 : Column `size` must be exist to detect is app can be donwloaded

validator.expect_column_to_exist(column='size')

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "success": true,
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## Expectation 3

In [19]:
# Expectation 3 : Column `rating` must be set to decimal between 0-5
validator.expect_column_values_to_be_in_set(column='rating', value_set=[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,
                                                                        1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,
                                                                        2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,
                                                                        3.0,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9,
                                                                        4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9,
                                                                        5.0])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 7418,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## Expectation 4

In [21]:
# Expectation 4 : Column `rating` must be less than 5 stars

validator.expect_column_values_to_be_between(
    column='rating', min_value=0, max_value=5
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 7418,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## Expectation 5

In [22]:
# Expectation 5 : Column `installs` must in form of integer or float

validator.expect_column_values_to_be_in_type_list('installs', ['int64', 'float64'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## Expectation 6

In [20]:
# Expectation 6 : Column `installs` greater than column 'review'
validator.expect_column_pair_values_a_to_be_greater_than_b(column_A='installs', column_B='review')

Calculating Metrics:   0%|          | 0/7 [00:00<?, ?it/s]

{
  "success": false,
  "result": {
    "element_count": 7418,
    "unexpected_count": 8,
    "unexpected_percent": 0.10784578053383662,
    "partial_unexpected_list": [
      [
        1,
        4
      ],
      [
        1,
        2
      ],
      [
        10,
        10
      ],
      [
        5,
        7
      ],
      [
        5,
        5
      ],
      [
        10,
        11
      ],
      [
        10,
        20
      ],
      [
        1,
        2
      ]
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.10784578053383662,
    "unexpected_percent_nonmissing": 0.10784578053383662
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## Expectation 7

In [25]:
# Expectation 7 : minimal row data = 100
validator.expect_table_row_count_to_be_between(min_value=100)

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 7418
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# Save Expectation

In [26]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)