In [1]:
import synapseclient

import great_expectations as gx

context = gx.get_context(project_root_dir='../src/agoradatatools/great_expectations')

from expectations.expect_column_values_to_have_list_length import ExpectColumnValuesToHaveListLength
from expectations.expect_column_values_to_have_list_members import ExpectColumnValuesToHaveListMembers
from expectations.expect_column_values_to_have_list_members_of_type import ExpectColumnValuesToHaveListMembersOfType


ModuleNotFoundError: No module named 'synapseclient'

# Create Expectation Suite for Biomarkers Data

## Get Example Data File

In [None]:
syn = synapseclient.Synapse()
syn.login()


In [None]:
biomarkers_data_file = syn.get("syn61250724.1").path


## Create Validator Object on Data File

In [None]:
validator = context.sources.pandas_default.read_json(
    metabolomics_data_file
)
validator.expectation_suite_name = "biomarkers"


## Add Expectations to Validator Object For Each Column

In [None]:
# model
validator.expect_column_values_to_be_of_type("model", "str")
validator.expect_column_values_to_not_be_null("model")
# allows all alphanumeric characters, underscores, periods, and dashes
validator.expect_column_values_to_match_regex("model", "^[A-Za-z0-9_.-]+$")


In [None]:
# type
validator.expect_column_values_to_be_of_type("type", "str")
validator.expect_column_values_to_not_be_null("type")
# allows all alphanumeric characters, underscores, periods, and dashes
validator.expect_column_values_to_match_regex("type", "^[A-Za-z0-9_.-]+$")

In [None]:
# measurement
validator.expect_column_values_to_be_of_type("measurement", "float")
validator.expect_column_values_to_not_be_null("measurement")
validator.expect_column_values_to_be_between("measurement", strict_min_value=0, max_value=100)

In [None]:
# units
validator.expect_column_values_to_be_of_type("units", "str")
validator.expect_column_values_to_not_be_null("units")
# allows all alphanumeric characters, underscores, periods, and dashes
validator.expect_column_values_to_match_regex("units", "^[A-Za-z0-9_.-]+$")

In [None]:
# age_death
validator.expect_column_values_to_be_of_type("age_death", "float")
validator.expect_column_values_to_not_be_null("age_death")
validator.expect_column_values_to_be_between("age_death", strict_min_value=0, max_value=100)

In [None]:
# tissue
validator.expect_column_values_to_be_of_type("tissue", "str")
validator.expect_column_values_to_not_be_null("tissue")
# allows all alphanumeric characters, underscores, periods, and dashes
validator.expect_column_values_to_match_regex("tissue", "^[A-Za-z0-9_.-]+$")

In [None]:
# sex
validator.expect_column_values_to_be_of_type("sex", "str")
validator.expect_column_values_to_not_be_null("sex")
# allows all alphanumeric characters, underscores, periods, and dashes
validator.expect_column_values_to_match_regex("sex", "^[A-Za-z0-9_.-]+$")

In [None]:
# genotype
validator.expect_column_values_to_be_of_type("genotype", "str")
validator.expect_column_values_to_not_be_null("genotype")
# allows all alphanumeric characters, underscores, periods, and dashes
validator.expect_column_values_to_match_regex("genotype", "^[A-Za-z0-9_.-]+$")

In [None]:
validator.save_expectation_suite(discard_failed_expectations=False)


## Create Checkpoint and View Results

In [None]:
checkpoint = context.add_or_update_checkpoint(
    name="agora-test-checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)


## Build Data Docs - Click on Expectation Suite to View All Expectations

In [None]:
context.build_data_docs()
context.open_data_docs()
