In [None]:
import synapseclient

import great_expectations as gx

context = gx.get_context(project_root_dir='../src/agoradatatools/great_expectations')


# Create Expectation Suite for RNA Distribution Data

## Get Example Data File

In [None]:
syn = synapseclient.Synapse()
syn.login()


In [None]:
rna_distribution_data_file = syn.get("syn28094691").path


## Create Validator Object on Data File

In [None]:
validator = context.sources.pandas_default.read_json(
    rna_distribution_data_file
)
validator.expectation_suite_name = "rna_distribution_data"


## Add Expectations to Validator Object For Each Column

In [None]:
# model
validator.expect_column_values_to_be_of_type("model", "str")
validator.expect_column_values_to_not_be_null("model")
validator.expect_column_values_to_be_in_set("model", ["AD Diagnosis (males and females)", "AD Diagnosis x AOD (males and females)","AD Diagnosis x Sex (females only)", "AD Diagnosis x Sex (males only)"])

In [None]:
# tissue
validator.expect_column_values_to_be_of_type("tissue", "str")
validator.expect_column_values_to_not_be_null("tissue")
validator.expect_column_values_to_be_in_set("tissue", ["CBE", "DLPFC", "FP", "IFG", "PHG", "STG", "TCX", "ACC", "PCC"])

In [None]:
# min
validator.expect_column_values_to_be_of_type("min", "float")
validator.expect_column_values_to_not_be_null("min")

In [None]:
# max
validator.expect_column_values_to_be_of_type("max", "float")
validator.expect_column_values_to_not_be_null("max")

In [None]:
# median
validator.expect_column_values_to_be_of_type("median", "float")
validator.expect_column_values_to_not_be_null("median")

In [None]:
# first_quartile
validator.expect_column_values_to_be_of_type("first_quartile", "float")
validator.expect_column_values_to_not_be_null("first_quartile")

In [None]:
# third_quartile
validator.expect_column_values_to_be_of_type("third_quartile", "float")
validator.expect_column_values_to_not_be_null("third_quartile")

In [None]:
# multi-field
validator.expect_column_pair_values_a_to_be_greater_than_b("max", "third_quartile")
validator.expect_column_pair_values_a_to_be_greater_than_b("third_quartile", "median")
validator.expect_column_pair_values_a_to_be_greater_than_b("median", "first_quartile")
validator.expect_column_pair_values_a_to_be_greater_than_b("first_quartile", "min")

## Save Expectation Suite

In [None]:
validator.save_expectation_suite(discard_failed_expectations=False)


## Create Checkpoint and View Results

In [None]:
checkpoint = context.add_or_update_checkpoint(
    name="agora-test-checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)


## Build Data Docs - Click on Expectation Suite to View All Expectations

In [None]:
context.build_data_docs()
context.open_data_docs()
