In [None]:
import synapseclient

import pandas as pd
import great_expectations as gx

from agoradatatools.gx import GreatExpectationsRunner

context = gx.get_context(project_root_dir='../src/agoradatatools/great_expectations')

# Create Expectation Suite for Neuropath Correlation Data

## Get Example Data File

In [None]:
syn = synapseclient.Synapse()
syn.login()


In [None]:
overall_scores_data_file = syn.get("syn22130742").path


## Create Validator Object on Data File

In [None]:
df = pd.read_json(overall_scores_data_file)
nested_columns = []
df = GreatExpectationsRunner.convert_nested_columns_to_json(df, nested_columns)
validator = context.sources.pandas_default.read_dataframe(df)
validator.expectation_suite_name = "neuropath_corr"

## Add Expectations to Validator Object For Each Column

In [None]:
# ensg
validator.expect_column_values_to_be_of_type("ensg", "str")
validator.expect_column_values_to_not_be_null("ensg")
validator.expect_column_value_lengths_to_equal("ensg", 15)
# checks format and allowed chatacters
validator.expect_column_values_to_match_regex("ensg", "^ENSG\d{11}$")

In [None]:
# gname
validator.expect_column_values_to_be_of_type("gname", "str")
validator.expect_column_value_lengths_to_be_between("gname", 1, 25)
validator.expect_column_values_to_match_regex("gname", "^[a-zA-Z0-9-]*$")

In [None]:
# oddsratio
validator.expect_column_values_to_be_of_type("oddsratio", "float")
validator.expect_column_values_to_be_between("oddsratio", 0, 2)
validator.expect_column_values_to_not_be_null("oddsratio")

In [None]:
# ci_lower
validator.expect_column_values_to_be_of_type("ci_lower", "float")
validator.expect_column_values_to_be_between("ci_lower", 0, 2)
validator.expect_column_values_to_not_be_null("ci_lower")

In [None]:
# ci_upper
validator.expect_column_values_to_be_of_type("ci_upper", "float")
validator.expect_column_values_to_be_between("ci_upper", 0, 3)
validator.expect_column_values_to_not_be_null("ci_upper")

In [None]:
# pval
validator.expect_column_values_to_be_of_type("pval", "float")
validator.expect_column_values_to_be_between("pval", 0, 1)
validator.expect_column_values_to_not_be_null("pval")

In [None]:
# pval
validator.expect_column_values_to_be_of_type("pval_adj", "float")
validator.expect_column_values_to_be_between("pval_adj", 0, 1)
validator.expect_column_values_to_not_be_null("pval_adj")

In [None]:
# neuropath_type
validator.expect_column_values_to_be_of_type("neuropath_type", "str")
validator.expect_column_values_to_not_be_null("neuropath_type")
validator.expect_column_values_to_be_in_set("neuropath_type", ['BRAAK', 'CERAD', 'DCFDX', 'COGDX'])

In [None]:
# multi-field
validator.expect_compound_columns_to_be_unique(["ensg", "neuropath_type"])
validator.expect_compound_columns_to_be_unique(["ensg", "gname", "neuropath_type"])

## Save Expectation Suite

In [None]:
validator.save_expectation_suite(discard_failed_expectations=False)


## Create Checkpoint and View Results

In [None]:
checkpoint = context.add_or_update_checkpoint(
    name="agora-test-checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)


## Build Data Docs - Click on Expectation Suite to View All Expectations

In [None]:
context.build_data_docs()
context.open_data_docs()
