In [None]:
import synapseclient

import pandas as pd
import great_expectations as gx

from agoradatatools.gx import GreatExpectationsRunner

context = gx.get_context(project_root_dir='../src/agoradatatools/great_expectations')

# Create Expectation Suite for Biodomain Info Data

## Get Example Data File

In [None]:
syn = synapseclient.Synapse()
syn.login()


In [None]:
biodomain_info_file = syn.get("syn51709751").path

## Create Validator Object on Data File

In [None]:
df = pd.read_json(biodomain_info_file)
nested_columns = []
df = GreatExpectationsRunner.convert_nested_columns_to_json(df, nested_columns)
validator = context.sources.pandas_default.read_dataframe(df)
validator.expectation_suite_name = "biodomain_info"

## Add Expectations to Validator Object For Each Column

In [None]:
# name
validator.expect_column_values_to_be_of_type("name", "str")
validator.expect_column_values_to_not_be_null("name")
validator.expect_column_values_to_be_unique("name") 
validator.expect_column_values_to_be_in_set("name", [
        "Apoptosis",
        "APP Metabolism",
        "Autophagy", 
        "Cell Cycle", 
        "DNA Repair",
        "Endolysosome", 
        "Epigenetic", 
        "Immune Response", 
        "Lipid Metabolism", 
        "Metal Binding and Homeostasis", 
        "Mitochondrial Metabolism", 
        "Myelination", 
        "Oxidative Stress", 
        "Proteostasis", 
        "RNA Spliceosome", 
        "Structural Stabilization",
        "Synapse", 
        "Tau Homeostasis", 
        "Vasculature"
    ]
)

## Save Expectation Suite

In [None]:
validator.save_expectation_suite(discard_failed_expectations=False)


## Create Checkpoint and View Results

In [None]:
checkpoint = context.add_or_update_checkpoint(
    name="agora-test-checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)


## Build Data Docs - Click on Expectation Suite to View All Expectations

In [None]:
context.build_data_docs()
context.open_data_docs()
