In [None]:
import synapseclient
import json

import pandas as pd
import great_expectations as gx

from agoradatatools.gx import GreatExpectationsRunner

context = gx.get_context(project_root_dir='../src/agoradatatools/great_expectations')

# Create Expectation Suite for Team Info Data

## Get Example Data File

In [None]:
syn = synapseclient.Synapse()
syn.login()


In [None]:
team_info_data_file = syn.get("syn17015358").path


## Create Validator Object on Data File

In [None]:
df = pd.read_json(team_info_data_file)
nested_columns = ['members']
df = GreatExpectationsRunner.convert_nested_columns_to_json(df, nested_columns)
validator = context.sources.pandas_default.read_dataframe(df)
validator.expectation_suite_name = "team_info"

## Add Expectations to Validator Object For Each Column

In [None]:
# team
validator.expect_column_values_to_be_of_type("team", "str")
validator.expect_column_values_to_not_be_null("team")
validator.expect_column_value_lengths_to_be_between("team", min_value=3, max_value=50)
validator.expect_column_values_to_be_in_set(
    "team", {
        "Columbia-Rush",
        "Duke",
        "Emory",
        "Harvard-MIT",
        "MSSM - Roussos Lab",
        "MSSM - Zhang Lab",
        "Sage Bionetworks",
        "Mayo-UFL-ISB",
        "Chang Lab", 
        "JAX-VUMC-UW Resilience",
        "ASU",
        "Longo Lab",
        "Duke BARU", 
        "Emory-Sage-SGC",
        "IUSM-Purdue",
        "Mayo"
    }
)
validator.expect_column_values_to_be_unique("team")


In [None]:
# team_full
validator.expect_column_values_to_be_of_type("team_full", "str")
validator.expect_column_values_to_not_be_null("team_full")
validator.expect_column_value_lengths_to_be_between("team_full", min_value=10, max_value=100)
validator.expect_column_values_to_match_regex("team_full", "^[a-zA-Z0-9-() ]+$")
validator.expect_column_values_to_be_unique("team_full")


In [None]:
# program
validator.expect_column_values_to_be_of_type("program", "str")
validator.expect_column_values_to_be_in_set("program", {'Community Contributed', None, 'AMP-AD', 'Resilience-AD', 'TREAT-AD'})

In [None]:
# description
validator.expect_column_values_to_be_of_type("description", "str")
validator.expect_column_values_to_not_be_null("description")
validator.expect_column_values_to_match_regex("description", "^[a-zA-Z0-9-(),.':/ ]+$")
validator.expect_column_value_lengths_to_be_between("description", min_value=10, max_value=2000)

In [None]:
# members
validator.expect_column_values_to_be_of_type("members", "str")
validator.expect_column_values_to_not_be_null("members")
#get JSON schema
with open("../src/agoradatatools/great_expectations/gx/json_schemas/team_info/members_schema.json", "r") as file:
    members_schema = json.load(file)
validator.expect_column_values_to_match_json_schema("members", json_schema=members_schema)

## Save Expectation Suite

In [None]:
validator.save_expectation_suite(discard_failed_expectations=False)


## Create Checkpoint and View Results

In [None]:
checkpoint = context.add_or_update_checkpoint(
    name="agora-test-checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)


## Build Data Docs - Click on Expectation Suite to View All Expectations

In [None]:
context.build_data_docs()
context.open_data_docs()
