In [None]:
import synapseclient
import json

import pandas as pd
import great_expectations as gx

from agoradatatools.gx import GreatExpectationsRunner

context = gx.get_context(project_root_dir='../src/agoradatatools/great_expectations')

# Create Expectation Suite for Distribution Data

## Get Example Data File

In [None]:
syn = synapseclient.Synapse()
syn.login()


In [None]:
distribution_data_file = syn.get("syn27572407").path


## Create Validator Object on Data File

In [None]:
df = pd.read_json(distribution_data_file)
nested_columns = ['target_risk_score', 'genetics_score', 'multi_omics_score']
df = GreatExpectationsRunner.convert_nested_columns_to_json(df, nested_columns)
validator = context.sources.pandas_default.read_dataframe(df)
validator.expectation_suite_name = "distribution_data"

## Add Expectations to Validator Object For Each Column

In [None]:
# target_risk_score
validator.expect_column_values_to_be_of_type("target_risk_score", "str")
validator.expect_column_values_to_not_be_null("target_risk_score")
#get JSON schema
with open("../src/agoradatatools/great_expectations/gx/json_schemas/distribution_data/target_risk_score.json", "r") as file:
    target_risk_score_schema = json.load(file)
validator.expect_column_values_to_match_json_schema("target_risk_score", json_schema=target_risk_score_schema)

In [None]:
# genetics_score
validator.expect_column_values_to_be_of_type("genetics_score", "str")
validator.expect_column_values_to_not_be_null("genetics_score")
#get JSON schema
with open("../src/agoradatatools/great_expectations/gx/json_schemas/distribution_data/genetics_score.json", "r") as file:
    genetics_score_schema = json.load(file)
validator.expect_column_values_to_match_json_schema("genetics_score", json_schema=genetics_score_schema)

In [None]:
# multi_omics_score
validator.expect_column_values_to_be_of_type("multi_omics_score", "str")
validator.expect_column_values_to_not_be_null("multi_omics_score")
#get JSON schema
with open("../src/agoradatatools/great_expectations/gx/json_schemas/distribution_data/multi_omics_score.json", "r") as file:
    multi_omics_score_schema = json.load(file)
validator.expect_column_values_to_match_json_schema("multi_omics_score", json_schema=multi_omics_score_schema)

## Save Expectation Suite

In [None]:
validator.save_expectation_suite(discard_failed_expectations=False)


## Create Checkpoint and View Results

In [None]:
checkpoint = context.add_or_update_checkpoint(
    name="agora-test-checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)


## Build Data Docs - Click on Expectation Suite to View All Expectations

In [None]:
context.build_data_docs()
context.open_data_docs()
