In [1]:
import synapseclient

import pandas as pd
import great_expectations as gx

from agoradatatools.gx import GreatExpectationsRunner

context = gx.get_context(project_root_dir='../src/agoradatatools/great_expectations')

# Create Expectation Suite for Proteomics Distribution Data

## Get Example Data File

In [2]:
syn = synapseclient.Synapse()
syn.login()



UPGRADE AVAILABLE

A more recent version of the Synapse Client (4.0.0) is available. Your version (3.1.1) can be upgraded by typing:
    pip install --upgrade synapseclient

Python Synapse Client version 4.0.0 release notes

https://python-docs.synapse.org/news/



Welcome, Brad Macdonald!

INFO: 2024-02-02 10:03:31 | synapseclient_default | Welcome, Brad Macdonald!



In [5]:
proteomics_distribution_data_file = syn.get("syn31510062").path


## Create Validator Object on Data File

In [9]:
df = pd.read_json(proteomics_distribution_data_file)
nested_columns = []
df = GreatExpectationsRunner.convert_nested_columns_to_json(df, nested_columns)
validator = context.sources.pandas_default.read_dataframe(df)
validator.expectation_suite_name = "proteomics_distribution_data"

## Add Expectations to Validator Object For Each Column

In [10]:
# tissue
validator.expect_column_values_to_be_of_type("tissue", "str")
validator.expect_column_values_to_not_be_null("tissue")
validator.expect_column_values_to_be_in_set("tissue", ['AntPFC', 'DLPFC', 'MFG', 'TCX'])







Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps.dtype], [])



See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps.dtype], [])



{
  "success": true,
  "result": {
    "element_count": 6,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [13]:
# min
validator.expect_column_values_to_be_of_type("min", "float")
validator.expect_column_values_to_be_between("min", -0.5, 0)
validator.expect_column_values_to_not_be_null("min")







Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 6,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [15]:
# max
validator.expect_column_values_to_be_of_type("max", "float")
validator.expect_column_values_to_be_between("max", 0, 0.5)
validator.expect_column_values_to_not_be_null("max")







Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 6,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [17]:
# first_quartile
validator.expect_column_values_to_be_of_type("first_quartile", "float")
validator.expect_column_values_to_be_between("first_quartile", -0.2, 0)
validator.expect_column_values_to_not_be_null("first_quartile")







Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 6,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [19]:
# median
validator.expect_column_values_to_be_of_type("median", "float")
validator.expect_column_values_to_be_between("median", -0.1, 0.1)
validator.expect_column_values_to_not_be_null("median")







Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 6,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [21]:
# third_quartile
validator.expect_column_values_to_be_of_type("third_quartile", "float")
validator.expect_column_values_to_be_between("third_quartile", 0, 0.1)
validator.expect_column_values_to_not_be_null("third_quartile")







Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 6,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [23]:
# type
validator.expect_column_values_to_be_of_type("type", "str")
validator.expect_column_values_to_not_be_null("type")
validator.expect_column_values_to_be_in_set("type", ['SRM', 'TMT', 'LFQ'])







Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps.dtype], [])



See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps.dtype], [])



{
  "success": true,
  "result": {
    "element_count": 6,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [25]:
# multi-field logical
validator.expect_column_pair_values_A_to_be_greater_than_B("max", "third_quartile")
validator.expect_column_pair_values_A_to_be_greater_than_B("third_quartile", "median")
validator.expect_column_pair_values_A_to_be_greater_than_B("median", "first_quartile")
validator.expect_column_pair_values_A_to_be_greater_than_B("first_quartile", "min")








Calculating Metrics:   0%|          | 0/7 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/7 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/7 [00:00<?, ?it/s]







Calculating Metrics:   0%|          | 0/7 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 6,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## Save Expectation Suite

In [None]:
validator.save_expectation_suite(discard_failed_expectations=False)


## Create Checkpoint and View Results

In [None]:
checkpoint = context.add_or_update_checkpoint(
    name="agora-test-checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)


## Build Data Docs - Click on Expectation Suite to View All Expectations

In [None]:
context.build_data_docs()
context.open_data_docs()
