Skip to content

Commit

Permalink
- BiolinkValidator.PREDICATE_INCLUSIONS added (just with "biolink:int…
Browse files Browse the repository at this point in the history
…eracts_with") to bypass 'mixin' Biolink Model validation error (pending full community review of the 'mixin' status of this predicate); unit tests modified to suit

- plus a small internal DRY refactor in test suit with respect to LATEST_BIOLINK_MODEL_VERSION

otherwise preparing for patch release 3.9.1
  • Loading branch information
RichardBruskiewich committed Sep 11, 2023
1 parent b59956c commit 8d0b452
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 48 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Expand Up @@ -2,6 +2,12 @@

The Reasoner Validator package is evolving along with progress in TRAPI and Biolink Model standards within the NCATS Biomedical Knowledge Translator.

## 3.9.1
- Upgrade to Biolink Model Toolkit 1.1.2
- Removed all residual references to 'sanitize_trapi_response()' (warning: validation with pre-release versions of TRAPI 1.4 earlier than 1.4.2 may trigger some funny false positive validation messages)
- BiolinkValidator.PREDICATE_INCLUSIONS added (just with "biolink:interacts_with") to bypass 'mixin' Biolink Model validation error (pending full community review of the 'mixin' status of this predicate); unit tests modified to suit
- plus a small internal DRY refactor in test suit with respect to LATEST_BIOLINK_MODEL_VERSION

## 3.9.0
- Detect missing knowledge_graph names (resolves part of issue#35)
- detection of uninformative QNodes (resolve issue#14)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "reasoner-validator"
version = "3.9.0"
version = "3.9.1"
description = "Validation tools for Reasoner API"
authors = [
"Richard Bruskiewich <richard.bruskiewich@delphinai.com>",
Expand Down
67 changes: 38 additions & 29 deletions reasoner_validator/biolink/__init__.py
Expand Up @@ -1225,6 +1225,10 @@ def validate_sources(self, edge_id: str, edge: Dict) -> Optional[str]:

return source_trail # may be empty if required RetrievalSource 'sources' entries are missing

# TODO: 11-Sept-2023: Certain specific 'mixin' predicates used in Knowledge Graphs are being validated for now
# as 'warnings', for short term validation purposes (see reasoner-validator issue #97)
PREDICATE_INCLUSIONS = ["biolink:interacts_with"]

def validate_predicate(
self,
edge_id: str,
Expand All @@ -1233,41 +1237,46 @@ def validate_predicate(
source_trail: Optional[str] = None
):
"""
Validates predicates based on their meta-nature: existence, mixin, deprecation, etc.
with notable hard-coded explicit PREDICATE_INCLUSIONS exceptions.
:param edge_id: str, identifier of the edge whose predicate is being validated
:param predicate: str, putative Biolink Model predicate to be validated
:param source_trail: str, putative Biolink Model predicate to be validated
:param graph_type: TRAPIGraphType, type of TRAPI graph component being validated
:return: None (validation communicated via class instance of method)
"""
graph_type_context: str = graph_type.name.lower()
if graph_type_context != "input_edge":
graph_type_context += ".edge"
context: str = f"{graph_type_context}.predicate"

# Validate the putative predicate as *not* being abstract, deprecated or a mixin
biolink_class = self.validate_element_status(
graph_type=graph_type,
context=context,
identifier=predicate,
edge_id=edge_id,
source_trail=source_trail
)
if biolink_class:
if not self.bmt.is_predicate(predicate):
self.report(
code=f"error.{context}.invalid",
source_trail=source_trail,
identifier=predicate,
edge_id=edge_id
)
elif self.minimum_required_biolink_version("2.2.0") and \
not self.bmt.is_translator_canonical_predicate(predicate):
self.report(
code=f"warning.{context}.non_canonical",
source_trail=source_trail,
identifier=predicate,
edge_id=edge_id
)
if predicate not in self.PREDICATE_INCLUSIONS:

graph_type_context: str = graph_type.name.lower()
if graph_type_context != "input_edge":
graph_type_context += ".edge"
context: str = f"{graph_type_context}.predicate"

# Validate the putative predicate as *not* being abstract, deprecated or a mixin
biolink_class = self.validate_element_status(
graph_type=graph_type,
context=context,
identifier=predicate,
edge_id=edge_id,
source_trail=source_trail
)
if biolink_class:
if not self.bmt.is_predicate(predicate):
self.report(
code=f"error.{context}.invalid",
source_trail=source_trail,
identifier=predicate,
edge_id=edge_id
)
elif self.minimum_required_biolink_version("2.2.0") and \
not self.bmt.is_translator_canonical_predicate(predicate):
self.report(
code=f"warning.{context}.non_canonical",
source_trail=source_trail,
identifier=predicate,
edge_id=edge_id
)

@staticmethod
def build_source_trail(sources: Optional[Dict[str, List[str]]]) -> Optional[str]:
Expand Down
86 changes: 68 additions & 18 deletions tests/test_biolink_compliance_validation.py
Expand Up @@ -69,12 +69,16 @@ def test_minimum_required_biolink_version():


def test_message():
reporter = BiolinkValidator(prefix="Test Message", trapi_version="v1.3", biolink_version="v3.5.4")
reporter = BiolinkValidator(
prefix="Test Message",
trapi_version="v1.3",
biolink_version=f"v{LATEST_BIOLINK_MODEL_VERSION}"
)
assert reporter.get_trapi_version() == "v1.3.0"

# Note: BMT is a bit tricky in resolving Biolink Model versions:
# the version is reported without the 'v' prefix of the GitHub release!
assert reporter.get_biolink_version() == "3.5.4"
assert reporter.get_biolink_version() == LATEST_BIOLINK_MODEL_VERSION

assert not reporter.has_messages()
reporter.report("info.compliant")
Expand Down Expand Up @@ -2884,6 +2888,7 @@ def test_validate_biolink_curie_in_qualifiers(
{
"nodes": {
"HGNC:3059": {
"name": "Heparin binding EGF like growth factor",
"categories": [
"biolink:Gene"
],
Expand All @@ -2900,7 +2905,7 @@ def test_validate_biolink_curie_in_qualifiers(
"edges": {
"edge_1": {
"subject": "HGNC:3059",
"predicate": "biolink:interacts_with",
"predicate": "biolink:increases_amount_or_activity_of",
"object": "HGNC:391",
"attributes": [
{
Expand All @@ -2921,10 +2926,55 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 20: predicate is abstract - not allowed in Knowledge Graphs
# Query 20: predicate is a mixin - not allowed in Knowledge Graphs, but "biolink:interacts_with"
# is tagged as an exception (see https://github.com/NCATSTranslator/reasoner-validator/issues/97)
{
"nodes": {
"HGNC:3059": {
"name": "Heparin binding EGF like growth factor",
"categories": [
"biolink:Gene"
],
"description": "heparin binding EGF like growth factor"
},
"HGNC:391": {
"name": "AKT serine/threonine kinase 1",
"categories": [
"biolink:Gene"
],
"description": "AKT serine/threonine kinase 1"
}
},
"edges": {
"edge_1": {
"subject": "HGNC:3059",
"predicate": "biolink:interacts_with",
"object": "HGNC:391",
"attributes": [
{
"attribute_type_id": "biolink:stoichiometry",
"value": 2
}
],
"sources": [
{
"resource_id": "infores:molepro",
"resource_role": "primary_knowledge_source"
}
]
}
}
},
"" # predicate "biolink:interacts_with" is now on
# the PREDICATE_INCLUSIONS list so 'mixin' validation is skipped
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 21: predicate is abstract - not allowed in Knowledge Graphs
{
"nodes": {
"PMID:1234": {
"name": "article title",
"categories": [
"biolink:InformationContentEntity"
],
Expand Down Expand Up @@ -2962,7 +3012,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 21: predicate is non-canonical
# Query 22: predicate is non-canonical
{
"nodes": SIMPLE_SAMPLE_NODES,
"edges": {
Expand All @@ -2989,7 +3039,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 22: 'object' id is missing from the nodes catalog
# Query 23: 'object' id is missing from the nodes catalog
{
"nodes": SIMPLE_SAMPLE_NODES,
"edges": {
Expand All @@ -3016,7 +3066,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 23: attribute 'attribute_type_id' is missing
# Query 24: attribute 'attribute_type_id' is missing
{
"nodes": SIMPLE_SAMPLE_NODES,
"edges": {
Expand All @@ -3042,7 +3092,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 24: attribute 'value' is missing?
# Query 25: attribute 'value' is missing?
{
"nodes": SIMPLE_SAMPLE_NODES,
"edges": {
Expand All @@ -3069,7 +3119,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 25: 'attribute_type_id' is not a CURIE
# Query 26: 'attribute_type_id' is not a CURIE
{
"nodes": SIMPLE_SAMPLE_NODES,
"edges": {
Expand All @@ -3096,7 +3146,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 26: 'attribute_type_id' is a Biolink (node) category - not sure yet
# Query 27: 'attribute_type_id' is a Biolink (node) category - not sure yet
# whether this reflects "best practices" so issue a warning for now
{
"nodes": SIMPLE_SAMPLE_NODES,
Expand Down Expand Up @@ -3124,7 +3174,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 27: 'attribute_type_id' is a Biolink (edge) predicate - not sure yet
# Query 28: 'attribute_type_id' is a Biolink (edge) predicate - not sure yet
# whether this reflects "best practices" so issue a warning for now
{
"nodes": SIMPLE_SAMPLE_NODES,
Expand Down Expand Up @@ -3152,7 +3202,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 28: 'attribute_type_id' is not a 'biolink:association_slot' (biolink:synonym is a node property)
# Query 29: 'attribute_type_id' is not a 'biolink:association_slot' (biolink:synonym is a node property)
{
"nodes": SIMPLE_SAMPLE_NODES,
"edges": {
Expand All @@ -3179,7 +3229,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 29: 'attribute_type_id' has a 'biolink' CURIE prefix and
# Query 30: 'attribute_type_id' has a 'biolink' CURIE prefix and
# is an association_slot, so it should pass
{
"nodes": SIMPLE_SAMPLE_NODES,
Expand Down Expand Up @@ -3211,7 +3261,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 30: 'attribute_type_id' has a CURIE prefix namespace unknown to Biolink?
# Query 31: 'attribute_type_id' has a CURIE prefix namespace unknown to Biolink?
{
"nodes": SIMPLE_SAMPLE_NODES,
"edges": {
Expand All @@ -3236,7 +3286,7 @@ def test_validate_biolink_curie_in_qualifiers(
},
"warning.knowledge_graph.edge.attribute.type_id.non_biolink_prefix"
),
( # Query 31: # An earlier Biolink Model won't recognize a category not found in its specified release
( # Query 32: # An earlier Biolink Model won't recognize a category not found in its specified release
"1.8.2",
{
# Sample nodes
Expand Down Expand Up @@ -3265,7 +3315,7 @@ def test_validate_biolink_curie_in_qualifiers(
},
"error.knowledge_graph.node.category.unknown"
),
( # Query 32: # 'attribute_type_id' has a CURIE prefix namespace unknown to Biolink but...
( # Query 33: # 'attribute_type_id' has a CURIE prefix namespace unknown to Biolink but...
SUPPRESS_BIOLINK_MODEL_VALIDATION,
{
"nodes": SIMPLE_SAMPLE_NODES,
Expand Down Expand Up @@ -3295,7 +3345,7 @@ def test_validate_biolink_curie_in_qualifiers(
),
(
SUPPRESS_BIOLINK_MODEL_VALIDATION,
# Query 33: 'attribute_type_id' is not a 'biolink:association_slot'
# Query 34: 'attribute_type_id' is not a 'biolink:association_slot'
# (biolink:synonym is a node property) but...
{
"nodes": SIMPLE_SAMPLE_NODES,
Expand Down Expand Up @@ -3326,7 +3376,7 @@ def test_validate_biolink_curie_in_qualifiers(
(
LATEST_BIOLINK_MODEL_VERSION,
# Query 34: Knowledge Graph dangling nodes error
# Query 35: Knowledge Graph dangling nodes error
{
# Sample nodes with extra unused node
'nodes': SAMPLE_NODES_WITH_UNUSED_NODE,
Expand Down

0 comments on commit 8d0b452

Please sign in to comment.