Skip to content

Commit

Permalink
adds is_valid_predicate_name to ensure that positive examples are val…
Browse files Browse the repository at this point in the history
…idkl
  • Loading branch information
JSybrandt committed May 20, 2020
1 parent 1e5ee6a commit ffbf00e
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 1 deletion.
5 changes: 4 additions & 1 deletion agatha/ml/hypothesis_predictor/hypothesis_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,10 @@ def prepare_for_training(self)->None:
entities = self.embeddings.keys()
assert len(entities) > 0, "Failed to find embedding entities."
self.coded_terms = list(filter(is_umls_term_type, entities))
self.predicates = list(filter(is_predicate_type, entities))
self.predicates = list(filter(
predicate_util.is_valid_predicate_name,
entities
))
self._vprint("Splitting train/validation")
validation_size = int(
len(self.predicates) * self.hparams.validation_fraction
Expand Down
9 changes: 9 additions & 0 deletions agatha/ml/hypothesis_predictor/predicate_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ def clean_coded_term(term:str)->str:
return f"{UMLS_TERM_TYPE}:{term}".lower()


def is_valid_predicate_name(predicate_name:str)->bool:
if not is_predicate_type(predicate_name):
return False
try:
typ, sub, vrb, obj = predicate_name.lower().split(":")
except Exception:
return False
return (len(sub) > 0) and (len(obj) > 0)

def parse_predicate_name(predicate_name:str)->Tuple[str, str]:
"""Parses subject and object from predicate name strings.
Expand Down
12 changes: 12 additions & 0 deletions agatha/ml/hypothesis_predictor/test_predicate_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,15 @@ def test_clean_coded_term_passthrough_lower():
expected = "m:c0444567"
actual = predicate_util.clean_coded_term(term)
assert actual == expected

def test_is_valid_predicate_name():
assert predicate_util.is_valid_predicate_name("p:c123:CAUSES:c456")
assert predicate_util.is_valid_predicate_name("p:123:CAUSES:456")
assert predicate_util.is_valid_predicate_name("p:1:FOO:4")
assert not predicate_util.is_valid_predicate_name("p:c123:CAUSES:")
assert not predicate_util.is_valid_predicate_name("p::CAUSES:c456")
assert not predicate_util.is_valid_predicate_name("c123:CAUSES:c456")
assert not predicate_util.is_valid_predicate_name("p:")
assert not predicate_util.is_valid_predicate_name("p:1:2")
assert not predicate_util.is_valid_predicate_name("p:::")
assert not predicate_util.is_valid_predicate_name("")

0 comments on commit ffbf00e

Please sign in to comment.