Skip to content

Commit

Permalink
Include category for missing nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
deepakunni3 committed Jun 27, 2020
1 parent 37a1302 commit 01109d9
Showing 1 changed file with 15 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,30 @@ def run(self, data_file: str = None):
edge_data = self.gpa_to_edge_data(rec)
subject_node = edge_data[0]
if subject_node not in seen:
subject_node_data = [subject_node] + [""] * 5 + [self.source_name]
subject_node_data = [subject_node, self.guess_category(subject_node)] + [""] * 4 + [self.source_name]
write_node_edge_item(node, self.node_header, subject_node_data)
seen.add(subject_node)
object_node = edge_data[2]
if object_node not in seen:
object_node_data = [object_node] + [""] * 5 + [self.source_name]
object_node_data = [object_node, self.guess_category(object_node)] + [""] * 4 + [self.source_name]
write_node_edge_item(node, self.node_header, object_node_data)
seen.add(object_node)

write_node_edge_item(edge, self.edge_header, edge_data)

def guess_category(self, identifier):
"""Guess category for a given identifier.
"""
prefix = identifier.split(':')[0]
if prefix in {'UniProtKB', 'ComplexPortal'}:
category = 'biolink:Protein'
elif prefix in {'GO'}:
category = 'biolink:OntologyClass'
else:
category = 'biolink:NamedThing'
return category

def gpa_to_edge_data(self, rec: dict) -> list:
"""given a parsed gpa entry, return an edge with the annotations
Expand Down

0 comments on commit 01109d9

Please sign in to comment.