Skip to content

Commit

Permalink
Move guess_category to transform_utils
Browse files Browse the repository at this point in the history
  • Loading branch information
deepakunni3 committed Jun 27, 2020
1 parent 01109d9 commit c2622ca
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
from typing import Generator, TextIO, List, Optional

from kg_covid_19.utils.transform_utils import get_item_by_priority, ItemInDictNotFound
from kg_covid_19.utils.transform_utils import get_item_by_priority, ItemInDictNotFound, guess_category

from kg_covid_19.transform_utils.transform import Transform
from kg_covid_19.utils import write_node_edge_item
Expand Down Expand Up @@ -66,30 +66,17 @@ def run(self, data_file: str = None):
edge_data = self.gpa_to_edge_data(rec)
subject_node = edge_data[0]
if subject_node not in seen:
subject_node_data = [subject_node, self.guess_category(subject_node)] + [""] * 4 + [self.source_name]
subject_node_data = [subject_node, guess_category(subject_node)] + [""] * 4 + [self.source_name]
write_node_edge_item(node, self.node_header, subject_node_data)
seen.add(subject_node)
object_node = edge_data[2]
if object_node not in seen:
object_node_data = [object_node, self.guess_category(object_node)] + [""] * 4 + [self.source_name]
object_node_data = [object_node, guess_category(object_node)] + [""] * 4 + [self.source_name]
write_node_edge_item(node, self.node_header, object_node_data)
seen.add(object_node)

write_node_edge_item(edge, self.edge_header, edge_data)

def guess_category(self, identifier):
"""Guess category for a given identifier.
"""
prefix = identifier.split(':')[0]
if prefix in {'UniProtKB', 'ComplexPortal'}:
category = 'biolink:Protein'
elif prefix in {'GO'}:
category = 'biolink:OntologyClass'
else:
category = 'biolink:NamedThing'
return category

def gpa_to_edge_data(self, rec: dict) -> list:
"""given a parsed gpa entry, return an edge with the annotations
Expand Down
22 changes: 22 additions & 0 deletions kg_covid_19/utils/transform_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,25 @@ def parse_header(header_string: str, sep: str = '\t') -> List:
def unzip_to_tempdir(zip_file_name: str, tempdir: str) -> None:
with zipfile.ZipFile(zip_file_name, 'r') as z:
z.extractall(tempdir)


def guess_category(identifier: str):
"""Guess category for a given identifier.
Note: This is a temporary solution and should not be used long term.
Args:
identifier: A CURIE
Returns:
The category for the given CURIE
"""
prefix = identifier.split(':')[0]
if prefix in {'UniProtKB', 'ComplexPortal'}:
category = 'biolink:Protein'
elif prefix in {'GO'}:
category = 'biolink:OntologyClass'
else:
category = 'biolink:NamedThing'
return category

0 comments on commit c2622ca

Please sign in to comment.