Skip to content

Commit

Permalink
Merge pull request #222 from Knowledge-Graph-Hub/fix_sars_cov_2_taxon…
Browse files Browse the repository at this point in the history
…_ids

Prettify taxon id in sars-cov-2 gene annotations
  • Loading branch information
deepakunni3 committed Jun 12, 2020
2 parents c983cfd + 0c91a69 commit 9cab8b4
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ def __init__(self, input_dir: Optional[str] = None, output_dir: str = None):
source_name = "sars_cov_2_gene_annot"
super().__init__(source_name, input_dir, output_dir)

self.node_header = ['id', 'name', 'category', 'synonym', 'taxon', 'provided_by']
self.node_header = ['id', 'name', 'category', 'synonym', 'in_taxon', 'provided_by']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'DB_References', 'ECO_code', 'With',
'Interacting_taxon_ID',
'Date', 'Assigned_by', 'Annotation_Extension',
'Annotation_Properties']

self.protein_node_type = "biolink:Protein"
self.ncbi_taxon_prefix = "NCBITaxon:"
self.ncbi_taxon_prefix = "NCBITaxon"

# translate edge labels to RO term, for the 'relation' column in edge
self.edge_label_prefix = "biolink:" # prepend to edge label
Expand Down Expand Up @@ -88,6 +88,8 @@ def gpa_to_edge_data(self, rec: dict) -> list:
item = get_item_by_priority(rec, [key])
if type(item) is list:
item = item[0]
if key == 'Interacting_taxon_ID':
item = ":".join([self.ncbi_taxon_prefix, item])
except (ItemInDictNotFound, IndexError):
item = ''
edge_data.append(item)
Expand All @@ -109,7 +111,7 @@ def gpi_to_gene_node_data(self, rec: dict) -> list:
:param rec: record from gpi iterator
:return: list of node items, one for each thing in self.node_header
"""
# ['id', 'name', 'category', 'synonym', 'taxon']
# ['id', 'name', 'category', 'synonym', 'in_taxon']
id: str = self._rec_to_id(rec)

try:
Expand All @@ -127,6 +129,7 @@ def gpi_to_gene_node_data(self, rec: dict) -> list:
except (IndexError, ItemInDictNotFound):
synonym = ''
taxon = get_item_by_priority(rec, ['Taxon'])
taxon = ":".join([self.ncbi_taxon_prefix, taxon.split(":")[1]])
return [id, name, category, synonym, taxon, self.source_name]


Expand Down
2 changes: 1 addition & 1 deletion tests/test_sars_cov_2_gene_annot.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_gpi_to_gene_node(self):
node = self.sc2ga.gpi_to_gene_node_data(item)
self.assertEqual(len(self.sc2ga.node_header), len(node))
self.assertEqual(node,
['UniProtKB:P0DTD2', 'Protein 9b', 'biolink:Protein', '', 'taxon:2697049', 'sars_cov_2_gene_annot'])
['UniProtKB:P0DTD2', 'Protein 9b', 'biolink:Protein', '', 'NCBITaxon:2697049', 'sars_cov_2_gene_annot'])

def test_gpa_to_edge_data(self):
gpa_iter = _gpa11iterator(self.gpa_fh)
Expand Down

0 comments on commit 9cab8b4

Please sign in to comment.