Skip to content

Commit

Permalink
Merge 01109d9 into 1a0a7ba
Browse files Browse the repository at this point in the history
  • Loading branch information
justaddcoffee committed Jun 27, 2020
2 parents 1a0a7ba + 01109d9 commit 05742d3
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
2 changes: 1 addition & 1 deletion download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
-
# Gene Product information for SARS-CoV-2 genes in GPI format
# http://geneontology.org/docs/gene-product-information-gpi-format/
url: ftp://ftp.ebi.ac.uk/pub/contrib/goa/uniprot_sars-cov-2.gpi
url: https://raw.githubusercontent.com/Knowledge-Graph-Hub/kg-covid-19/master/curated/ORFs/uniprot_sars-cov-2.gpi
local_name: uniprot_sars-cov-2.gpi

#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,42 @@ def run(self, data_file: str = None):
# write headers
node.write("\t".join(self.node_header) + "\n")
edge.write("\t".join(self.edge_header) + "\n")

seen = set()
with open(gpi_file, 'r') as gpi_fh:
for rec in _gpi12iterator(gpi_fh):
node_data = self.gpi_to_gene_node_data(rec)
seen.add(node_data[0])
write_node_edge_item(node, self.node_header, node_data)

with open(gpa_file, 'r') as gpa_fh:
for rec in _gpa11iterator(gpa_fh):
edge_data = self.gpa_to_edge_data(rec)
subject_node = edge_data[0]
if subject_node not in seen:
subject_node_data = [subject_node, self.guess_category(subject_node)] + [""] * 4 + [self.source_name]
write_node_edge_item(node, self.node_header, subject_node_data)
seen.add(subject_node)
object_node = edge_data[2]
if object_node not in seen:
object_node_data = [object_node, self.guess_category(object_node)] + [""] * 4 + [self.source_name]
write_node_edge_item(node, self.node_header, object_node_data)
seen.add(object_node)

write_node_edge_item(edge, self.edge_header, edge_data)

def guess_category(self, identifier):
"""Guess category for a given identifier.
"""
prefix = identifier.split(':')[0]
if prefix in {'UniProtKB', 'ComplexPortal'}:
category = 'biolink:Protein'
elif prefix in {'GO'}:
category = 'biolink:OntologyClass'
else:
category = 'biolink:NamedThing'
return category

def gpa_to_edge_data(self, rec: dict) -> list:
"""given a parsed gpa entry, return an edge with the annotations
Expand Down

0 comments on commit 05742d3

Please sign in to comment.