Skip to content

Commit

Permalink
Wikipathways adding 'chembl.compound', 'ncbiprotein' and 'ena.embl' c…
Browse files Browse the repository at this point in the history
…ases.
  • Loading branch information
jmarinllao committed Oct 7, 2018
1 parent b1bccda commit 2b34504
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 4 deletions.
2 changes: 2 additions & 0 deletions src/pathme/wikipathways/convert_to_bel.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def convert_to_bel(nodes: Dict[str, Dict], complexes: Dict[str, Dict], interacti
contact='daniel.domingo.fernandez@scai.fraunhofer.de',
)

print(pathway_info['pathway_id'])

nodes = nodes_to_bel(nodes, hgnc_manager)
nodes.update(complexes_to_bel(complexes, nodes, graph))

Expand Down
1 change: 1 addition & 0 deletions src/pathme/wikipathways/rdf_sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def get_wp_statistics(resource_files, resource_folder, hgnc_manager) -> Tuple[
all_pathways_statistics = {}

for rdf_file in tqdm.tqdm(resource_files, desc='Parsing WikiPathways'):
print(rdf_file)
# Parse pathway rdf_file
pathway_path = os.path.join(resource_folder, rdf_file)
rdf_graph = parse_rdf(pathway_path, format='turtle')
Expand Down
24 changes: 20 additions & 4 deletions src/pathme/wikipathways/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,12 @@ def get_valid_gene_identifier(node_ids_dict, hgnc_manager):
return _validate_query(hgnc_manager, hgnc_entry, hgnc_symbol, HGNC)

# Try to get ENTREZ id
elif 'bdb_ncbigene' in node_ids_dict:
entrez_id = check_multiple(node_ids_dict['bdb_ncbigene'], 'bdb_ncbigene')
elif 'bdb_ncbigene' in node_ids_dict or 'ncbiprotein' in node_ids_dict['uri_id']:
if 'bdb_ncbigene' in node_ids_dict:
entrez_id = check_multiple(node_ids_dict['bdb_ncbigene'], 'bdb_ncbigene')
elif 'ncbiprotein' in node_ids_dict['uri_id']:
entrez_id = check_multiple(node_ids_dict['identifier'], 'ncbiprotein')

hgnc_entry = hgnc_manager.get_gene_by_entrez_id(entrez_id)

return _validate_query(hgnc_manager, hgnc_entry, entrez_id, ENTREZ)
Expand All @@ -106,8 +110,13 @@ def get_valid_gene_identifier(node_ids_dict, hgnc_manager):
return _validate_query(hgnc_manager, hgnc_entry, uniprot_id, UNIPROT)

# Try to get ENSEMBL id
elif 'bdb_ensembl' in node_ids_dict:
ensembl_id = check_multiple(node_ids_dict['bdb_ensembl'], 'bdb_ensembl')
elif 'bdb_ensembl' in node_ids_dict or 'ena.embl' in node_ids_dict['uri_id']:
if 'bdb_ensembl' in node_ids_dict:
ensembl_id = check_multiple(node_ids_dict['bdb_ensembl'], 'bdb_ensembl')

elif 'ena.embl' in node_ids_dict['uri_id']:
ensembl_id = check_multiple(node_ids_dict['identifier'], 'bdb_ensembl')

hgnc_entry = hgnc_manager.get_gene_by_uniprot_id(ensembl_id)

return _validate_query(hgnc_manager, hgnc_entry, ensembl_id, ENSEMBL)
Expand Down Expand Up @@ -167,6 +176,13 @@ def get_valid_gene_identifier(node_ids_dict, hgnc_manager):

return PFAM, mirbase_name, mirbase_id

elif 'chembl.compound' in node_ids_dict['uri_id']:
chembl_id = check_multiple(node_ids_dict['identifier'], 'chembl_id')
chembl_name = check_multiple(node_ids_dict['name'], 'chembl_name')
log.warning('Adding MIRBASE node %s ', chembl_id)

return PFAM, chembl_name, chembl_id

This comment has been minimized.

Copy link
@cthoyt

cthoyt Oct 7, 2018

Member

why PFAM?


raise Exception('Unknown identifier for node %s', node_ids_dict)


Expand Down

0 comments on commit 2b34504

Please sign in to comment.