Skip to content

Commit

Permalink
Merge pull request #21 from PathwayMerger/bel-specification
Browse files Browse the repository at this point in the history
Bel specification
  • Loading branch information
ddomingof committed Jul 1, 2019
2 parents a5acc00 + 3531cf5 commit 5da4fb7
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 47 deletions.
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ install_requires =
bio2bel==0.2.1
bio2bel_hgnc==0.2.2
bio2bel_chebi==0.2.1
bio2bel_kegg==0.2.3
bio2bel_wikipathways==0.2.2
bio2bel_kegg==0.2.5
bio2bel_wikipathways==0.2.3
bio2bel_reactome==0.2.3
pybel==0.13.2
pybel-tools>=0.7.2
Expand Down
5 changes: 3 additions & 2 deletions src/pathme/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,16 @@ def ensure_pathme_folders(): # TODO why is this a function?
'methylation': 'Me',
}
KEGG_CITATION = '10592173'
REACTOME_CITATION = '29145629'

# FIXME why doesn't this just import the compath_resources package?
KEGG_WIKIPATHWAYS_MAPPINGS = 'https://github.com/ComPath/curation/raw/master/mappings/kegg_wikipathways.xlsx'
KEGG_REACTOME_MAPPINGS = 'https://github.com/ComPath/curation/raw/master/mappings/kegg_reactome.xlsx'
WIKIPATHWAYS_REACTOME_MAPPINGS = 'https://github.com/ComPath/curation/raw/master/mappings/wikipathways_reactome.xlsx'

KEGG_KGML_URL = 'http://rest.kegg.jp/get/{}/kgml'
RDF_REACTOME = ' ftp://ftp.ebi.ac.uk/pub/databases/RDF/reactome/r67/reactome-biopax.tar.bz2'
RDF_WIKIPATHWAYS = 'http://data.wikipathways.org/20190310/rdf/wikipathways-20190310-rdf-wp.zip'
RDF_REACTOME = 'ftp://ftp.ebi.ac.uk/pub/databases/RDF/reactome/r67/reactome-biopax.tar.bz2'
RDF_WIKIPATHWAYS = 'http://data.wikipathways.org/20190610/rdf/wikipathways-20190610-rdf-wp.zip'

KEGG_STATS_COLUMN_NAMES = {
'nodes': 'BEL Nodes',
Expand Down
4 changes: 2 additions & 2 deletions src/pathme/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import logging
import os
from typing import List, Iterable
from typing import Iterable, List, Tuple

import click
import networkx as nx
Expand Down Expand Up @@ -36,7 +36,7 @@ def add_annotation_key(graph):
graph[u][v][k][ANNOTATIONS] = {}


def get_all_pickles(kegg_path, reactome_path, wikipathways_path):
def get_all_pickles(kegg_path: str, reactome_path: str, wikipathways_path: str) -> Tuple[List, List, List]:
"""Return a list with all pickle paths."""
kegg_pickles = get_paths_in_folder(kegg_path)

Expand Down
65 changes: 50 additions & 15 deletions src/pathme/kegg/convert_to_bel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pybel import BELGraph, to_pickle
from pybel.dsl.edges import activity
from pybel.dsl.node_classes import CentralDogma
from pybel.dsl.nodes import abundance, bioprocess, complex_abundance, composite_abundance, pmod, protein, reaction
from pybel.dsl.nodes import abundance, bioprocess, complex_abundance, composite_abundance, pmod, protein, reaction, rna
from pybel.struct.summary import count_functions, edge_summary

from pathme.constants import *
Expand Down Expand Up @@ -171,7 +171,8 @@ def gene_to_bel_node(graph, node):
return protein_node

elif UNIPROT in attribute:
protein_node = protein(namespace=UNIPROT.upper(), name=attribute[UNIPROT], identifier=attribute[UNIPROT])
protein_node = protein(namespace=UNIPROT.upper(), name=attribute[UNIPROT],
identifier=attribute[UNIPROT])
graph.add_node_from_data(protein_node)
return protein_node

Expand Down Expand Up @@ -505,13 +506,23 @@ def add_simple_edge(graph, u, v, relation_type):

# Add increases edge if pmod subtype is coupled with activation subtype
if relation_type[0] == 'activation':
graph.add_increases(u, v_modified, citation='', evidence='', subject_modifier=activity(),
annotations={})
graph.add_increases(
u, v_modified,
citation=KEGG_CITATION, evidence='Extracted from KEGG',
subject_modifier=activity() if u in {protein, complex_abundance, rna} else None,
# Add the activity function if subject is one of the following nodes (BEL 2.0 specifications)
annotations={},
)

# Add decreases edge if pmod subtype is coupled with inhibition subtype
elif relation_type[0] == 'inhibition':
graph.add_decreases(u, v_modified, citation='', evidence='', subject_modifier=activity(),
annotations={})
graph.add_decreases(
u, v_modified,
citation=KEGG_CITATION, evidence='Extracted from KEGG',
subject_modifier=activity() if u in {protein, complex_abundance, rna} else None,
# Add the activity function if subject is one of the following nodes (BEL 2.0 specifications)
annotations={},
)

# TODO: add pmod of v activates v
# TODO: how to represent abundance modification in BEL?
Expand All @@ -522,48 +533,72 @@ def add_simple_edge(graph, u, v, relation_type):
# If the object is a gene, miRNA, RNA, or protein, add protein modification
if isinstance(v, CentralDogma):
v_modified = v.with_variants(pmod(KEGG_MODIFICATIONS[relation_type]))
graph.add_increases(u, v_modified, citation='', evidence='', subject_modifier=activity(),
annotations={})
graph.add_increases(
u, v_modified,
citation=KEGG_CITATION, evidence='Extracted from KEGG',
subject_modifier=activity() if u in {protein, complex_abundance, rna} else None,
annotations={},
)

# Subject activity decreases protein modification (i.e. dephosphorylation) of object
elif relation_type == 'dephosphorylation':

# If the object is a gene, miRNA, RNA, or protein, add protein modification
if isinstance(v, CentralDogma):
v = v.with_variants(pmod('Ph'))
graph.add_decreases(u, v, citation=KEGG_CITATION, evidence='', subject_modifier=activity(), annotations={})
graph.add_decreases(
u, v,
citation=KEGG_CITATION, evidence='Extracted from KEGG',
subject_modifier=activity() if u in {protein, complex_abundance, rna} else None,
annotations={},
)

# Subject increases activity of object
elif relation_type == 'activation':
graph.add_increases(u, v, citation=KEGG_CITATION, evidence='', object_modifier=activity(), annotations={})
graph.add_increases(
u, v,
citation=KEGG_CITATION, evidence='Extracted from KEGG',
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
annotations={},
)

# Catalytic activity of subject increases transformation of reactant(s) to product(s)
elif relation_type in {'reversible', 'irreversible'}:
graph.add_increases(u, v, citation=KEGG_CITATION, evidence='', subject_modifier=activity('cat'), annotations={})
graph.add_increases(
u, v,
citation=KEGG_CITATION, evidence='Extracted from KEGG',
subject_modifier=activity('cat') if u in {protein, complex_abundance, rna} else None,
annotations={},
)

# Subject decreases activity of object
elif relation_type == 'inhibition':
graph.add_decreases(u, v, citation=KEGG_CITATION, evidence='', object_modifier=activity(), annotations={})
graph.add_decreases(
u, v,
citation=KEGG_CITATION, evidence='Extracted from KEGG',
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
annotations={},
)

# Indirect effect and binding/association are noted to be equivalent relation types
elif relation_type in {'indirect effect', 'binding/association'}:
graph.add_association(u, v, citation=KEGG_CITATION, evidence='', annotations={})
graph.add_association(u, v, citation=KEGG_CITATION, evidence='Extracted from KEGG', annotations={})

# Subject increases expression of object
elif relation_type == 'expression':

# Expression object is converted to RNA abundance
if isinstance(v, CentralDogma):
v = v.get_rna()
graph.add_increases(u, v, citation=KEGG_CITATION, evidence='', annotations={})
graph.add_increases(u, v, citation=KEGG_CITATION, evidence='Extracted from KEGG', annotations={})

# Subject decreases expression of object
elif relation_type == 'repression':

# Repression object is converted to RNA abundance
if isinstance(v, CentralDogma):
v = v.get_rna()
graph.add_decreases(u, v, citation=KEGG_CITATION, evidence='', annotations={})
graph.add_decreases(u, v, citation=KEGG_CITATION, evidence='Extracted from KEGG', annotations={})

elif relation_type in {'dissociation', 'hidden compound', 'missing interaction', 'state change'}:
pass
Expand Down
12 changes: 7 additions & 5 deletions src/pathme/kegg/kegg_xml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import json
import logging
import os
from xml.etree.ElementTree import parse
from collections import defaultdict
from xml.etree.ElementTree import parse

import requests
from bio2bel_kegg.constants import API_KEGG_GET
Expand Down Expand Up @@ -92,10 +92,12 @@ def _post_process_api_query(node_meta_data, hgnc_manager, chebi_manager):
for chebi_id in identifier.split(' '):
chebi_entry = chebi_manager.get_chemical_by_chebi_id(chebi_id)

if not chebi_entry:
continue

node_dict[CHEBI_NAME] = chebi_entry.name
# If the id is found in the database stick the name
if chebi_entry:
node_dict[CHEBI_NAME] = chebi_entry.name
# Else use the default name by KEGG to ensure the name makes it into the graph
elif "ENTRY_NAME" in node_meta_data:
node_dict[CHEBI_NAME] = node_meta_data["ENTRY_NAME"]

return node_dict

Expand Down
33 changes: 20 additions & 13 deletions src/pathme/reactome/convert_to_bel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from bio2bel_chebi import Manager as ChebiManager
from bio2bel_hgnc import Manager as HgncManager

from pybel import BELGraph
from pybel.dsl import (
abundance,
Expand All @@ -23,7 +22,7 @@
NamedComplexAbundance
)

from pathme.constants import UNKNOWN
from pathme.constants import UNKNOWN, REACTOME_CITATION
from pathme.reactome.utils import get_valid_node_parameters, process_multiple_proteins
from pathme.utils import parse_id_uri

Expand All @@ -36,6 +35,7 @@

def convert_to_bel(nodes: Dict[str, Dict], interactions: List[Tuple[str, str, Dict]], pathway_info: Dict,
hgnc_manager: HgncManager, chebi_manager: ChebiManager) -> BELGraph:
"""Convert RDF graph dictionary into BEL graph."""
uri_id = pathway_info['uri_reactome_id']

if uri_id != UNKNOWN:
Expand Down Expand Up @@ -69,7 +69,8 @@ def convert_to_bel(nodes: Dict[str, Dict], interactions: List[Tuple[str, str, Di
return graph


def nodes_to_bel(nodes: Dict[str, Dict], graph: BELGraph, hgnc_manager: HgncManager, chebi_manager: ChebiManager) -> Dict[str, BaseEntity]:
def nodes_to_bel(nodes: Dict[str, Dict], graph: BELGraph, hgnc_manager: HgncManager, chebi_manager: ChebiManager) -> \
Dict[str, BaseEntity]:
"""Convert dictionary values to BEL nodes."""
return {
node_id: node_to_bel(node_att, graph, hgnc_manager, chebi_manager)
Expand Down Expand Up @@ -125,18 +126,16 @@ def node_to_bel(node: Dict, graph, hgnc_manager: HgncManager, chebi_manager: Che
namespace=namespace.upper()
)


elif 'Pathway' in node_types:
bioprocess_node = bioprocess(identifier=identifier, name=name, namespace=namespace.upper())
graph.add_node_from_data(bioprocess_node)
return bioprocess_node

else:
log.warning('Entity type not recognized', node_types)


def add_edges(graph: BELGraph, participants, nodes, att: Dict):
uri_id = att['uri_id']
"""Add edges into the graph."""
edge_types = att['interaction_type']

if isinstance(participants, dict):
Expand All @@ -157,17 +156,25 @@ def add_edges(graph: BELGraph, participants, nodes, att: Dict):
elif isinstance(participants, tuple):
u = nodes[participants[0]]
v = nodes[participants[1]]
add_simple_edge(graph, u, v, edge_types, uri_id)
add_simple_edge(graph, u, v, edge_types)


def add_simple_edge(graph: BELGraph, u, v, edge_types, uri_id):
def add_simple_edge(graph: BELGraph, u, v, edge_types):
"""Add a simple edge into the graph."""
if 'ACTIVATION' in edge_types:
# TODO anadir pubmed y descripcion
graph.add_increases(u, v, citation=uri_id, evidence='', object_modifier=activity(), annotations={})
graph.add_increases(
u, v,
citation=REACTOME_CITATION, evidence='Extracted from Reactome',
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
annotations={},
)

elif 'INHIBITION' in edge_types:
# TODO anadir pubmed y descripcion
graph.add_decreases(u, v, citation=uri_id, evidence='', object_modifier=activity(), annotations={})

graph.add_decreases(
u, v,
citation=REACTOME_CITATION, evidence='Extracted from Reactome',
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
annotations={},
)
else:
log.warning('edge type %s', edge_types)
23 changes: 19 additions & 4 deletions src/pathme/wikipathways/convert_to_bel.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,20 +200,35 @@ def add_simple_edge(graph: BELGraph, u, v, edge_types, uri_id):
:param uri_id: citation URI
"""
if 'Stimulation' in edge_types:
graph.add_increases(u, v, citation=uri_id, evidence='', object_modifier=activity())
graph.add_increases(
u, v,
citation=uri_id, evidence='Extracted from WikiPathways',
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
annotations={},
)

elif 'Inhibition' in edge_types:
graph.add_decreases(u, v, citation=uri_id, evidence='', object_modifier=activity())
graph.add_decreases(
u, v,
citation=uri_id, evidence='Extracted from WikiPathways',
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
annotations={},
)

elif 'Catalysis' in edge_types:
graph.add_increases(u, v, citation=uri_id, evidence='', object_modifier=activity())
graph.add_increases(
u, v,
citation=uri_id, evidence='Extracted from WikiPathways',
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
annotations={},
)

elif 'DirectedInteraction' in edge_types:
graph.add_qualified_edge(
u, v,
relation=REGULATES,
citation=uri_id,
evidence='',
evidence='Extracted from WikiPathways',
annotations={
'EdgeTypes': edge_types,
},
Expand Down

0 comments on commit 5da4fb7

Please sign in to comment.