Skip to content

Commit

Permalink
Update logging and add INDRA exporter
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Jul 16, 2019
1 parent cbd8a6e commit fd951c7
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 145 deletions.
17 changes: 11 additions & 6 deletions src/pathme/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
import pathme.kegg.cli
import pathme.reactome.cli
import pathme.wikipathways.cli
from pathme.constants import KEGG_BEL, REACTOME_BEL, SPIA_DIR, UNIVERSE_DIR, WIKIPATHWAYS_BEL
from pathme.export_utils import get_universe_graph, spia_export_helper
from .constants import KEGG_BEL, REACTOME_BEL, SPIA_DIR, UNIVERSE_DIR, WIKIPATHWAYS_BEL
from .export_utils import get_universe_graph, spia_export_helper

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -45,7 +45,12 @@ def export():
def spia(kegg_path, reactome_path, wikipathways_path, output):
"""Export BEL Pickles to SPIA Excel."""
click.echo(f'Results will be exported to {output}')
spia_export_helper(kegg_path, reactome_path, wikipathways_path, output)
spia_export_helper(
kegg_path=kegg_path,
reactome_path=reactome_path,
wikipathways_path=wikipathways_path,
output=output,
)


@export.command()
Expand Down Expand Up @@ -73,9 +78,9 @@ def universe(kegg_path, reactome_path, wikipathways_path, output, no_flatten, no

# Not explode will flip the boolean coming from the cli
universe_graph = get_universe_graph(
kegg_path,
reactome_path,
wikipathways_path,
kegg_path=kegg_path,
reactome_path=reactome_path,
wikipathways_path=wikipathways_path,
flatten=flatten,
normalize_names=normalize_names,
)
Expand Down
184 changes: 95 additions & 89 deletions src/pathme/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,69 +4,69 @@

import logging
import os
from typing import Iterable, List, Tuple
from typing import Iterable, List, Optional, Tuple

import click
import networkx as nx
import pybel
from bio2bel_reactome import Manager as ReactomeManager
from bio2bel_reactome.models import Pathway
from pybel import BELGraph, union
from pybel import from_pickle
from pathme.constants import KEGG, PATHME_DIR, REACTOME, WIKIPATHWAYS
from pathme.normalize_names import normalize_graph_names
from pathme.pybel_utils import flatten_complex_nodes
from pybel import BELGraph, from_pickle, union
from pybel.constants import ANNOTATIONS, RELATION
from pybel.struct import add_annotation_value
from pybel.struct.mutation import collapse_all_variants, collapse_to_genes
from pybel_tools.analysis.spia import bel_to_spia_matrices, spia_matrices_to_excel
from tqdm import tqdm

from pathme.constants import KEGG, PATHME_DIR, REACTOME, WIKIPATHWAYS
from pathme.normalize_names import normalize_graph_names
from pathme.pybel_utils import flatten_complex_nodes
from .constants import KEGG_BEL, REACTOME_BEL, WIKIPATHWAYS_BEL

logger = logging.getLogger(__name__)


def add_annotation_key(graph):
"""Add annotation key in data (in place operation).
:param pybel.BELGraph graph: BEL Graph
"""
def add_annotation_key(graph: BELGraph):
"""Add annotation key in data (in place operation)."""
for u, v, k in graph.edges(keys=True):
if ANNOTATIONS not in graph[u][v][k]:
graph[u][v][k][ANNOTATIONS] = {}


def get_all_pickles(kegg_path: str, reactome_path: str, wikipathways_path: str) -> Tuple[List, List, List]:
def get_all_pickles(
*,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
) -> Tuple[List[str], List[str], List[str]]:
"""Return a list with all pickle paths."""
kegg_pickles = get_paths_in_folder(kegg_path)

kegg_pickles = get_paths_in_folder(kegg_path or KEGG_BEL)
if not kegg_pickles:
logger.warning('No KEGG files found. Please create the BEL KEGG files')

reactome_pickles = get_paths_in_folder(reactome_path)

reactome_pickles = get_paths_in_folder(reactome_path or REACTOME_BEL)
if not reactome_pickles:
logger.warning('No Reactome files found. Please create the BEL Reactome files')

wp_pickles = get_paths_in_folder(wikipathways_path)

wp_pickles = get_paths_in_folder(wikipathways_path or WIKIPATHWAYS_BEL)
if not wp_pickles:
logger.warning('No WikiPathways files found. Please create the BEL WikiPathways files')

return kegg_pickles, reactome_pickles, wp_pickles


def get_universe_graph(
kegg_path: str,
reactome_path: str,
wikipathways_path: str,
*,
kegg_path: str = KEGG_BEL,
reactome_path: str = REACTOME_BEL,
wikipathways_path: str = WIKIPATHWAYS_BEL,
flatten: bool = True,
normalize_names: bool = True,
) -> BELGraph:
"""Return universe graph."""
universe_graphs = _iterate_universe_graphs(
kegg_path, reactome_path, wikipathways_path,
universe_graphs = iterate_universe_graphs(
kegg_path=kegg_path,
reactome_path=reactome_path,
wikipathways_path=wikipathways_path,
flatten=flatten,
normalize_names=normalize_names
)
Expand All @@ -75,31 +75,33 @@ def get_universe_graph(


def spia_export_helper(
kegg_path: str,
reactome_path: str,
wikipathways_path: str,
output: str
):
*,
output: str,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
) -> None:
"""Export PathMe pickles to SPIA excel like file.
:param output: output directory
:param kegg_path: directory to KEGG pickles
:param reactome_path: directory to Reactome pickles
:param wikipathways_path: directory to WikiPathways pickles
:param output: output directory
:param merge: merge equivalent pathways (default False)
:return:
"""
kegg_pickles, reactome_pickles, wp_pickles = get_all_pickles(kegg_path, reactome_path, wikipathways_path)
kegg_pickles, reactome_pickles, wp_pickles = get_all_pickles(
kegg_path=kegg_path,
reactome_path=reactome_path,
wikipathways_path=wikipathways_path,
)

all_pickles = kegg_pickles + reactome_pickles + wp_pickles

click.echo(f'A total of {len(all_pickles)} will be exported')
logger.info(f'A total of {len(all_pickles)} will be exported')

iterator = tqdm(all_pickles, desc='Exporting SPIA excel files')

# Call Reactome manager and check that is populated
reactome_manager = ReactomeManager()

if not reactome_manager.is_populated():
logger.warning('Reactome Manager is not populated')

Expand All @@ -113,7 +115,6 @@ def spia_export_helper(
normalize_graph_names(pathway_graph, KEGG)

elif file in reactome_pickles:

# Load BELGraph
pathway_graph = from_pickle(os.path.join(reactome_path, file))

Expand Down Expand Up @@ -168,77 +169,80 @@ def spia_export_helper(
spia_matrices_to_excel(spia_matrices, output_file)


def _iterate_universe_graphs(
kegg_path: str,
reactome_path: str,
wikipathways_path: str,
def iterate_indra_statements(**kwargs) -> Iterable['indra.statements.Statement']:
"""Iterate over INDRA statements for the universe."""
for graph in iterate_universe_graphs(**kwargs):
yield from pybel.to_indra_statements(graph)


def iterate_universe_graphs(
*,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
flatten: bool = True,
normalize_names: bool = True,
) -> BELGraph:
) -> Iterable[BELGraph]:
"""Return universe graph."""
kegg_pickles, reactome_pickles, wp_pickles = get_all_pickles(kegg_path, reactome_path, wikipathways_path)

all_pickles = kegg_pickles + reactome_pickles + wp_pickles

logger.info(f'A total of {len(all_pickles)} will be merged into the universe')
kegg_pickles, reactome_pickles, wp_pickles = get_all_pickles(
kegg_path=kegg_path,
reactome_path=reactome_path,
wikipathways_path=wikipathways_path,
)

iterator = tqdm(all_pickles, desc='Loading of the graph pickles')
logger.info(f'{len(kegg_pickles) + len(reactome_pickles) + len(wp_pickles)} graphs will be put inthe universe')

# Export KEGG
for file in iterator:
for file in tqdm(wp_pickles, desc=f'Loading WP pickles from {wikipathways_path}'):
if not file.endswith('.pickle'):
continue

if file in kegg_pickles:
graph = from_pickle(os.path.join(kegg_path, file), check_version=False)

if flatten:
flatten_complex_nodes(graph)
graph = from_pickle(os.path.join(wikipathways_path, file), check_version=False)

if normalize_names:
normalize_graph_names(graph, KEGG)
if flatten:
flatten_complex_nodes(graph)

graph.annotation_list['database'] = {KEGG, REACTOME, WIKIPATHWAYS}
add_annotation_key(graph)
add_annotation_value(graph, 'database', KEGG)
if normalize_names:
normalize_graph_names(graph, WIKIPATHWAYS)

elif file in reactome_pickles:
graph = from_pickle(os.path.join(reactome_path, file), check_version=False)

if flatten:
flatten_complex_nodes(graph)
_update_graph(graph, file, WIKIPATHWAYS)
yield graph

if normalize_names:
normalize_graph_names(graph, REACTOME)
for file in tqdm(kegg_pickles, desc=f'Loading KEGG pickles from {kegg_path}'):
if not file.endswith('.pickle'):
continue
graph = from_pickle(os.path.join(kegg_path, file), check_version=False)

graph.annotation_list['database'] = {KEGG, REACTOME, WIKIPATHWAYS}
add_annotation_key(graph)
add_annotation_value(graph, 'database', REACTOME)
if flatten:
flatten_complex_nodes(graph)

if normalize_names:
normalize_graph_names(graph, KEGG)

elif file in wp_pickles:
graph = from_pickle(os.path.join(wikipathways_path, file), check_version=False)
_update_graph(graph, file, KEGG)
yield graph

if flatten:
flatten_complex_nodes(graph)
for file in tqdm(reactome_pickles, desc=f'Loading Reactome pickles from {reactome_path}'):
if not file.endswith('.pickle'):
continue

if normalize_names:
normalize_graph_names(graph, WIKIPATHWAYS)
graph = from_pickle(os.path.join(reactome_path, file), check_version=False)

graph.annotation_list['database'] = {KEGG, REACTOME, WIKIPATHWAYS}
add_annotation_key(graph)
add_annotation_value(graph, 'database', WIKIPATHWAYS)
if flatten:
flatten_complex_nodes(graph)

if normalize_names:
normalize_graph_names(graph, REACTOME)

else:
logger.warning(f'Unknown pickle file: {file}')
continue
_update_graph(graph, file, REACTOME)
yield graph

graph.annotation_pattern['PathwayID'] = '.*'
add_annotation_value(graph, 'PathwayID', file.strip(".pickle"))

yield graph
def _update_graph(graph, file, database):
graph.annotation_list['database'] = {KEGG, REACTOME, WIKIPATHWAYS}
add_annotation_key(graph)
add_annotation_value(graph, 'database', database)
graph.annotation_pattern['PathwayID'] = '.*'
add_annotation_value(graph, 'PathwayID', file.strip(".pickle"))


def _munge_node_attribute(node, attribute='name'):
Expand All @@ -250,13 +254,15 @@ def _munge_node_attribute(node, attribute='name'):


def to_gml(graph: pybel.BELGraph, path: str = PATHME_DIR) -> None:
"""Write this graph to GML file using :func:`networkx.write_gml`.
"""
"""Write this graph to GML file using :func:`networkx.write_gml`."""
rv = nx.MultiDiGraph()

for node in graph:
rv.add_node(_munge_node_attribute(node, 'name'), namespace=str(node.get('namespace')),
function=node.get('function'))
rv.add_node(
_munge_node_attribute(node, 'name'),
namespace=str(node.get('namespace')),
function=node.get('function'),
)

for u, v, key, edge_data in graph.edges(data=True, keys=True):
rv.add_edge(
Expand Down Expand Up @@ -284,7 +290,7 @@ def get_paths_in_folder(directory: str) -> List[str]:


def yield_all_children(pathway: Pathway) -> Iterable[Pathway]:
"""Transverse recursively reactome hierarchy and return all children for a given pathway."""
"""Transverse recursively the Reactome hierarchy and return all children for a given pathway."""
if pathway.children:
for child in pathway.children:
yield child
Expand Down
2 changes: 1 addition & 1 deletion src/pathme/kegg/convert_to_bel.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def kegg_to_pickles(resource_files, resource_folder, hgnc_manager, chebi_manager
if export_folder is None:
export_folder = resource_folder

for kgml_file in tqdm.tqdm(resource_files, desc='Exporting KEGG to BEL'):
for kgml_file in tqdm.tqdm(resource_files, desc=f'Exporting KEGG to BEL in {export_folder}'):

# Name of file created will be: "hsaXXX_unflatten.pickle" or "hsaXXX_flatten.pickle"
pickle_path = os.path.join(
Expand Down
13 changes: 6 additions & 7 deletions src/pathme/reactome/rdf_sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@
from typing import Any, Dict, List, Set, Tuple, Union

import rdflib
import tqdm
from pybel import to_pickle
from rdflib import URIRef
from rdflib.namespace import DC, DCTERMS, Namespace, OWL, RDF, RDFS, SKOS, XSD

from pathme.constants import REACTOME_BEL
from pathme.reactome.convert_to_bel import convert_to_bel
from pathme.utils import get_pathway_statitics, parse_rdf, query_result_to_dict
from pybel import to_pickle
from rdflib import URIRef
from rdflib.namespace import DC, DCTERMS, Namespace, OWL, RDF, RDFS, SKOS, XSD
from tqdm import tqdm

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -275,7 +274,7 @@ def get_reactome_statistics(resource_file, hgnc_manager, chebi_manager):

global_statistics = defaultdict(lambda: defaultdict(int))

for pathway_uri, pathway_title in tqdm.tqdm(spaqrl_all_pathways, desc='Generating Reactome Statistics'):
for pathway_uri, pathway_title in tqdm(spaqrl_all_pathways, desc='Generating Reactome Statistics'):
nodes, edges = _get_pathway_components(pathway_uri, rdf_graph)
pathway_metadata = _get_pathway_metadata(pathway_uri, rdf_graph)

Expand Down Expand Up @@ -321,7 +320,7 @@ def reactome_to_bel(resource_file, hgnc_manager, chebi_manager, export_folder=RE

pathways_uris_to_names = rdf_graph.query(GET_ALL_PATHWAYS, initNs=PREFIXES)

for pathway_uri, pathway_name in tqdm.tqdm(pathways_uris_to_names, desc='Creating Reactome BELGraphs'):
for pathway_uri, pathway_name in tqdm(pathways_uris_to_names, desc=f'Exporting Reactome BEL to {export_folder}'):

# Take the identifier of the pathway which is placed at the end of the URL and also strip the number
# next to it. (probably version of pathway)
Expand Down

0 comments on commit fd951c7

Please sign in to comment.