Skip to content

Commit

Permalink
Add CX exporter and update iterate_universe_graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Oct 4, 2019
1 parent 2c54ba0 commit e8fc775
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 64 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ install_requires =
bio2bel_reactome==0.2.3
pybel==0.13.2
pybel-tools>=0.7.2
pybel-cx==0.1.4

This comment has been minimized.

Copy link
@ddomingof

ddomingof Nov 8, 2019

Member

0.1.4 is not release yet so I changed it to 0.1.3 is that problem for this to work?

This comment has been minimized.

Copy link
@cthoyt

cthoyt Nov 10, 2019

Author Member

uh... I merged all this back into the master of PyBEL for v0.14 but i don't think you want to upgrade pathme to the new one yet right


# Random options
zip_safe = false
Expand Down
71 changes: 61 additions & 10 deletions src/pathme/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import logging
import os
import sys

import click
import networkx as nx
Expand All @@ -14,8 +15,8 @@
import pathme.kegg.cli
import pathme.reactome.cli
import pathme.wikipathways.cli
from .constants import KEGG_BEL, REACTOME_BEL, SPIA_DIR, UNIVERSE_DIR, WIKIPATHWAYS_BEL
from .export_utils import get_universe_graph, spia_export_helper
from .constants import CX_DIR, KEGG_BEL, REACTOME_BEL, SPIA_DIR, UNIVERSE_DIR, WIKIPATHWAYS_BEL
from .export_utils import get_universe_graph, iterate_universe_graphs, spia_export_helper

logger = logging.getLogger(__name__)

Expand All @@ -37,10 +38,33 @@ def export():
"""Export commands."""


kegg_path_option = click.option(
'-k', '--kegg-path',
help='KEGG BEL folder',
default=KEGG_BEL,
show_default=True,
)
reactome_path_option = click.option(
'-r', '--reactome-path',
help='Reactome BEL folder.',
default=REACTOME_BEL,
show_default=True,
)
wikipathways_path_option = click.option(
'-w', '--wikipathways-path',
help='WikiPathways BEL folder',
default=WIKIPATHWAYS_BEL,
show_default=True,
)

no_flatten = click.option('--no-flatten', is_flag=True, help='Do not flatten complex/reactions nodes')
no_normalize_names = click.option('--no-normalize-names', is_flag=True, help='Do not normalize names')


@export.command()
@click.option('-k', '--kegg_path', help='KEGG BEL folder', default=KEGG_BEL, show_default=True)
@click.option('-r', '--reactome_path', help='Reactome BEL folder.', default=REACTOME_BEL, show_default=True)
@click.option('-w', '--wikipathways_path', help='WikiPathways BEL folder', default=WIKIPATHWAYS_BEL, show_default=True)
@kegg_path_option
@reactome_path_option
@wikipathways_path_option
@click.option('-o', '--output', help='Output directory', default=SPIA_DIR, show_default=True)
def spia(kegg_path, reactome_path, wikipathways_path, output):
"""Export BEL Pickles to SPIA Excel."""
Expand All @@ -54,12 +78,39 @@ def spia(kegg_path, reactome_path, wikipathways_path, output):


@export.command()
@click.option('-k', '--kegg-path', help='KEGG BEL folder', default=KEGG_BEL, show_default=True)
@click.option('-r', '--reactome-path', help='Reactome BEL folder.', default=REACTOME_BEL, show_default=True)
@click.option('-w', '--wikipathways-path', help='WikiPathways BEL folder', default=WIKIPATHWAYS_BEL, show_default=True)
@kegg_path_option
@reactome_path_option
@wikipathways_path_option
@click.option('-o', '--output', help='Output directory', default=CX_DIR, show_default=True)
@no_flatten
@no_normalize_names
def cx(kegg_path, reactome_path, wikipathways_path, output, no_flatten, no_normalize_names):
"""Export BEL Pickles to CX."""
try:
from pybel_cx import to_cx_file
except ImportError:
click.secho('Could not import pybel_cx. Use pip install pybel-cx.')
sys.exit(1)

click.echo(f'Results will be exported to {output}')
for source, path, graph in iterate_universe_graphs(
kegg_path=kegg_path,
reactome_path=reactome_path,
wikipathways_path=wikipathways_path,
flatten=(not no_flatten),
normalize_names=(not no_normalize_names),
):
with open(os.path.join(output, f"{path.strip('.pickle')}.cx.json"), 'w') as file:
to_cx_file(graph, file)


@export.command()
@kegg_path_option
@reactome_path_option
@wikipathways_path_option
@click.option('-o', '--output', help='Output directory', default=UNIVERSE_DIR, show_default=True)
@click.option('--no-flatten', is_flag=True, help='Do not flatten complex/reactions nodes')
@click.option('--no-normalize-names', is_flag=True, help='Do not normalize names')
@no_flatten
@no_normalize_names
def universe(kegg_path, reactome_path, wikipathways_path, output, no_flatten, no_normalize_names):
"""Export harmonized PathMe universe."""
logging.basicConfig(level=logging.info, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
Expand Down
1 change: 1 addition & 0 deletions src/pathme/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def get_data_dir() -> str:
WIKIPATHWAYS_FILES = os.path.join(WIKIPATHWAYS_DIR, 'rdf')

SPIA_DIR = os.path.join(DATA_DIR, 'spia')
CX_DIR = os.path.join(DATA_DIR, 'cx')
UNIVERSE_DIR = os.path.join(DATA_DIR, 'universe')


Expand Down
121 changes: 67 additions & 54 deletions src/pathme/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@
import pybel
from bio2bel_reactome import Manager as ReactomeManager
from bio2bel_reactome.models import Pathway
from pathme.constants import KEGG, PATHME_DIR, REACTOME, WIKIPATHWAYS
from pathme.normalize_names import normalize_graph_names
from pathme.pybel_utils import flatten_complex_nodes
from pybel import BELGraph, from_pickle, union
from pybel.constants import ANNOTATIONS, RELATION
from pybel.struct import add_annotation_value
from pybel.struct.mutation import collapse_all_variants, collapse_to_genes
from pybel_tools.analysis.spia import bel_to_spia_matrices, spia_matrices_to_excel
from tqdm import tqdm

from pathme.constants import KEGG, PATHME_DIR, REACTOME, WIKIPATHWAYS
from pathme.normalize_names import normalize_graph_names
from pathme.pybel_utils import flatten_complex_nodes
from .constants import KEGG_BEL, REACTOME_BEL, WIKIPATHWAYS_BEL

logger = logging.getLogger(__name__)
Expand All @@ -33,10 +33,10 @@ def add_annotation_key(graph: BELGraph):


def get_all_pickles(
*,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
*,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
) -> Tuple[List[str], List[str], List[str]]:
"""Return a list with all pickle paths."""
kegg_pickles = get_paths_in_folder(kegg_path or KEGG_BEL)
Expand All @@ -55,12 +55,12 @@ def get_all_pickles(


def get_universe_graph(
*,
kegg_path: str = KEGG_BEL,
reactome_path: str = REACTOME_BEL,
wikipathways_path: str = WIKIPATHWAYS_BEL,
flatten: bool = True,
normalize_names: bool = True,
*,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
flatten: bool = True,
normalize_names: bool = True,
) -> BELGraph:
"""Return universe graph."""
universe_graphs = iterate_universe_graphs(
Expand All @@ -70,16 +70,18 @@ def get_universe_graph(
flatten=flatten,
normalize_names=normalize_names
)
# Just keep the graph and not the source
universe_graphs = (graph for _, _, graph in universe_graphs)
logger.info('Merging all into a hairball...')
return union(universe_graphs)


def spia_export_helper(
*,
output: str,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
*,
output: str,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
) -> None:
"""Export PathMe pickles to SPIA excel like file.
Expand All @@ -94,32 +96,32 @@ def spia_export_helper(
wikipathways_path=wikipathways_path,
)

all_pickles = kegg_pickles + reactome_pickles + wp_pickles
paths = kegg_pickles + reactome_pickles + wp_pickles

logger.info(f'A total of {len(all_pickles)} will be exported')
logger.info(f'A total of {len(paths)} will be exported')

iterator = tqdm(all_pickles, desc='Exporting SPIA excel files')
paths = tqdm(paths, desc='Exporting SPIA excel files')

# Call Reactome manager and check that is populated
reactome_manager = ReactomeManager()
if not reactome_manager.is_populated():
logger.warning('Reactome Manager is not populated')

# Load each pickle and export it as excel file
for file in iterator:
if not file.endswith('.pickle'):
for path in paths:
if not path.endswith('.pickle'):
continue

if file in kegg_pickles:
pathway_graph = from_pickle(os.path.join(kegg_path, file))
if path in kegg_pickles:
pathway_graph = from_pickle(os.path.join(kegg_path, path))
normalize_graph_names(pathway_graph, KEGG)

elif file in reactome_pickles:
elif path in reactome_pickles:
# Load BELGraph
pathway_graph = from_pickle(os.path.join(reactome_path, file))
pathway_graph = from_pickle(os.path.join(reactome_path, path))

# Check if pathway has children to build the merge graph
pathway_id = file.strip('.pickle')
pathway_id = path.strip('.pickle')

# Look up in Bio2BEL Reactome
pathway = reactome_manager.get_pathway_by_id(pathway_id)
Expand All @@ -143,12 +145,12 @@ def spia_export_helper(
# Normalize graph names
normalize_graph_names(pathway_graph, REACTOME)

elif file in wp_pickles:
pathway_graph = from_pickle(os.path.join(wikipathways_path, file))
elif path in wp_pickles:
pathway_graph = from_pickle(os.path.join(wikipathways_path, path))
normalize_graph_names(pathway_graph, WIKIPATHWAYS)

else:
logger.warning(f'Unknown pickle file: {file}')
logger.warning(f'Unknown pickle file: {path}')
continue

# Explode complex nodes
Expand All @@ -160,7 +162,7 @@ def spia_export_helper(

spia_matrices = bel_to_spia_matrices(pathway_graph)

output_file = os.path.join(output, f"{file.strip('.pickle')}.xlsx")
output_file = os.path.join(output, f"{path.strip('.pickle')}.xlsx")

if os.path.isfile(output_file):
continue
Expand All @@ -171,57 +173,68 @@ def spia_export_helper(

def iterate_indra_statements(**kwargs) -> Iterable['indra.statements.Statement']:
"""Iterate over INDRA statements for the universe."""
for graph in iterate_universe_graphs(**kwargs):
for _, _, graph in iterate_universe_graphs(**kwargs):
yield from pybel.to_indra_statements(graph)


def iterate_universe_graphs(
*,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
flatten: bool = True,
normalize_names: bool = True,
) -> Iterable[BELGraph]:
*,
kegg_path: Optional[str] = None,
reactome_path: Optional[str] = None,
wikipathways_path: Optional[str] = None,
flatten: bool = True,
normalize_names: bool = True,
) -> Iterable[Tuple[str, str, BELGraph]]:
"""Return universe graph."""
kegg_pickles, reactome_pickles, wp_pickles = get_all_pickles(
kegg_pickle_paths, reactome_pickle_paths, wp_pickle_paths = get_all_pickles(
kegg_path=kegg_path,
reactome_path=reactome_path,
wikipathways_path=wikipathways_path,
)

logger.info(f'{len(kegg_pickles) + len(reactome_pickles) + len(wp_pickles)} graphs will be put inthe universe')
n_paths = len(kegg_pickle_paths) + len(reactome_pickle_paths) + len(wp_pickle_paths)
logger.info(f'{n_paths} graphs will be put in the universe')

for file in tqdm(wp_pickles, desc=f'Loading WP pickles from {wikipathways_path}'):
if not file.endswith('.pickle'):
yield from _iterate_wp(wp_pickle_paths, wikipathways_path, flatten, normalize_names)
yield from _iterate_kegg(kegg_pickle_paths, kegg_path, flatten, normalize_names)
yield from _iterate_reactome(reactome_pickle_paths, reactome_path, flatten, normalize_names)


def _iterate_wp(wp_pickle_paths, wikipathways_path, flatten, normalize_names):
for path in tqdm(wp_pickle_paths, desc=f'Loading WP pickles from {wikipathways_path}'):
if not path.endswith('.pickle'):
continue

graph = from_pickle(os.path.join(wikipathways_path, file), check_version=False)
graph = from_pickle(os.path.join(wikipathways_path, path), check_version=False)

if flatten:
flatten_complex_nodes(graph)

if normalize_names:
normalize_graph_names(graph, WIKIPATHWAYS)

_update_graph(graph, file, WIKIPATHWAYS)
yield graph
_update_graph(graph, path, WIKIPATHWAYS)
yield WIKIPATHWAYS, path, graph

for file in tqdm(kegg_pickles, desc=f'Loading KEGG pickles from {kegg_path}'):
if not file.endswith('.pickle'):

def _iterate_kegg(kegg_pickle_paths, kegg_path, flatten, normalize_names):
for path in tqdm(kegg_pickle_paths, desc=f'Loading KEGG pickles from {kegg_path}'):
if not path.endswith('.pickle'):
continue
graph = from_pickle(os.path.join(kegg_path, file), check_version=False)
graph = from_pickle(os.path.join(kegg_path, path), check_version=False)

if flatten:
flatten_complex_nodes(graph)

if normalize_names:
normalize_graph_names(graph, KEGG)

_update_graph(graph, file, KEGG)
yield graph
_update_graph(graph, path, KEGG)
yield KEGG, path, graph


for file in tqdm(reactome_pickles, desc=f'Loading Reactome pickles from {reactome_path}'):
def _iterate_reactome(reactome_pickle_paths, reactome_path, flatten, normalize_names):
for file in tqdm(reactome_pickle_paths, desc=f'Loading Reactome pickles from {reactome_path}'):
if not file.endswith('.pickle'):
continue

Expand All @@ -234,7 +247,7 @@ def iterate_universe_graphs(
normalize_graph_names(graph, REACTOME)

_update_graph(graph, file, REACTOME)
yield graph
yield REACTOME,path, graph


def _update_graph(graph, file, database):
Expand Down

0 comments on commit e8fc775

Please sign in to comment.