Skip to content

Commit

Permalink
Add normalization code
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Domingo-Fernandez committed Mar 6, 2019
1 parent 2745639 commit 2ece010
Show file tree
Hide file tree
Showing 8 changed files with 224 additions and 144 deletions.
22 changes: 5 additions & 17 deletions src/pathme/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,18 @@
from bio2bel_chebi import Manager as ChebiManager
from bio2bel_hgnc import Manager as HgncManager
from pathme.constants import *
from pathme.export_universe import get_all_pickles
from pathme.export_universe import get_universe_graph
from pathme.export_utils import get_all_pickles, get_files_in_folder, get_universe_graph
from pathme.kegg.convert_to_bel import kegg_to_pickles
from pathme.kegg.utils import download_kgml_files, get_kegg_pathway_ids
from pathme.reactome.rdf_sparql import get_reactome_statistics, reactome_to_bel
from pathme.reactome.utils import untar_file
from pathme.utils import CallCounted, get_files_in_folder, make_downloader, statistics_to_df, summarize_helper
from pathme.utils import CallCounted, make_downloader, statistics_to_df, summarize_helper
from pathme.wikipathways.rdf_sparql import get_wp_statistics, wikipathways_to_pickles
from pathme.wikipathways.utils import get_file_name_from_url, get_wikipathways_files, unzip_file
from pybel import from_pickle, to_pickle
from pybel.struct.mutation import collapse_to_genes, collapse_all_variants
from tqdm import tqdm

from pybel_tools.node_utils import list_abundance_cartesian_expansion, reaction_cartesian_expansion

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -351,20 +348,11 @@ def export_to_spia(kegg_path, reactome_path, wikipathways_path, output):
@click.option('-r', '--reactome_path', help='Reactome BEL folder.', default=REACTOME_BEL, show_default=True)
@click.option('-w', '--wikipathways_path', help='WikiPathways BEL folder', default=WIKIPATHWAYS_BEL, show_default=True)
@click.option('-o', '--output', help='Output directory', default=SPIA_DIR, show_default=True)
def get_harmonize_universe(kegg_path, reactome_path, wikipathways_path, output):
@click.option('-f', '--flatten', is_flag=True, default=True)
def get_harmonize_universe(kegg_path, reactome_path, wikipathways_path, output, flatten):
"""Return harmonized universe BELGraph of all the databases included in PathMe."""
universe_graph = get_universe_graph(kegg_path, reactome_path, wikipathways_path)

# Step 1: Flat complexes and composites
logger.info("Flat complexes and composites")
list_abundance_cartesian_expansion(universe_graph)
reaction_cartesian_expansion(universe_graph)

logger.info("Harmonize entity names")

# TODO: Harmonize entitiy names
universe_graph = get_universe_graph(kegg_path, reactome_path, wikipathways_path, flatten)

# Step: 3. Merge to genes and variants
logger.info("Merging variants and genes")
collapse_all_variants(universe_graph)
collapse_to_genes(universe_graph)
Expand Down
103 changes: 103 additions & 0 deletions src/pathme/export_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-

"""Export harmonized universe."""

import logging
import os
from typing import List

from pathme.pybel_utils import flatten_complex_nodes
from pathme.normalize_names import normalize_graph_names
from pybel import BELGraph
from pybel import from_pickle
from pybel import union
from tqdm import tqdm

logger = logging.getLogger(__name__)


def get_all_pickles(kegg_path, reactome_path, wikipathways_path):
"""Return a list with all pickle paths."""
kegg_pickles = get_files_in_folder(kegg_path)

if not kegg_pickles:
logger.warning('No KEGG files found. Please create the BEL KEGG files')

reactome_pickles = get_files_in_folder(reactome_path)

if not reactome_pickles:
logger.warning('No Reactome files found. Please create the BEL Reactome files')

wp_pickles = get_files_in_folder(wikipathways_path)

if not wp_pickles:
logger.warning('No WikiPathways files found. Please create the BEL WikiPathways files')

return kegg_pickles, reactome_pickles, wp_pickles


def get_universe_graph(
kegg_path: str, reactome_path: str, wikipathways_path: str,
flatten: bool = True, normalize_names: bool = True) -> BELGraph:
"""Return universe graph."""
kegg_pickles, reactome_pickles, wp_pickles = get_all_pickles(kegg_path, reactome_path, wikipathways_path)

all_pickles = kegg_pickles + reactome_pickles + wp_pickles

logger.info(f'A total of {len(all_pickles)} will be merged into the universe')

iterator = tqdm(all_pickles, desc='Creating universe')

universe_list = []

# Export KEGG
for file in iterator:
if not file.endswith('.pickle'):
continue

if file in kegg_pickles:
graph = from_pickle(os.path.join(kegg_path, file))

if flatten:
flatten_complex_nodes(graph)

if normalize_names:
normalize_graph_names(graph)

elif file in reactome_pickles:
graph = from_pickle(os.path.join(reactome_path, file))

if flatten:
flatten_complex_nodes(graph)

if normalize_names:
normalize_graph_names(graph)

elif file in wp_pickles:
graph = from_pickle(os.path.join(wikipathways_path, file))

if flatten:
flatten_complex_nodes(graph)

if normalize_names:
normalize_graph_names(graph)
else:
logger.warning(f'Unknown pickle file: {file}')
continue

universe_list.append(graph)

return union(universe_list)


def get_files_in_folder(path: str) -> List[str]:
"""Return the files in a given folder.
:param path: folder path
:return: file names in folder
"""
return [
file
for file in os.listdir(path)
if os.path.isfile(os.path.join(path, file))
]
2 changes: 1 addition & 1 deletion src/pathme/kegg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from pathme.kegg.convert_to_bel import get_bel_types
from pathme.kegg.kegg_xml_parser import import_xml_etree, get_xml_types
from pathme.wikipathways.utils import get_files_in_folder
from pathme.export_utils import get_files_in_folder
from pathme.constants import KEGG_FILES, KEGG_KGML_URL, KEGG_STATS_COLUMN_NAMES

__all__ = [
Expand Down

0 comments on commit 2ece010

Please sign in to comment.