Skip to content

Commit

Permalink
Merge pull request #20 from PathwayMerger/diffupy
Browse files Browse the repository at this point in the history
Diffupy
  • Loading branch information
ddomingof committed Apr 25, 2019
2 parents 67b543a + e00502d commit e743616
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 5 deletions.
118 changes: 115 additions & 3 deletions src/pathme/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,33 @@
import os
from typing import List

import networkx as nx
import pybel
from pybel import BELGraph, from_pickle
from pybel.constants import RELATION
from pybel.struct.utils import update_metadata
from tqdm import tqdm

from pathme.constants import KEGG, REACTOME, WIKIPATHWAYS
from pathme.constants import KEGG, REACTOME, WIKIPATHWAYS, PATHME_DIR
from pathme.normalize_names import normalize_graph_names
from pathme.pybel_utils import flatten_complex_nodes
from pybel import BELGraph, from_pickle, union

logger = logging.getLogger(__name__)


def set_resource(elements, database):
for element, data in elements:
if 'database' in data:
data['database'].add(database)
else:
data['database'] = {database}


def set_graph_resource(graph, database):
set_resource(graph.nodes(data=True), database)
# set_resource(graph.edges(data=True), database)


def get_all_pickles(kegg_path, reactome_path, wikipathways_path):
"""Return a list with all pickle paths."""
kegg_pickles = get_files_in_folder(kegg_path)
Expand All @@ -36,6 +53,65 @@ def get_all_pickles(kegg_path, reactome_path, wikipathways_path):
return kegg_pickles, reactome_pickles, wp_pickles


def left_full_data_join(g, h) -> None:
"""Wrapper around PyBEL's left_full_join to merge node data.
:param pybel.BELGraph g: A BEL graph
:param pybel.BELGraph h: A BEL graph
"""
for node, data in h.nodes(data=True):
if node in g:
if 'database' in data and 'database' in g.nodes[node]:
g.nodes[node]['database'].update(data['database'])
else:
g.add_node(node, **data)

g.add_edges_from(
(u, v, key, data)
for u, v, key, data in h.edges(keys=True, data=True)
if u not in g or v not in g[u] or key not in g[u][v]
)

update_metadata(h, g)

g.warnings.extend(h.warnings)


def union_data(graphs, use_tqdm: bool = False):
"""Wrapper around PyBEL's union to instate left_full_data_join function.
Assumes iterator is longer than 2, but not infinite.
:param iter[BELGraph] graphs: An iterator over BEL graphs. Can't be infinite.
:return: A merged graph
:rtype: BELGraph
Example usage:
"""
it = iter(graphs)

if use_tqdm:
it = tqdm(it, desc='taking union')

try:
target = next(it)
except StopIteration as e:
raise ValueError('no graphs given') from e

try:
graph = next(it)
except StopIteration as e:
return target
else:
target = target.copy()
left_full_data_join(target, graph)

for graph in it:
left_full_data_join(target, graph)

return target


def get_universe_graph(
kegg_path: str,
reactome_path: str,
Expand All @@ -51,7 +127,7 @@ def get_universe_graph(
normalize_names=normalize_names
)
logger.info('Merging all into a hairball...')
return union(universe_graphs)
return union_data(universe_graphs)


def _iterate_universe_graphs(
Expand Down Expand Up @@ -85,6 +161,8 @@ def _iterate_universe_graphs(
if normalize_names:
normalize_graph_names(graph, KEGG)

set_graph_resource(graph, KEGG)

elif file in reactome_pickles:
graph = from_pickle(os.path.join(reactome_path, file), check_version=False)

Expand All @@ -94,6 +172,8 @@ def _iterate_universe_graphs(
if normalize_names:
normalize_graph_names(graph, REACTOME)

set_graph_resource(graph, REACTOME)

elif file in wp_pickles:
graph = from_pickle(os.path.join(wikipathways_path, file), check_version=False)

Expand All @@ -102,13 +182,45 @@ def _iterate_universe_graphs(

if normalize_names:
normalize_graph_names(graph, WIKIPATHWAYS)

set_graph_resource(graph, WIKIPATHWAYS)

else:
logger.warning(f'Unknown pickle file: {file}')
continue

yield graph


def _munge_node_attribute(node, attribute='name'):
"""Munge node attribute."""
if node.get(attribute) == None:
return str(node)
else:
return node.get(attribute)


def to_gml(graph: pybel.BELGraph, path: str = PATHME_DIR) -> None:
"""Write this graph to GML file using :func:`networkx.write_gml`.
"""
rv = nx.MultiDiGraph()

for node in graph:
rv.add_node(_munge_node_attribute(node, 'name'), namespace=str(node.get('namespace')),
function=node.get('function'))

for u, v, key, edge_data in graph.edges(data=True, keys=True):
rv.add_edge(
_munge_node_attribute(u),
_munge_node_attribute(v),
interaction=str(edge_data[RELATION]),
bel=str(edge_data),
key=str(key),
)

nx.write_gml(rv, path)


def get_files_in_folder(path: str) -> List[str]:
"""Return the files in a given folder.
Expand Down
3 changes: 1 addition & 2 deletions src/pathme/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import pandas as pd
import rdflib

from pathme.constants import UNKNOWN, BEL_STATS_COLUMN_NAMES
from pathme.constants import UNKNOWN, BEL_STATS_COLUMN_NAMES, PATHME_DIR

import pybel
from pathme.export_utils import get_files_in_folder
Expand Down Expand Up @@ -63,7 +63,6 @@ def check_multiple(element, element_name):

return element


def parse_id_uri(uri):
"""Get the components of a given uri (with identifier at the last position).
Expand Down

0 comments on commit e743616

Please sign in to comment.