# PathMe to BEL

This notebook transforms PathMe networks into BEL.

Author: [Daniel Domingo-Fernández](https://github.com/ddomingof)

In [1]:
import os
import pybel
import collections

import itertools as itt
from tqdm import tqdm_notebook

from pathme.constants import REACTOME_BEL, KEGG_BEL, WIKIPATHWAYS_BEL, KEGG, UNIVERSE_DIR, WIKIPATHWAYS, REACTOME
from pathme.export_utils import get_files_in_folder

from bio2bel_kegg import Manager as KeggManager
from bio2bel_reactome import Manager as ReactomeManager
from bio2bel_wikipathways import Manager as WikiPathwaysManager

import pybel_tools

from pathme.pybel_utils import flatten_complex_nodes
from pathme.normalize_names import normalize_graph_names

In [2]:
def flatten(l):
    """Flatten recursively iterables."""
    for el in l:
        if isinstance(el, collections.Iterable) and not isinstance(el, (str, bytes)):
            yield from flatten(el)
        else:
            yield el

def get_all_children(pathway):
    """Transverse the Reactome hierarchy recursively to get all the children for a given pathway."""
    if not pathway.children:
        return pathway
    
    return [
        get_all_children(child)
        for child in pathway.children
    ]

def export_to_bel(directory, manager, database, flat_nodes=False):
    
    pickles = get_files_in_folder(directory)
    
    for file in tqdm_notebook(pickles, desc='Loading pickles'):
        
        # Skip files that do not end in pickle
        if not file.endswith('.pickle'):
            continue
            
        pathway_id = file.strip('.pickle').strip('_unflatten').strip('_flatten')
        
        # Fix pathway identifier notation for KEGG
        if database == KEGG:
            pathway_id = "path:{}".format(pathway_id)
        
        # Get pathway raw in the DB
        pathway =  manager.get_pathway_by_id(pathway_id)
        
        # Throw an error if the pathway is not in the database
        if not pathway:
            raise ValueError(f'Pathway not found: {pathway_id}')
        
        # Tranverse the Reactome hierarchy and get all the children
        if database == REACTOME and pathway.children:
            
            children = get_all_children(pathway)
                        
            children = list(flatten(children))
            
            graphs_to_merge = []
            
            for child in children:
                # Inform that the children couldnt been found in the default directory
                if f"{child.resource_id}.pickle" not in pickles:
                    print(f'Child not found: {child.resource_id}')
                    continue
                    
                graphs_to_merge.append(
                    pybel.from_pickle(
                        os.path.join(directory, f"{child.resource_id}.pickle")
                    )
                )
            
            # Inform if there isnt any graph to be merged
            if not graphs_to_merge:
                print(f'Not graphs to merge for {pathway_id}')
                continue
                
            graph = pybel.union(graphs_to_merge)
            
        else:
            graph = pybel.from_pickle(os.path.join(directory, file))
        
        # Option to flatten complexes/reactions and so on...
        if flatten:
            flatten_complex_nodes(graph)
        
        # Manual postprocessing of names to improve harmonization
        normalize_graph_names(graph, database)
        
        pybel.to_bel_path(graph, os.path.join(UNIVERSE_DIR, f'{pathway.name.replace("/",".")}.bel'))
    

Initialize the Bio2BEL Managers

In [3]:
kegg_manager = KeggManager()
reactome_manager = ReactomeManager()
wikipathways_manager = WikiPathwaysManager()

Export each database to raw BEL

*Note that you can pass the flat_nodes argument*

In [4]:
export_to_bel(REACTOME_BEL, reactome_manager, REACTOME, flat_nodes=True)

HBox(children=(IntProgress(value=0, description='Loading pickles', max=2256, style=ProgressStyle(description_w…

Child not found: R-HSA-9634600
Child not found: R-HSA-9634638
Child not found: R-HSA-9634635
Child not found: R-HSA-9634638
Child not found: R-HSA-9634635
Child not found: R-HSA-434313
Child not found: R-HSA-9634600
Child not found: R-HSA-9033807
Child not found: R-HSA-9037629
Child not found: R-HSA-9037628
Child not found: R-HSA-9636003
Child not found: R-HSA-9636003
Child not found: R-HSA-9616334
Child not found: R-HSA-9629232
Child not found: R-HSA-9630222
Child not found: R-HSA-9630221
Child not found: R-HSA-434313
Child not found: R-HSA-9634638
Child not found: R-HSA-9634635
Child not found: R-HSA-9636003
Child not found: R-HSA-9634600
Child not found: R-HSA-434313
Child not found: R-HSA-9616334
Child not found: R-HSA-9629232
Child not found: R-HSA-9630222
Child not found: R-HSA-9630221
Child not found: R-HSA-9603798
Child not found: R-HSA-9609523
Child not found: R-HSA-9634600
Child not found: R-HSA-9033807
Child not found: R-HSA-9037629
Child not found: R-HSA-9037628
Child not f

In [5]:
# export_to_bel(KEGG_BEL, kegg_manager, KEGG, flat_nodes=True)

In [6]:
# export_to_bel(WIKIPATHWAYS_BEL, wikipathways_manager, WIKIPATHWAYS, flat_nodes=True)