# Export ComPath mappings to RDF

#### This notebook outlines the exporting of the ComPath mappings to RDF

##### Author: Daniel Domingo-Fernandez

In [1]:
import rdflib
from rdflib import Namespace

import pandas as pd

from bio2bel_kegg.manager import Manager as KeggManager
from bio2bel_wikipathways.manager import Manager as WikiPathwaysManager
from bio2bel_reactome.manager import Manager as ReactomeManager

infer_central_dogmatic_translations is deprecated. please migrate to enrich_proteins_with_rnas
infer_central_dogmatic_transcriptions is deprecated. please migrate to enrich_rnas_with_genes
infer_central_dogma is deprecated. please migrate to enrich_protein_and_rna_origins
collapse_by_central_dogma is deprecated. please migrate to collapse_to_genes
prune_central_dogma is deprecated. please migrate to prune_protein_rna_origins


In [2]:
kegg_manager = KeggManager()
wikipathways_manager = WikiPathwaysManager()
reactome_manager = ReactomeManager()

In [3]:
kegg_wikipathways_df = pd.read_excel(
    'https://github.com/ComPath/curation/raw/master/mappings/kegg_wikipathways.xlsx',
    index_col=0
)
kegg_reactome_df = pd.read_excel(
    'https://github.com/ComPath/curation/raw/master/mappings/kegg_reactome.xlsx',
    index_col=0
)
wikipathways_reactome_df = pd.read_excel(
    'https://github.com/ComPath/curation/raw/master/mappings/wikipathways_reactome.xlsx',
    index_col=0
)

In [4]:
def get_pathway_models(reference_manager, compared_manager, pathway_1_name, pathway_2_name):
    """Return the pathway models from their correspondent managers.
    
    :rtype: tuple(Pathway, Pathway)
    """
    pathway_1 = reference_manager.get_pathway_by_name(pathway_1_name)
    
    pathway_2 = compared_manager.get_pathway_by_name(pathway_2_name)
    
    if pathway_1 is None:
        raise ValueError("Not Valid Pathway Name: {}".format(pathway_1_name))
        
    if pathway_2 is None:
        raise ValueError("Not Valid Pathway Name: {}".format(pathway_2_name))
        
    return pathway_1, pathway_2


def remove_star_from_pathway_name(pathway_name):
    """Remove the star that label the reference pathway in isPartOf statements.
    
    :param str statements: pathway name
    """
    return pathway_name.replace("*", "").strip()

def get_pathways_from_statement(mapping_statement, mapping_type):
    """Return the subject, object of the mapping.
    
    :param str mapping_statement: statement
    :param str mapping_type: type of relationship
    :rtype: tuple[str,str]
    """
    _pathways = mapping_statement.split(mapping_type)
        
    return _pathways[0].strip(), _pathways[1].strip()

def get_pathways_from_is_part_of_mapping(mapping_statement):
    """Return the pathways of a hierarchical mapping."""

    pathway_1, pathway_2 = get_pathways_from_statement(mapping_statement, 'isPartOf')

    if "*" in pathway_1:

        pathway_1 = remove_star_from_pathway_name(pathway_1)
        return pathway_1, pathway_2


    else:
        pathway_2 = remove_star_from_pathway_name(pathway_2)
        return pathway_2, pathway_1
    
    
def parse_equivalent_to(df, reference_manager, compared_manager, reference_name, compared_name):
    """Parse the column corresponding to equivalentTo mappings in the excel sheet.
    
    :returns: list of equivalent pathways
    :rtype: list[tuple[str,str]]
    """
    
    equivalent_pathways = list()
        
    for index, row in df.iterrows(): 

        equivalent_to_mappings = row['equivalentTo Mappings']

        if pd.isnull(equivalent_to_mappings):
            continue

        for mapping_statement in equivalent_to_mappings.split("\n"):
            
            if mapping_statement == '':
                continue
            
            reference_pathway, compared_pathway = get_pathways_from_statement(mapping_statement, "equivalentTo")
            
            pathway_1, pathway_2 = get_pathway_models(reference_manager, compared_manager, reference_pathway, compared_pathway)

            equivalent_pathways.append(
                (pathway_1.name, 
                 pathway_1.resource_id,
                 reference_name,
                 pathway_2.name,
                 pathway_2.resource_id,
                 compared_name
                )
            )

    return equivalent_pathways


def parse_is_part_of(df, reference_manager, compared_manager, reference_name, compared_name):
    """Parse the column corresponding to isPartOf mappings in the excel sheet.
    
    :returns: list of hierarchical mappings
    :rtype: list[tuple[str,str]]
    """
    
    is_part_of_pathways = []

    for index, row in df.iterrows(): 
                
        is_part_of_mappings = row['isPartOf Mappings']

        if pd.isnull(is_part_of_mappings):
                continue

        for mapping_statement in is_part_of_mappings.split('\n'):
            
            if mapping_statement == '':
                continue

            reference_pathway, compared_pathway = get_pathways_from_is_part_of_mapping(mapping_statement)
            
            pathway_1, pathway_2 = get_pathway_models(reference_manager, compared_manager, reference_pathway, compared_pathway)

            is_part_of_pathways.append(
                (pathway_1.name, 
                 pathway_1.resource_id,
                 reference_name,
                 pathway_2.name,
                 pathway_2.resource_id,
                 compared_name
                )
            )

    return is_part_of_pathways

In [5]:
# Hierarchical mappings
kegg_reactome_hierarchical = parse_is_part_of(kegg_reactome_df, kegg_manager, reactome_manager, 'kegg', 'reactome')
kegg_wikipathways_hierarchical = parse_is_part_of(kegg_wikipathways_df, kegg_manager, wikipathways_manager, 'kegg', 'wikipathways')
wikipathways_reactome_hierarchical = parse_is_part_of(wikipathways_reactome_df, wikipathways_manager, reactome_manager, 'wikipathways', 'reactome')

# Equivalent mappings
kegg_reactome_equivalents = parse_equivalent_to(kegg_reactome_df, kegg_manager, reactome_manager, 'kegg', 'reactome')
kegg_wikipathways_equivalents = parse_equivalent_to(kegg_wikipathways_df, kegg_manager, wikipathways_manager, 'kegg', 'wikipathways')
wikipathways_reactome_equivalents = parse_equivalent_to(wikipathways_reactome_df, wikipathways_manager, reactome_manager, 'wikipathways', 'reactome')

# Create ComPath RDF

In [6]:
KEGG_PREFIX = 'http://identifiers.org/kegg.pathway/'
REACTOME_PREFIX = 'http://identifiers.org/reactome/'
WIKIPATHWAYS_PREFIX = 'http://identifiers.org/wikipathways/'
COMPATH_PREFIX = 'http://compath.scai.fraunhofer.de/rdfs#'

kegg = Namespace(KEGG_PREFIX)
reactome = Namespace(REACTOME_PREFIX)
wikipathways = Namespace(WIKIPATHWAYS_PREFIX)
compath = Namespace(COMPATH_PREFIX)

In [45]:
def match_resource(resource, pathway_id, kegg_namespace, reactome_namespace, wikipathways_namespace):
        
    if resource == 'kegg':
        return kegg[pathway_id.replace('path:', '')]
        
    elif resource == 'reactome':
        return reactome[pathway_id]
        
    elif resource == 'wikipathways':
        return wikipathways[pathway_id]
    
    else:
        raise Exception('Unvalid resource %s', resource) 
        
        
def populate_graph(graph, mappings, mapping_type, kegg_namespace, reactome_namespace, wikipathways_namespace, compath):
        
    for _, pathway_1_id, pathway_1_resource, _, pathway_2_id, pathway_2_resource in mappings:
        
        _object = match_resource(pathway_1_resource, pathway_1_id,  kegg_namespace, reactome_namespace, wikipathways_namespace)
        _subject = match_resource(pathway_2_resource, pathway_2_id,  kegg_namespace, reactome_namespace, wikipathways_namespace)
        
        graph.add((
            _object,
            compath[mapping_type],
            _subject
        ))

In [47]:
mappings_graph = rdflib.Graph()

mappings_graph.namespace_manager.bind('kegg', kegg)
mappings_graph.namespace_manager.bind('reactome', reactome)
mappings_graph.namespace_manager.bind('wp', wikipathways)
mappings_graph.namespace_manager.bind('compath', compath)

populate_graph(mappings_graph, kegg_reactome_hierarchical, 'isPartOf', kegg, reactome, wikipathways, compath)
populate_graph(mappings_graph, kegg_wikipathways_hierarchical, 'isPartOf', kegg, wikipathways, wikipathways, compath)
populate_graph(mappings_graph, wikipathways_reactome_hierarchical, 'isPartOf', wikipathways, reactome, wikipathways, compath)
populate_graph(mappings_graph, kegg_reactome_equivalents, 'equivalentTo', kegg, reactome, wikipathways, compath)
populate_graph(mappings_graph, kegg_wikipathways_equivalents, 'equivalentTo', kegg, wikipathways, wikipathways, compath)
populate_graph(mappings_graph, wikipathways_reactome_equivalents, 'equivalentTo', wikipathways, reactome, wikipathways, compath)

## Export Graphs to RDF

In [49]:
mappings_graph.serialize(destination='compath_mappings.rdf')