# Example 2: Metabolite workflow

This notebook provides insights on how to use the tool if you have list of metabolites.

# Import modules

In [None]:
import pandas as pd

from pyBiodatafuse import id_mapper
from pyBiodatafuse.analyzer.summarize import BioGraph
from pyBiodatafuse.annotators import molmedb, kegg, aopwikifrom pyBiodatafuse.graph import saver
from pyBiodatafuse.utils import combine_sources, create_or_append_to_metadata

# Load list of metabolites/chemicals/compounds

In [None]:
metabolites_of_interest = """100208
10040286
10041551
10025195
5291
6030
1172
1060
8571
697993
21831736
159603
445643
"""

metabolite_list = metabolites_of_interest.split("\n")
len(metabolite_list)

14

In [None]:
data_input = pd.DataFrame(metabolite_list, columns=["identifier"])
data_input.head()

### Entity resolution using BridgeDB

In [None]:
bridgedb_df, bridgdb_metadata = id_mapper.bridgedb_xref(
    identifiers=data_input,
    input_species="Human",
    input_datasource="PubChem Compound",
    output_datasource="All",
)
bridgedb_df.head()

### Transporter inhibited from MolMeDB

In [None]:
(
    molmedb_transporter_inhibited_df,
    molmedb_transporter_inhibited_metadata,
) = molmedb.get_compound_gene_inhibitor(bridgedb_df=bridgedb_df)
molmedb_transporter_inhibited_df.head()

  check_columns_against_constants(


Unnamed: 0,identifier,identifier.source,target,target.source,MolMeDB_transporter_inhibited
0,100208,PubChem-compound,OFDNQWIFNXBECV-UHFFFAOYSA-N,InChIKey,[{'MolMeDB_uniprot_trembl_id': 'Uniprot-TrEMBL...
1,10025195,PubChem-compound,LEJRLSZVESQKJK-UHFFFAOYSA-N,InChIKey,"[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h..."
2,10040286,PubChem-compound,FYGREZKTJIXWIH-UHFFFAOYSA-N,InChIKey,"[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h..."
3,10041551,PubChem-compound,OVVBIIBBRZVPAL-UHFFFAOYSA-N,InChIKey,[{'MolMeDB_uniprot_trembl_id': 'Uniprot-TrEMBL...
4,1060,PubChem-compound,LCTONWCANYUPML-UHFFFAOYSA-N,InChIKey,"[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h..."


### AOPs from AOP-Wiki

In [None]:
(
    aopwiki_df,
    aopwiki_metadata,
) = aopwiki.get_aops(bridgedb_df=bridgedb_df)
aopwiki_df.head()

Querying AOP_Wiki_RDF for compounds: 100%|██████████| 1/1 [00:01<00:00,  1.48s/it]


Unnamed: 0,identifier,identifier.source,target,target.source,AOP_Wiki_RDF_compounds
0,100208,PubChem-compound,100208,PubChem Compound,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
1,10025195,PubChem-compound,10025195,PubChem Compound,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
2,10040286,PubChem-compound,10040286,PubChem Compound,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
3,10041551,PubChem-compound,10041551,PubChem Compound,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
4,1060,PubChem-compound,1060,PubChem Compound,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."


### Pathways from KEGG

In [7]:
(
    kegg_df,
    kegg_metadata,
) = kegg.get_pathways(bridgedb_df=bridgedb_df)
kegg_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,KEGG_pathways
0,100208,PubChem-compound,C11280,KEGG Compound,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
1,6030,PubChem-compound,C00105,KEGG Compound,"[{'pathway_id': 'path:hsa00240', 'pathway_labe..."
2,1172,PubChem-compound,C00105,KEGG Compound,"[{'pathway_id': 'path:hsa00240', 'pathway_labe..."
3,1060,PubChem-compound,C00022,KEGG Compound,"[{'pathway_id': 'path:hsa00010', 'pathway_labe..."
4,697993,PubChem-compound,C14437,KEGG Compound,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."


### AOPs from AOP-Wiki

In [None]:
(
    aopwiki_df,
    aopwiki_metadata,
) = aopwiki.get_aops(bridgedb_df=bridgdb_df)
aopwiki_df.head()

# Create the BDF graph

In [12]:
combined_df = combine_sources(
    bridgedb_df, df_list=[kegg_df, molmedb_transporter_inhibited_df, aopwiki_df]
)
combined_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,KEGG_pathways,MolMeDB_transporter_inhibited,AOP_Wiki_RDF_compounds
0,100208,PubChem-compound,100208,PubChem Compound,"[{'pathway_id': nan, 'pathway_label': nan, 'pa...",[{'MolMeDB_uniprot_trembl_id': 'Uniprot-TrEMBL...,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
1,10040286,PubChem-compound,10040286,PubChem Compound,,"[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h...","[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
2,10041551,PubChem-compound,10041551,PubChem Compound,,[{'MolMeDB_uniprot_trembl_id': 'Uniprot-TrEMBL...,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
3,10025195,PubChem-compound,10025195,PubChem Compound,,"[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h...","[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
4,5291,PubChem-compound,5291,PubChem Compound,,[{'MolMeDB_uniprot_trembl_id': 'Uniprot-TrEMBL...,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."


In [9]:
combined_metadata = create_or_append_to_metadata(
    bridgdb_metadata, [kegg_metadata, molmedb_transporter_inhibited_metadata, aopwiki_metadata]
)

# Saving the graph

In [10]:
import os

DATA_DIR = "./data/metabolite_workflow"
os.makedirs(DATA_DIR, exist_ok=True)

In [11]:
pygraph = saver.save_graph(
    combined_df=combined_df,
    combined_metadata=combined_metadata,
    graph_name="metabolite_workflow",
    graph_dir=DATA_DIR,
)

Combined DataFrame saved in ./data/metabolite_workflow/metabolite_workflow_df.pkl
Metadata saved in ./data/metabolite_workflow/metabolite_workflow_metadata.pkl
Building graph: 0it [00:00, ?it/s]
Graph is built successfully
Graph saved in: 
 ./data/metabolite_workflow/metabolite_workflow_graph.pkl 
 ./data/metabolite_workflow/metabolite_workflow_graph.gml
Graph saved in ./data/metabolite_workflow/metabolite_workflow_graph.edgelist


In [None]:
print(pygraph)

# Graph statistics

In [None]:
graph_obj = BioGraph(graph=pygraph)
graph_obj.graph_summary

In [None]:
graph_obj.count_nodes_by_data_source(plot=True)

In [None]:
graph_obj.count_edge_by_data_source(plot=True)