# Example: Compounds for Generator

In [1]:
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
src_path = os.path.join(project_root, "src")

if src_path not in sys.path:
    sys.path.insert(0, src_path)

print("PYTHONPATH:", sys.path[0])

PYTHONPATH: c:\Users\are10\Documents\BAFSTU\code\GitHub\pyBiodatafuse\src


In [2]:
# Import modules
import pandas as pd

from pyBiodatafuse import id_mapper
from pyBiodatafuse.annotators import intact, kegg, molmedb, aopwiki
from pyBiodatafuse.graph import generator
from pyBiodatafuse.utils import combine_sources

# Load list of metabolites/chemicals/compounds

In [3]:
metabolites_of_interest = """100208
10040286
10041551
10025195
5291
6030
1172
1060
8571
697993
21831736
159603
445643
16865
"""

metabolite_list = metabolites_of_interest.split("\n")
len(metabolite_list)

15

In [4]:
data_input = pd.DataFrame(metabolite_list, columns=["identifier"])
data_input.head()

Unnamed: 0,identifier
0,100208
1,10040286
2,10041551
3,10025195
4,5291


### Entity resolution using BridgeDB

In [5]:
bridgedb_compound_df, bridgdb_metadata = id_mapper.bridgedb_xref(
    identifiers=data_input,
    input_species="Human",
    input_datasource="ChEBI",
    output_datasource="All",
)
bridgedb_compound_df.head(10)

Unnamed: 0,identifier,identifier.source,target,target.source
0,100208,ChEBI,100208,ChEBI
1,100208,ChEBI,CHEBI:100208,ChEBI
2,100208,ChEBI,Q27176235,Wikidata
3,100208,ChEBI,54631547,PubChem Compound
4,100208,ChEBI,OOHQDHOXZWVAAU-SEPYTNNBSA-N,InChIKey
5,5291,ChEBI,C01498,KEGG Compound
6,5291,ChEBI,CHEBI:5291,ChEBI
7,5291,ChEBI,DB11242,DrugBank
8,5291,ChEBI,5291,ChEBI
9,5291,ChEBI,9000-70-8,CAS


### Compound Interactions from IntAct

In [6]:
intact_compound_df, intact_compound_metadata = intact.get_compound_interactions(
    bridgedb_compound_df, interaction_type="both"
)
intact_compound_df.head(10)

Querying IntAct for compounds: 100%|██████████| 1/1 [00:08<00:00,  8.62s/it]


Unnamed: 0,identifier,identifier.source,target,target.source,IntAct_compound_interactions
0,100208,ChEBI,CHEBI:100208,ChEBI,"[{'interaction_id': nan, 'interactor_id_A': na..."
1,5291,ChEBI,CHEBI:5291,ChEBI,"[{'interaction_id': nan, 'interactor_id_A': na..."
2,6030,ChEBI,CHEBI:6030,ChEBI,"[{'interaction_id': nan, 'interactor_id_A': na..."
3,1060,ChEBI,CHEBI:1060,ChEBI,"[{'interaction_id': nan, 'interactor_id_A': na..."
4,159603,ChEBI,CHEBI:159603,ChEBI,"[{'interaction_id': nan, 'interactor_id_A': na..."
5,16865,ChEBI,CHEBI:16865,ChEBI,"[{'interaction_id': nan, 'interactor_id_A': na..."


## MolMeDB

In [7]:
molmedb_df, molmedb_metadata = molmedb.get_compound_gene_inhibitor(bridgedb_compound_df)
molmedb_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,MolMeDB_transporter_inhibited
0,100208,ChEBI,OOHQDHOXZWVAAU-SEPYTNNBSA-N,InChIKey,"[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h..."
1,1060,ChEBI,UQIGQRSJIKIPKZ-VKHMYHEASA-N,InChIKey,"[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h..."
2,159603,ChEBI,VJGQRELPQWNURN-JYJNAYRXSA-N,InChIKey,"[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h..."
3,16865,ChEBI,BTCSSZJGUNDROE-UHFFFAOYSA-N,InChIKey,[{'MolMeDB_uniprot_trembl_id': 'Uniprot-TrEMBL...
4,6030,ChEBI,QRXWMOHMRWLFEY-UHFFFAOYSA-N,InChIKey,"[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h..."


## KEGG

In [8]:
kegg_df, kegg_metadata = kegg.get_pathways(bridgedb_compound_df)
kegg_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,KEGG_pathways
0,5291,ChEBI,C01498,KEGG Compound,"[{'pathway_id': 'path:hsa05146', 'pathway_labe..."
1,6030,ChEBI,C07054,KEGG Compound,"[{'pathway_id': 'path:hsa00983', 'pathway_labe..."
2,1060,ChEBI,C03826,KEGG Compound,"[{'pathway_id': 'path:hsa00040', 'pathway_labe..."
3,16865,ChEBI,C00334,KEGG Compound,"[{'pathway_id': 'path:hsa00250', 'pathway_labe..."


In [9]:
aop_df, aop_metadata = aopwiki.get_aops_compound(bridgedb_compound_df)
aop_df.head()

Querying AOP_Wiki_RDF for compounds: 100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


Unnamed: 0,identifier,identifier.source,target,target.source,AOP_Wiki_RDF_compounds
0,100208,ChEBI,54631547,PubChem Compound,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
1,1060,ChEBI,5280626,PubChem Compound,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
2,159603,ChEBI,145456550,PubChem Compound,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
3,16865,ChEBI,119,PubChem Compound,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n..."
4,6030,ChEBI,3767,PubChem Compound,"[{'aop': '260', 'aop_title': 'CYP2E1 activatio..."


In [15]:
aop_df["AOP_Wiki_RDF_compounds"][1]

[{'aop': nan,
  'aop_title': nan,
  'MIE_title': nan,
  'MIE': nan,
  'KE_downstream': nan,
  'KE_downstream_title': nan,
  'KER': nan,
  'ao': nan,
  'ao_title': nan,
  'KE_upstream': nan,
  'KE_upstream_title': nan,
  'KE_upstream_organ': nan,
  'KE_downstream_organ': nan}]

In [10]:
combined_df = combine_sources(
    bridgedb_compound_df,
    [
        intact_compound_df, 
        molmedb_df,
        aop_df,
        kegg_df
    ],
)

In [11]:
combined_df.head(25)

Unnamed: 0,identifier,identifier.source,target,target.source,IntAct_compound_interactions,MolMeDB_transporter_inhibited,AOP_Wiki_RDF_compounds,KEGG_pathways
0,100208,ChEBI,54631547.0,PubChem Compound,"[{'interaction_id': nan, 'interactor_id_A': na...","[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h...","[{'aop': nan, 'aop_title': nan, 'MIE_title': n...",
1,6030,ChEBI,3767.0,PubChem Compound,"[{'interaction_id': nan, 'interactor_id_A': na...","[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h...","[{'aop': '260', 'aop_title': 'CYP2E1 activatio...","[{'pathway_id': 'path:hsa00983', 'pathway_labe..."
2,1060,ChEBI,5280626.0,PubChem Compound,"[{'interaction_id': nan, 'interactor_id_A': na...","[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h...","[{'aop': nan, 'aop_title': nan, 'MIE_title': n...","[{'pathway_id': 'path:hsa00040', 'pathway_labe..."
3,159603,ChEBI,145456550.0,PubChem Compound,"[{'interaction_id': nan, 'interactor_id_A': na...","[{'MolMeDB_uniprot_trembl_id': nan, 'MolMeDB_h...","[{'aop': nan, 'aop_title': nan, 'MIE_title': n...",
4,16865,ChEBI,119.0,PubChem Compound,"[{'interaction_id': nan, 'interactor_id_A': na...",[{'MolMeDB_uniprot_trembl_id': 'Uniprot-TrEMBL...,"[{'aop': nan, 'aop_title': nan, 'MIE_title': n...","[{'pathway_id': 'path:hsa00250', 'pathway_labe..."
5,5291,,,,"[{'interaction_id': nan, 'interactor_id_A': na...",,,"[{'pathway_id': 'path:hsa05146', 'pathway_labe..."


In [12]:
pygraph = generator.build_networkx_graph(combined_df)

Building graph: 100%|██████████| 5/5 [00:00<00:00, 908.72it/s]


In [10]:
combined_df.shape

(2, 5)

In [13]:
from pyBiodatafuse.graph import cytoscape

cytoscape.load_graph(pygraph, network_name="Test network")

Applying default style...
Applying preferred layout
