# Example: Metabolite workflow

This notebook provides insights on how to use the tool if you have list of metabolites.
Our current data sources include:
* MolMeDB
* AOP-WIKI RDF

In [1]:
# Import modules
import pandas as pd

from pyBiodatafuse import id_mapper
from pyBiodatafuse.annotators import aopwiki, molmedb, opentargets
from pyBiodatafuse.constants import MOLMEDB_COMPOUND_PROTEIN_COL, OPENTARGETS_DISEASE_COL
from pyBiodatafuse.utils import combine_sources

# Load list of metabolites/chemicals/compounds

In [2]:
metabolites_of_interest = """31304"""
# metabolites_of_interest = """CHEMBL1201583
# CHEMBL941
# """
# metabolites_of_interest = "5291"
# metabolites_of_interest = "10025195"

metabolite_list = metabolites_of_interest.split("\n")
len(metabolite_list)

1

In [3]:
data_input = pd.DataFrame(metabolite_list, columns=["identifier"])
data_input.head()

Unnamed: 0,identifier
0,31304


### Entity resolution using BridgeDB

In [4]:
bridgdb_df, bridgdb_metadata = id_mapper.bridgedb_xref(
    identifiers=data_input,
    input_species="Human",
    input_datasource="PubChem Compound",  # ChEMBL compound
    output_datasource="All",
)
bridgdb_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source
0,31304,PubChem-compound,29044,ChemSpider
1,31304,PubChem-compound,124-87-8,CAS
2,31304,PubChem-compound,31304,PubChem Compound
3,31304,PubChem-compound,CHEMBL506977,ChEMBL compound
4,31304,PubChem-compound,134126,ChEBI


### AOPs from AOP-Wiki RDF

In [5]:
# test = molmedb.get_compound_gene_inhibitor(bridgedb_df=bridgdb_df)
# test.head()

In [6]:
aopwiki_df, aopwiki_metadata = aopwiki.get_aops(
    bridgedb_df=bridgdb_df,
    db="aopwiki",
    input_type="compound",
    input_identifier="PubChem Compound",
)
aopwiki_df

Querying aopwiki for compound: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]


Unnamed: 0,identifier,identifier.source,target,target.source,aopwiki
0,31304,PubChem-compound,LEJRLSZVESQKJK-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': nan, 'hgnc_symbol': nan..."
2,10040286,PubChem-compound,FYGREZKTJIXWIH-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': nan, 'hgnc_symbol': nan..."
3,10041551,PubChem-compound,OVVBIIBBRZVPAL-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'P23975', 'hgnc_symbol'..."
4,5291,PubChem-compound,KTUFNOKKBVMGRW-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'O15244', 'hgnc_symbol'..."


### Transporter inhibited from MolMeDB

In [None]:
(
    molmedb_transporter_inhibited_df,
    molmedb_transporter_inhibited_metadata,
) = molmedb.get_compound_gene_inhibitor(bridgedb_df=bridgdb_df)
molmedb_transporter_inhibited_df.head()

In [7]:
molmedb_transporter_inhibited_df[MOLMEDB_COMPOUND_PROTEIN_COL][4]

[{'uniprot_trembl_id': 'O15244',
  'hgnc_symbol': 'SLC22A2',
  'source_pmid': '23241029'},
 {'uniprot_trembl_id': 'Q9UNQ0',
  'hgnc_symbol': 'ABCG2',
  'source_pmid': '15155841, 18678495, 19932960'}]