# Example: Metabolite workflow

This notebook provides insights on how to use the tool if you have list of metabolites.
Our current data sources include:
* MolMeDB

In [1]:
# Import modules
import pandas as pd

from pyBiodatafuse import id_mapper
from pyBiodatafuse.annotators import molmedb
from pyBiodatafuse.utils import combine_sources

# Load list of metabolites/chemicals/compounds

In [2]:
metabolites_of_interest = """10041551
10025195
2153"""
metabolite_list = metabolites_of_interest.split("\n")
len(metabolite_list)

3

In [3]:
data_input = pd.DataFrame(metabolite_list, columns=["identifier"])
data_input.to_dict()

{'identifier': {0: '10041551', 1: '10025195', 2: '2153'}}

In [4]:
bridgdb_df, bridgdb_metadata = id_mapper.bridgedb_xref(
    identifiers=data_input,
    input_species="Human",
    input_datasource="PubChem Compound",
    output_datasource="All",
)
bridgdb_df[bridgdb_df['target.source']=='InChIKey'].to_dict('list')

{'identifier': ['10041551', '10025195', '2153'],
 'identifier.source': ['PubChem-compound',
  'PubChem-compound',
  'PubChem-compound'],
 'target': ['OVVBIIBBRZVPAL-UHFFFAOYSA-N',
  'LEJRLSZVESQKJK-UHFFFAOYSA-N',
  'ZFXYFBGIUFBOJW-UHFFFAOYSA-N'],
 'target.source': ['InChIKey', 'InChIKey', 'InChIKey']}

In [5]:
transporter_inhibited_df, transporter_inhibited_metadata = molmedb.get_mol_gene_inhibitor(
    bridgedb_df=bridgdb_df
)
transporter_inhibited_df

Unnamed: 0,identifier,identifier.source,target,target.source,transporter_inhibited
0,10025195,PubChem-compound,LEJRLSZVESQKJK-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'Q01959', 'hgcn_id': 'S..."
1,10041551,PubChem-compound,OVVBIIBBRZVPAL-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'P23975', 'hgcn_id': 'S..."
2,2153,PubChem-compound,ZFXYFBGIUFBOJW-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': nan, 'hgcn_id': nan, 's..."


In [6]:
transporter_inhibited_df["transporter_inhibited"][0]

[{'uniprot_trembl_id': 'Q01959',
  'hgcn_id': 'SLC6A3',
  'source_doi': 'doi:10.1021/jm980066t',
  'source_pmid': '9703474'}]