# Example: Metabolite workflow

This notebook provides insights on how to use the tool if you have list of metabolites.
Our current data sources include:
* MolMeDB
* OpenTargets

In [1]:
# Import modules
import pandas as pd

from pyBiodatafuse import id_mapper
from pyBiodatafuse.annotators import molmedb, opentargets
from pyBiodatafuse.constants import (
    MOLMEDB_INHIBITED_COL,
    OPENTARGETS_DISEASE_COL
)
from pyBiodatafuse.utils import combine_sources

# Load list of metabolites/chemicals/compounds

In [2]:
metabolites_of_interest = """100208
10040286
10041551
10025195
5291"""
# metabolites_of_interest = """CHEMBL1201583
# CHEMBL941
# """
# metabolites_of_interest = """5291"""
metabolite_list = metabolites_of_interest.split("\n")
len(metabolite_list)

5

In [3]:
data_input = pd.DataFrame(metabolite_list, columns=["identifier"])
data_input.head()

Unnamed: 0,identifier
0,100208
1,10040286
2,10041551
3,10025195
4,5291


### Entity resolution using BridgeDB

In [4]:
bridgdb_df, bridgdb_metadata = id_mapper.bridgedb_xref(
    identifiers=data_input,
    input_species="Human",
    input_datasource="PubChem Compound",  # ChEMBL compound
    output_datasource="All",
)
bridgdb_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source
0,100208,PubChem-compound,90560,ChemSpider
1,100208,PubChem-compound,100208,PubChem Compound
2,100208,PubChem-compound,HMDB0244377,HMDB
3,100208,PubChem-compound,OFDNQWIFNXBECV-UHFFFAOYSA-N,InChIKey
4,100208,PubChem-compound,C11280,KEGG Compound


### Diseases from OpenTarget

In [5]:
opentargets_disease_df, opentargets_disease_metadata = opentargets.get_compound_disease_interactions(
    bridgedb_df=bridgdb_df
)
opentargets_disease_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_diseases
0,100208,PubChem-compound,100208,PubChem Compound,"[{'disease_id': nan, 'disease_name': nan, 'the..."
1,10025195,PubChem-compound,10025195,PubChem Compound,"[{'disease_id': nan, 'disease_name': nan, 'the..."
2,10040286,PubChem-compound,10040286,PubChem Compound,"[{'disease_id': nan, 'disease_name': nan, 'the..."
3,10041551,PubChem-compound,10041551,PubChem Compound,"[{'disease_id': nan, 'disease_name': nan, 'the..."
4,5291,PubChem-compound,5291,PubChem Compound,"[{'disease_id': 'umls:C0009375', 'disease_name..."


In [6]:
opentargets_disease_df[OPENTARGETS_DISEASE_COL][4]

[{'disease_id': 'umls:C0009375',
  'disease_name': 'colonic neoplasm',
  'therapeutic_areas': 'MONDO_0045024:cancer or benign tumor, EFO_0010282:gastrointestinal disease',
  'disease_xrefs': ['EFO_0004288',
   'NCI_C2953',
   'MESH_D003110',
   'UMLS_C0009375',
   'MONDO_0005401']},
 {'disease_id': 'umls:C0238033, umls:C0242787, umls:C0242788',
  'disease_name': 'male breast carcinoma',
  'therapeutic_areas': 'OTAR_0000017:reproductive system or breast disease, MONDO_0045024:cancer or benign tumor, EFO_0010285:integumentary system disease',
  'disease_xrefs': ['UMLS_C0238033',
   'DO_1614',
   'MESH_D018567',
   'EFO_0006861',
   'UMLS_C0242787',
   'MONDO_0005628',
   'NCI_C3862',
   'UMLS_C0242788']},
 {'disease_id': 'umls:C0007115',
  'disease_name': 'thyroid cancer',
  'therapeutic_areas': 'EFO_0001379:endocrine system disease, MONDO_0045024:cancer or benign tumor',
  'disease_xrefs': ['DO_1781', 'UMLS_C0007115', 'NCI_C7510']},
 {'disease_id': 'umls:C0036421',
  'disease_name': 'sy

### Transporter inhibited from MolMeDB

In [7]:
molmedb_transporter_inhibited_df, molmedb_transporter_inhibited_metadata = molmedb.get_compound_gene_inhibitor(
    bridgedb_df=bridgdb_df
)
molmedb_transporter_inhibited_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,MolMeDB_transporter_inhibited
0,100208,PubChem-compound,OFDNQWIFNXBECV-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'P08183', 'hgnc_symbol'..."
1,10025195,PubChem-compound,LEJRLSZVESQKJK-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'Q01959', 'hgnc_symbol'..."
2,10040286,PubChem-compound,FYGREZKTJIXWIH-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'Q01959', 'hgnc_symbol'..."
3,10041551,PubChem-compound,OVVBIIBBRZVPAL-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'P23975', 'hgnc_symbol'..."
4,5291,PubChem-compound,KTUFNOKKBVMGRW-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'O15244', 'hgnc_symbol'..."


In [8]:
molmedb_transporter_inhibited_df[MOLMEDB_INHIBITED_COL][4]

[{'uniprot_trembl_id': 'O15244',
  'hgnc_symbol': 'SLC22A2',
  'source_pmid': '23241029'},
 {'uniprot_trembl_id': 'O15245',
  'hgnc_symbol': 'SLC22A1',
  'source_pmid': '23241029'},
 {'uniprot_trembl_id': 'O75751',
  'hgnc_symbol': 'SLC22A3',
  'source_pmid': '23241029'},
 {'uniprot_trembl_id': 'Q92887',
  'hgnc_symbol': 'ABCC2',
  'source_pmid': '23956101'},
 {'uniprot_trembl_id': 'Q9UNQ0',
  'hgnc_symbol': 'ABCG2',
  'source_pmid': '15155841, 18678495, 19932960'}]

### Combing all the results into single dataframe

In [9]:
combined_df = combine_sources(
    [
        opentargets_disease_df,
        molmedb_transporter_inhibited_df
    ]
)
combined_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_diseases,MolMeDB_transporter_inhibited
0,100208,PubChem-compound,100208,PubChem Compound,"[{'disease_id': nan, 'disease_name': nan, 'the...","[{'uniprot_trembl_id': 'P08183', 'hgnc_symbol'..."
1,10025195,PubChem-compound,10025195,PubChem Compound,"[{'disease_id': nan, 'disease_name': nan, 'the...","[{'uniprot_trembl_id': 'Q01959', 'hgnc_symbol'..."
2,10040286,PubChem-compound,10040286,PubChem Compound,"[{'disease_id': nan, 'disease_name': nan, 'the...","[{'uniprot_trembl_id': 'Q01959', 'hgnc_symbol'..."
3,10041551,PubChem-compound,10041551,PubChem Compound,"[{'disease_id': nan, 'disease_name': nan, 'the...","[{'uniprot_trembl_id': 'P23975', 'hgnc_symbol'..."
4,5291,PubChem-compound,5291,PubChem Compound,"[{'disease_id': 'umls:C0009375', 'disease_name...","[{'uniprot_trembl_id': 'O15244', 'hgnc_symbol'..."
