# Example 2: Metabolite workflow

This notebook provides insights on how to use the tool if you have list of metabolites.

# Import modules

In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
src_path = os.path.join(project_root, "src")

if src_path not in sys.path:
    sys.path.insert(0, src_path)

print("PYTHONPATH:", sys.path[0]) 

PYTHONPATH: c:\Users\are10\Documents\BAFSTU\code\pyBioDatafusemain\pyBiodatafuse\src


In [2]:
import pandas as pd

from pyBiodatafuse import id_mapper
from pyBiodatafuse.annotators import molmedb, opentargets
from pyBiodatafuse.graph import saver
from pyBiodatafuse.graph import cytoscape
from pyBiodatafuse.graph import generator
from pyBiodatafuse.utils import combine_sources, create_or_append_to_metadata

# Load list of metabolites/chemicals/compounds

In [3]:
metabolites_of_interest = """100208
10040286
10041551
10025195
5291"""

metabolite_list = metabolites_of_interest.split("\n")
len(metabolite_list)

5

In [4]:
data_input = pd.DataFrame(metabolite_list, columns=["identifier"])
data_input.head()

Unnamed: 0,identifier
0,100208
1,10040286
2,10041551
3,10025195
4,5291


### Entity resolution using BridgeDB

In [5]:
bridgdb_df, bridgdb_metadata = id_mapper.bridgedb_xref(
    identifiers=data_input,
    input_species="Human",
    input_datasource="PubChem Compound",
    output_datasource="All",
)
bridgdb_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source
0,100208,PubChem-compound,90560,ChemSpider
1,100208,PubChem-compound,100208,PubChem Compound
2,100208,PubChem-compound,HMDB0244377,HMDB
3,100208,PubChem-compound,OFDNQWIFNXBECV-UHFFFAOYSA-N,InChIKey
4,100208,PubChem-compound,C11280,KEGG Compound


### Transporter inhibited from MolMeDB

In [6]:
(
    molmedb_transporter_inhibited_df,
    molmedb_transporter_inhibited_metadata,
) = molmedb.get_compound_gene_inhibitor(bridgedb_df=bridgdb_df)
molmedb_transporter_inhibited_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,MolMeDB_transporter_inhibited
0,100208,PubChem-compound,OFDNQWIFNXBECV-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'P08183', 'hgnc_symbol'..."
1,10025195,PubChem-compound,LEJRLSZVESQKJK-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': nan, 'hgnc_symbol': nan..."
2,10040286,PubChem-compound,FYGREZKTJIXWIH-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': nan, 'hgnc_symbol': nan..."
3,10041551,PubChem-compound,OVVBIIBBRZVPAL-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'P23975', 'hgnc_symbol'..."
4,5291,PubChem-compound,KTUFNOKKBVMGRW-UHFFFAOYSA-N,InChIKey,"[{'uniprot_trembl_id': 'O15244', 'hgnc_symbol'..."


In [7]:
molmedb_transporter_inhibited_df["MolMeDB_transporter_inhibited"][0]

[{'uniprot_trembl_id': 'P08183',
  'hgnc_symbol': 'ABCB1',
  'source_pmid': '8621716'}]

### Disease from OpenTargets

In [8]:
(
    opentargets_df,
    opentargets_metadata,
) = opentargets.get_compound_disease_interactions(bridgedb_df=bridgdb_df)
opentargets_df.head()

  ) = opentargets.get_compound_disease_interactions(bridgedb_df=bridgdb_df)


Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_diseases
0,100208,PubChem-compound,100208,PubChem Compound,"[{'disease_name': nan, 'therapeutic_areas': na..."
1,10025195,PubChem-compound,10025195,PubChem Compound,"[{'disease_name': nan, 'therapeutic_areas': na..."
2,10040286,PubChem-compound,10040286,PubChem Compound,"[{'disease_name': nan, 'therapeutic_areas': na..."
3,10041551,PubChem-compound,10041551,PubChem Compound,"[{'disease_name': nan, 'therapeutic_areas': na..."
4,5291,PubChem-compound,5291,PubChem Compound,"[{'disease_name': 'colonic neoplasm', 'therape..."


In [9]:
opentargets_df["OpenTargets_diseases"][4]

[{'disease_name': 'colonic neoplasm',
  'therapeutic_areas': 'EFO_0010282:gastrointestinal disease, MONDO_0045024:cancer or benign tumor',
  'HPO': '',
  'NCI': 'NCI_C2953',
  'OMIM': '',
  'MONDO': 'MONDO_0005401',
  'ORDO': '',
  'EFO': 'EFO_0004288',
  'DO': '',
  'MESH': 'MESH_D003110',
  'UMLS': 'UMLS_C0009375'},
 {'disease_name': 'male breast carcinoma',
  'therapeutic_areas': 'EFO_0010285:integumentary system disease, OTAR_0000017:reproductive system or breast disease, MONDO_0045024:cancer or benign tumor',
  'HPO': '',
  'NCI': 'NCI_C3862',
  'OMIM': '',
  'MONDO': 'MONDO_0005628',
  'ORDO': '',
  'EFO': 'EFO_0006861',
  'DO': 'DO_1614',
  'MESH': 'MESH_D018567',
  'UMLS': 'UMLS_C0242788'},
 {'disease_name': 'thyroid cancer',
  'therapeutic_areas': 'EFO_0001379:endocrine system disease, MONDO_0045024:cancer or benign tumor',
  'HPO': '',
  'NCI': 'NCI_C7510',
  'OMIM': '',
  'MONDO': '',
  'ORDO': '',
  'EFO': '',
  'DO': 'DO_1781',
  'MESH': '',
  'UMLS': 'UMLS_C0007115'},
 {'

# Combine data sources

In [10]:
combined_df = combine_sources(
    bridgdb_df, df_list=[opentargets_df, molmedb_transporter_inhibited_df]
)
combined_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_diseases,MolMeDB_transporter_inhibited
0,100208,PubChem-compound,100208,PubChem Compound,"[{'disease_name': nan, 'therapeutic_areas': na...","[{'uniprot_trembl_id': 'P08183', 'hgnc_symbol'..."
1,10040286,PubChem-compound,10040286,PubChem Compound,"[{'disease_name': nan, 'therapeutic_areas': na...","[{'uniprot_trembl_id': nan, 'hgnc_symbol': nan..."
2,10041551,PubChem-compound,10041551,PubChem Compound,"[{'disease_name': nan, 'therapeutic_areas': na...","[{'uniprot_trembl_id': 'P23975', 'hgnc_symbol'..."
3,10025195,PubChem-compound,10025195,PubChem Compound,"[{'disease_name': nan, 'therapeutic_areas': na...","[{'uniprot_trembl_id': nan, 'hgnc_symbol': nan..."
4,5291,PubChem-compound,5291,PubChem Compound,"[{'disease_name': 'colonic neoplasm', 'therape...","[{'uniprot_trembl_id': 'O15244', 'hgnc_symbol'..."


In [11]:
combined_metadata = create_or_append_to_metadata(
    bridgdb_metadata, [opentargets_metadata, molmedb_transporter_inhibited_metadata]
)

# Saving the graph

In [12]:
import os

DATA_DIR = "./data/metabolite_workflow"
os.makedirs(DATA_DIR, exist_ok=True)

In [13]:
# pygraph = saver.save_graph(
#     combined_df=combined_df,
#     combined_metadata=combined_metadata,
#     graph_name="metabolite_workflow",
#     graph_dir=DATA_DIR,
# )

# Generate graph

In [14]:
pygraph = generator.build_networkx_graph(combined_df)

Building graph: 100%|██████████| 5/5 [00:00<00:00, 502.85it/s]




In [16]:
print(pygraph)

MultiDiGraph with 109 nodes and 104 edges


In [17]:
cytoscape.load_graph(pygraph, network_name="Test network")

Applying default style...
Applying preferred layout


In [None]:
graph_obj.count_edge_by_data_source(plot=True)