# Example: Gene Workflow

This notebook provides insights on how to use the tool if you have list of genes.
Our current data sources include:
* DisGeNet
* MolMeDB
* OpenTargets
* WikiPathways
* WikiData
* STRING
* Bgee

In [1]:
# Import modules
import pandas as pd

from pyBiodatafuse import id_mapper
from pyBiodatafuse.annotators import disgenet, molmedb, pubchem  # Example with just three resource
from pyBiodatafuse.utils import combine_sources

## Load the input list and convert it to a dataframe

In [2]:
genes_of_interest = """AGRN
ALG14
ALG2
CHAT
CHD8
CHRNA1
CHRNB1
CHRND
CHRNE
CHRNG
COL13A1
COLQ
DOK7
SLC22A5
EGFR"""
gene_list = genes_of_interest.split("\n")
len(gene_list)

15

In [3]:
data_input = pd.DataFrame(gene_list, columns=["identifier"])
data_input.head()

Unnamed: 0,identifier
0,AGRN
1,ALG14
2,ALG2
3,CHAT
4,CHD8


## Entity resolution using BridgeDB

In [4]:
bridgdb_df, bridgdb_metadata = id_mapper.bridgedb_xref(
    identifiers=data_input,
    input_species="Human",
    input_datasource="HGNC",
    output_datasource="All",
)
bridgdb_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source
0,AGRN,HGNC,GO:0046872,Gene Ontology
1,AGRN,HGNC,4053565,Affy
2,AGRN,HGNC,4053601,Affy
3,AGRN,HGNC,4053600,Affy
4,AGRN,HGNC,4053602,Affy


## Disease annotating using DisGeNet

In [5]:
api_key = "0209751bfa7b6a981a8f5fb5f062313067ecd36c"  # TODO: add your key
params = {"source": "CURATED", "format": "json"}  # only curated data
disgenet_result, disgenet_metadata = disgenet.get_gene_disease(
    bridgedb_df=bridgdb_df, api_key=api_key, params=params
)
disgenet_result.head()

  disgenet_result, disgenet_metadata = disgenet.get_gene_disease(


In [6]:
disgenet_result["DisGeNET"][0]

KeyError: 'DisGeNET'

## Inhibitors of trasporters encoded by genes annotation by MolMeDB

In [9]:
inhibitor_df, inhibitor_metadata = molmedb.get_gene_mol_inhibitor(bridgedb_df=bridgdb_df)
inhibitor_df

Unnamed: 0,identifier,identifier.source,target,target.source,transporter_inhibitor
0,AGRN,HGNC,A0A087X208,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
1,ALG14,HGNC,Q96F25,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
2,ALG2,HGNC,A0A024R184,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
3,CHAT,HGNC,A0A1W2PP46,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
4,CHD8,HGNC,A0A2R8Y4P3,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
5,CHRNA1,HGNC,A0A1B0GV17,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
6,CHRNB1,HGNC,I3L1T7,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
7,CHRND,HGNC,B4DKT6,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
8,CHRNE,HGNC,A0A3B3IRM1,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
9,CHRNG,HGNC,A0A6F7YAP6,Uniprot-TrEMBL,"[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."


In [10]:
inhibitor_df["transporter_inhibitor"][14]

[{'label': 'Cefepime',
  'InChIKey': 'HVFLCNVBZFFHBT-UHFFFAOYSA-O',
  'SMILES': 'CON=C(C(=O)NC1C(=O)N2C(C(=O)O)=C(C[N+]3(C)CCCC3)CSC12)c1csc(N)n1',
  'pubchem_compound_id': 2623,
  'molmedb_id': 'MM16967',
  'source_doi': 'doi:10.1074/jbc.275.3.1699',
  'source_pmid': 10636865,
  'chebi_id': nan,
  'drugbank_id': nan},
 {'label': 'Cephaloridine',
  'InChIKey': 'CZTQZXZIADLWOZ-UHFFFAOYSA-O',
  'SMILES': 'O=C(Cc1cccs1)NC1C(=O)N2C(C(=O)O)=C(C[n+]3ccccc3)CSC12',
  'pubchem_compound_id': 5773,
  'molmedb_id': 'MM00638',
  'source_doi': 'doi:10.1074/jbc.275.3.1699',
  'source_pmid': 10636865,
  'chebi_id': '3537',
  'drugbank_id': 'DB09008'}]

## Screening results of compounds on proteins encoded by genes annotation by PubChem

In [11]:
pubchem_assay_df, pubchem_assay_metadata = pubchem.get_protein_molecule_screened(bridgedb_df=bridgdb_df)
pubchem_assay_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,compounds_screened
0,AGRN,HGNC,A0A087X208,Uniprot-TrEMBL,"[{'assay_type': nan, 'outcome': nan, 'compound..."
1,ALG14,HGNC,Q96F25,Uniprot-TrEMBL,"[{'assay_type': nan, 'outcome': nan, 'compound..."
2,ALG2,HGNC,A0A024R184,Uniprot-TrEMBL,"[{'assay_type': nan, 'outcome': nan, 'compound..."
3,CHAT,HGNC,A0A1W2PP46,Uniprot-TrEMBL,"[{'assay_type': nan, 'outcome': nan, 'compound..."
4,CHD8,HGNC,A0A2R8Y4P3,Uniprot-TrEMBL,"[{'assay_type': nan, 'outcome': nan, 'compound..."


In [13]:
pubchem_assay_df['compounds_screened'][13]

[{'assay_type': 'AC50',
  'outcome': 'active',
  'compound_cid': 5211059,
  'compound_name': 'N-(3,4-difluorophenyl)-7-thia-9,11-diazatricyclo[6.4.0.02,6]dodeca-1(12),2(6),8,10-tetraen-12-amine',
  'SMILES': 'C1CC2=C(C1)SC3=NC=NC(=C23)NC4=CC(=C(C=C4)F)F',
  'InChI': 'InChI=1S/C15H11F2N3S/c16-10-5-4-8(6-11(10)17)20-14-13-9-2-1-3-12(9)21-15(13)19-7-18-14/h4-7H,1-3H2,(H,18,19,20)'},
 {'assay_type': 'AC50',
  'outcome': 'active',
  'compound_cid': 2303746,
  'compound_name': '4-Methyl-3-(2-phenylhydrazinyl)-1,5-benzodiazepin-2-one',
  'SMILES': 'CC1=C(C(=O)N=C2C=CC=CC2=N1)NNC3=CC=CC=C3',
  'InChI': 'InChI=1S/C16H14N4O/c1-11-15(20-19-12-7-3-2-4-8-12)16(21)18-14-10-6-5-9-13(14)17-11/h2-10,19H,1H3,(H,18,20,21)'},
 {'assay_type': 'AC50',
  'outcome': 'active',
  'compound_cid': 2322338,
  'compound_name': '7-Methyl-3-(phenylhydrazo)-1,5-benzodiazepin-2-one',
  'SMILES': 'CC1=CC2=NC=C(C(=O)N=C2C=C1)NNC3=CC=CC=C3',
  'InChI': 'InChI=1S/C16H14N4O/c1-11-7-8-13-14(9-11)17-10-15(16(21)18-13)20-19-12

## Combing all the results into single dataframe

In [14]:
combined_df = combine_sources([pubchem_assay_df, inhibitor_df])

In [15]:
combined_df.head(4)

Unnamed: 0,identifier,identifier.source,target,target.source,compounds_screened,transporter_inhibitor
0,AGRN,HGNC,A0A087X208,Uniprot-TrEMBL,"[{'assay_type': nan, 'outcome': nan, 'compound...","[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
1,ALG14,HGNC,Q96F25,Uniprot-TrEMBL,"[{'assay_type': nan, 'outcome': nan, 'compound...","[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
2,ALG2,HGNC,A0A024R184,Uniprot-TrEMBL,"[{'assay_type': nan, 'outcome': nan, 'compound...","[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
3,CHAT,HGNC,A0A1W2PP46,Uniprot-TrEMBL,"[{'assay_type': nan, 'outcome': nan, 'compound...","[{'label': nan, 'InChIKey': nan, 'SMILES': nan..."
