#### Import BTE modules

In [229]:
import pandas as pandas
from biothings_explorer.query.predict import Predict
from biothings_explorer.query.visualize import display_graph
from biothings_explorer.user_query_dispatcher import FindConnection
from biothings_explorer.hint import Hint
import nest_asyncio
nest_asyncio.apply()
import networkx as nx
import matplotlib.pyplot as plt

%matplotlib inline
import warnings
warnings.filterwarnings("ignore") 

ht = Hint()

### Find representation of VAMP2 in BTE

In [2]:
# find representations of VAMP2 (ENSG00000220205)
vamp2 = ht.query("ENSG00000220205") ["Gene"] [0]
vamp2

{'NCBIGene': '6844',
 'name': 'vesicle associated membrane protein 2',
 'SYMBOL': 'VAMP2',
 'UMLS': 'C1421419',
 'HGNC': '12643',
 'UNIPROTKB': 'P63027',
 'ENSEMBL': 'ENSG00000220205',
 'primary': {'identifier': 'NCBIGene', 'cls': 'Gene', 'value': '6844'},
 'display': 'NCBIGene(6844) ENSEMBL(ENSG00000220205) HGNC(12643) UMLS(C1421419) UNIPROTKB(P63027) SYMBOL(VAMP2)',
 'type': 'Gene'}

### Biological Process related to VAMP2
Find all the **Biological Process** directly connected to the gene: **VAMP2** (no intermediate nodes [ ]) 

In [3]:

fc_BP = FindConnection(input_obj=vamp2, output_obj="BiologicalProcess", intermediate_nodes= [])

In [4]:
fc_BP.connect(verbose=True)


BTE will find paths that join 'VAMP2' and 'BiologicalProcess'. Paths will have 0 intermediate node.




==== Step #1: Query path planning ====

Because VAMP2 is of type 'Gene', BTE will query our meta-KG for APIs that can take 'Gene' as input and 'BiologicalProcess' as output

BTE found 3 apis:

API 1. cord_gene(1 API call)
API 2. mygene(1 API call)
API 3. semmed_gene(11 API calls)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 3.6: https://biothings.ncats.io/semmedgene/query?fields=positively_regulates (POST -d q=C1421419&scopes=umls)
API 3.1: https://biothings.ncats.io/semmedgene/query?fields=related_to (POST -d q=C1421419&scopes=umls)
API 3.7: https://biothings.ncats.io/semmedgene/query?fields=negatively_regulates (POST -d q=C1421419&scopes=umls)
API 3.2: https://biothings.ncats.io/semmedgene/query?fields=negatively_regulated_by (POST -d q=C1421419&scopes=umls)
API 2.1: https://mygene

In [5]:
#Display results in a table 
df_BP=fc_BP.display_table_view ()
df_BP

Unnamed: 0,input,input_type,pred1,pred1_source,pred1_api,pred1_pubmed,output_type,output_name,output_id
0,VAMP2,Gene,affects,SEMMED,SEMMED Gene API,24356748206094252364107425762204,BiologicalProcess,C0015283,UMLS:C0015283
1,VAMP2,Gene,affects,SEMMED,SEMMED Gene API,2858828119116655,BiologicalProcess,C0025246,UMLS:C0025246
2,VAMP2,Gene,affects,SEMMED,SEMMED Gene API,28264432,BiologicalProcess,C0597304,UMLS:C0597304
3,VAMP2,Gene,affects,SEMMED,SEMMED Gene API,1577448122406549,BiologicalProcess,C1326474,UMLS:C1326474
4,VAMP2,Gene,affects,SEMMED,SEMMED Gene API,9341137,BiologicalProcess,C1326489,UMLS:C1326489
5,VAMP2,Gene,affects,SEMMED,SEMMED Gene API,17215881,BiologicalProcess,C1522290,UMLS:C1522290
6,VAMP2,Gene,disrupts,SEMMED,SEMMED Gene API,12597859,BiologicalProcess,C0014139,UMLS:C0014139
7,VAMP2,Gene,causes,SEMMED,SEMMED Gene API,27527271,BiologicalProcess,C1330957,UMLS:C1330957
8,VAMP2,Gene,related_to,Translator Text Mining Provider,CORD Gene API,,BiologicalProcess,LEARNING,GO:GO:0007612
9,VAMP2,Gene,related_to,Translator Text Mining Provider,CORD Gene API,,BiologicalProcess,AER,GO:GO:0070914


In [230]:
#Display just the Biological Process retrieved
df_BP.output_name.value_counts()

SYNAPTIC VESICLE EXOCYTOSIS                                   2
VESICLE FUSION                                                2
GOLGI TO PLASMA MEMBRANE PROTEIN TRANSPORT                    1
CALCIUM ION-DEPENDENT EXOCYTOSIS                              1
REGULATION OF VESICLE-MEDIATED TRANSPORT                      1
POST-GOLGI TRANSPORT                                          1
C1326489                                                      1
CHAPERONE ACTIVITY                                            1
REGULATION OF EXOCYTOSIS                                      1
MUCUS PRODUCTION                                              1
GLUTAMATE SECRETION                                           1
LONG-TERM POTENTIATION                                        1
POSITIVE REGULATION OF INTRACELLULAR PROTEIN TRANSPORT        1
PROTEIN SORTING ALONG SECRETORY PATHWAY                       1
REGULATION OF DELAYED RECTIFIER POTASSIUM CHANNEL ACTIVITY    1
EOSINOPHIL DEGRANULATION                

In [7]:
# Display id's of the Biological Process retrieved 
bp_output_id = df_BP.output_id.value_counts()
bp_output_id

GO:GO:0006906    2
GO:GO:0016079    2
GO:GO:0048488    1
GO:GO:0016192    1
UMLS:C1326489    1
GO:GO:0007269    1
GO:GO:0032869    1
GO:GO:0001171    1
UMLS:C1326474    1
GO:GO:0006892    1
UMLS:C0597304    1
GO:GO:0009749    1
GO:GO:0006887    1
GO:GO:0043308    1
GO:GO:0030073    1
GO:GO:0070254    1
GO:GO:0061024    1
GO:GO:0007612    1
GO:GO:0043312    1
GO:GO:0043001    1
GO:GO:1903593    1
GO:GO:1902259    1
GO:GO:0017157    1
GO:GO:0065003    1
GO:GO:0060627    1
GO:GO:0061025    1
UMLS:C0015283    1
GO:GO:0035493    1
GO:GO:0060291    1
GO:GO:0017156    1
GO:GO:0015031    1
GO:GO:0014047    1
GO:GO:0070914    1
UMLS:C1522290    1
UMLS:C1330957    1
GO:GO:0090316    1
GO:GO:0043320    1
UMLS:C0014139    1
UMLS:C0025246    1
Name: output_id, dtype: int64

###  Query Path: BP>Genes>ChemicalSubstances>Disease
We want to find all the **Genes** associated to each **BiologicalProcess** connected to VAMP2 (previously retrieved). We also want to find the the **ChemicalSubstances** connecting each gene to **Diseases** of interest (Epilepsy, autism, intelectual disabilities)

For loop: 
1. Genes connected to the Biological Process (**bp_output_id**) 
    - Representation of Biological Process 
    - Find Genes directly connected to Biological Process. No intermediate nodes. 
    - Filter. Select only the Biological Process that have less than "x" genes related (**threshold_gene**)
    
2. Find intermediate ChemicalSubstance connecting Genes and diseases of interest (**diseases_id**)
    - Representation of genes selected
    - Representation of Diseases (diseases_id)

In [8]:
def bp_to_cs (bp_output_id,
              threshold_gene, 
              diseases_id):
    
#Clean ID results
    ##Remove first "GO" character
    BP_ID =[]
    for i in (bp_output_id.index.tolist()): #Get the index of pandas.Series to a list 
        if "GO" in i: 
            BP_ID.append(i[3:]) #Remove the first GO character in the ID and append to sliced list
        else: 
            not_GO_ID = (i) #Remove the not-GO IDs

#Genes connected to the Biological Process
    #Connection BP-Gene
    table_BP_genes_results = []
    for gene in BP_ID: 
        fc = FindConnection(input_obj=(ht.query(gene)["BiologicalProcess"][0]), #Find representation of BP
                            output_obj= "Gene", 
                            intermediate_nodes= [])
        fc.connect(verbose=True)
        fc.results=(fc.display_table_view ())
        
        #Filter BP based on threshold_gene argument
        if len(fc.results) <= threshold_gene: 
            table_BP_genes_results.append(fc.results)
        else: 
            None 

#Concatenate tables BP-Genes      
    df = pandas.concat(table_BP_genes_results)
    #print (df)

    
#Get the Chemical substances related to the genes retrieved
#Connection Gene-Cs-Disease
    final_table = []
    for bp,pred_type,source,gene_id in zip(df["input"], df ["pred1"], df["pred1_pubmed"], df["output_name"]):
        for disease in diseases_id:
            cs = FindConnection(input_obj=(ht.query(gene_id)['Gene'][0]), 
                                output_obj= (ht.query(disease)['Disease'][0]),
                                intermediate_nodes= "ChemicalSubstance") #Next step would be to Filter Chemical Substances (??)
            cs.connect(verbose=True)
            table_results_cs= (cs.display_table_view ())
            #Add columns related to the Biological Process 
            table_results_cs.insert(0, 'BiologicalProcess', bp) #Add biological Process
            table_results_cs.insert(1, 'pred_BP_Gene', pred_type) #Add the association type 
            table_results_cs.insert(2, 'pred_source', source) #Add the source of prediction
            
            final_table.append(table_results_cs)
    
    
#Concatenate tables  Gene-Disease-CS      
    final_results = pandas.concat(final_table)
    return (final_results)

#### Run for loop 

In [9]:
#List of disease
    #MONDO:0005027: Epilepsy #MONDO:0005260: Autism #MONDO:0001071: intellectual disability

diseases_id= ["MONDO:0005027","MONDO:0005260", "MONDO:0001071"]

#bp_to_cs function
    #Select the Biological Process that have 10 or less genes associated

test = bp_to_cs(bp_output_id, threshold_gene = 10, diseases_id=diseases_id )


BTE will find paths that join 'vesicle fusion' and 'Gene'. Paths will have 0 intermediate node.




==== Step #1: Query path planning ====

Because vesicle fusion is of type 'BiologicalProcess', BTE will query our meta-KG for APIs that can take 'BiologicalProcess' as input and 'Gene' as output

BTE found 1 apis:

API 1. cord_biological_process(1 API call)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 1.1: https://biothings.ncats.io/cord_bp/query?fields=associated_with (POST -d q=GO:0006906&scopes=go)


==== Step #3: Output normalization ====

API 1.1 cord_biological_process: 16 hits

After id-to-object translation, BTE retrieved 16 unique objects.



In the #1 query, BTE found 16 unique Gene nodes

BTE will find paths that join 'synaptic vesicle exocytosis' and 'Gene'. Paths will have 0 intermediate node.




==== Step #1: Query path planning ====

Because synaptic vesicle exocytosis is


BTE will find paths that join 'response to glucose' and 'Gene'. Paths will have 0 intermediate node.




==== Step #1: Query path planning ====

Because response to glucose is of type 'BiologicalProcess', BTE will query our meta-KG for APIs that can take 'BiologicalProcess' as input and 'Gene' as output

BTE found 1 apis:

API 1. cord_biological_process(1 API call)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 1.1: https://biothings.ncats.io/cord_bp/query?fields=associated_with (POST -d q=GO:0009749&scopes=go)


==== Step #3: Output normalization ====

API 1.1 cord_biological_process: 20 hits

After id-to-object translation, BTE retrieved 20 unique objects.



In the #1 query, BTE found 20 unique Gene nodes

BTE will find paths that join 'exocytosis' and 'Gene'. Paths will have 0 intermediate node.




==== Step #1: Query path planning ====

Because exocytosis is of type 'BiologicalProc


BTE will find paths that join 'Golgi to plasma membrane protein transport' and 'Gene'. Paths will have 0 intermediate node.




==== Step #1: Query path planning ====

Because Golgi to plasma membrane protein transport is of type 'BiologicalProcess', BTE will query our meta-KG for APIs that can take 'BiologicalProcess' as input and 'Gene' as output

BTE found 1 apis:

API 1. cord_biological_process(1 API call)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 1.1: https://biothings.ncats.io/cord_bp/query?fields=associated_with (POST -d q=GO:0043001&scopes=go)


==== Step #3: Output normalization ====

API 1.1 cord_biological_process: No hits

After id-to-object translation, BTE retrieved 0 unique objects.




BTE will find paths that join 'regulation of histamine secretion by mast cell' and 'Gene'. Paths will have 0 intermediate node.




==== Step #1: Query path planning ====

Because regu


BTE will find paths that join 'long-term synaptic potentiation' and 'Gene'. Paths will have 0 intermediate node.




==== Step #1: Query path planning ====

Because long-term synaptic potentiation is of type 'BiologicalProcess', BTE will query our meta-KG for APIs that can take 'BiologicalProcess' as input and 'Gene' as output

BTE found 1 apis:

API 1. cord_biological_process(1 API call)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 1.1: https://biothings.ncats.io/cord_bp/query?fields=associated_with (POST -d q=GO:0060291&scopes=go)


==== Step #3: Output normalization ====

API 1.1 cord_biological_process: No hits

After id-to-object translation, BTE retrieved 0 unique objects.




BTE will find paths that join 'calcium-ion regulated exocytosis' and 'Gene'. Paths will have 0 intermediate node.




==== Step #1: Query path planning ====

Because calcium-ion regulated exocytosis is of t

API 1.1: https://platform-api.opentargets.io/v3/platform/public/evidence/filter?target=ENSG00000102882&datasource=chembl&size=100&fields=drug
API 9.3: https://biothings.ncats.io/semmedgene/query?fields=affects (POST -d q=C1366765&scopes=umls)
API 9.1: https://biothings.ncats.io/semmedgene/query?fields=negatively_regulates (POST -d q=C1366765&scopes=umls)
API 9.10: https://biothings.ncats.io/semmedgene/query?fields=positively_regulates (POST -d q=C1366765&scopes=umls)
API 9.9: https://biothings.ncats.io/semmedgene/query?fields=disrupted_by (POST -d q=C1366765&scopes=umls)
API 9.2: https://biothings.ncats.io/semmedgene/query?fields=physically_interacts_with (POST -d q=C1366765&scopes=umls)
API 9.5: https://biothings.ncats.io/semmedgene/query?fields=related_to (POST -d q=C1366765&scopes=umls)
API 2.1: https://biothings.ncats.io/cord_gene/query?fields=associated_with (POST -d q=6877&scopes=hgnc)
API 9.8: https://biothings.ncats.io/semmedgene/query?fields=disrupts (POST -d q=C1366765&scopes

API 7.1: https://automat.renci.org/pharos/gene/chemical_substance/NCBIGene:5595
API 6.1: http://dgidb.genome.wustl.edu/api/v2/interactions.json?genes=MAPK3


==== Step #3: Output normalization ====

API 1.1 opentarget: 12 hits
API 9.1 semmed_gene: 14 hits
API 9.2 semmed_gene: 143 hits
API 9.3 semmed_gene: No hits
API 9.4 semmed_gene: No hits
API 9.5 semmed_gene: No hits
API 6.1 dgidb: No hits
API 5.1 scibite: No hits
API 9.6 semmed_gene: 147 hits
API 10.1 mychem: 7 hits
API 3.1 scigraph: No hits
API 2.1 cord_gene: 96 hits
API 9.7 semmed_gene: 99 hits
API 7.1 pharos: No hits
API 9.8 semmed_gene: No hits
API 8.1 chembio: No hits
API 10.2 mychem: No hits
API 9.9 semmed_gene: No hits
API 4.1 hmdb: No hits
API 10.3 mychem: 11 hits
API 9.10 semmed_gene: 34 hits

After id-to-object translation, BTE retrieved 434 unique objects.



==== Step #1: Query path planning ====

Because autism (disease) is of type 'Disease', BTE will query our meta-KG for APIs that can take 'Disease' as input and 'Che

API 4.1: https://automat.renci.org/hmdb/disease/chemical_substance/MONDO:0001071
API 3.1: https://automat.renci.org/cord19-scibite/disease/chemical_substance/MONDO:0001071
API 2.1: https://automat.renci.org/cord19-scigraph/disease/chemical_substance/MONDO:0001071
API 6.1: https://automat.renci.org/pharos/disease/chemical_substance/MONDO:0001071


==== Step #3: Output normalization ====

API 7.1 mychem: No hits
API 4.1 hmdb: No hits
API 5.1 semmed_disease: No hits
API 5.2 semmed_disease: No hits
API 7.2 mychem: No hits
API 5.3 semmed_disease: No hits
API 1.1 cord_disease: 10 hits
API 2.1 scigraph: No hits
API 5.4 semmed_disease: No hits
API 5.5 semmed_disease: No hits
API 3.1 scibite: No hits
API 5.6 semmed_disease: No hits
API 6.1 pharos: No hits
API 5.7 semmed_disease: No hits
API 5.8 semmed_disease: No hits
API 5.9 semmed_disease: No hits
API 5.10 semmed_disease: No hits
API 5.11 semmed_disease: No hits
API 5.12 semmed_disease: No hits
API 5.13 semmed_disease: No hits
API 5.14 semmed

API 8.1 mydisease: 155 hits
API 5.15 semmed_disease: No hits

After id-to-object translation, BTE retrieved 1318 unique objects.



BTE found 2 unique intermediate nodes connecting 'CPLX2' and 'epilepsy'

BTE will find paths that join 'CPLX2' and 'autism (disease)'. Paths will have 1 intermediate node.

Intermediate node #1 will have these type constraints: ChemicalSubstance



==== Step #1: Query path planning ====

Because CPLX2 is of type 'Gene', BTE will query our meta-KG for APIs that can take 'Gene' as input and 'ChemicalSubstance' as output

BTE found 10 apis:

API 1. opentarget(1 API call)
API 2. cord_gene(1 API call)
API 3. scigraph(1 API call)
API 4. scibite(1 API call)
API 5. hmdb(1 API call)
API 6. dgidb(1 API call)
API 7. pharos(1 API call)
API 8. chembio(1 API call)
API 9. semmed_gene(10 API calls)
API 10. mychem(3 API calls)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 1.


After id-to-object translation, BTE retrieved 4 unique objects.



==== Step #1: Query path planning ====

Because intellectual disability is of type 'Disease', BTE will query our meta-KG for APIs that can take 'Disease' as input and 'ChemicalSubstance' as output

BTE found 8 apis:

API 1. cord_disease(1 API call)
API 2. scigraph(1 API call)
API 3. hmdb(1 API call)
API 4. scibite(1 API call)
API 5. semmed_disease(15 API calls)
API 6. pharos(1 API call)
API 7. mychem(2 API calls)
API 8. mydisease(1 API call)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 5.2: https://biothings.ncats.io/semmed/query?fields=disrupted_by (POST -d q=C3714756&scopes=umls)
API 5.1: https://biothings.ncats.io/semmed/query?fields=physically_interacts_with (POST -d q=C3714756&scopes=umls)
API 8.1: https://mydisease.info/v1/query?fields=ctd.chemical_related_to_disease (POST -d q=D008607&scopes=mondo.xrefs.mesh, dis

API 5.8: https://biothings.ncats.io/semmed/query?fields=treated_by (POST -d q=C0014544&scopes=umls)
API 4.1: https://automat.renci.org/hmdb/disease/chemical_substance/MONDO:0005027
API 5.9: https://biothings.ncats.io/semmed/query?fields=related_to (POST -d q=C0014544&scopes=umls)
API 5.15: https://biothings.ncats.io/semmed/query?fields=positively_regulated_by (POST -d q=C0014544&scopes=umls)
API 5.12: https://biothings.ncats.io/semmed/query?fields=affected_by (POST -d q=C0014544&scopes=umls)
API 3.1: https://automat.renci.org/cord19-scibite/disease/chemical_substance/MONDO:0005027
API 5.11: https://biothings.ncats.io/semmed/query?fields=positively_regulates (POST -d q=C0014544&scopes=umls)
API 1.1: https://biothings.ncats.io/cord_disease/query?fields=associated_with (POST -d q=DOID:1826&scopes=doid)
API 6.1: https://automat.renci.org/pharos/disease/chemical_substance/MONDO:0005027
API 2.1: https://automat.renci.org/cord19-scigraph/disease/chemical_substance/MONDO:0005027


==== Step #3

API 2.1: https://biothings.ncats.io/cord_gene/query?fields=associated_with (POST -d q=21155&scopes=hgnc)
API 5.1: https://automat.renci.org/cord19-scibite/gene/chemical_substance/NCBIGene:29116
API 3.1: https://automat.renci.org/cord19-scigraph/gene/chemical_substance/NCBIGene:29116
API 8.1: https://automat.renci.org/chembio/gene/chemical_substance/NCBIGene:29116
API 4.1: https://automat.renci.org/hmdb/gene/chemical_substance/NCBIGene:29116
API 7.1: https://automat.renci.org/pharos/gene/chemical_substance/NCBIGene:29116
API 6.1: http://dgidb.genome.wustl.edu/api/v2/interactions.json?genes=MYLIP


==== Step #3: Output normalization ====

API 1.1 opentarget: No hits
API 9.1 semmed_gene: 4 hits
API 9.2 semmed_gene: 11 hits
API 9.3 semmed_gene: No hits
API 9.4 semmed_gene: No hits
API 9.5 semmed_gene: No hits
API 6.1 dgidb: No hits
API 5.1 scibite: No hits
API 9.6 semmed_gene: 6 hits
API 10.1 mychem: No hits
API 3.1 scigraph: No hits
API 2.1 cord_gene: 95 hits
API 9.7 semmed_gene: 4 hits
A

API 6.1 dgidb: No hits
API 5.1 scibite: No hits
API 9.6 semmed_gene: 6 hits
API 10.1 mychem: No hits
API 3.1 scigraph: No hits
API 2.1 cord_gene: 93 hits
API 9.7 semmed_gene: 3 hits
API 7.1 pharos: No hits
API 9.8 semmed_gene: No hits
API 8.1 chembio: No hits
API 10.2 mychem: No hits
API 9.9 semmed_gene: No hits
API 4.1 hmdb: No hits
API 10.3 mychem: No hits
API 9.10 semmed_gene: 1 hits

After id-to-object translation, BTE retrieved 112 unique objects.



==== Step #1: Query path planning ====

Because epilepsy is of type 'Disease', BTE will query our meta-KG for APIs that can take 'Disease' as input and 'ChemicalSubstance' as output

BTE found 8 apis:

API 1. cord_disease(1 API call)
API 2. scigraph(1 API call)
API 3. hmdb(1 API call)
API 4. scibite(1 API call)
API 5. semmed_disease(15 API calls)
API 6. pharos(1 API call)
API 7. mychem(2 API calls)
API 8. mydisease(1 API call)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs 

API 6.1: https://mydisease.info/v1/query?fields=ctd.chemical_related_to_disease (POST -d q=D001321&scopes=mondo.xrefs.mesh, disgenet.xrefs.mesh)
API 1.1: https://biothings.ncats.io/cord_disease/query?fields=associated_with (POST -d q=DOID:12849&scopes=doid)
API 4.1: https://automat.renci.org/hmdb/disease/chemical_substance/MONDO:0005260
API 2.1: https://automat.renci.org/cord19-scigraph/disease/chemical_substance/MONDO:0005260
API 3.1: https://automat.renci.org/cord19-scibite/disease/chemical_substance/MONDO:0005260
API 5.1: https://automat.renci.org/pharos/disease/chemical_substance/MONDO:0005260


==== Step #3: Output normalization ====

API 4.1 hmdb: No hits
API 1.1 cord_disease: 43 hits
API 2.1 scigraph: No hits
API 3.1 scibite: No hits
API 5.1 pharos: No hits
API 6.1 mydisease: 105 hits

After id-to-object translation, BTE retrieved 139 unique objects.



BTE found 11 unique intermediate nodes connecting 'MLXIP' and 'autism (disease)'

BTE will find paths that join 'MLXIP' and 'in


After id-to-object translation, BTE retrieved 35 unique objects.



BTE found 2 unique intermediate nodes connecting 'MLXIP' and 'intellectual disability'

BTE will find paths that join 'KITLG' and 'epilepsy'. Paths will have 1 intermediate node.

Intermediate node #1 will have these type constraints: ChemicalSubstance



==== Step #1: Query path planning ====

Because KITLG is of type 'Gene', BTE will query our meta-KG for APIs that can take 'Gene' as input and 'ChemicalSubstance' as output

BTE found 10 apis:

API 1. opentarget(1 API call)
API 2. cord_gene(1 API call)
API 3. scigraph(1 API call)
API 4. scibite(1 API call)
API 5. hmdb(1 API call)
API 6. dgidb(1 API call)
API 7. pharos(1 API call)
API 8. chembio(1 API call)
API 9. semmed_gene(10 API calls)
API 10. mychem(3 API calls)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 1.1: https://platform-api.opentargets.io/v3/platform/publi

API 1.1: https://platform-api.opentargets.io/v3/platform/public/evidence/filter?target=ENSG00000049130&datasource=chembl&size=100&fields=drug
API 9.2: https://biothings.ncats.io/semmedgene/query?fields=physically_interacts_with (POST -d q=C1366480&scopes=umls)
API 9.3: https://biothings.ncats.io/semmedgene/query?fields=affects (POST -d q=C1366480&scopes=umls)
API 9.4: https://biothings.ncats.io/semmedgene/query?fields=affected_by (POST -d q=C1366480&scopes=umls)
API 9.6: https://biothings.ncats.io/semmedgene/query?fields=positively_regulated_by (POST -d q=C1366480&scopes=umls)
API 9.8: https://biothings.ncats.io/semmedgene/query?fields=disrupts (POST -d q=C1366480&scopes=umls)
API 2.1: https://biothings.ncats.io/cord_gene/query?fields=associated_with (POST -d q=6343&scopes=hgnc)
API 9.7: https://biothings.ncats.io/semmedgene/query?fields=negatively_regulated_by (POST -d q=C1366480&scopes=umls)
API 9.1: https://biothings.ncats.io/semmedgene/query?fields=negatively_regulates (POST -d q=C

API 5.14: https://biothings.ncats.io/semmed/query?fields=negatively_regulated_by (POST -d q=C3714756&scopes=umls)
API 5.13: https://biothings.ncats.io/semmed/query?fields=derives_info (POST -d q=C3714756&scopes=umls)
API 1.1: https://biothings.ncats.io/cord_disease/query?fields=associated_with (POST -d q=DOID:1059&scopes=doid)
API 5.11: https://biothings.ncats.io/semmed/query?fields=positively_regulates (POST -d q=C3714756&scopes=umls)
API 5.5: https://biothings.ncats.io/semmed/query?fields=derives_from (POST -d q=C3714756&scopes=umls)
API 5.8: https://biothings.ncats.io/semmed/query?fields=treated_by (POST -d q=C3714756&scopes=umls)
API 5.1: https://biothings.ncats.io/semmed/query?fields=physically_interacts_with (POST -d q=C3714756&scopes=umls)
API 8.1: https://mydisease.info/v1/query?fields=ctd.chemical_related_to_disease (POST -d q=D008607&scopes=mondo.xrefs.mesh, disgenet.xrefs.mesh)
API 5.7: https://biothings.ncats.io/semmed/query?fields=negatively_regulates (POST -d q=C3714756&s

API 5.3: https://biothings.ncats.io/semmed/query?fields=coexists_with (POST -d q=C0014544&scopes=umls)
API 5.15: https://biothings.ncats.io/semmed/query?fields=positively_regulated_by (POST -d q=C0014544&scopes=umls)
API 5.1: https://biothings.ncats.io/semmed/query?fields=physically_interacts_with (POST -d q=C0014544&scopes=umls)
API 5.6: https://biothings.ncats.io/semmed/query?fields=caused_by (POST -d q=C0014544&scopes=umls)
API 5.10: https://biothings.ncats.io/semmed/query?fields=affects (POST -d q=C0014544&scopes=umls)
API 5.13: https://biothings.ncats.io/semmed/query?fields=derives_info (POST -d q=C0014544&scopes=umls)
API 3.1: https://automat.renci.org/cord19-scibite/disease/chemical_substance/MONDO:0005027
API 2.1: https://automat.renci.org/cord19-scigraph/disease/chemical_substance/MONDO:0005027
API 4.1: https://automat.renci.org/hmdb/disease/chemical_substance/MONDO:0005027
API 6.1: https://automat.renci.org/pharos/disease/chemical_substance/MONDO:0005027


==== Step #3: Outpu

API 5.1: https://automat.renci.org/cord19-scibite/gene/chemical_substance/NCBIGene:4155
API 3.1: https://automat.renci.org/cord19-scigraph/gene/chemical_substance/NCBIGene:4155
API 4.1: https://automat.renci.org/hmdb/gene/chemical_substance/NCBIGene:4155
API 8.1: https://automat.renci.org/chembio/gene/chemical_substance/NCBIGene:4155
API 7.1: https://automat.renci.org/pharos/gene/chemical_substance/NCBIGene:4155
API 6.1: http://dgidb.genome.wustl.edu/api/v2/interactions.json?genes=MBP


==== Step #3: Output normalization ====

API 1.1 opentarget: No hits
API 9.1 semmed_gene: 7 hits
API 9.2 semmed_gene: 51 hits
API 9.3 semmed_gene: No hits
API 9.4 semmed_gene: No hits
API 9.5 semmed_gene: No hits
API 6.1 dgidb: No hits
API 5.1 scibite: No hits
API 9.6 semmed_gene: 13 hits
API 10.1 mychem: No hits
API 3.1 scigraph: No hits
API 2.1 cord_gene: 40 hits
API 9.7 semmed_gene: 10 hits
API 7.1 pharos: No hits
API 9.8 semmed_gene: No hits
API 8.1 chembio: No hits
API 10.2 mychem: No hits
API 9.9 


After id-to-object translation, BTE retrieved 46 unique objects.



==== Step #1: Query path planning ====

Because epilepsy is of type 'Disease', BTE will query our meta-KG for APIs that can take 'Disease' as input and 'ChemicalSubstance' as output

BTE found 8 apis:

API 1. cord_disease(1 API call)
API 2. scigraph(1 API call)
API 3. hmdb(1 API call)
API 4. scibite(1 API call)
API 5. semmed_disease(15 API calls)
API 6. pharos(1 API call)
API 7. mychem(2 API calls)
API 8. mydisease(1 API call)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 5.10: https://biothings.ncats.io/semmed/query?fields=affects (POST -d q=C0014544&scopes=umls)
API 8.1: https://mydisease.info/v1/query?fields=ctd.chemical_related_to_disease (POST -d q=D004827&scopes=mondo.xrefs.mesh, disgenet.xrefs.mesh)
API 5.1: https://biothings.ncats.io/semmed/query?fields=physically_interacts_with (POST -d q=C0014544&scopes=umls)


API 4.1: https://automat.renci.org/hmdb/disease/chemical_substance/MONDO:0005260
API 3.1: https://automat.renci.org/cord19-scibite/disease/chemical_substance/MONDO:0005260
API 2.1: https://automat.renci.org/cord19-scigraph/disease/chemical_substance/MONDO:0005260
API 5.1: https://automat.renci.org/pharos/disease/chemical_substance/MONDO:0005260


==== Step #3: Output normalization ====

API 4.1 hmdb: No hits
API 1.1 cord_disease: 43 hits
API 2.1 scigraph: No hits
API 3.1 scibite: No hits
API 5.1 pharos: No hits
API 6.1 mydisease: 105 hits

After id-to-object translation, BTE retrieved 139 unique objects.



BTE found 4 unique intermediate nodes connecting 'RNASE3' and 'autism (disease)'

BTE will find paths that join 'RNASE3' and 'intellectual disability'. Paths will have 1 intermediate node.

Intermediate node #1 will have these type constraints: ChemicalSubstance



==== Step #1: Query path planning ====

Because RNASE3 is of type 'Gene', BTE will query our meta-KG for APIs that can 


BTE found 10 apis:

API 1. opentarget(1 API call)
API 2. cord_gene(1 API call)
API 3. scigraph(1 API call)
API 4. scibite(1 API call)
API 5. hmdb(1 API call)
API 6. dgidb(1 API call)
API 7. pharos(1 API call)
API 8. chembio(1 API call)
API 9. semmed_gene(10 API calls)
API 10. mychem(3 API calls)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 1.1: https://platform-api.opentargets.io/v3/platform/public/evidence/filter?target=ENSG00000186652&datasource=chembl&size=100&fields=drug
API 9.7: https://biothings.ncats.io/semmedgene/query?fields=negatively_regulated_by (POST -d q=C1418889&scopes=umls)
API 9.9: https://biothings.ncats.io/semmedgene/query?fields=disrupted_by (POST -d q=C1418889&scopes=umls)
API 9.6: https://biothings.ncats.io/semmedgene/query?fields=positively_regulated_by (POST -d q=C1418889&scopes=umls)
API 9.4: https://biothings.ncats.io/semmedgene/query?fields=affected_by (POST 

API 5.1: https://automat.renci.org/cord19-scibite/gene/chemical_substance/NCBIGene:5553
API 4.1: https://automat.renci.org/hmdb/gene/chemical_substance/NCBIGene:5553
API 3.1: https://automat.renci.org/cord19-scigraph/gene/chemical_substance/NCBIGene:5553
API 8.1: https://automat.renci.org/chembio/gene/chemical_substance/NCBIGene:5553
API 7.1: https://automat.renci.org/pharos/gene/chemical_substance/NCBIGene:5553
API 6.1: http://dgidb.genome.wustl.edu/api/v2/interactions.json?genes=PRG2


==== Step #3: Output normalization ====

API 1.1 opentarget: No hits
API 9.1 semmed_gene: No hits
API 9.2 semmed_gene: 7 hits
API 9.3 semmed_gene: No hits
API 9.4 semmed_gene: No hits
API 9.5 semmed_gene: No hits
API 6.1 dgidb: No hits
API 5.1 scibite: No hits
API 9.6 semmed_gene: No hits
API 10.1 mychem: 2 hits
API 3.1 scigraph: No hits
API 2.1 cord_gene: 31 hits
API 9.7 semmed_gene: No hits
API 7.1 pharos: No hits
API 9.8 semmed_gene: No hits
API 8.1 chembio: No hits
API 10.2 mychem: No hits
API 9.9 

API 4.1: https://automat.renci.org/hmdb/disease/chemical_substance/MONDO:0001071
API 2.1: https://automat.renci.org/cord19-scigraph/disease/chemical_substance/MONDO:0001071
API 3.1: https://automat.renci.org/cord19-scibite/disease/chemical_substance/MONDO:0001071
API 6.1: https://automat.renci.org/pharos/disease/chemical_substance/MONDO:0001071


==== Step #3: Output normalization ====

API 7.1 mychem: No hits
API 4.1 hmdb: No hits
API 5.1 semmed_disease: No hits
API 5.2 semmed_disease: No hits
API 7.2 mychem: No hits
API 5.3 semmed_disease: No hits
API 1.1 cord_disease: 10 hits
API 2.1 scigraph: No hits
API 5.4 semmed_disease: No hits
API 5.5 semmed_disease: No hits
API 3.1 scibite: No hits
API 5.6 semmed_disease: No hits
API 6.1 pharos: No hits
API 5.7 semmed_disease: No hits
API 5.8 semmed_disease: No hits
API 5.9 semmed_disease: No hits
API 5.10 semmed_disease: No hits
API 5.11 semmed_disease: No hits
API 5.12 semmed_disease: No hits
API 5.13 semmed_disease: No hits
API 5.14 semmed

API 8.1 mydisease: 155 hits
API 5.15 semmed_disease: No hits

After id-to-object translation, BTE retrieved 1318 unique objects.



BTE found 533 unique intermediate nodes connecting 'CA2' and 'epilepsy'

BTE will find paths that join 'CA2' and 'autism (disease)'. Paths will have 1 intermediate node.

Intermediate node #1 will have these type constraints: ChemicalSubstance



==== Step #1: Query path planning ====

Because CA2 is of type 'Gene', BTE will query our meta-KG for APIs that can take 'Gene' as input and 'ChemicalSubstance' as output

BTE found 10 apis:

API 1. opentarget(1 API call)
API 2. cord_gene(1 API call)
API 3. scigraph(1 API call)
API 4. scibite(1 API call)
API 5. hmdb(1 API call)
API 6. dgidb(1 API call)
API 7. pharos(1 API call)
API 8. chembio(1 API call)
API 9. semmed_gene(10 API calls)
API 10. mychem(3 API calls)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 1.1: h

API 10.1 mychem: 99 hits
API 3.1 scigraph: No hits
API 2.1 cord_gene: 251 hits
API 9.7 semmed_gene: 1051 hits
API 7.1 pharos: No hits
API 9.8 semmed_gene: No hits
API 8.1 chembio: No hits
API 10.2 mychem: 2 hits
API 9.9 semmed_gene: No hits
API 4.1 hmdb: No hits
API 10.3 mychem: 64 hits
API 9.10 semmed_gene: 685 hits

After id-to-object translation, BTE retrieved 3019 unique objects.



==== Step #1: Query path planning ====

Because intellectual disability is of type 'Disease', BTE will query our meta-KG for APIs that can take 'Disease' as input and 'ChemicalSubstance' as output

BTE found 8 apis:

API 1. cord_disease(1 API call)
API 2. scigraph(1 API call)
API 3. hmdb(1 API call)
API 4. scibite(1 API call)
API 5. semmed_disease(15 API calls)
API 6. pharos(1 API call)
API 7. mychem(2 API calls)
API 8. mydisease(1 API call)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 5.13: https://biot

In [10]:
#Results 
test

Unnamed: 0,BiologicalProcess,pred_BP_Gene,pred_source,input,input_type,pred1,pred1_source,pred1_api,pred1_pubmed,node1_type,node1_name,node1_id,pred2,pred2_source,pred2_api,pred2_pubmed,output_type,output_name,output_id
0,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,Gene,physically_interacts_with,SEMMED,SEMMED Gene API,862658622370345,ChemicalSubstance,C0040549,UMLS:C0040549,treated_by,SEMMED,SEMMED Disease API,21737204,ChemicalSubstance,EPILEPSY,MONDO:MONDO:0005027
1,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,Gene,physically_interacts_with,SEMMED,SEMMED Gene API,862658622370345,ChemicalSubstance,C0040549,UMLS:C0040549,caused_by,SEMMED,SEMMED Disease API,2084085022771692,ChemicalSubstance,EPILEPSY,MONDO:MONDO:0005027
2,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,Gene,physically_interacts_with,SEMMED,SEMMED Gene API,862658622370345,ChemicalSubstance,C0040549,UMLS:C0040549,affected_by,SEMMED,SEMMED Disease API,9610917,ChemicalSubstance,EPILEPSY,MONDO:MONDO:0005027
3,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,Gene,physically_interacts_with,SEMMED,SEMMED Gene API,19046339,ChemicalSubstance,C0243076,UMLS:C0243076,treated_by,SEMMED,SEMMED Disease API,"10511950,18350576,22788917,27434597,3359104,10...",ChemicalSubstance,EPILEPSY,MONDO:MONDO:0005027
4,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,Gene,physically_interacts_with,SEMMED,SEMMED Gene API,19046339,ChemicalSubstance,C0243076,UMLS:C0243076,caused_by,SEMMED,SEMMED Disease API,12151117152505889789839,ChemicalSubstance,EPILEPSY,MONDO:MONDO:0005027
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,GLUTAMATE SECRETION,related_to,,CA2,Gene,negatively_regulated_by,SEMMED,SEMMED Gene API,25508309578025,ChemicalSubstance,"5,5-DIPHENYL-IMIDAZOLIDINE-2,4-DIONE","name:5,5-DIPHENYL-IMIDAZOLIDINE-2,4-DIONE",related_to,ctd,mydisease.info API,10798072719976572945299212184,ChemicalSubstance,INTELLECTUAL DISABILITIES,MONDO:MONDO:0001071
24,GLUTAMATE SECRETION,related_to,,CA2,Gene,physically_interacts_with,SEMMED,SEMMED Gene API,8609904929149491459079182535,ChemicalSubstance,"BIPHENYLS, POLYCHLORINATED","name:BIPHENYLS, POLYCHLORINATED",related_to,ctd,mydisease.info API,27548254,ChemicalSubstance,INTELLECTUAL DISABILITIES,MONDO:MONDO:0001071
25,GLUTAMATE SECRETION,related_to,,CA2,Gene,positively_regulated_by,SEMMED,SEMMED Gene API,175736362765534811083102,ChemicalSubstance,"BIPHENYLS, POLYCHLORINATED","name:BIPHENYLS, POLYCHLORINATED",related_to,ctd,mydisease.info API,27548254,ChemicalSubstance,INTELLECTUAL DISABILITIES,MONDO:MONDO:0001071
26,GLUTAMATE SECRETION,related_to,,CA2,Gene,negatively_regulated_by,SEMMED,SEMMED Gene API,9182535,ChemicalSubstance,"BIPHENYLS, POLYCHLORINATED","name:BIPHENYLS, POLYCHLORINATED",related_to,ctd,mydisease.info API,27548254,ChemicalSubstance,INTELLECTUAL DISABILITIES,MONDO:MONDO:0001071


## Cytoscape 
Documentation https://dash.plotly.com/cytoscape

### Data wrangling for Cytoscape 

In [16]:
#Filter columns of interest 
df_entities = test[['BiologicalProcess', "pred_BP_Gene", "pred_source",
                    "input", "pred1", "pred1_pubmed",
                    "node1_name", "pred2", "pred2_pubmed",
                    "output_name"]]


#Rename columns 
df_entities = df_entities.rename(columns = {"pred_source": "source_BP_Gene",
                       "input": "Gene", "pred1": "pred_Gene_CS", "pred1_pubmed": "source_Gene_CS",
                       "node1_name": "ChemicalSubstance", "pred2": "pred_CS_Dis", "pred2_pubmed": "source_CS_Dis",
                       "output_name": "Disease"})

In [17]:
df_entities

Unnamed: 0,BiologicalProcess,pred_BP_Gene,source_BP_Gene,Gene,pred_Gene_CS,source_Gene_CS,ChemicalSubstance,pred_CS_Dis,source_CS_Dis,Disease
0,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,physically_interacts_with,862658622370345,C0040549,treated_by,21737204,EPILEPSY
1,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,physically_interacts_with,862658622370345,C0040549,caused_by,2084085022771692,EPILEPSY
2,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,physically_interacts_with,862658622370345,C0040549,affected_by,9610917,EPILEPSY
3,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,physically_interacts_with,19046339,C0243076,treated_by,"10511950,18350576,22788917,27434597,3359104,10...",EPILEPSY
4,SYNAPTIC VESICLE EXOCYTOSIS,related_to,,MAPK3,physically_interacts_with,19046339,C0243076,caused_by,12151117152505889789839,EPILEPSY
...,...,...,...,...,...,...,...,...,...,...
23,GLUTAMATE SECRETION,related_to,,CA2,negatively_regulated_by,25508309578025,"5,5-DIPHENYL-IMIDAZOLIDINE-2,4-DIONE",related_to,10798072719976572945299212184,INTELLECTUAL DISABILITIES
24,GLUTAMATE SECRETION,related_to,,CA2,physically_interacts_with,8609904929149491459079182535,"BIPHENYLS, POLYCHLORINATED",related_to,27548254,INTELLECTUAL DISABILITIES
25,GLUTAMATE SECRETION,related_to,,CA2,positively_regulated_by,175736362765534811083102,"BIPHENYLS, POLYCHLORINATED",related_to,27548254,INTELLECTUAL DISABILITIES
26,GLUTAMATE SECRETION,related_to,,CA2,negatively_regulated_by,9182535,"BIPHENYLS, POLYCHLORINATED",related_to,27548254,INTELLECTUAL DISABILITIES


In [214]:
#Create a list from test results
my_list_entities = df_entities.values.tolist()

In [215]:
my_list_entities

[['SYNAPTIC VESICLE EXOCYTOSIS',
  'related_to',
  None,
  'MAPK3',
  'physically_interacts_with',
  '8626586,22370345',
  'C0040549',
  'treated_by',
  '21737204',
  'EPILEPSY'],
 ['SYNAPTIC VESICLE EXOCYTOSIS',
  'related_to',
  None,
  'MAPK3',
  'physically_interacts_with',
  '8626586,22370345',
  'C0040549',
  'caused_by',
  '20840850,22771692',
  'EPILEPSY'],
 ['SYNAPTIC VESICLE EXOCYTOSIS',
  'related_to',
  None,
  'MAPK3',
  'physically_interacts_with',
  '8626586,22370345',
  'C0040549',
  'affected_by',
  '9610917',
  'EPILEPSY'],
 ['SYNAPTIC VESICLE EXOCYTOSIS',
  'related_to',
  None,
  'MAPK3',
  'physically_interacts_with',
  '19046339',
  'C0243076',
  'treated_by',
  '10511950,18350576,22788917,27434597,3359104,10668446,18221206,27820603,2848606,10514878,24900180,28128443,21686307,3320347,1386786,22035233,17561422,7796325,1686756,12871085',
  'EPILEPSY'],
 ['SYNAPTIC VESICLE EXOCYTOSIS',
  'related_to',
  None,
  'MAPK3',
  'physically_interacts_with',
  '19046339',
  

In [231]:
#Lists to store elements of interest  
source = [] 
target = []
association_type = []
number_pred_pubmed = []
pred_pubmed = []

for lis in my_list_entities: 
    for index,value in enumerate(lis):
        #Source
        if index % 10 == 0 or index % 10 == 3 or index % 10 == 6: 
                source.append(value)
        #Target
        if index % 10 == 3 or index % 10 == 6 or index % 10 == 9:
                target.append(value)
        #Association 
        elif index % 10 == 1 or index % 10 == 4 or index % 10 == 7:
                association_type.append(value)
        #pubmed Id
        elif index % 10 == 2 or index % 10 == 5 or index % 10 == 8:
                pred_pubmed.append(value)
                
#Number of pubmed IDs 
for result in pred_pubmed:
        if result == None:
            number_pred_pubmed.append(0)
        else:
            number_pred_pubmed.append(result.count(',') + 1) 
        
            
#Create data frame with generated lists 
d = {'source': source, 
    'target': target,
    "association_type": association_type,
    "pred_pubmed": pred_pubmed,
   "number_pred_pubmed": number_pred_pubmed} 
df= pandas.DataFrame(data=d)


df

Unnamed: 0,source,target,association_type,pred_pubmed,number_pred_pubmed
0,SYNAPTIC VESICLE EXOCYTOSIS,MAPK3,related_to,,0
1,MAPK3,C0040549,physically_interacts_with,862658622370345,2
2,C0040549,EPILEPSY,treated_by,21737204,1
3,SYNAPTIC VESICLE EXOCYTOSIS,MAPK3,related_to,,0
4,MAPK3,C0040549,physically_interacts_with,862658622370345,2
...,...,...,...,...,...
12874,CA2,"BIPHENYLS, POLYCHLORINATED",negatively_regulated_by,9182535,1
12875,"BIPHENYLS, POLYCHLORINATED",INTELLECTUAL DISABILITIES,related_to,27548254,1
12876,GLUTAMATE SECRETION,CA2,related_to,,0
12877,CA2,METHYLBENZENE,negatively_regulated_by,12164548,1


In [232]:
#To test results in Cytoscape: Select only the first 500 connections
df = df.head(500)

### Cytoscape: Elements = [ ]
We need to create **elements []** list of dictionaries for Cytoscape. This include the dictionaries for the **"nodes"** and **"edges"**
#### This is an example of  the structure required fot cytoscape elements list: 
```
elements = [
#Nodes 
{ "data": {"id": "BP1", "name": "BP1"}, 
"classes": "BiologicalProcess" }}, #This variable will later help us to manipulate each class indepentently 
{ "data": {"id": "Gene1", "name": "Gene1"}, 
"classes": "Gene" }}
....

#Edges
{'data': {'source': BP1, 
'target': Gene, 
'label': related_to, 
"weight": 2}, #Number of pubmed_id 
"group": "edges" }

]
```

##### Cytoscape: Nodes. 
We first are going to create the dictionaries of nodes, which includes each of our entities (BP, Gene, CSub, Disease). We are going to iterate through the elements on source and target column in our new data frame

In [222]:
#Get unique source entities and its index
import numpy as np
value_source, indx_source = np.unique(df["source"].values, return_index = True)

In [223]:
#Get unique target entities and its index
value_target, indx_target = np.unique(df["target"].values, return_index = True)

In [224]:
#Cytoscape Node format
my_nodes = []

for i, value in zip(indx_source, value_source):
    if i % 3 == 0: #Index position remainder = 0 (0,3,6,9,12...)
        my_nodes.append({
            "data": {
            "id": value,
            "name": value
            },
            "classes":"BiologicalProcess"})
    elif i % 3 == 1:  #Index position remainder = 1 (1,4,7,10....)
        my_nodes.append({
        "data": {
            "id": value,
            "name": value
            },
        "classes":"Gene"}) 
    elif i % 3 == 2:  #Index position remainder = 2 (2,5,8,11...)
        my_nodes.append({
        "data": {
            "id": value,
            "name": value
            },
        "classes":"ChemicalSubstance"})

for i, value in zip(indx_target,value_target):    
    if i % 3 == 2: #Index position remainder = 2 (2,5,8,11...)
        my_nodes.append({
        "data": {
            "id": value,
            "name": value
            },
        "classes":"Disease"})

##### Cytoscape: Edges
Then we create dictionaries of edges. Each dictionary includes the source and target elements. We are also including the association type between the target and source, and the number of pub_med id that connect this association. 

In [226]:
#Iterate to get Source-Target, the association type and the number of pred_pubmed

my_edges = []
for source,target,asso,pred_num in zip(df["source"], df["target"], 
                              df["association_type"], df["number_pred_pubmed"]):
    my_edges.append({
             'data': {'source': source, #Source
              'target': target, #Target
                'label': [asso], #Association type
                "weight": [pred_num]}, #Number of pred_pubmed ids  
               "group": "edges"    
               })
    
my_edges = [i for j, i in enumerate(my_edges) if i not in my_edges[:j]] #Remove duplicates 
print ((my_edges))

[{'data': {'source': 'SYNAPTIC VESICLE EXOCYTOSIS', 'target': 'MAPK3', 'label': ['related_to'], 'weight': [0]}, 'group': 'edges'}, {'data': {'source': 'MAPK3', 'target': 'C0040549', 'label': ['physically_interacts_with'], 'weight': [2]}, 'group': 'edges'}, {'data': {'source': 'C0040549', 'target': 'EPILEPSY', 'label': ['treated_by'], 'weight': [1]}, 'group': 'edges'}, {'data': {'source': 'C0040549', 'target': 'EPILEPSY', 'label': ['caused_by'], 'weight': [2]}, 'group': 'edges'}, {'data': {'source': 'C0040549', 'target': 'EPILEPSY', 'label': ['affected_by'], 'weight': [1]}, 'group': 'edges'}, {'data': {'source': 'MAPK3', 'target': 'C0243076', 'label': ['physically_interacts_with'], 'weight': [1]}, 'group': 'edges'}, {'data': {'source': 'C0243076', 'target': 'EPILEPSY', 'label': ['treated_by'], 'weight': [20]}, 'group': 'edges'}, {'data': {'source': 'C0243076', 'target': 'EPILEPSY', 'label': ['caused_by'], 'weight': [3]}, 'group': 'edges'}, {'data': {'source': 'C0243076', 'target': 'EPIL

##### Concatenate my_nodes and  my_edges lists
Finally, we concatenate both list into one. 

In [228]:
my_list_elements = (my_nodes + my_edges)

##### Save into file and run it to PyCharm
We save into a json file and export it ino pycharm 

In [40]:
# #Save list on file using json 
# import json
# with open("vamp2_500_w11.txt", "w") as fp:
#     json.dump(my_list_elements, fp)