# Example: Graph generation from a gene

This notebook will show you how to use the tool to generate a KG on the underlying data.

In [1]:
new_path = "E:\BioDataFuse\pyBiodatafuse"

import os

os.chdir(new_path)

# Set the current working directory
current_dir = os.getcwd()
print("Current directory:", current_dir)

Current directory: E:\BioDataFuse\pyBiodatafuse


In [2]:
# Import modules
import pickle

import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
from dotenv import load_dotenv

from pyBiodatafuse import id_mapper
from pyBiodatafuse.annotators import (
    bgee,
    disgenet,
    minerva,
    molmedb,
    opentargets,
    pubchem,
    stringdb,
    wikipathways,
)
from pyBiodatafuse.constants import (
    BGEE,
    DISGENET,
    MINERVA,
    MOLMEDB_COMPOUND_PROTEIN_COL,
    MOLMEDB_PROTEIN_COMPOUND_COL,
    OPENTARGETS_COMPOUND_COL,
    OPENTARGETS_DISEASE_COL,
    OPENTARGETS_GO_COL,
    OPENTARGETS_LOCATION_COL,
    OPENTARGETS_REACTOME_COL,
    PUBCHEM_COMPOUND_ASSAYS_COL,
    STRING,
    WIKIPATHWAYS,
)
from pyBiodatafuse.graph import generator
from pyBiodatafuse.utils import combine_sources

### Load the input list and convert it to a dataframe

In [3]:
data_input = pd.read_csv(os.path.join(os.getcwd(), r"examples\usecases\PCS\PCS_gene_list.csv"))
print("Number of genes:", len(data_input))
data_input.head()

Number of genes: 2023


Unnamed: 0,identifier
0,LOC729609
1,LOC105374060
2,DMP1
3,PNLIP
4,OR4N3P


### Entity resolution using BridgeDB

In [4]:
bridgdb_df, bridgdb_metadata = id_mapper.bridgedb_xref(
    identifiers=data_input,
    input_species="Human",
    input_datasource="HGNC",
    output_datasource="All",
)
bridgdb_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source
0,LOC729609,HGNC,,
1,LOC105374060,HGNC,,
2,DMP1,HGNC,XP_011530008,RefSeq
3,DMP1,HGNC,2735121,Affy
4,DMP1,HGNC,2735120,Affy


### Disease annotatation from DisGeNet


ADD your DISGENET API KEY in the main folder

**1)** Create a .env File and add DISGENET_API_KEY to it:

DISGENET_API_KEY="your-API-key-value"

**2)** Install *python-dotenv*:
```
pip install python-dotenv
```

In [5]:
# Read the .env File
load_dotenv()
# Retrieve the key from the environment variable
disgenet_api_key = os.getenv("DISGENET_API_KEY")

In [6]:
# disgenet_df, disgenet_metadata = disgenet.get_gene_disease(
#     api_key=disgenet_api_key, bridgedb_df=bridgdb_df
# )
# disgenet_df.to_pickle(os.getcwd(), r"examples\usecases\PCS\disgenet_df.pkl")
# with open(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "disgenet_metadata.pkl"), "wb") as file:
#     pickle.dump(disgenet_metadata, file)
with open(os.path.join(os.getcwd(), r"examples\usecases\PCS\disgenet_df.pkl"), "rb") as file:
    disgenet_df = pickle.load(file)
# with open(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "disgenet_metadata.pkl"), 'rb') as file:
#     disgenet_metadata = pickle.load(file)

disgenet_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,DISGENET
0,A2ML1,HGNC,144568,NCBI Gene,"[{'HPO': '', 'NCI': 'NCI_C34854', 'OMIM': 'OMI..."
1,AAMDC,HGNC,28971,NCBI Gene,"[{'HPO': nan, 'NCI': nan, 'OMIM': nan, 'MONDO'..."
2,ABCA1,HGNC,19,NCBI Gene,"[{'HPO': '', 'NCI': 'NCI_C85182', 'OMIM': 'OMI..."
3,ABCB1,HGNC,5243,NCBI Gene,"[{'HPO': 'HPO_HP:0001250', 'NCI': 'NCI_C3020',..."
4,ABCC6P1,HGNC,653190,NCBI Gene,"[{'HPO': nan, 'NCI': nan, 'OMIM': nan, 'MONDO'..."


In [7]:
disgenet_df[DISGENET][0]

[{'HPO': '',
  'NCI': 'NCI_C34854',
  'OMIM': 'OMIM_163950, OMIM_176876',
  'MONDO': 'MONDO_0018997',
  'ORDO': 'ORDO_648',
  'ICD10': '',
  'EFO': '',
  'DO': 'DO_0060254, DO_11983, DO_11725, DO_2962, DO_14681, DO_3490, DO_14796, DO_6683',
  'MESH': 'MESH_D009634',
  'UMLS': 'UMLS_C0028326',
  'ICD9CM': '',
  'disease_name': 'Noonan Syndrome',
  'disease_type': 'disease',
  'disease_umlscui': 'C0028326',
  'score': 0.7,
  'ei': 0.8333333333333334,
  'el': 'Disputed'},
 {'HPO': 'HPO_HP:0000388',
  'NCI': 'NCI_C34885',
  'OMIM': '',
  'MONDO': 'MONDO_0005441',
  'ORDO': '',
  'ICD10': 'ICD10_H66.9',
  'EFO': 'EFO_0004992',
  'DO': 'DO_10754',
  'MESH': 'MESH_D010033',
  'UMLS': 'UMLS_C0029882',
  'ICD9CM': 'ICD9CM_382.9',
  'disease_name': 'Otitis Media',
  'disease_type': 'disease',
  'disease_umlscui': 'C0029882',
  'score': 0.65,
  'ei': 1.0,
  'el': None},
 {'HPO': '',
  'NCI': 'NCI_C75459',
  'OMIM': 'OMIM_176876, OMIM_163950',
  'MONDO': 'MONDO_0008104, MONDO_0018997',
  'ORDO': '

### Compounds from OpenTarget

In [20]:
# opentargets_drug_df, opentargets_drug_metadata = opentargets.get_gene_compound_interactions(
#     bridgedb_df=bridgdb_df
# )
# opentargets_drug_df.to_pickle(os.getcwd(), r"examples\usecases\PCS\opentargets_drug_df.pkl")
# with open(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "opentargets_drug_metadata.pkl"), "wb") as file:
#     pickle.dump(opentargets_drug_metadata, file)

with open(
    os.path.join(os.getcwd(), r"examples\usecases\PCS\opentargets_drug_df.pkl"), "rb"
) as file:
    opentargets_drug_df = pickle.load(file)
with open(
    os.path.join(os.getcwd(), "examples", "usecases", "PCS", "opentargets_drug_metadata.pkl"), "rb"
) as file:
    opentargets_drug_metadata = pickle.load(file)
opentargets_drug_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_Compounds
0,A2ML1,HGNC,ENSG00000166535,Ensembl,"[{'chembl_id': nan, 'drugbank_id': nan, 'compo..."
1,AAMDC,HGNC,ENSG00000087884,Ensembl,"[{'chembl_id': nan, 'drugbank_id': nan, 'compo..."
2,ABCA1,HGNC,ENSG00000165029,Ensembl,"[{'chembl_id': 'CHEMBL608', 'drugbank_id': 'DB..."
3,ABCB1,HGNC,ENSG00000085563,Ensembl,"[{'chembl_id': 'CHEMBL1086218', 'drugbank_id':..."
4,ABCC13,HGNC,ENSG00000243064,Ensembl,"[{'chembl_id': nan, 'drugbank_id': nan, 'compo..."


In [21]:
opentargets_drug_df[OPENTARGETS_COMPOUND_COL][2]

[{'chembl_id': 'CHEMBL608',
  'drugbank_id': 'DB01599',
  'compound_cid': '4912',
  'compound_name': 'PROBUCOL',
  'is_approved': True,
  'relation': 'activates',
  'adverse_effect_count': 5.0,
  'adverse_effect': [{'name': 'blood creatine phosphokinase increased'},
   {'name': 'fractured coccyx'},
   {'name': 'angina unstable'},
   {'name': 'long qt syndrome'},
   {'name': 'haemorrhage subcutaneous'}]}]

### Disease annotation from OpenTargets

In [None]:
disease_df, opentargets_disease_metadata = opentargets.get_gene_disease_interactions(
    bridgedb_df=bridgdb_df
)
disease_df.head()

In [None]:
disease_df[OPENTARGETS_DISEASE_COL][0]

### Pathways from MINERVA

In [10]:
# minerva_df, minerva_metadata = minerva.get_gene_minerva_pathways(
#     bridgdb_df, map_name="COVID19 Disease Map"
# )
# minerva_df.to_pickle(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "minerva_df.pkl"))
# with open(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "minerva_metadata.pkl"), "wb") as file:
#     pickle.dump(minerva_metadata, file)

with open(os.path.join(os.getcwd(), r"examples\usecases\PCS\minerva_df.pkl"), "rb") as file:
    minerva_df = pickle.load(file)
with open(
    os.path.join(os.getcwd(), "examples", "usecases", "PCS", "minerva_metadata.pkl"), "rb"
) as file:
    minerva_metadata = pickle.load(file)
minerva_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,MINERVA
0,A2ML1,HGNC,ENSG00000166535,Ensembl,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
1,AAMDC,HGNC,ENSG00000087884,Ensembl,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
2,ABCA1,HGNC,ENSG00000165029,Ensembl,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
3,ABCB1,HGNC,ENSG00000085563,Ensembl,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
4,ABCC13,HGNC,ENSG00000243064,Ensembl,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."


In [11]:
minerva_df[MINERVA][33]

[{'pathway_id': 953.0,
  'pathway_label': 'Kynurenine synthesis pathway',
  'pathway_gene_count': 45.0}]

### Pathways from WikiPathways

In [12]:
# wikipathways_df, wikipathways_metadata = wikipathways.get_gene_wikipathways(bridgedb_df=bridgdb_df)
# wikipathways_df.to_pickle(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "wikipathways_df.pkl"))
# with open(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "wikipathways_metadata.pkl"), "wb") as file:
#     pickle.dump(wikipathways_metadata, file)

with open(os.path.join(os.getcwd(), r"examples\usecases\PCS\wikipathways_df.pkl"), "rb") as file:
    wikipathways_df = pickle.load(file)
with open(
    os.path.join(os.getcwd(), "examples", "usecases", "PCS", "wikipathways_metadata.pkl"), "rb"
) as file:
    wikipathways_metadata = pickle.load(file)
wikipathways_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,WikiPathways
0,A2ML1,HGNC,144568,NCBI Gene,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
1,AAMDC,HGNC,28971,NCBI Gene,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
2,ABCA1,HGNC,19,NCBI Gene,"[{'pathway_id': 'WP5470', 'pathway_label': 'Ef..."
3,ABCB1,HGNC,5243,NCBI Gene,"[{'pathway_id': 'WP299', 'pathway_label': 'Nuc..."
4,ABCC6P1,HGNC,653190,NCBI Gene,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."


In [13]:
wikipathways_df[WIKIPATHWAYS][3]

[{'pathway_id': 'WP299',
  'pathway_label': 'Nuclear receptors in lipid metabolism and toxicity',
  'pathway_gene_count': 35.0},
 {'pathway_id': 'WP3672',
  'pathway_label': 'lncRNA-mediated mechanisms of therapeutic resistance',
  'pathway_gene_count': 7.0},
 {'pathway_id': 'WP3640',
  'pathway_label': 'Imatinib and chronic myeloid leukemia',
  'pathway_gene_count': 20.0},
 {'pathway_id': 'WP4673',
  'pathway_label': 'Male infertility',
  'pathway_gene_count': 145.0},
 {'pathway_id': 'WP2328',
  'pathway_label': 'Allograft rejection',
  'pathway_gene_count': 102.0},
 {'pathway_id': 'WP4917',
  'pathway_label': 'Proximal tubule transport',
  'pathway_gene_count': 57.0},
 {'pathway_id': 'WP2289',
  'pathway_label': 'Drug induction of bile acid pathway',
  'pathway_gene_count': 17.0},
 {'pathway_id': 'WP1604',
  'pathway_label': 'Codeine and morphine metabolism',
  'pathway_gene_count': 17.0},
 {'pathway_id': 'WP2877',
  'pathway_label': 'Vitamin D receptor pathway',
  'pathway_gene_coun

### Reactome pathways from OpenTargets

In [14]:
# opentargets_reactome_df, opentargets_reactome_metadata = opentargets.get_gene_reactome_pathways(
#     bridgedb_df=bridgdb_df
# )
# opentargets_reactome_df.to_pickle(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "opentargets_reactome_df.pkl"))
# with open(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "opentargets_reactome_metadata.pkl"), "wb") as file:
#     pickle.dump(opentargets_reactome_metadata, file)

with open(
    os.path.join(os.getcwd(), r"examples\usecases\PCS\opentargets_reactome_df.pkl"), "rb"
) as file:
    opentargets_reactome_df = pickle.load(file)
with open(
    os.path.join(os.getcwd(), "examples", "usecases", "PCS", "opentargets_reactome_metadata.pkl"),
    "rb",
) as file:
    opentargets_reactome_metadata = pickle.load(file)

opentargets_reactome_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_Reactome
0,A2ML1,HGNC,ENSG00000166535,Ensembl,"[{'pathway_label': nan, 'pathway_id': nan}]"
1,AAMDC,HGNC,ENSG00000087884,Ensembl,"[{'pathway_label': nan, 'pathway_id': nan}]"
2,ABCA1,HGNC,ENSG00000165029,Ensembl,[{'pathway_label': 'PPARA activates gene expre...
3,ABCB1,HGNC,ENSG00000085563,Ensembl,[{'pathway_label': 'Abacavir transmembrane tra...
4,ABCC13,HGNC,ENSG00000243064,Ensembl,"[{'pathway_label': nan, 'pathway_id': nan}]"


In [15]:
opentargets_reactome_df[OPENTARGETS_REACTOME_COL][2]

[{'pathway_label': 'PPARA activates gene expression',
  'pathway_id': 'R-HSA-1989781'},
 {'pathway_label': 'Defective ABCA1 causes TGD',
  'pathway_id': 'R-HSA-5682113'},
 {'pathway_label': 'NR1H3 & NR1H2 regulate gene expression linked to cholesterol transport and efflux',
  'pathway_id': 'R-HSA-9029569'},
 {'pathway_label': 'HDL assembly', 'pathway_id': 'R-HSA-8963896'}]

### Gene Ontology from OpenTargets

In [23]:
# opentargets_go_df, opentargets_go_metadata = opentargets.get_gene_go_process(bridgedb_df=bridgdb_df)
# opentargets_go_df.to_pickle(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "opentargets_go_df.pkl"))
# with open(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "opentargets_go_metadata.pkl"), "wb") as file:
#     pickle.dump(opentargets_go_metadata, file)

with open(os.path.join(os.getcwd(), r"examples\usecases\PCS\opentargets_go_df.pkl"), "rb") as file:
    opentargets_go_df = pickle.load(file)
with open(
    os.path.join(os.getcwd(), "examples", "usecases", "PCS", "opentargets_go_metadata.pkl"), "rb"
) as file:
    opentargets_go_metadata = pickle.load(file)
opentargets_go_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_GO
0,A2ML1,HGNC,ENSG00000166535,Ensembl,"[{'go_id': 'GO:0052548', 'go_name': 'regulatio..."
1,AAMDC,HGNC,ENSG00000087884,Ensembl,"[{'go_id': 'GO:0005737', 'go_name': 'cytoplasm..."
2,ABCA1,HGNC,ENSG00000165029,Ensembl,"[{'go_id': 'GO:0005524', 'go_name': 'ATP bindi..."
3,ABCB1,HGNC,ENSG00000085563,Ensembl,"[{'go_id': 'GO:0008559', 'go_name': 'ABC-type ..."
4,ABCC13,HGNC,ENSG00000243064,Ensembl,"[{'go_id': nan, 'go_name': nan, 'go_type': nan}]"


In [24]:
opentargets_go_df[OPENTARGETS_GO_COL][0]

[{'go_id': 'GO:0052548',
  'go_name': 'regulation of endopeptidase activity',
  'go_type': 'P'},
 {'go_id': 'GO:0070062', 'go_name': 'extracellular exosome', 'go_type': 'C'},
 {'go_id': 'GO:0030414',
  'go_name': 'peptidase inhibitor activity',
  'go_type': 'F'},
 {'go_id': 'GO:0005615', 'go_name': 'extracellular space', 'go_type': 'C'},
 {'go_id': 'GO:0004867',
  'go_name': 'serine-type endopeptidase inhibitor activity',
  'go_type': 'F'}]

### Protein-Protein interactions from STRING

In [27]:
# string_ppi_df, string_ppi_metadata = stringdb.get_ppi(bridgedb_df=bridgdb_df)
# string_ppi_df.to_pickle(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "string_ppi_df.pkl"))
# with open(os.path.join(os.getcwd(), "examples", "usecases", "PCS", "string_ppi_metadata.pkl"), "wb") as file:
#     pickle.dump(string_ppi_metadata, file)

with open(os.path.join(os.getcwd(), r"examples\usecases\PCS\string_ppi_df.pkl"), "rb") as file:
    string_ppi_df = pickle.load(file)
with open(
    os.path.join(os.getcwd(), "examples", "usecases", "PCS", "string_ppi_metadata.pkl"), "rb"
) as file:
    string_ppi_metadata = pickle.load(file)
string_ppi_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,StringDB
0,DMP1,HGNC,ENSG00000152592,Ensembl,"[{'stringdb_link_to': 'TNFRSF11B', 'Ensembl': ..."
1,PNLIP,HGNC,ENSG00000175535,Ensembl,"[{'stringdb_link_to': 'LIPE', 'Ensembl': 'ENSP..."
2,OR4N3P,HGNC,ENSG00000259435,Ensembl,"[{'stringdb_link_to': nan, 'Ensembl': nan, 'sc..."
3,SLC6A14,HGNC,ENSG00000268104,Ensembl,"[{'stringdb_link_to': 'SLC7A11', 'Ensembl': 'E..."
4,DEFB105A,HGNC,ENSG00000186562,Ensembl,"[{'stringdb_link_to': 'DEFB118', 'Ensembl': 'E..."


In [29]:
string_ppi_df[STRING][0]

[{'stringdb_link_to': 'TNFRSF11B',
  'Ensembl': 'ENSP00000297350',
  'score': 0.409},
 {'stringdb_link_to': 'HSPA5', 'Ensembl': 'ENSP00000324173', 'score': 0.504},
 {'stringdb_link_to': 'GAPDH', 'Ensembl': 'ENSP00000380070', 'score': 0.449},
 {'stringdb_link_to': 'CD44', 'Ensembl': 'ENSP00000398632', 'score': 0.601},
 {'stringdb_link_to': 'ENPP1', 'Ensembl': 'ENSP00000498074', 'score': 0.625},
 {'stringdb_link_to': 'RUNX2', 'Ensembl': 'ENSP00000360493', 'score': 0.713}]

### Combing all the results into single dataframe

In [30]:
combined_df = combine_sources(
    [
        minerva_df,
        wikipathways_df,
        opentargets_reactome_df,
        opentargets_go_df,
        disgenet_df,
        disease_df,
        opentargets_drug_df,
        # inhibitor_df,
        string_ppi_metadata,
    ]
)

In [31]:
combined_df.head(4)

Unnamed: 0,identifier,identifier.source,target,target.source,Bgee,MINERVA,WikiPathways,OpenTargets_Reactome,OpenTargets_GO,OpenTargets_Location,OpenTargets_Diseases,OpenTargets_Compounds,MolMeDB_transporter_inhibitor,PubChem_Assays,StringDB
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': 953.0, 'pathway_label': 'Kynur...","[{'pathway_id': 'WP5044', 'pathway_label': 'Ky...","[{'pathway_label': 'Endogenous sterols', 'path...","[{'go_id': 'GO:0005667', 'go_name': 'transcrip...","[{'location_id': 'SL-0086', 'location': 'Cytop...","[{'disease_id': 'umls:C0033860', 'disease_name...","[{'chembl_id': 'CHEMBL259571', 'drugbank_id': ...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...","[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...",[{'pathway_label': 'Highly sodium permeable po...,"[{'go_id': 'GO:0015464', 'go_name': 'acetylcho...","[{'location_id': 'SL-0219', 'location': 'Posts...","[{'disease_id': 'umls:C0085631', 'disease_name...","[{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...","[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
2,DMD,HGNC,ENSG00000198947,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': 'WP2858', 'pathway_label': 'Ec...",[{'pathway_label': 'Striated Muscle Contractio...,"[{'go_id': 'GO:0016010', 'go_name': 'dystrophi...","[{'location_id': 'SL-0039', 'location': 'Cell ...","[{'disease_id': 'umls:C0013264', 'disease_name...","[{'chembl_id': 'CHEMBL2108278', 'drugbank_id':...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...",[]
3,HTR3A,HGNC,ENSG00000166736,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': 'WP706', 'pathway_label': 'Sud...",[{'pathway_label': 'Neurotransmitter receptors...,"[{'go_id': 'GO:1904602', 'go_name': 'serotonin...","[{'location_id': 'SL-0219', 'location': 'Posts...","[{'disease_id': 'EFO_0003843', 'disease_name':...","[{'chembl_id': 'CHEMBL56564', 'drugbank_id': '...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...","[{'stringdb_link_to': 'DMD', 'Ensembl': 'ENSP0..."


In [32]:
combined_df.shape

(50, 15)

### Exporting the database in pickle format

In [33]:
with open("combined_df.pkl", "wb") as out:
    pickle.dump(combined_df, out)

## Creating a graph from the annotated dataframe

In [34]:
fuse_df = generator.load_dataframe_from_pickle("combined_df.pkl")

In [35]:
fuse_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,Bgee,MINERVA,WikiPathways,OpenTargets_Reactome,OpenTargets_GO,OpenTargets_Location,OpenTargets_Diseases,OpenTargets_Compounds,MolMeDB_transporter_inhibitor,PubChem_Assays,StringDB
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': 953.0, 'pathway_label': 'Kynur...","[{'pathway_id': 'WP5044', 'pathway_label': 'Ky...","[{'pathway_label': 'Endogenous sterols', 'path...","[{'go_id': 'GO:0005667', 'go_name': 'transcrip...","[{'location_id': 'SL-0086', 'location': 'Cytop...","[{'disease_id': 'umls:C0033860', 'disease_name...","[{'chembl_id': 'CHEMBL259571', 'drugbank_id': ...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...","[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...",[{'pathway_label': 'Highly sodium permeable po...,"[{'go_id': 'GO:0015464', 'go_name': 'acetylcho...","[{'location_id': 'SL-0219', 'location': 'Posts...","[{'disease_id': 'umls:C0085631', 'disease_name...","[{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...","[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
2,DMD,HGNC,ENSG00000198947,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': 'WP2858', 'pathway_label': 'Ec...",[{'pathway_label': 'Striated Muscle Contractio...,"[{'go_id': 'GO:0016010', 'go_name': 'dystrophi...","[{'location_id': 'SL-0039', 'location': 'Cell ...","[{'disease_id': 'umls:C0013264', 'disease_name...","[{'chembl_id': 'CHEMBL2108278', 'drugbank_id':...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...",[]
3,HTR3A,HGNC,ENSG00000166736,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': 'WP706', 'pathway_label': 'Sud...",[{'pathway_label': 'Neurotransmitter receptors...,"[{'go_id': 'GO:1904602', 'go_name': 'serotonin...","[{'location_id': 'SL-0219', 'location': 'Posts...","[{'disease_id': 'EFO_0003843', 'disease_name':...","[{'chembl_id': 'CHEMBL56564', 'drugbank_id': '...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...","[{'stringdb_link_to': 'DMD', 'Ensembl': 'ENSP0..."
4,SCN4A,HGNC,ENSG00000007314,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...",[{'pathway_label': 'Phase 0 - rapid depolarisa...,"[{'go_id': 'GO:0035725', 'go_name': 'sodium io...","[{'location_id': 'SL-0039', 'location': 'Cell ...","[{'disease_id': 'EFO_0000432', 'disease_name':...","[{'chembl_id': 'CHEMBL1077896', 'drugbank_id':...","[{'compound_name': '3-phenyl-1h-pyrazole', 'In...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...",[]


In [36]:
pygraph = generator.networkx_graph(fuse_df)

### Store the graph

In [37]:
with open("networkx_graph.pkl", "wb") as out:
    pickle.dump(pygraph, out)

## Visualize the graph

In [38]:
# pos = nx.circular_layout(pygraph)

# plt.figure(3, figsize=(30, 30))
# nx.draw(pygraph, pos)
# plt.show()

In [None]:
from pyBiodatafuse.graph import cytoscape, neo4j

neo4j.save_graph_to_graphml(pygraph, output_path="graph_to-test.graphml")
cytoscape.load_graph(pygraph, network_name="test")