# Example: Graph generation from a gene

This notebook will show you how to use the tool to generate a KG on the underlying data.

In [1]:
new_path = "E:\BioDataFuse\pyBiodatafuse"

import os

os.chdir(new_path)

# Set the current working directory
current_dir = os.getcwd()
print("Current directory:", current_dir)

Current directory: E:\BioDataFuse\pyBiodatafuse


In [2]:
# Import modules
import pickle

import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd

from pyBiodatafuse import id_mapper
from pyBiodatafuse.annotators import (
    bgee,
    disgenet,
    minerva,
    molmedb,
    opentargets,
    pubchem,
    stringdb,
    wikipathways,
)
from pyBiodatafuse.constants import (
    BGEE,
    DISGENET,
    MINERVA,
    MOLMEDB_INHIBITED_COL,
    MOLMEDB_INHIBITOR_COL,
    OPENTARGETS_COMPOUND_COL,
    OPENTARGETS_DISEASE_COL,
    OPENTARGETS_GO_COL,
    OPENTARGETS_LOCATION_COL,
    OPENTARGETS_REACTOME_COL,
    STRING,
    WIKIPATHWAYS,
    PUBCHEM_Assays_COL,
)
from pyBiodatafuse.graph import generator
from pyBiodatafuse.utils import combine_sources

### Load the input list and convert it to a dataframe

In [3]:
# genes_of_interest = """AAGRN
# ALG14
# ALG2
# CHAT
# CHD8
# CHRNA1
# CHRNB1
# CHRND
# CHRNE
# CHRNG
# COL13A1
# COLQ
# DOK7
# DPAGT1
# GFPT1
# GMPPB
# LAMA5
# LAMB2
# LRP4
# MUSK
# MYO9A
# PLEC
# PREPL
# PURA
# RAPSN
# RPH3A
# SCN4A
# SLC18A3
# SLC25A1
# SLC5A7
# SNAP25
# SYT2
# TOR1AIP1
# UNC13A
# VAMP1"""
genes_of_interest = """CHRNG
DMD
AHR
SCN4A
SLC25A1"""
# genes_of_interest = "DMD"
gene_list = genes_of_interest.split("\n")
len(gene_list)

5

In [4]:
data_input = pd.DataFrame(gene_list, columns=["identifier"])
data_input.head()

Unnamed: 0,identifier
0,CHRNG
1,DMD
2,AHR
3,SCN4A
4,SLC25A1


### Entity resolution using BridgeDB

In [5]:
bridgdb_df, bridgdb_metadata = id_mapper.bridgedb_xref(
    identifiers=data_input,
    input_species="Human",
    input_datasource="HGNC",
    output_datasource="All",
)
bridgdb_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source
0,CHRNG,HGNC,GO:0042391,Gene Ontology
1,CHRNG,HGNC,A_23_P5718,Agilent
2,CHRNG,HGNC,GO:0016021,Gene Ontology
3,CHRNG,HGNC,GO:0016020,Gene Ontology
4,CHRNG,HGNC,GO:0006936,Gene Ontology


### Gene expression from Bgee

In [6]:
bgee_df, bgee_metadata = bgee.get_gene_expression(bridgedb_df=bridgdb_df)
bgee_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,Bgee
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a..."
2,DMD,HGNC,ENSG00000198947,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a..."
3,SCN4A,HGNC,ENSG00000007314,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a..."
4,SLC25A1,HGNC,ENSG00000100075,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a..."


In [7]:
bgee_df[BGEE][0]

[{'anatomical_entity_id': 'UBERON_0000178',
  'anatomical_entity_name': 'blood',
  'developmental_stage_id': 'HsapDv_0000268',
  'developmental_stage_name': '15-19 year-old (human)',
  'expression_level': 88.1783,
  'confidence_level_id': 'CIO_0000029',
  'confidence_level_name': 'high confidence level'},
 {'anatomical_entity_id': 'UBERON_0000178',
  'anatomical_entity_name': 'blood',
  'developmental_stage_id': 'HsapDv_0000268',
  'developmental_stage_name': '15-19 year-old (human)',
  'expression_level': 88.1783,
  'confidence_level_id': 'CIO_0000029',
  'confidence_level_name': 'high confidence level'},
 {'anatomical_entity_id': 'UBERON_0000178',
  'anatomical_entity_name': 'blood',
  'developmental_stage_id': 'UBERON_0018241',
  'developmental_stage_name': 'prime adult stage',
  'expression_level': 81.3048,
  'confidence_level_id': 'CIO_0000029',
  'confidence_level_name': 'high confidence level'},
 {'anatomical_entity_id': 'UBERON_0000178',
  'anatomical_entity_name': 'blood',
  '

### Disease annotatation from DisGeNet

In [8]:
disgenet_df, disgenet_metadata = disgenet.get_gene_disease(bridgedb_df=bridgdb_df)
disgenet_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,DisGeNET
0,AHR,HGNC,196,NCBI Gene,"[{'disease_id': 'umls:C0007102', 'disease_name..."
1,CHRNG,HGNC,1146,NCBI Gene,"[{'disease_id': 'umls:C0026034', 'disease_name..."
2,DMD,HGNC,1756,NCBI Gene,"[{'disease_id': 'umls:C0018801', 'disease_name..."
3,SCN4A,HGNC,6329,NCBI Gene,"[{'disease_id': 'umls:C0151468', 'disease_name..."
4,SLC25A1,HGNC,6576,NCBI Gene,"[{'disease_id': 'umls:C0013080', 'disease_name..."


In [9]:
disgenet_df[DISGENET][0]

[{'disease_id': 'umls:C0007102',
  'disease_name': 'Malignant tumor of colon',
  'score': 0.08,
  'source': 'BEFREE'},
 {'disease_id': 'umls:C0007103',
  'disease_name': 'Malignant neoplasm of endometrium',
  'score': 0.05,
  'source': 'BEFREE'},
 {'disease_id': 'umls:C0009319',
  'disease_name': 'Colitis',
  'score': 0.4,
  'source': 'BEFREE'},
 {'disease_id': 'umls:C0010346',
  'disease_name': 'Crohn Disease',
  'score': 0.14,
  'source': 'BEFREE'},
 {'disease_id': 'umls:C0017601',
  'disease_name': 'Glaucoma',
  'score': 0.11,
  'source': 'HPO'},
 {'disease_id': 'umls:C0017636',
  'disease_name': 'Glioblastoma',
  'score': 0.08,
  'source': 'BEFREE'},
 {'disease_id': 'umls:C0017638',
  'disease_name': 'Glioma',
  'score': 0.05,
  'source': 'BEFREE'},
 {'disease_id': 'umls:C0019196',
  'disease_name': 'Hepatitis C',
  'score': 0.04,
  'source': 'BEFREE'},
 {'disease_id': 'umls:C0027430',
  'disease_name': 'Nasal Polyps',
  'score': 0.02,
  'source': 'BEFREE'},
 {'disease_id': 'umls:C

### Disease annotation from OpenTargets

In [10]:
disease_df, opentargets_disease_metadata = opentargets.get_gene_disease_associations(
    bridgedb_df=bridgdb_df
)
disease_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_Diseases
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'disease_id': 'umls:C0033860', 'disease_name..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'disease_id': 'umls:C0085631', 'disease_name..."
2,DMD,HGNC,ENSG00000198947,Ensembl,"[{'disease_id': 'umls:C0013264', 'disease_name..."
3,SCN4A,HGNC,ENSG00000007314,Ensembl,"[{'disease_id': 'umls:C0037011', 'disease_name..."
4,SLC25A1,HGNC,ENSG00000100075,Ensembl,"[{'disease_id': nan, 'disease_name': nan, 'the..."


In [11]:
disease_df[OPENTARGETS_DISEASE_COL][0]

[{'disease_id': 'umls:C0033860',
  'disease_name': 'psoriasis',
  'therapeutic_areas': 'OTAR_0000018:genetic, familial or congenital disease, EFO_0000540:immune system disease, EFO_0010285:integumentary system disease'},
 {'disease_id': 'EFO_0000274',
  'disease_name': 'atopic eczema',
  'therapeutic_areas': 'OTAR_0000018:genetic, familial or congenital disease, EFO_0000540:immune system disease, EFO_0010285:integumentary system disease'}]

### Pathways from MINERVA

In [12]:
minerva_df, opentargets_process_metadata = minerva.get_gene_minerva_pathways(
    bridgdb_df, map_name="COVID19 Disease Map"
)
minerva_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,MINERVA
0,AHR,HGNC,196,NCBI Gene,"[{'pathway_id': 953.0, 'pathway_label': 'Kynur..."
1,CHRNG,HGNC,1146,NCBI Gene,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
2,DMD,HGNC,1756,NCBI Gene,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
3,SCN4A,HGNC,6329,NCBI Gene,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
4,SLC25A1,HGNC,6576,NCBI Gene,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."


In [13]:
minerva_df[MINERVA][0]

[{'pathway_id': 953.0,
  'pathway_label': 'Kynurenine synthesis pathway',
  'pathway_gene_count': 45.0}]

### Pathways from WikiPathways

In [15]:
wikipathways_df, wp_metadata = wikipathways.get_gene_wikipathways(bridgedb_df=bridgdb_df)
wikipathways_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,WikiPathways
0,AHR,HGNC,196,NCBI Gene,"[{'pathway_id': 'WP3869', 'pathway_label': 'Ca..."
1,CHRNG,HGNC,1146,NCBI Gene,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
2,DMD,HGNC,1756,NCBI Gene,"[{'pathway_id': 'WP2858', 'pathway_label': 'Ec..."
3,SCN4A,HGNC,6329,NCBI Gene,"[{'pathway_id': nan, 'pathway_label': nan, 'pa..."
4,SLC25A1,HGNC,6576,NCBI Gene,"[{'pathway_id': 'WP4657', 'pathway_label': '22..."


In [16]:
wikipathways_df[WIKIPATHWAYS][0]

[{'pathway_id': 'WP3869',
  'pathway_label': 'Cannabinoid receptor signaling',
  'pathway_gene_count': 31.0},
 {'pathway_id': 'WP4673',
  'pathway_label': 'Male infertility',
  'pathway_gene_count': 145.0},
 {'pathway_id': 'WP5115',
  'pathway_label': 'Network map of SARS-CoV-2 signaling pathway',
  'pathway_gene_count': 251.0},
 {'pathway_id': 'WP236',
  'pathway_label': 'Adipogenesis',
  'pathway_gene_count': 131.0},
 {'pathway_id': 'WP2586',
  'pathway_label': 'Aryl hydrocarbon receptor pathway',
  'pathway_gene_count': 44.0},
 {'pathway_id': 'WP2873',
  'pathway_label': 'Aryl hydrocarbon receptor pathway',
  'pathway_gene_count': 46.0},
 {'pathway_id': 'WP5088',
  'pathway_label': 'Prostaglandin signaling',
  'pathway_gene_count': 31.0},
 {'pathway_id': 'WP3893',
  'pathway_label': 'Development and heterogeneity of the ILC family',
  'pathway_gene_count': 32.0},
 {'pathway_id': 'WP465',
  'pathway_label': 'Tryptophan metabolism',
  'pathway_gene_count': 32.0},
 {'pathway_id': 'WP51

### Reactome pathways from OpenTargets

In [17]:
reactome_process_df, opentargets_process_metadata = opentargets.get_gene_reactome_pathways(
    bridgedb_df=bridgdb_df
)
reactome_process_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_Reactome
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'pathway_label': 'Endogenous sterols', 'path..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,[{'pathway_label': 'Highly sodium permeable po...
2,DMD,HGNC,ENSG00000198947,Ensembl,[{'pathway_label': 'Striated Muscle Contractio...
3,SCN4A,HGNC,ENSG00000007314,Ensembl,[{'pathway_label': 'Phase 0 - rapid depolarisa...
4,SLC25A1,HGNC,ENSG00000100075,Ensembl,"[{'pathway_label': 'Gluconeogenesis', 'pathway..."


In [18]:
reactome_process_df[OPENTARGETS_REACTOME_COL][0]

[{'pathway_label': 'Endogenous sterols', 'pathway_id': 'R-HSA-211976'},
 {'pathway_label': 'PPARA activates gene expression',
  'pathway_id': 'R-HSA-1989781'},
 {'pathway_label': 'Phase I - Functionalization of compounds',
  'pathway_id': 'R-HSA-211945'},
 {'pathway_label': 'Xenobiotics', 'pathway_id': 'R-HSA-211981'},
 {'pathway_label': 'Aryl hydrocarbon receptor signalling',
  'pathway_id': 'R-HSA-8937144'}]

### Gene Ontology from OpenTargets

In [19]:
go_process_df, opentargets_go_metadata = opentargets.get_gene_go_process(bridgedb_df=bridgdb_df)
go_process_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_GO
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'go_id': 'GO:0005667', 'go_name': 'transcrip..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'go_id': 'GO:0015464', 'go_name': 'acetylcho..."
2,DMD,HGNC,ENSG00000198947,Ensembl,"[{'go_id': 'GO:0016010', 'go_name': 'dystrophi..."
3,SCN4A,HGNC,ENSG00000007314,Ensembl,"[{'go_id': 'GO:0019228', 'go_name': 'neuronal ..."
4,SLC25A1,HGNC,ENSG00000100075,Ensembl,"[{'go_id': 'GO:0006843', 'go_name': 'mitochond..."


In [20]:
go_process_df[OPENTARGETS_GO_COL][0]

[{'go_id': 'GO:0005667', 'go_name': 'transcription regulator complex'},
 {'go_id': 'GO:0004879', 'go_name': 'nuclear receptor activity'},
 {'go_id': 'GO:0005634', 'go_name': 'nucleus'},
 {'go_id': 'GO:0046982', 'go_name': 'protein heterodimerization activity'},
 {'go_id': 'GO:0009410', 'go_name': 'response to xenobiotic stimulus'},
 {'go_id': 'GO:0000976',
  'go_name': 'transcription cis-regulatory region binding'},
 {'go_id': 'GO:0007049', 'go_name': 'cell cycle'},
 {'go_id': 'GO:0005829', 'go_name': 'cytosol'},
 {'go_id': 'GO:0030888', 'go_name': 'regulation of B cell proliferation'},
 {'go_id': 'GO:0009636', 'go_name': 'response to toxic substance'},
 {'go_id': 'GO:0051879', 'go_name': 'Hsp90 protein binding'},
 {'go_id': 'GO:0001094',
  'go_name': 'TFIID-class transcription factor complex binding'},
 {'go_id': 'GO:0001568', 'go_name': 'blood vessel development'},
 {'go_id': 'GO:0071320', 'go_name': 'cellular response to cAMP'},
 {'go_id': 'GO:0000987',
  'go_name': 'cis-regulatory 

### Gene location annotation from OpenTargets

In [21]:
loc_df, opentargets_loc_metadata = opentargets.get_gene_location(bridgedb_df=bridgdb_df)
loc_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_Location
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'location_id': 'SL-0086', 'location': 'Cytop..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'location_id': 'SL-0219', 'location': 'Posts..."
2,DMD,HGNC,ENSG00000198947,Ensembl,"[{'location_id': 'SL-0039', 'location': 'Cell ..."
3,SCN4A,HGNC,ENSG00000007314,Ensembl,"[{'location_id': 'SL-0039', 'location': 'Cell ..."
4,SLC25A1,HGNC,ENSG00000100075,Ensembl,"[{'location_id': 'SL-0168', 'location': 'Mitoc..."


In [22]:
loc_df[OPENTARGETS_LOCATION_COL][0]

[{'location_id': 'SL-0086',
  'location': 'Cytoplasm',
  'subcellular_location': 'Cellular component'},
 {'location_id': 'SL-0191',
  'location': 'Nucleus',
  'subcellular_location': 'Cellular component'},
 {'location_id': 'SL-0091',
  'location': 'Cytosol',
  'subcellular_location': 'Cytosol'},
 {'location_id': 'SL-0190',
  'location': 'Nucleoplasm',
  'subcellular_location': 'Nucleoplasm'}]

### Compounds from OpenTarget

In [23]:
drug_df, opentargets_drug_metadata = opentargets.get_gene_compound_interactions(
    bridgedb_df=bridgdb_df
)
drug_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,OpenTargets_Compounds
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'chembl_id': 'CHEMBL259571', 'compound_name'..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'chembl_id': 'CHEMBL1200641', 'compound_name..."
2,DMD,HGNC,ENSG00000198947,Ensembl,"[{'chembl_id': 'CHEMBL2108278', 'compound_name..."
3,SCN4A,HGNC,ENSG00000007314,Ensembl,"[{'chembl_id': 'CHEMBL1098', 'compound_name': ..."
4,SLC25A1,HGNC,ENSG00000100075,Ensembl,"[{'chembl_id': nan, 'compound_name': nan, 'is_..."


In [24]:
drug_df[OPENTARGETS_COMPOUND_COL][0]

[{'chembl_id': 'CHEMBL259571',
  'compound_name': 'TAPINAROF',
  'is_approved': True,
  'relation': 'activates'},
 {'chembl_id': 'CHEMBL259571',
  'compound_name': 'TAPINAROF',
  'is_approved': True,
  'relation': 'activates'},
 {'chembl_id': 'CHEMBL259571',
  'compound_name': 'TAPINAROF',
  'is_approved': True,
  'relation': 'activates'},
 {'chembl_id': 'CHEMBL259571',
  'compound_name': 'TAPINAROF',
  'is_approved': True,
  'relation': 'activates'},
 {'chembl_id': 'CHEMBL259571',
  'compound_name': 'TAPINAROF',
  'is_approved': True,
  'relation': 'activates'},
 {'chembl_id': 'CHEMBL259571',
  'compound_name': 'TAPINAROF',
  'is_approved': True,
  'relation': 'activates'},
 {'chembl_id': 'CHEMBL259571',
  'compound_name': 'TAPINAROF',
  'is_approved': True,
  'relation': 'activates'},
 {'chembl_id': 'CHEMBL259571',
  'compound_name': 'TAPINAROF',
  'is_approved': True,
  'relation': 'activates'},
 {'chembl_id': 'CHEMBL259571',
  'compound_name': 'TAPINAROF',
  'is_approved': True,
  

### Transporter inhibitors from MolMeDB

In [25]:
inhibitor_df, inhibitor_metadata = molmedb.get_gene_compound_inhibitor(bridgedb_df=bridgdb_df)
inhibitor_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,MolMeDB_transporter_inhibitor
0,AHR,HGNC,A0A024R9Z8,Uniprot-TrEMBL,"[{'compound_name': nan, 'InChIKey': nan, 'SMIL..."
1,CHRNG,HGNC,A0A6F7YAP6,Uniprot-TrEMBL,"[{'compound_name': nan, 'InChIKey': nan, 'SMIL..."
2,DMD,HGNC,A0A075B6G3,Uniprot-TrEMBL,"[{'compound_name': nan, 'InChIKey': nan, 'SMIL..."
3,SCN4A,HGNC,P35499,Uniprot-TrEMBL,"[{'compound_name': '3-phenyl-1h-pyrazole', 'In..."
4,SLC25A1,HGNC,B4DP62,Uniprot-TrEMBL,"[{'compound_name': nan, 'InChIKey': nan, 'SMIL..."


In [26]:
inhibitor_df[MOLMEDB_INHIBITOR_COL][3]

[{'compound_name': '3-phenyl-1h-pyrazole',
  'InChIKey': 'OEDUIFSDODUDRK-UHFFFAOYSA-N',
  'SMILES': 'c1ccc(-c2ccn[nH]2)cc1',
  'compound_cid': 17155,
  'molmedb_id': 'MM467646',
  'source_doi': 'doi:10.1021/jm030498q',
  'source_pmid': 14998340,
  'chebi_id': nan,
  'pdb_ligand_id': nan,
  'drugbank_id': nan},
 {'compound_name': '5-(4-phenoxyphenyl)-1h-pyrazole',
  'InChIKey': 'SGQSMRQYEPPNIA-UHFFFAOYSA-N',
  'SMILES': 'c1ccc(Oc2ccc(-c3ccn[nH]3)cc2)cc1',
  'compound_cid': 2806138,
  'molmedb_id': 'MM467662',
  'source_doi': 'doi:10.1021/jm030498q',
  'source_pmid': 14998340,
  'chebi_id': nan,
  'pdb_ligand_id': nan,
  'drugbank_id': nan},
 {'compound_name': 'MM29412',
  'InChIKey': 'VNGNMTZTIGDQCD-UHFFFAOYSA-N',
  'SMILES': 'CCC1NC(=O)CNC(=O)C(CC(C)C)NC(=O)C(NC(=O)C(N)CC(=O)O)CSSCC2NC(=O)C3CSSCC(C(=O)NC(CCCCN)C(=O)NC(Cc4ccc(O)cc4)C(=O)NC(C(=O)NC(Cc4ccccc4)C(N)=O)C(C)C)NC(=O)C(Cc4c[nH]c5ccccc45)NC(=O)C(CCCCN)NC(=O)C(Cc4cnc[nH]4)NC(=O)C(C(C)O)NC(=O)C(CCCNC(=N)N)NC(=O)C(CO)NC(=O)C(CSSCC(

### Screening results of compounds on proteins encoded by genes annotation by PubChem

In [27]:
pubchem_assay_df, pubchem_assay_metadata = pubchem.get_protein_molecule_screened(
    bridgedb_df=bridgdb_df
)
pubchem_assay_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,PubChem_Assays
0,CHRNG,HGNC,P07510,Uniprot-TrEMBL,
1,CHRNG,HGNC,A0A6F7YAP6,Uniprot-TrEMBL,
2,DMD,HGNC,A0A5H1ZRR9,Uniprot-TrEMBL,
3,DMD,HGNC,A0A5H1ZRQ8,Uniprot-TrEMBL,
4,DMD,HGNC,A0A5H1ZRQ1,Uniprot-TrEMBL,


In [29]:
pubchem_assay_df[PUBCHEM_Assays_COL][10]

### Protein-Protein interactions from STRING

In [30]:
ppi_df, ppi_metadata = stringdb.get_ppi(bridgedb_df=bridgdb_df)
ppi_df.head()

Unnamed: 0,identifier,identifier.source,target,target.source,StringDB
0,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
1,DMD,HGNC,ENSG00000198947,Ensembl,"[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
2,AHR,HGNC,ENSG00000106546,Ensembl,[]
3,SCN4A,HGNC,ENSG00000007314,Ensembl,"[{'stringdb_link_to': 'DMD', 'Ensembl': 'ENSP0..."
4,SLC25A1,HGNC,ENSG00000100075,Ensembl,[]


In [31]:
ppi_df[STRING][0]

[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENSP00000396320', 'score': 0.454}]

### Combing all the results into single dataframe

In [32]:
combined_df = combine_sources(
    [
        bgee_df,
        disgenet_df,
        minerva_df,
        wikipathways_df,
        reactome_process_df,
        go_process_df,
        loc_df,
        disease_df,
        drug_df,
        inhibitor_df,
        pubchem_assay_df,
        ppi_df,
    ]
)

In [33]:
combined_df.head(4)

Unnamed: 0,identifier,identifier.source,target,target.source,Bgee,DisGeNET,MINERVA,WikiPathways,OpenTargets_Reactome,OpenTargets_GO,OpenTargets_Location,OpenTargets_Diseases,OpenTargets_Compounds,MolMeDB_transporter_inhibitor,PubChem_Assays,StringDB
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'disease_id': 'umls:C0007102', 'disease_name...","[{'pathway_id': 953.0, 'pathway_label': 'Kynur...","[{'pathway_id': 'WP3869', 'pathway_label': 'Ca...","[{'pathway_label': 'Endogenous sterols', 'path...","[{'go_id': 'GO:0005667', 'go_name': 'transcrip...","[{'location_id': 'SL-0086', 'location': 'Cytop...","[{'disease_id': 'umls:C0033860', 'disease_name...","[{'chembl_id': 'CHEMBL259571', 'compound_name'...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...",,"[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'disease_id': 'umls:C0026034', 'disease_name...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...",[{'pathway_label': 'Highly sodium permeable po...,"[{'go_id': 'GO:0015464', 'go_name': 'acetylcho...","[{'location_id': 'SL-0219', 'location': 'Posts...","[{'disease_id': 'umls:C0085631', 'disease_name...","[{'chembl_id': 'CHEMBL1200641', 'compound_name...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...",,"[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
2,DMD,HGNC,ENSG00000198947,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'disease_id': 'umls:C0018801', 'disease_name...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': 'WP2858', 'pathway_label': 'Ec...",[{'pathway_label': 'Striated Muscle Contractio...,"[{'go_id': 'GO:0016010', 'go_name': 'dystrophi...","[{'location_id': 'SL-0039', 'location': 'Cell ...","[{'disease_id': 'umls:C0013264', 'disease_name...","[{'chembl_id': 'CHEMBL2108278', 'compound_name...","[{'compound_name': nan, 'InChIKey': nan, 'SMIL...",,[]
3,SCN4A,HGNC,ENSG00000007314,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'disease_id': 'umls:C0151468', 'disease_name...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...",[{'pathway_label': 'Phase 0 - rapid depolarisa...,"[{'go_id': 'GO:0019228', 'go_name': 'neuronal ...","[{'location_id': 'SL-0039', 'location': 'Cell ...","[{'disease_id': 'umls:C0037011', 'disease_name...","[{'chembl_id': 'CHEMBL1098', 'compound_name': ...","[{'compound_name': '3-phenyl-1h-pyrazole', 'In...",,"[{'stringdb_link_to': 'DMD', 'Ensembl': 'ENSP0..."


In [34]:
combined_df.shape

(48, 16)

### Exporting the database in pickle format

In [35]:
with open("combined_df.pkl", "wb") as out:
    pickle.dump(combined_df, out)

## Creating a graph from the annotated dataframe

In [36]:
fuse_df = generator.load_dataframe_from_pickle("combined_df.pkl")

In [None]:
pygraph = generator.networkx_graph(fuse_df)

### Store the graph

In [None]:
with open("networkx_graph.pkl", "wb") as out:
    pickle.dump(pygraph, out)

## Visualize the graph

In [None]:
pos = nx.circular_layout(pygraph)

plt.figure(3, figsize=(30, 30))
nx.draw(pygraph, pos)
plt.show()

In [None]:
from pyBiodatafuse.graph import cytoscape, neo4j

neo4j.save_graph_to_neo4j_graphml(pygraph, output_path="graph_to-test.graphml")
cytoscape.load_graph_into_cytoscape(pygraph, network_name="test")