In [1]:
#Import libraries
import json
import pandas as pd
from pyvis.network import Network

In [2]:
# Define function to Extract subject-predicate-object values 
def sub_pre_obj (my_dir):
    df_list = []
    with open (my_dir) as f:
            data = json.load(f)
    for rec in data["message"]["knowledge_graph"]["edges"].values():
        #Empty data frame 
        column_names = ["subject", "predicate", "object", "original_object_name", 'original_subject_name', 'biolink:original_predicate']
        df = pd.DataFrame(columns = column_names)
        for att in rec['attributes']:
            if att['attribute_type_id'] in ["original_object_name", 'original_subject_name', 'biolink:original_predicate']:
                df[att['attribute_type_id']] = [att['value']]
        df["subject"]=rec["subject"], 
        df["predicate"]=rec["predicate"], 
        df["object"]=rec ["object"]
        df_list.append(df)
    df_all = pd.concat(df_list)
    return (df_all)

### Pulmonary hypertension - Phenotypic Features - Iloprost

In [3]:
#Import file 
my_dir= "../results/Disease_SmallMolecule_PhenotypicFeature_/MONDO:0015924_Disease_SmallMolecule_PhenotypicFeature_CHEMBL494"

In [4]:
df_all = sub_pre_obj(my_dir)
set_obj = df_all[0:5] #Select sobe subjects 
set_obj

Unnamed: 0,subject,predicate,object,original_object_name,original_subject_name,biolink:original_predicate
0,MONDO:0015924,biolink:related_to,HP:0002105,"Hypertension, Pulmonary",Hemoptysis,CAUSES
0,MONDO:0015924,biolink:related_to,HP:0012378,Fatigue,"Hypertension, Pulmonary",COEXISTS_WITH
0,MONDO:0015924,biolink:related_to,HP:0100749,Chest Pain,"Hypertension, Pulmonary",CAUSES
0,MONDO:0015924,biolink:related_to,HP:0002094,"Hypertension, Pulmonary",Dyspnea,DIAGNOSES
0,MONDO:0015924,biolink:related_to,HP:0025406,"Hypertension, Pulmonary",Asthenia,DIAGNOSES


In [5]:
#Select subjects that has object of interest. That way we can connect them 
set_sub = df_all[df_all["subject"].isin(["HP:0002105","HP:0012378" , "HP:0100749","HP:0002094",  "HP:0025406"])]
set_sub

Unnamed: 0,subject,predicate,object,original_object_name,original_subject_name,biolink:original_predicate
0,HP:0002105,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,
0,HP:0100749,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,
0,HP:0002094,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,
0,HP:0012378,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,
0,HP:0025406,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,


In [6]:
relations = pd.concat([set_obj, set_sub])
relations

Unnamed: 0,subject,predicate,object,original_object_name,original_subject_name,biolink:original_predicate
0,MONDO:0015924,biolink:related_to,HP:0002105,"Hypertension, Pulmonary",Hemoptysis,CAUSES
0,MONDO:0015924,biolink:related_to,HP:0012378,Fatigue,"Hypertension, Pulmonary",COEXISTS_WITH
0,MONDO:0015924,biolink:related_to,HP:0100749,Chest Pain,"Hypertension, Pulmonary",CAUSES
0,MONDO:0015924,biolink:related_to,HP:0002094,"Hypertension, Pulmonary",Dyspnea,DIAGNOSES
0,MONDO:0015924,biolink:related_to,HP:0025406,"Hypertension, Pulmonary",Asthenia,DIAGNOSES
0,HP:0002105,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,
0,HP:0100749,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,
0,HP:0002094,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,
0,HP:0012378,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,
0,HP:0025406,biolink:adverse_event_caused_by,PUBCHEM.COMPOUND:5311181,,,


In [7]:
#Networkx visualization 
got_net = Network(height='750px', width='100%', bgcolor='#222222', font_color='black')

# set the physics layout of the network

sources = relations['subject']
targets =relations["object"]
weights = relations["predicate"]

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_node(src, src, title=src)
    got_net.add_node(dst, dst, title=dst)
    got_net.add_edge(src, dst, value=w)

neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

got_net.show("MONDO:0015924-pheno.html")

### Pulmonary hypertension - Genes - Iloprost

In [8]:
my_dir_gene_com= "../results/Disease_SmallMolecule_Gene_//MONDO:0015924_Disease_SmallMolecule_Gene_CHEMBL494"

In [9]:
df_all = sub_pre_obj(my_dir_gene_com)
set_obj = df_all[0:4] #Select sobe subjects 
set_obj

Unnamed: 0,subject,predicate,object,original_object_name,original_subject_name,biolink:original_predicate
0,MONDO:0015924,biolink:related_to,NCBIGene:6347,"Hypertension, Pulmonary",CCL2 gene,ASSOCIATED_WITH
0,MONDO:0015924,biolink:related_to,NCBIGene:3717,"Hypertension, Pulmonary",JAK2 gene,ASSOCIATED_WITH
0,MONDO:0015924,biolink:related_to,NCBIGene:284,"Hypertension, Pulmonary",ANGPT1 gene,ASSOCIATED_WITH
0,MONDO:0015924,biolink:related_to,NCBIGene:3689,"Hypertension, Pulmonary",ITGB2 gene,ASSOCIATED_WITH


In [10]:
set_sub = df_all[df_all["subject"].isin(["NCBIGene:6347", "NCBIGene:3717", "NCBIGene:284", "NCBIGene:3689"])]
set_sub

Unnamed: 0,subject,predicate,object,original_object_name,original_subject_name,biolink:original_predicate
0,NCBIGene:284,biolink:entity_positively_regulated_by_entity,PUBCHEM.COMPOUND:5311181,,,
0,NCBIGene:3717,biolink:entity_negatively_regulated_by_entity,PUBCHEM.COMPOUND:5311181,,,
0,NCBIGene:3689,biolink:entity_negatively_regulated_by_entity,PUBCHEM.COMPOUND:5311181,,,
0,NCBIGene:6347,biolink:entity_negatively_regulated_by_entity,PUBCHEM.COMPOUND:5311181,,,


In [11]:
relations = pd.concat([set_obj, set_sub])
relations

Unnamed: 0,subject,predicate,object,original_object_name,original_subject_name,biolink:original_predicate
0,MONDO:0015924,biolink:related_to,NCBIGene:6347,"Hypertension, Pulmonary",CCL2 gene,ASSOCIATED_WITH
0,MONDO:0015924,biolink:related_to,NCBIGene:3717,"Hypertension, Pulmonary",JAK2 gene,ASSOCIATED_WITH
0,MONDO:0015924,biolink:related_to,NCBIGene:284,"Hypertension, Pulmonary",ANGPT1 gene,ASSOCIATED_WITH
0,MONDO:0015924,biolink:related_to,NCBIGene:3689,"Hypertension, Pulmonary",ITGB2 gene,ASSOCIATED_WITH
0,NCBIGene:284,biolink:entity_positively_regulated_by_entity,PUBCHEM.COMPOUND:5311181,,,
0,NCBIGene:3717,biolink:entity_negatively_regulated_by_entity,PUBCHEM.COMPOUND:5311181,,,
0,NCBIGene:3689,biolink:entity_negatively_regulated_by_entity,PUBCHEM.COMPOUND:5311181,,,
0,NCBIGene:6347,biolink:entity_negatively_regulated_by_entity,PUBCHEM.COMPOUND:5311181,,,


In [12]:
got_net = Network(height='750px', width='100%', bgcolor='#222222', font_color='black')

# set the physics layout of the network

sources = relations['subject']
targets = relations["object"]
weights = relations["predicate"]

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_node(src, src, title=src)
    got_net.add_node(dst, dst, title=dst)
    got_net.add_edge(src, dst, value=w)

neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

got_net.show("MONDO:0015924-gene.html")

### Pulmonary hypertension - Genetic variants - Iloprost

In [13]:
my_dir_var= "../results/Disease_SmallMolecule_SequenceVariant_Gene_/MONDO:0015924_Disease_SmallMolecule_SequenceVariant_Gene_CHEMBL494"

In [14]:
df_all = sub_pre_obj(my_dir_var)
df_all

Unnamed: 0,subject,predicate,object,original_object_name,original_subject_name,biolink:original_predicate
0,MONDO:0015924,biolink:related_to,DBSNP:rs779721443,,,
0,DBSNP:rs779721443,biolink:is_sequence_variant_of,NCBIGene:4090,,,
0,NCBIGene:4090,biolink:entity_negatively_regulated_by_entity,PUBCHEM.COMPOUND:5311181,,,


In [15]:
got_net = Network(height='750px', width='100%', bgcolor='#222222', font_color='black')

# set the physics layout of the network

sources = df_all['subject']
targets =df_all["object"]
weights = df_all["predicate"]

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_node(src, src, title=src)
    got_net.add_node(dst, dst, title=dst)
    got_net.add_edge(src, dst, title= w)

neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

got_net.show("MONDO:0015924_genetic_variants.html")

### Pulmonary hypertension - Phenotypic Features - Genes- Iloprost

In [19]:
my_dir_pheno_gene= "../results/Disease_SmallMolecule_PhenotypicFeature_Gene_/MONDO:0015924_Disease_SmallMolecule_PhenotypicFeature_Gene_CHEMBL494"

In [23]:
df_all = sub_pre_obj(my_dir_pheno_gene)
df_all[0:25]

Unnamed: 0,subject,predicate,object,original_object_name,original_subject_name,biolink:original_predicate
0,MONDO:0015924,biolink:related_to,HP:0002172,Pulmonary arterial hypertension,Imbalance,MANIFESTATION_OF
0,MONDO:0015924,biolink:related_to,HP:0002375,"Hypertension, Pulmonary",Hypokinesia,CAUSES
0,MONDO:0015924,biolink:related_to,HP:0030148,"Hypertension, Pulmonary",Heart murmur,ASSOCIATED_WITH
0,MONDO:0015924,biolink:related_to,HP:0004326,"Hypertension, Pulmonary",Cachexia,ASSOCIATED_WITH
0,MONDO:0015924,biolink:related_to,HP:0002105,"Hypertension, Pulmonary",Hemoptysis,CAUSES
0,MONDO:0015924,biolink:related_to,HP:0012378,Fatigue,"Hypertension, Pulmonary",COEXISTS_WITH
0,MONDO:0015924,biolink:related_to,MONDO:0005466,"Hypertension, Pulmonary",Hypersomnolence,ASSOCIATED_WITH
0,MONDO:0015924,biolink:related_to,HP:0100749,Chest Pain,"Hypertension, Pulmonary",CAUSES
0,MONDO:0015924,biolink:related_to,UMLS:C0278134,"Hypertension, Pulmonary",Lack of sensation,COEXISTS_WITH
0,MONDO:0015924,biolink:related_to,HP:0025143,"Hypertension, Pulmonary",Chills,ASSOCIATED_WITH


In [25]:
got_net = Network(height='750px', width='100%', bgcolor='#222222', font_color='black')

# set the physics layout of the network

sources = df_all['subject']
targets =df_all["object"]
weights = df_all["predicate"]

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_node(src, src, title=src)
    got_net.add_node(dst, dst, title=dst)
    got_net.add_edge(src, dst, value=w)

neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

got_net.show("MONDO:0015924-pheno-gene.html")

### Pulmonary hypertension - Biological Process - Genes- Iloprost

In [26]:
my_dir_bp= "../results/Disease_SmallMolecule_BiologicalProcessOrActivity_/MONDO:0015924_Disease_SmallMolecule_BiologicalProcessOrActivity_CHEMBL494"

In [27]:
df_all = sub_pre_obj(my_dir_bp)
df_all

Unnamed: 0,subject,predicate,object,original_object_name,original_subject_name,biolink:original_predicate
0,MONDO:0015924,biolink:correlated_with,GO:0051344,,,
0,MONDO:0015924,biolink:correlated_with,GO:0042311,,,
0,GO:0042311,biolink:correlated_with,PUBCHEM.COMPOUND:5311181,,,
0,GO:0051344,biolink:correlated_with,PUBCHEM.COMPOUND:5311181,,,


In [28]:
got_net = Network(height='750px', width='100%', bgcolor='#222222', font_color='black')

# set the physics layout of the network

sources = df_all['subject']
targets =df_all["object"]
weights = df_all["predicate"]

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_node(src, src, title=src)
    got_net.add_node(dst, dst, title=dst)
    got_net.add_edge(src, dst, value=w)

neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

got_net.show("MONDO:0015924_biological_process.html")