In [1]:
import sys, os, io, json
from neo4j import GraphDatabase
from py2neo import Graph
from pathlib import Path
from pandas import DataFrame
import pandas as pd
import networkx as nx

graph = Graph("bolt://localhost:7687")
driver = GraphDatabase.driver('bolt://localhost:7687', auth=None)

## For a set of chemicals, what are the associated genes and disease associations?




In [2]:
def chemical_to_diseases(chem_id):
    return DataFrame(graph.run("""
MATCH (Id { id: $id })<-[:id]-(chemical:GraphNode)-[r1:`biolink:affects`]->(affected:`biolink:Gene`)<-[r2:`ro:causally_related_to`]-(disease:GraphNode)
RETURN chemical.`grebi:name`[0] as chemical, affected.`grebi:name`[0] as affected_entity, disease.`grebi:name`[0] as disease, r1.`grebi:datasources` as affected_source, r2.`grebi:datasources` as cause_source
""", { 'id': chem_id }).data())

df = pd.concat([
    chemical_to_diseases('chebi:6801'), # Metformin,
    chemical_to_diseases('chebi:132447'), # PFOS
    chemical_to_diseases('chebi:91275'), # BBN
    chemical_to_diseases('chebi:5063') # Fipronil
], ignore_index=True)

print(df.head(5).to_markdown())

with open('chemicals_and_diseases.csv', 'w') as f:
    df.to_csv(f, index=False)


|    | chemical   | affected_entity   | disease                                            | affected_source        | cause_source   |
|---:|:-----------|:------------------|:---------------------------------------------------|:-----------------------|:---------------|
|  0 | Metformin  | CTD Gene:2335 FN1 | spondylometaphyseal dysplasia corner fracture type | ['Robokop.TextMining'] | ['UberGraph']  |
|  1 | Metformin  | CTD Gene:2335 FN1 | spondylometaphyseal dysplasia corner fracture type | ['Robokop.CAM']        | ['UberGraph']  |
|  2 | Metformin  | CTD Gene:2335 FN1 | plasma fibronectin deficiency                      | ['Robokop.TextMining'] | ['UberGraph']  |
|  3 | Metformin  | CTD Gene:2335 FN1 | plasma fibronectin deficiency                      | ['Robokop.CAM']        | ['UberGraph']  |
|  4 | Metformin  | CTD Gene:2335 FN1 | glomerulopathy with fibronectin deposits 2         | ['Robokop.TextMining'] | ['UberGraph']  |


## For a set of chemicals, what are the associated genes and disease associations for these genes from OpenTargets?

In [3]:
def chemical_to_diseases(chem_id):
    return DataFrame(graph.run("""
MATCH (id:Id { id: $id })<-[:id]-(chemical:GraphNode)-[r1:`biolink:affects`]->(affected)<-[:`otar:targetId`]-(evidence)-[:`otar:diseaseId`]->(disease)
USING INDEX id:Id(id)
WHERE toFloat(evidence.`otar:score`[0]) >= 0.9
RETURN DISTINCT chemical.`grebi:name`[0] as chemical, affected.`grebi:name`[0] as affected_entity, disease.`grebi:name`[0] as disease, evidence.`otar:score`[0] AS score, r1.`grebi:datasources` as affected_source
""", { 'id': chem_id }).data())

df = pd.concat([
    chemical_to_diseases('chebi:6801'), # Metformin,
    chemical_to_diseases('chebi:132447'), # PFOS
    chemical_to_diseases('chebi:91275'), # BBN
    chemical_to_diseases('chebi:5063') # Fipronil
], ignore_index=True)

print(df.head(5).to_markdown())

with open('chemicals_and_diseases_otar.csv', 'w') as f:
    df.to_csv(f, index=False)

|    | chemical   | affected_entity     | disease                                                    |   score | affected_source        |
|---:|:-----------|:--------------------|:-----------------------------------------------------------|--------:|:-----------------------|
|  0 | Metformin  | CTD Gene:59067 IL21 | Rheumatoid Arthritis                                       |       1 | ['Robokop.TextMining'] |
|  1 | Metformin  | CTD Gene:59067 IL21 | Bovine Neoplasm                                            |       1 | ['Robokop.TextMining'] |
|  2 | Metformin  | CTD Gene:59067 IL21 | Autosomal recessive early-onset inflammatory bowel disease |       1 | ['Robokop.TextMining'] |
|  3 | Metformin  | CTD Gene:59067 IL21 | Dermatomyositis                                            |       1 | ['Robokop.TextMining'] |
|  4 | Metformin  | CTD Gene:59067 IL21 | B-cell chronic lymphocytic leukemia                        |       1 | ['Robokop.TextMining'] |


### As above but one row per disease

In [4]:
def chemical_to_diseases(chem_id):
    return DataFrame(graph.run("""
MATCH (id:Id { id: $id })<-[:id]-(chemical:GraphNode)-[r1:`biolink:affects`]->(gene)<-[:`otar:targetId`]-(evidence)-[:`otar:diseaseId`]->(disease)
USING INDEX id:Id(id)
WHERE toFloat(evidence.`otar:score`[0]) >= 0.9
RETURN DISTINCT chemical.`grebi:name`[0] as chemical, disease.`grebi:name`[0] as disease, collect(DISTINCT gene.`grebi:name`[0]) as genes, count(DISTINCT gene) AS num_genes, avg(toFloat(evidence.`otar:score`[0])) AS avg_score, collect(DISTINCT r1.`grebi:datasources`) as chem_to_gene_sources
""", { 'id': chem_id }).data())

df = pd.concat([
    chemical_to_diseases('chebi:6801'), # Metformin,
    chemical_to_diseases('chebi:132447'), # PFOS
    chemical_to_diseases('chebi:91275'), # BBN
    chemical_to_diseases('chebi:5063') # Fipronil
], ignore_index=True)

print(df.head(5).to_markdown())

with open('chemicals_and_diseases_otar_by_disease.csv', 'w') as f:
    df.to_csv(f, index=False)

|    | chemical   | disease                                                    | genes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  