In [1]:
import os
import requests
import ipycytoscape
from py2neo import Graph
import seaborn as sns
import random
from dotenv import load_dotenv
import pandas as pd
from IPython.display import display, Markdown
import json

In [2]:
load_dotenv()
graph = Graph(os.getenv('NEO4j_URL'), auth=(os.getenv('NEO4J_USER'), os.getenv('NEO4J_PASSWORD')))

## Cypher Helper

In [3]:
style = [{
            "selector": 'node',
            "style": {
            'background-color': 'data(color)',
            'border-color': 'data(borderColor)',
            'border-width': 'data(borderWidth)',
            'label': 'data(label)',
            "text-valign": "center",
            "text-halign": "center",
            'width': "50",
            'height': "50",
            }
        },
        {
            "selector": 'edge',
            "style": {
            'curve-style': 'straight',
            'line-color': 'data(lineColor)',
            'width': '3',
            'label': 'data(relation)',
            "text-rotation": "autorotate",
            "text-margin-x": "0px",
            "text-margin-y": "0px",
            'font-size': '12px',
            'target-arrow-shape': "data(directed)",
            'target-endpoint': 'outside-to-node',
            'source-endpoint': 'outside-to-node',
            'target-arrow-color': 'data(lineColor)',
            }
        },
        {
            "selector": 'node.highlight',
            "style": {
                'border-color': 'gray',
                'border-width': '2px',
                'font-weight': 'bold',
                'font-size': '18px',
                'width': "90",
                'height': "90",
            }
        },
        {
            "selector": 'node.focused',
            "style": {
                'border-color': 'gray',
                'border-width': '2px',
                'font-weight': 'bold',
                'font-size': '18px',
                'width': "90",
                'height': "90",
            }
        },
        {
            "selector": 'edge.focusedColored',
            "style": {
                'line-color': '#F8333C',
                'width': '6'
            }
        },
        {
            "selector": 'node.semitransp',
            "style":{ 'opacity': '0.5' }
        },
        {
            "selector": 'node.focusedSemitransp',
            "style":{ 'opacity': '0.5' }
        },
        {
            "selector": 'edge.colored',
            "style": {
                'line-color': '#F8333C',
                'target-arrow-color': '#F8333C',
                'width': '6'
            }
        },
        {
            "selector": 'edge.semitransp',
            "style":{ 'opacity': '0.5' }
        },
        {
            "selector": 'edge.focusedSemitransp',
            "style":{ 'opacity': '0.5' }
        }]

In [4]:
palette = sns.color_palette().as_hex()
def cypher(query):
    if query == "": return
    results = graph.run(query).data()
    nodes = {}
    edges = []
    colors = {}
    relations = {}
    for i in results:
        for vals in i.values():
            for node in vals.nodes:
                label = str(node.labels)
                if label not in colors:
                    colors[label] = palette[len(colors) % len(sns.color_palette())]
                color = colors[label]
                n = {"kind": label, "color": color}
                for k,v in node.items():
                    n[k] = v
                if 'GTEXEXP' in n:
                    n["label"] = n['GTEXEXP']
                nodes[n["id"]] = n
            for relation in vals.relationships:
                r = {
                    "kind": "relation",
                    "source": relation.nodes[0]["id"],
                    "target": relation.nodes[1]["id"]
                    }
                for k,v in relation.items():
                    r[k] = v
                r["relation"] = r["relation"].replace("_"," ")
                edges.append(r)
                if r["relation"] not in relations:
                    relations[r["relation"]] = pd.DataFrame("-", index=[], columns=["name", "relation", "SAB", "evidence"])
    for edge in edges:
        start = nodes[edge["source"]]
        end = nodes[edge["target"]]
        relation = edge["relation"]
        if relation not in colors:
            colors[relation] = palette[len(colors) % len(sns.color_palette())]
        color = colors[relation]
    cytoscapeobj = ipycytoscape.CytoscapeWidget()
    cytoscapeobj.graph.add_graph_from_json({
        "nodes": list(nodes.values()),
        "edges": edges
    }) 
    cytoscapeobj.set_style(style)
    display(cytoscapeobj)


### Use Case 1
Find all CF data points that are linked to evidence related to <gene> in <tissue> and report the CF datasets where those data points are found.

In [30]:
gene_symbol = "FABP4 gene"
tissue = "breast epithelium"
limit = 5
query = '''
MATCH p=(a:`Gene or Genome` {label: "%s"})-[r1:expresses]-(b:GTEXEXP)-[r2:expresses]-(c:Tissue {label: "%s"}) 
ORDER BY r1.evidence DESC 
RETURN p
LIMIT %d
'''%(gene_symbol, tissue, limit)
table = cypher(query)

ClientError: [Statement.SyntaxError] Invalid input 'ORDER': expected
  ","
  "CALL"
  "CREATE"
  "DELETE"
  "DETACH"
  "FOREACH"
  "LOAD"
  "MATCH"
  "MERGE"
  "OPTIONAL"
  "REMOVE"
  "RETURN"
  "SET"
  "UNION"
  "UNWIND"
  "USE"
  "USING"
  "WHERE"
  "WITH"
  <EOF> (line 3, column 1 (offset: 135))
"ORDER BY r1.evidence DESC"
 ^

In [31]:
query = '''
MATCH p=(a:`Gene or Genome` {label: "%s"})-[r1:expresses]-(b:GTEXEXP)-[r2:expresses]-(c:Tissue {label: "%s"}) 
RETURN p
ORDER BY r1.evidence DESC 
LIMIT %d
'''%(gene_symbol, tissue, limit)
print(query)


MATCH p=(a:`Gene or Genome` {label: "FABP4 gene"})-[r1:expresses]-(b:GTEXEXP)-[r2:expresses]-(c:Tissue {label: "breast epithelium"}) 
ORDER BY r1.evidence DESC 
RETURN p
LIMIT 5



### Use Case 2
Find all genes that are highly expressed in the GTEx <tissue> dataset and may be perturbed by a specific compound based on data from the LINCS L1000 dataset and known drug targets found in data curated by IDG.


In [12]:
gene_symbol = "CES1 gene"
limit = 5
query = '''
MATCH p=(a:`Gene, Protein, or Genome` {label: "%s"})-[r1:`negatively regulates`]-(b:Drug) 
    RETURN p
    ORDER BY r1.evidence DESC 
    LIMIT %d
UNION
MATCH p=(a:`Gene, Protein, or Genome` {label: "%s"})-[r1:`positively regulates`]-(b:Drug) 
    RETURN p 
    ORDER BY r1.evidence DESC 
    LIMIT %d
UNION
MATCH p=(a:`Gene, Protein, or Genome` {label: "%s"})-[r1:`bioactivity`]-(b:Drug) 
    RETURN p 
    ORDER BY r1.evidence DESC 
    LIMIT %d
UNION
    MATCH p=(a:`Gene, Protein, or Genome` {label: "%s"})-[r1:expresses]-(b:GTEXEXP)-[r2:expresses]-(c:Tissue) 
    RETURN p 
    ORDER BY r1.evidence DESC 
    LIMIT %d
'''%(gene_symbol, limit, gene_symbol, limit, gene_symbol, limit, gene_symbol, limit)
table = cypher(query)

CytoscapeWidget(cytoscape_layout={'name': 'cola'}, cytoscape_style=[{'selector': 'node', 'style': {'background…

In [None]:
'''MATCH q=(t:Tissue {label: "Subcutaneous Fat"})-[r1:expresses]-(b:GTEXEXP)-[r2:expresses]-(g:`Gene, Protein, or Genome`) 
WITH q, g
LIMIT 25
CALL {
    WITH q, g
        MATCH p=(g)-[r1:`bioactivity`]-(a:Drug)
            RETURN p
        LIMIT 10
    UNION
    WITH q, g
        MATCH p=(g)-[r1:`positively regulates`]-(a:Drug)
            RETURN p
            ORDER BY r1.evidence DESC 
        LIMIT 10
    UNION
        WITH q, g
        MATCH p=(g)-[r1:`negatively regulates`]-(a:Drug)
            RETURN p
            ORDER BY r1.evidence ASC 
        LIMIT 10
}
WITH COLLECT([p,q]) as tmp
UNWIND tmp as p
RETURN p'''

In [9]:
print(query)


MATCH p=(a:`Gene or Genome` {label: "CES1 gene"})-[r1:`negatively regulates`]-(b:Drug) 
    RETURN p
    ORDER BY r1.evidence DESC 
    LIMIT 5
UNION
MATCH p=(a:`Gene or Genome` {label: "CES1 gene"})-[r1:`positively regulates`]-(b:Drug) 
    RETURN p 
    ORDER BY r1.evidence DESC 
    LIMIT 5
UNION
MATCH p=(a:`Gene or Genome` {label: "CES1 gene"})-[r1:`bioactivity`]-(b:Drug) 
    RETURN p 
    ORDER BY r1.evidence DESC 
    LIMIT 5
UNION
MATCH p=(a:`Gene or Genome` {label: "CES1 gene"})-[r1:expresses]-(b:GTEXEXP)-[r2:expresses]-(c:Tissue) 
RETURN p 
ORDER BY r1.evidence DESC 
LIMIT 5



### Use Case 4
For a specific drug transporter or drug processing enzyme, find the tissue where these transporters and enzymes are highly expressed (GTEx), and the drugs that may induce or suppress the expression of these genes (LINCS).


In [19]:
gene_symbol = "CES1 gene"
limit = 5
query = '''
MATCH p=(a:`Gene or Genome` {label: "%s"})-[r1:`negatively regulates`]-(b:Drug) 
    RETURN p
    ORDER BY r1.evidence DESC 
    LIMIT %d
UNION
MATCH p=(a:`Gene or Genome` {label: "%s"})-[r1:`positively regulates`]-(b:Drug) 
    RETURN p 
    ORDER BY r1.evidence DESC 
    LIMIT %d
UNION
MATCH p=(a:`Gene or Genome` {label: "%s"})-[r1:expresses]-(b:GTEXEXP)-[r2:expresses]-(c:Tissue) 
RETURN p 
ORDER BY r1.evidence DESC 
LIMIT %d
'''%(gene_symbol, limit, gene_symbol, limit, gene_symbol, limit)
table = cypher(query)

CytoscapeWidget(cytoscape_layout={'name': 'cola'}, cytoscape_style=[{'selector': 'node', 'style': {'background…

## MW

In [None]:
'''
MATCH p=(d:Disease {label: 'glutathione synthetase deficiency'})-[:`correlated with condition`]-(m)
WITH p, d, m
LIMIT 25
CALL {
    WITH p, d, m
    MATCH q=(a:Tissue)-[:produces]-(m)
    RETURN q
    LIMIT 5
}
WITH COLLECT([p,q]) as tmp
UNWIND tmp as p
RETURN p
'''