In [2]:
import sys, os, io, json
from neo4j import GraphDatabase
from py2neo import Graph
from pathlib import Path
from pandas import DataFrame
import pandas as pd
import networkx as nx

graph = Graph("bolt://localhost:7687")
driver = GraphDatabase.driver('bolt://localhost:7687', auth=None)



## Get the neighbourhood of Fipronil as GraphML

In [27]:
df = DataFrame(graph.run("""
WITH $query AS query
CALL apoc.export.graphml.query(query, null, {stream:true, useTypes:true, storeNodeIds:true, readLabels:true})
YIELD file, source, format, nodes, relationships, properties, time, rows, batchSize, batches, done, data
RETURN data;
""", {'query': """
MATCH (other)-[r]-(fipronil:GraphNode)-[:id]->(id:Id { id: \"chebi:5063\" })
RETURN *
"""}).data())

with open('fipronil.graphml', 'w') as f:
    f.write(df['data'][0])



## Loading into networkx and writing Cytoscape JSON

In [14]:

## Helper function to run a Cypher query over a GrEBI graph and return a NetworkX graph from the results
##
def query_to_nx(query):
    results = driver.session().run(query)
    G = nx.MultiDiGraph()
    for node in list(results.graph()._nodes.values()):
        if not 'Id' in node._labels:
            G.add_nodes_from([(node._properties['grebi:nodeId'], dict({"grebi:type": list(node._labels)}, **node._properties))])
    for rel in list(results.graph()._relationships.values()):
        if not 'Id' in rel.start_node._labels and not 'Id' in rel.end_node._labels:
            G.add_edges_from([(rel.start_node._properties['grebi:nodeId'], rel.end_node._properties['grebi:nodeId'], rel._properties['edge_id'], dict({"grebi:type": rel.type}, **rel._properties))])
    return G

def nx_to_cytoscape(G):
    cydata = nx.cytoscape_data(G)
    for node in cydata['elements']['nodes']:
        node['data']['ids'] = node['data']['id']
        node['data']['id'] = node['data']['grebi:nodeId']
        del node['data']['grebi:nodeId']
    return cydata


## Extracting subgraphs

In [17]:
G = query_to_nx("""
MATCH (fipronil:GraphNode)-[:id]->(id:Id { id: \"chebi:5063\" })
CALL apoc.path.subgraphAll(fipronil, {maxLevel:2})
YIELD nodes, relationships
RETURN *
""")

with open('fipronil_subgraph_2.cyjs.json', 'w') as f:
    json.dump(nx_to_cytoscape(G), f, indent=2)



KeyboardInterrupt: 

## What are the chains of properties connected to fipronil with a distance of 1 or 2 edges?

In [23]:
df = DataFrame(graph.run("""
MATCH p = (id:Id { id: $id })<-[:id]-(n:GraphNode)<-[ra]-(n1)<-[rb]-(n2)
WHERE type(ra) <> "id" AND type(rb) <> "id"
RETURN "in_in" AS direction, type(ra) as edge1, n1.`grebi:displayType` AS type1, type(rb) as edge2, n2.`grebi:displayType` as type2, count(p) as num
UNION
MATCH p = (id:Id { id: $id })<-[:id]-(n:GraphNode)<-[ra]-(n1)-[rb]->(n2)
WHERE type(ra) <> "id" AND type(rb) <> "id"
RETURN "in_out" AS direction, type(ra) as edge1, n1.`grebi:displayType` AS type1, type(rb) as edge2, n2.`grebi:displayType` as type2, count(p) as num
UNION
MATCH p = (id:Id { id: $id })<-[:id]-(n:GraphNode)-[ra]->(n1)-[rb]->(n2)
WHERE type(ra) <> "id" AND type(rb) <> "id"
RETURN "out_out" AS direction, type(ra) as edge1, n1.`grebi:displayType` AS type1, type(rb) as edge2, n2.`grebi:displayType` as type2, count(p) as num
UNION
MATCH p = (id:Id { id: $id })<-[:id]-(n:GraphNode)-[ra]->(n1)<-[rb]-(n2)
WHERE type(ra) <> "id" AND type(rb) <> "id"
RETURN "out_in" AS direction, type(ra) as edge1, n1.`grebi:displayType` AS type1, type(rb) as edge2, n2.`grebi:displayType` as type2, count(p) as num
""",
{'id': 'chebi:5063'}
).data())

df.to_csv("fipronil_aggr_counts.csv", index=False)



In [40]:
df = pd.read_csv("fipronil_aggr_counts.csv", dtype=str)

df['edge1'] = df['edge1'].str.replace("\"", "")
df['edge2'] = df['edge2'].str.replace("\"", "")
df['type1'] = df['type1'].str.replace("\"", "")
df['type2'] = df['type2'].str.replace("\"", "")

def path(row):
    edge1 = "?" if pd.isna(row['edge1']) else row['edge1']
    type1 = "?" if pd.isna(row['type1']) else row['type1']
    edge2 = "?" if pd.isna(row['edge2']) else row['edge2']
    type2 = "?" if pd.isna(row['type2']) else row['type2']
    if row['direction'] == 'out_out':
        return f'(R)-[{edge1}]->({type1})-[{edge2}]->({type2})'
    if row['direction'] == 'in_in':
        return f'(R)<-[{edge1}]-({type1})<-[{edge2}]-({type2})'
    if row['direction'] == 'out_in':
        return f'(R)-[{edge1}]->({type1})<-[{edge2}]-({type2})'
    if row['direction'] == 'in_out':
        return f'(R)<-[{edge1}]-({type1})-[{edge2}]->({type2})'
    
df['path'] = df.apply(path, axis=1)

df.to_csv("fipronil_aggr_counts.csv", index=False)


In [34]:

df = pd.read_csv("fipronil_aggr_counts.csv", dtype=str)

min_size=30
max_size = 500
df['size'] = min_size+(df['num'].astype(float) / df['num'].astype(float).max())*(max_size-min_size)

for edge in ['ols:hierarchicalParent', 'ols:relatedFrom', 'ols:relatedTo']:
    df = df[~df['edge1'].str.contains(edge, na=False)]
    df = df[~df['edge2'].str.contains(edge, na=False)]

G = nx.DiGraph()
G.add_node("root", label="root", type='root')

for i, row in df.iterrows():
    edge1 = "?" if pd.isna(row['edge1']) else row['edge1'].replace("\"", "")
    type1 = "?" if pd.isna(row['type1']) else row['type1'].replace("\"", "")
    edge2 = "?" if pd.isna(row['edge2']) else row['edge2'].replace("\"", "")
    type2 = "?" if pd.isna(row['type2']) else row['type2'].replace("\"", "")
    if row['direction'] == 'out_out':
        middle_node_id = f'(R)-[{edge1}]->({type1})'
        G.add_node(middle_node_id, label=type1)
        G.add_edge("root", middle_node_id, label=edge1)
        end_node_id = f'{middle_node_id}-[{edge2}]->({type2})'
        G.add_node(end_node_id, label=type2, num=int(row['num']), size=row['size'])
        G.add_edge(middle_node_id, end_node_id, label=edge2)
    if row['direction'] == 'in_in':
        middle_node_id = f'(R)<-[{edge1}]-({type1})'
        G.add_node(middle_node_id, label=type1)
        G.add_edge(middle_node_id, "root", label=edge1)
        end_node_id = f'{middle_node_id}<-[{edge2}]-({type2})'
        G.add_node(end_node_id, label=type2, num=int(row['num']), size=row['size'])
        G.add_edge(end_node_id, middle_node_id, label=edge2)
    if row['direction'] == 'out_in':
        middle_node_id = f'(R)-[{edge1}]->({type1})'
        G.add_node(middle_node_id, label=type1)
        G.add_edge("root", middle_node_id, label=edge1)
        end_node_id = f'{middle_node_id}<-[{edge2}]-({type2})'
        G.add_node(end_node_id, label=type2, num=int(row['num']), size=row['size'])
        G.add_edge(end_node_id, middle_node_id, label=edge2)
    if row['direction'] == 'in_out':
        middle_node_id = f'(R)<-[{edge1}]-({type1})'
        G.add_node(middle_node_id, label=type1)
        G.add_edge(middle_node_id, "root", label=edge1)
        end_node_id = f'{middle_node_id}-[{edge2}]->({type2})'
        G.add_node(end_node_id, label=type2, num=int(row['num']), size=row['size'])
        G.add_edge(middle_node_id, end_node_id, label=edge2)        
with open('fipronil_aggr_counts.cyjs.json', 'w') as f:
    json.dump(nx.cytoscape_data(G, name="label"), f, indent=2, default=str)
