In [1]:
# @name debugger.py
# @description debugger for the graph-hypothesis generation library using 2 genes
# @author 
# @date

In [2]:
# to do:
#        * check transitions: in >> out 
#        * queries should be only node1 -> node2
#        * summary: should control when none hypothesis and not to produce output files. filename should be a 
#          function argument and consistent with the file naming of the hypothesis section

In [3]:
#import time
#import monarch, graph, neo4jlib, hypothesis, summary

---
# Monarch module

In [4]:
import monarch

In [5]:
%%time
# build monarch network from source, target
seedList = ['HGNC:17646','HGNC:633']
#seedList = ['NCBIGene:55768','NCBIGene:358']

# get 1st layer
neighbourList = monarch.get_neighbours_list(seedList)

# get ortho-phenotype data for the seed genes
orthophenoList = monarch.get_orthopheno_list(seedList)
print('neighbors: ',len(neighbourList),'orthopheno: ',len(orthophenoList))

# get monarch edges
geneList = sum([seedList,neighbourList,orthophenoList], [])
print('genelist: ',len(geneList))
network = monarch.expand_edges(geneList)
print('network: ',len(network))

# save network nodes
monarch.print_nodes(geneList, 'monarch_nodes_debugging')

# save network edges
monarch.print_network(network, 'monarch_edges_debugging')


The function "get_neighbours_list()" is running, please keep calm and have some coffee...

The function "get_orthopheno_list()" is running, please keep calm and have some coffee...
neighbors:  370 orthopheno:  102
genelist:  474

The function "expand_edges()" is running, please keep calm and have some coffee...
network:  3325

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/monarch/monarch_nodes_debugging_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/monarch/monarch_edges_debugging_v2018-05-07.csv' saved.
CPU times: user 40.5 s, sys: 1.88 s, total: 42.4 s
Wall time: 44min 40s


---
# Graph module

In [6]:
import graph

In [7]:
%%time
# build graph
grapho = graph.build([network])
edges = graph.edges(grapho)
nodes = graph.nodes(grapho)
graph.print_graph(edges,'monarch_edges_debugging')
graph.print_graph(nodes,'monarch_nodes_debugging')
print('edges: ', len(edges))
print('nodes: ', len(nodes))


The function "build()" is running, please keep calm and have some coffee...

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/graph/monarch_edges_debugging_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/graph/monarch_nodes_debugging_v2018-05-07.csv' saved.
edges:  3325
nodes:  444
CPU times: user 80 ms, sys: 0 ns, total: 80 ms
Wall time: 78.9 ms


---
# Neo4j module

In [8]:
import neo4jlib

In [9]:
edges_pipe = edges
nodes_pipe = nodes

In [10]:
%%time
# import to graph interface, by now neo4j
## get edges and files for neo4j
edges = neo4jlib.get_dataframe_from_file('./graph/monarch_edges_debugging_v2018-05-03')
nodes = neo4jlib.get_dataframe_from_file('./graph/monarch_nodes_debugging_v2018-05-03')
statements = neo4jlib.get_statements(edges)
concepts = neo4jlib.get_concepts(nodes)
print('statements: ', len(statements))
print('concepts: ',len(concepts))

## import the graph into neo4j
# save files into neo4j import dir
neo4j_path = '/home/nuria/ngly1-graph/graph-creator-lib/plan/neo4j-community-3.0.3'
neo4jlib.save_neo4j_files(statements, neo4j_path, file_type = 'statements')
neo4jlib.save_neo4j_files(concepts, neo4j_path, file_type = 'concepts')

# import graph into neo4j
neo4jlib.do_import(neo4j_path)

statements:  3302
concepts:  444

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/neo4j-community-3.0.3/import/ngly1/ngly1_statements.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/neo4j-community-3.0.3/import/ngly1/ngly1_concepts.csv' saved.

The graph is imported into the server. The server is running.

CPU times: user 64 ms, sys: 28 ms, total: 92 ms
Wall time: 3.99 s


#### Add functionality to run all the pipeline at once
- convert edges and nodes objects which are list of dictionaries to df as a _function in the module

In [11]:
import pandas as pd

def get_dataframe(object):
    """This function converts a list_of_dictionaries object into a dataframe."""

    try:
        df = pd.DataFrame(object)
    except ValueError:
        raise
    else:
        return df

In [12]:
edges_df = get_dataframe(edges_pipe)
nodes_df = get_dataframe(nodes_pipe)

In [13]:
statements = neo4jlib.get_statements(edges_df)
concepts = neo4jlib.get_concepts(nodes_df)
print('statements: ', len(statements))
print('concepts: ',len(concepts))

## import the graph into neo4j
# save files into neo4j import dir
neo4j_path = '/home/nuria/ngly1-graph/graph-creator-lib/plan/neo4j-community-3.0.3'
neo4jlib.save_neo4j_files(statements, neo4j_path, file_type = 'statements')
neo4jlib.save_neo4j_files(concepts, neo4j_path, file_type = 'concepts')

# import graph into neo4j
neo4jlib.do_import(neo4j_path)

statements:  3325
concepts:  444

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/neo4j-community-3.0.3/import/ngly1/ngly1_statements.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/neo4j-community-3.0.3/import/ngly1/ngly1_concepts.csv' saved.

The graph is imported into the server. The server is running.



---
# Hypothesis module
- The error of service unvalaible arises because the graph database is still not ready to be queried. One should wait some sec to 1 min or so to run the hypothesis generation part

In [14]:
import hypothesis

In [20]:
%%time
# get orthopheno paths
seed = list([
        'HGNC:17646',  # NGLY1 human gene
        'HGNC:633'  # AQP1 human gene
])
hypothesis.query(seed,queryname='ngly1_aqp1',port='7687')
#seed = list([
#        'NCBIGene:55768',  # NGLY1 human gene
#        'NCBIGene:358'  # AQP1 human gene
#])
#hypothesis.query(seed,queryname='ngly1_aqp1_7688',port='7688')

CPU times: user 52 ms, sys: 12 ms, total: 64 ms
Wall time: 1.52 s



Hypothesis generator has finished. 2 QUERIES completed.


---
# Summary module

In [16]:
import summary

In [21]:
%%time
# get summary
# data without paths
data = summary.path_load('./hypothesis/query_ngly1_aqp1_pwdl50_phdl20_paths_v2018-05-03')
# data with paths
#data = summary.path_load('./hypothesis/query_ngly1_aqp1_open_query_only_topology_paths_v2018-03-15')

#parse data for summarization
data_parsed = list()
#funcs = [summary.metapaths, summary.nodes, summary.node_types, summary.edges, summary.edge_types]
for query in data:
    query_parsed = summary.query_parser(query)
    #metapath(query_parsed)
    #map(lambda x: x(query_parsed), funcs)
    data_parsed.append(query_parsed)
summary.metapaths(data_parsed)
summary.nodes(data_parsed)
summary.node_types(data_parsed)
summary.edges(data_parsed)
summary.edge_types(data_parsed)
#for query in data_parsed:
#    map(lambda x: x(query), funcs)


File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:17646_target:HGNC:633_summary_metapaths_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:17646_target:HGNC:633_summary_entities_in_metapaths_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:633_target:HGNC:17646_summary_metapaths_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:633_target:HGNC:17646_summary_entities_in_metapaths_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:17646_target:HGNC:633_summary_nodes_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:633_target:H

---
# Hypothesis generation modules: debug open_query()
## hypothesis module

In [22]:
%%time
# get orthopheno paths
seed = list([
        'HGNC:17646',  # NGLY1 human gene
        'HGNC:633'  # AQP1 human gene
])
hypothesis.open_query(seed,queryname='open_ngly1_aqp1',port='7687')

CPU times: user 220 ms, sys: 20 ms, total: 240 ms
Wall time: 669 ms



Hypothesis generator has finished. 2 QUERIES completed.


---
## summary module

In [23]:
%%time
# get summary
# data without paths
#data = summary.path_load('./hypothesis/query_ngly1_aqp1_pwdl50_phdl20_paths_v2018-05-03')
# data with paths
data = summary.path_load('./hypothesis/query_open_ngly1_aqp1_paths_v2018-05-03')

#parse data for summarization
data_parsed = list()
#funcs = [summary.metapaths, summary.nodes, summary.node_types, summary.edges, summary.edge_types]
for query in data:
    query_parsed = summary.query_parser(query)
    #metapath(query_parsed)
    #map(lambda x: x(query_parsed), funcs)
    data_parsed.append(query_parsed)
summary.metapaths(data_parsed)
summary.nodes(data_parsed)
summary.node_types(data_parsed)
summary.edges(data_parsed)
summary.edge_types(data_parsed)
#for query in data_parsed:
#    map(lambda x: x(query), funcs)


File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:17646_target:HGNC:633_summary_metapaths_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:17646_target:HGNC:633_summary_entities_in_metapaths_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:633_target:HGNC:17646_summary_metapaths_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:633_target:HGNC:17646_summary_entities_in_metapaths_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:17646_target:HGNC:633_summary_nodes_v2018-05-07.csv' saved.

File '/home/nuria/ngly1-graph/graph-creator-lib/plan/summaries/monarch_orthopeno_network_query_source:HGNC:633_target:H