Skip to content

Commit

Permalink
Configured tests for psql2neo4j
Browse files Browse the repository at this point in the history
  • Loading branch information
philloooo committed Mar 24, 2015
1 parent f5aa097 commit ff585f2
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 34 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -61,3 +61,4 @@ target/

# IDEs
.idea/
*.swp
1 change: 1 addition & 0 deletions bin/csv_to_neo.sh
@@ -1,3 +1,4 @@
#!/bin/bash
while getopts “:s:d:b:” OPTION
do
case $OPTION in
Expand Down
4 changes: 2 additions & 2 deletions bin/setup_neo4j.py
Expand Up @@ -20,8 +20,8 @@ def download_and_extract(url):

def start_neo4j(url):
dir_path = p_dir.match(url).group(2)
binary = os.path.join(dir_path, 'bin', 'neo4j')
call([binary, 'start'])
conf = os.path.join(dir_path, 'conf', 'neo4j.properties')
call(['sed','-i','s/^#allow/allow/g', conf])

if __name__ == '__main__':

Expand Down
26 changes: 16 additions & 10 deletions psqlgraph/psqlgraph2neo4j.py
@@ -1,9 +1,9 @@
from __future__ import print_function
from datetime import datetime
import psqlgraph
import progressbar
import json
import os
import gdcdatamodel
class PsqlGraph2Neo4j(object):
def __init__(self):
self.psqlgraphDriver = None
Expand Down Expand Up @@ -33,13 +33,10 @@ def convert_node(self, node):
self.try_parse_doc(node.properties)


def create_node_files(self,data_dir):
def create_node_files(self,data_dir,node_properties):
with self.psqlgraphDriver.session_scope():
count = 0
with open(os.path.join(
gdcdatamodel.schema_src_dir,'node_properties.avsc'),'r') as f:
schema = json.load(f)
for node in schema:
for node in node_properties:
label = '_'.join(node['name'].split('_')[0:-1])
f=open(os.path.join(data_dir,'nodes'+str(count)+'.csv'),'w')
count+=1
Expand Down Expand Up @@ -86,7 +83,7 @@ def node_to_csv(self,id,node):
pdb.set_trace()


def export_to_csv(self,data_dir,silent=False):
def export_to_csv(self,data_dir,node_properties,silent=False):
node_ids=dict()
if not silent:
i = 0
Expand All @@ -96,7 +93,7 @@ def export_to_csv(self,data_dir,silent=False):

edge_file = open(os.path.join(data_dir,'rels.csv'),'w')
print('start\tend\ttype\t',file=edge_file)
self.create_node_files(data_dir)
self.create_node_files(data_dir,node_properties)
nodes = self.psqlgraphDriver.get_nodes()
id_count=0
for node in nodes:
Expand Down Expand Up @@ -149,10 +146,19 @@ def update_pbar(self, pbar, i):
return i+1


def export(self, data_dir, silent=False):
def export(self, data_dir,node_properties, silent=False):
'''
create csv files that will later be parsed by batch
importer from psqlgraph.
data_dir: directory to store csv
node_properties: dictionary that should have the same structure
as node_properties.avsc in gdcdatamodel.
'''


if not self.psqlgraphDriver:
raise Exception(
'No psqlgraph driver. Please call .connect_to_psql()')

self.export_to_csv(data_dir,silent=silent)
self.export_to_csv(data_dir,node_properties,silent=silent)
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -10,4 +10,5 @@
'progressbar',
'avro==1.7.7',
'xlocal==0.5'
]
)
131 changes: 109 additions & 22 deletions test/test_psqlgraph2neoj4.py
Expand Up @@ -2,28 +2,52 @@
import unittest
import logging
from psqlgraph import psqlgraph2neo4j
import py2neo
import os
from py2neo.packages.httpstream import http
http.socket_timeout = 9999
from psqlgraph.edge import PsqlEdge
import uuid

import shutil
import subprocess
import pdb
import time
host = 'localhost'
user = 'test'
password = 'test'
database = 'automated_test'

driver = psqlgraph2neo4j.PsqlGraph2Neo4j()
driver.connect_to_psql(host, user, password, database)
driver.connect_to_neo4j(host)

logging.basicConfig(level=logging.INFO)


class Test_psql2neo(unittest.TestCase):
@classmethod
def setUpClass(cls):
dirname = os.path.dirname
script = os.path.join(
dirname(dirname(__file__)),'bin/csv_to_neo.sh')

subprocess.call('bash %s setup' % script, shell=True)


@classmethod
def tearDownClass(cls):
dirname = os.path.dirname
shutil.rmtree(os.path.join(dirname(dirname(__file__)),'batch_importer'))

def setUp(self):
self.logger = logging.getLogger(__name__)
self.driver = driver
self.data_dir = None
self.root_dir = None
self.csv_dir = None
self.neo4j_script = None
self.batch_script = None
self.psqlDriver = self.driver.psqlgraphDriver
self.neo4jDriver = self.driver.neo4jDriver
shutil.rmtree(self.get_data_dir())

def tearDown(self):
self._clear_tables()
Expand All @@ -37,20 +61,51 @@ def _clear_tables(self):
conn.execute('delete from nodes')
conn.close()

# clear neo4j
self.neo4jDriver.cypher.execute(
"""MATCH (n:test)
OPTIONAL MATCH (n:test)-[r]-()
DELETE n,r
"""
)
self.neo4jDriver.cypher.execute(
"""MATCH (n:test)
OPTIONAL MATCH (n:test2)-[r]-()
DELETE n,r
"""
)


def get_data_dir(self):
if not self.data_dir:
dirname = os.path.dirname
self.data_dir = os.path.join(dirname(dirname(__file__)),'neo4j-community-2.1.6/data/graph.db')
if not os.path.exists(self.data_dir):
os.makedirs(self.data_dir)
return self.data_dir

def get_batch_script(self):
if not self.batch_script:
self.batch_script = os.path.join(
self.get_root_dir(),'bin/csv_to_neo.sh')
return self.batch_script

def get_root_dir(self):
if not self.root_dir:
dirname = os.path.dirname
self.root_dir = dirname(dirname(__file__))
return self.root_dir

def get_csv_dir(self):
if not self.csv_dir:
self.csv_dir = os.path.join(self.get_root_dir(),'csv')
if not os.path.exists(self.csv_dir):
os.makedirs(self.csv_dir)
return self.csv_dir

def get_neo4j_script(self):
if not self.neo4j_script:
self.neo4j_script = os.path.join(self.get_root_dir(),
'neo4j-community-2.1.6/bin/neo4j')
return self.neo4j_script

def batch_import(self):
data_dir = self.get_data_dir()
csv_dir = self.get_csv_dir()
script = self.get_batch_script()
print script, csv_dir, data_dir
subprocess.call('%s -s %s -d %s -b batch_importer convert' % (script,csv_dir,data_dir),shell=True)
subprocess.call('%s stop' % self.get_neo4j_script(),shell=True)
subprocess.call('%s start-no-wait' % self.get_neo4j_script(),shell=True)
time.sleep(10)

def test_neo_single_node(self):
self._clear_tables()
node_id = str(uuid.uuid4())
Expand All @@ -60,7 +115,13 @@ def test_neo_single_node(self):
'id': node_id, 'time': self.driver.datetime2ms_epoch(timestamp)
}
self.psqlDriver.node_merge(node_id, label='test', properties=props)
self.driver.export()
node_properties = [ {'name':'test_properties',
'fields':[{'name':'time','type':'long'}]} ]

self.driver.export(self.get_csv_dir(),node_properties)
self.batch_import()

self.neo4jDriver=py2neo.Graph()
nodes = self.neo4jDriver.cypher.execute('match (n:test) return n')
self.assertEqual(len(nodes), 1)
self.assertEqual(nodes[0].n.properties, test_props)
Expand All @@ -78,16 +139,22 @@ def test_neo_many_node(self):
})
self.psqlDriver.node_merge(node_id, label='test', properties=props)

self.driver.export()
node_properties = [ {'name':'test_properties',
'fields':[{'name':'time','type':'long'}]} ]
self.driver.export(self.get_csv_dir(),node_properties)

self.batch_import()

self.neo4jDriver=py2neo.Graph()
nodes = self.neo4jDriver.cypher.execute('match (n:test) return n')
self.assertEqual(len(nodes), count)

node_props = [n.n.properties for n in nodes]
for prop in test_props:
self.assertTrue(prop in node_props)
for node_prop in node_props:
self.assertTrue(node_prop in test_props)


def test_neo_single_path(self):
self._clear_tables()
src_id = str(uuid.uuid4())
Expand All @@ -96,8 +163,15 @@ def test_neo_single_path(self):
self.psqlDriver.node_merge(node_id=dst_id, label='test')
self.psqlDriver.edge_insert(
PsqlEdge(src_id=src_id, dst_id=dst_id, label='test'))
self.driver.export()
node_properties = [ {'name':'test_properties',
'fields':[]} ]

self.driver.export(self.get_csv_dir(),node_properties)
self.batch_import()


self.neo4jDriver=py2neo.Graph()
self.neo4jDriver=py2neo.Graph()
nodes = self.neo4jDriver.cypher.execute('match (n:test) return n')
self.assertEqual(len(nodes), 2)
nodes = self.neo4jDriver.cypher.execute("""
Expand Down Expand Up @@ -135,7 +209,14 @@ def test_neo_star_topology(self):
self.psqlDriver.node_merge(node_id=dst_id, label='test')
self.psqlDriver.edge_insert(PsqlEdge(
src_id=src_id, dst_id=dst_id, label='test'))
self.driver.export()
node_properties = [ {'name':'test_properties',
'fields':[]} ]

self.driver.export(self.get_csv_dir(),node_properties)
self.batch_import()


self.neo4jDriver=py2neo.Graph()
nodes = self.neo4jDriver.cypher.execute(
'match (n)-[r]-(m) where n.id = "{src_id}" return n'.format(
src_id=src_id)
Expand All @@ -162,7 +243,13 @@ def test_neo_tree_topology(self):
node_id = str(uuid.uuid4())
self.psqlDriver.node_merge(node_id=node_id, label='test')
self._create_subtree(node_id, 0)
self.driver.export()
node_properties = [ {'name':'test_properties',
'fields':[]} ]

self.driver.export(self.get_csv_dir(),node_properties)
self.batch_import()




if __name__ == '__main__':
Expand Down

0 comments on commit ff585f2

Please sign in to comment.