Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
# redisgraph-bulk-loader
A Python utility for building RedisGraph databases from CSV inputs

## Requirements
The bulk loader utility requires a Python 3 interpreter.

A Redis server with the [RedisGraph](https://github.com/RedisLabsModules/RedisGraph) module must be running. Installation instructions may be found at:
https://oss.redislabs.com/redisgraph/

## Installation
The bulk loader script's dependencies can be resolved using pip:
```
pip install --user -r requirements.txt
```

A Redis server with the [RedisGraph](https://github.com/RedisLabsModules/RedisGraph) module must be running. Installation instructions may be found at:
https://oss.redislabs.com/redisgraph/

## Usage
bulk_insert.py GRAPHNAME [OPTIONS]

Expand Down
15 changes: 9 additions & 6 deletions bulk_insert.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import csv
import os
import io
import sys
import struct
from timeit import default_timer as timer
import redis
import click
from backports import csv

# Global variables
CONFIGS = None # thresholds for batching Redis queries
Expand Down Expand Up @@ -99,17 +99,17 @@ def report_completion(self, runtime):
class EntityFile(object):
def __init__(self, filename):
# The label or relation type string is the basename of the file
self.entity_str = os.path.splitext(os.path.basename(filename))[0].encode('utf-8')
self.entity_str = os.path.splitext(os.path.basename(filename))[0].encode()
# Input file handling
self.infile = io.open(filename, 'rt', encoding='utf-8')
self.infile = io.open(filename, 'rt')
# Initialize CSV reader that ignores leading whitespace in each field
# and does not modify input quote characters
self.reader = csv.reader(self.infile, skipinitialspace=True, quoting=csv.QUOTE_NONE)

self.prop_offset = 0 # Starting index of properties in row
self.prop_count = 0 # Number of properties per entity

self.packed_header = ""
self.packed_header = b''
self.binary_entities = []
self.binary_size = 0 # size of binary token
self.count_entities() # number of entities/row in file.
Expand Down Expand Up @@ -143,7 +143,7 @@ def pack_header(self, header):
fmt = "=%dsI" % (len(self.entity_str) + 1) # Unaligned native, entity_string, count of properties
args = [self.entity_str, prop_count]
for p in header[self.prop_offset:]:
prop = p.encode('utf-8')
prop = p.encode()
fmt += "%ds" % (len(prop) + 1) # encode string with a null terminator
args.append(prop)
return struct.pack(fmt, *args)
Expand Down Expand Up @@ -291,8 +291,8 @@ def prop_to_binary(prop_str):
return struct.pack(format_str + '?', Type.BOOL, True)

# If we've reached this point, the property is a string
encoded_str = str.encode(prop_str) # struct.pack requires bytes objects as arguments
# Encoding len+1 adds a null terminator to the string
encoded_str = prop_str.encode('utf-8')
format_str += "%ds" % (len(encoded_str) + 1)
return struct.pack(format_str, Type.STRING, encoded_str)

Expand Down Expand Up @@ -334,6 +334,9 @@ def bulk_insert(graph, host, port, password, nodes, relations, max_token_count,
global TOP_NODE_ID
global QUERY_BUF

if sys.version_info[0] < 3:
raise Exception("Python 3 is required for the RedisGraph bulk loader.")

TOP_NODE_ID = 0 # reset global ID variable (in case we are calling bulk_insert from unit tests)
CONFIGS = Configs(max_token_count, max_buffer_size, max_token_size)

Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
redis==2.10.6
click>=6.7
backports.csv