Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ bulk_insert.py GRAPHNAME [OPTIONS]
| -t | --max-token-count INT | max number of tokens sent in each Redis query (default 1024) |
| -b | --max-buffer-size INT | max batch size (MBs) of each Redis query (default 4096) |
| -c | --max-token-size INT | max size (MBs) of each token sent to Redis (default 500) |
| -q | --quote-minimal | enable smart quoting for items within the CSV |


The only required arguments are the name to give the newly-created graph (which can appear anywhere) and at least one node CSV file.
Expand Down
19 changes: 13 additions & 6 deletions bulk_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
import click

# Global variables
CONFIGS = None # thresholds for batching Redis queries
NODE_DICT = {} # global node dictionary
TOP_NODE_ID = 0 # next ID to assign to a node
QUERY_BUF = None # Buffer for query being constructed
CONFIGS = None # thresholds for batching Redis queries
NODE_DICT = {} # global node dictionary
TOP_NODE_ID = 0 # next ID to assign to a node
QUERY_BUF = None # Buffer for query being constructed

# Custom error class for invalid inputs
class CSVError(Exception):
Expand Down Expand Up @@ -104,7 +104,7 @@ def __init__(self, filename):
self.infile = io.open(filename, 'rt')
# Initialize CSV reader that ignores leading whitespace in each field
# and does not modify input quote characters
self.reader = csv.reader(self.infile, skipinitialspace=True, quoting=csv.QUOTE_NONE)
self.reader = csv.reader(self.infile, skipinitialspace=True, quoting=QUOTING)

self.prop_offset = 0 # Starting index of properties in row
self.prop_count = 0 # Number of properties per entity
Expand Down Expand Up @@ -328,16 +328,23 @@ def process_entity_csvs(cls, csvs):
@click.option('--max-token-count', '-c', default=1024, help='max number of processed CSVs to send per query (default 1024)')
@click.option('--max-buffer-size', '-b', default=2048, help='max buffer size in megabytes (default 2048)')
@click.option('--max-token-size', '-t', default=500, help='max size of each token in megabytes (default 500, max 512)')
@click.option('--quote-minimal/--no-quote-minimal', '-q/-d', default=False, help='only quote those fields which contain special characters such as delimiter, quotechar or any of the characters in lineterminator')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the -d alternate is strictly necessary, but if you think it is helpful then I have no objection!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just kind of helpful for me as I'm lazy


def bulk_insert(graph, host, port, password, nodes, relations, max_token_count, max_buffer_size, max_token_size):
def bulk_insert(graph, host, port, password, nodes, relations, max_token_count, max_buffer_size, max_token_size, quote_minimal):
global CONFIGS
global NODE_DICT
global TOP_NODE_ID
global QUERY_BUF
global QUOTING

if sys.version_info[0] < 3:
raise Exception("Python 3 is required for the RedisGraph bulk loader.")

if quote_minimal:
QUOTING=csv.QUOTE_MINIMAL
else:
QUOTING=csv.QUOTE_NONE

TOP_NODE_ID = 0 # reset global ID variable (in case we are calling bulk_insert from unit tests)
CONFIGS = Configs(max_token_count, max_buffer_size, max_token_size)

Expand Down