From 1b1d6e1e3721fefaf8d55131c6f3185c32711192 Mon Sep 17 00:00:00 2001 From: renewooller Date: Wed, 21 Aug 2019 12:10:36 +1000 Subject: [PATCH 1/7] allows any csv quote format --- bulk_insert.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/bulk_insert.py b/bulk_insert.py index 80657a8..afaccee 100644 --- a/bulk_insert.py +++ b/bulk_insert.py @@ -336,12 +336,12 @@ def process_entity_csvs(cls, csvs, separator): @click.option('--max-token-count', '-c', default=1024, help='max number of processed CSVs to send per query (default 1024)') @click.option('--max-buffer-size', '-b', default=2048, help='max buffer size in megabytes (default 2048)') @click.option('--max-token-size', '-t', default=500, help='max size of each token in megabytes (default 500, max 512)') -@click.option('--quote-minimal/--no-quote-minimal', '-q/-d', default=False, help='only quote those fields which contain special characters such as delimiter, quotechar or any of the characters in lineterminator') +@click.option('--quote', '-q', default=3, help='the quoting format used in the CSV file. QUOTE_MINIMAL=0,QUOTE_ALL=1,QUOTE_NONNUMERIC=2,QUOTE_NONE=3') @click.option('--skip-invalid-nodes', '-s', default=False, is_flag=True, help='ignore nodes that use previously defined IDs') @click.option('--skip-invalid-edges', '-e', default=False, is_flag=True, help='ignore invalid edges, print an error message and continue loading (True), or stop loading after an edge loading failure (False)') -def bulk_insert(graph, host, port, password, nodes, relations, separator, max_token_count, max_buffer_size, max_token_size, quote_minimal, skip_invalid_nodes, skip_invalid_edges): +def bulk_insert(graph, host, port, password, nodes, relations, separator, max_token_count, max_buffer_size, max_token_size, quote, skip_invalid_nodes, skip_invalid_edges): global CONFIGS global NODE_DICT global TOP_NODE_ID @@ -350,11 +350,8 @@ def bulk_insert(graph, host, port, password, nodes, relations, separator, max_to if sys.version_info[0] < 3: raise Exception("Python 3 is required for the RedisGraph bulk loader.") - - if quote_minimal: - QUOTING=csv.QUOTE_MINIMAL - else: - QUOTING=csv.QUOTE_NONE + + QUOTING=int(quote) TOP_NODE_ID = 0 # reset global ID variable (in case we are calling bulk_insert from unit tests) CONFIGS = Configs(max_token_count, max_buffer_size, max_token_size, skip_invalid_nodes, skip_invalid_edges) From e89eae89f96987931dc732bd73ef31479ea0f1cb Mon Sep 17 00:00:00 2001 From: renewooller Date: Wed, 21 Aug 2019 14:04:37 +1000 Subject: [PATCH 2/7] updates bulk loader to be able to set types explicitly --- .gitignore | 1 + README.md | 9 +++++++ bulk_insert.py | 66 ++++++++++++++++++++++++++++++--------------- example2/Robots.csv | 5 ++++ 4 files changed, 59 insertions(+), 22 deletions(-) create mode 100644 .gitignore create mode 100644 example2/Robots.csv diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..722d5e7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.vscode diff --git a/README.md b/README.md index 2ee40b5..298dda3 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ bulk_insert.py GRAPHNAME [OPTIONS] | -b | --max-buffer-size INT | max batch size (MBs) of each Redis query (default 4096) | | -c | --max-token-size INT | max size (MBs) of each token sent to Redis (default 500) | | -q | --quote-minimal | enable smart quoting for items within the CSV | +| -f | --field-types | json to set explicit types for each field, format {