From d87af8555cfd9889ea3c8aa363b70460909c105a Mon Sep 17 00:00:00 2001
From: Jeffrey Lovitz <jeffrey.lovitz@gmail.com>
Date: Tue, 18 Dec 2018 16:17:30 -0500
Subject: [PATCH] Require a Python 3 interpreter, better Unicode support

---
 README.md        |  9 ++++++---
 bulk_insert.py   | 15 +++++++++------
 requirements.txt |  1 -
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index bd50b5d..73f0659 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,18 @@
 # redisgraph-bulk-loader
 A Python utility for building RedisGraph databases from CSV inputs
 
+## Requirements
+The bulk loader utility requires a Python 3 interpreter.
+
+A Redis server with the [RedisGraph](https://github.com/RedisLabsModules/RedisGraph) module must be running. Installation instructions may be found at:
+https://oss.redislabs.com/redisgraph/
+
 ## Installation
 The bulk loader script's dependencies can be resolved using pip:
 ```
 pip install --user -r requirements.txt
 ```
 
-A Redis server with the [RedisGraph](https://github.com/RedisLabsModules/RedisGraph) module must be running. Installation instructions may be found at:
-https://oss.redislabs.com/redisgraph/
-
 ## Usage
 bulk_insert.py GRAPHNAME [OPTIONS]
 
diff --git a/bulk_insert.py b/bulk_insert.py
index f6e5743..61ff79a 100644
--- a/bulk_insert.py
+++ b/bulk_insert.py
@@ -1,11 +1,11 @@
 import csv
 import os
 import io
+import sys
 import struct
 from timeit import default_timer as timer
 import redis
 import click
-from backports import csv
 
 # Global variables
 CONFIGS = None # thresholds for batching Redis queries
@@ -99,9 +99,9 @@ def report_completion(self, runtime):
 class EntityFile(object):
     def __init__(self, filename):
         # The label or relation type string is the basename of the file
-        self.entity_str = os.path.splitext(os.path.basename(filename))[0].encode('utf-8')
+        self.entity_str = os.path.splitext(os.path.basename(filename))[0].encode()
         # Input file handling
-        self.infile = io.open(filename, 'rt', encoding='utf-8')
+        self.infile = io.open(filename, 'rt')
         # Initialize CSV reader that ignores leading whitespace in each field
         # and does not modify input quote characters
         self.reader = csv.reader(self.infile, skipinitialspace=True, quoting=csv.QUOTE_NONE)
@@ -109,7 +109,7 @@ def __init__(self, filename):
         self.prop_offset = 0 # Starting index of properties in row
         self.prop_count = 0 # Number of properties per entity
 
-        self.packed_header = ""
+        self.packed_header = b''
         self.binary_entities = []
         self.binary_size = 0 # size of binary token
         self.count_entities() # number of entities/row in file.
@@ -143,7 +143,7 @@ def pack_header(self, header):
         fmt = "=%dsI" % (len(self.entity_str) + 1) # Unaligned native, entity_string, count of properties
         args = [self.entity_str, prop_count]
         for p in header[self.prop_offset:]:
-            prop = p.encode('utf-8')
+            prop = p.encode()
             fmt += "%ds" % (len(prop) + 1) # encode string with a null terminator
             args.append(prop)
         return struct.pack(fmt, *args)
@@ -291,8 +291,8 @@ def prop_to_binary(prop_str):
         return struct.pack(format_str + '?', Type.BOOL, True)
 
     # If we've reached this point, the property is a string
+    encoded_str = str.encode(prop_str) # struct.pack requires bytes objects as arguments
     # Encoding len+1 adds a null terminator to the string
-    encoded_str = prop_str.encode('utf-8')
     format_str += "%ds" % (len(encoded_str) + 1)
     return struct.pack(format_str, Type.STRING, encoded_str)
 
@@ -334,6 +334,9 @@ def bulk_insert(graph, host, port, password, nodes, relations, max_token_count,
     global TOP_NODE_ID
     global QUERY_BUF
 
+    if sys.version_info[0] < 3:
+        raise Exception("Python 3 is required for the RedisGraph bulk loader.")
+
     TOP_NODE_ID = 0 # reset global ID variable (in case we are calling bulk_insert from unit tests)
     CONFIGS = Configs(max_token_count, max_buffer_size, max_token_size)
 
diff --git a/requirements.txt b/requirements.txt
index 7672fba..5772c06 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,2 @@
 redis==2.10.6
 click>=6.7
-backports.csv