support py2.7

chatopera · Oct 20, 2017 · 4563cc6 · 4563cc6
1 parent e48be1a
commit 4563cc6
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 9 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 .vscode/
 *.swp
 *.swo
+*.pyc
 **/__pycache__
 nohup.out
 
@@ -18,4 +19,4 @@ config.ini
 
 # serve
 /deepqa2/serve/db.sqlite3
-/deepqa2/serve/dump.rdb
+/deepqa2/serve/dump.rdb
diff --git a/deepqa2/config.py b/deepqa2/config.py
@@ -106,8 +106,9 @@ def __init__(self):
         '''
         Define Dataset
         '''
+	print("dataset", self.ini['data']['dataset'])
         if not os.path.exists(self.ini['data']['dataset']):
-            raise 'Corpus Data not exists.'
+            raise Exception('Corpus Data not exists.')
         print('Start to load corpus ... %s' % self.ini['data']['dataset'])
         self.dataset_pkl_path = self.ini['data']['dataset']
         with open(self.dataset_pkl_path, 'rb') as handle:

diff --git a/deepqa2/dataset/cornelldata.py b/deepqa2/dataset/cornelldata.py
@@ -59,7 +59,7 @@ def loadLines(self, fileName, fields):
         """
         lines = {}
 
-        with open(fileName, 'r', encoding='iso-8859-1') as f:  # TODO: Solve Iso encoding pb !
+        with open(fileName, 'r') as f:  # TODO: Solve Iso encoding pb !
             for line in f:
                 values = line.split(" +++$+++ ")
 
@@ -82,7 +82,7 @@ def loadConversations(self, fileName, fields):
         """
         conversations = []
 
-        with open(fileName, 'r', encoding='iso-8859-1') as f:  # TODO: Solve Iso encoding pb !
+        with open(fileName, 'r') as f:  # TODO: Solve Iso encoding pb !
             for line in f:
                 values = line.split(" +++$+++ ")
 

diff --git a/deepqa2/dataset/textdata.py b/deepqa2/dataset/textdata.py
@@ -268,7 +268,7 @@ def saveDataset(self, dirName):
 
         with open(dataset_pkl_path + '.json', 'w') as fp:
             # Save in json format for fast view
-            json.dump(data, fp)
+            json.dump(data, fp, ensure_ascii=False, encoding='utf8')
 
     def loadDataset(self, dirName):
         """Load samples from file
@@ -319,10 +319,11 @@ def extractConversation(self, conversation):
         for i in range(len(conversation["lines"]) - 1):
             inputLine = conversation["lines"][i]
             targetLine = conversation["lines"][i + 1]
-
-            inputWords = self.extractText(inputLine["text"])
-            targetWords = self.extractText(targetLine["text"], True)
-
+	    try:
+		inputWords = self.extractText(inputLine["text"])
+		targetWords = self.extractText(targetLine["text"], True)
+	    except:
+		continue
             # Filter wrong samples (if one of the list is empty)
             if inputWords and targetWords:
                 self.trainingSamples.append([inputWords, targetWords])