Skip to content
Permalink
Browse files

support py2.7

  • Loading branch information...
Samurais committed Oct 20, 2017
1 parent e48be1a commit 4563cc6bd3df731096e448c7e987094c96ea963a
Showing with 12 additions and 9 deletions.
  1. +2 −1 .gitignore
  2. +2 −1 deepqa2/config.py
  3. +2 −2 deepqa2/dataset/cornelldata.py
  4. +6 −5 deepqa2/dataset/textdata.py
@@ -1,6 +1,7 @@
.vscode/
*.swp
*.swo
*.pyc
**/__pycache__
nohup.out

@@ -18,4 +19,4 @@ config.ini

# serve
/deepqa2/serve/db.sqlite3
/deepqa2/serve/dump.rdb
/deepqa2/serve/dump.rdb
@@ -106,8 +106,9 @@ def __init__(self):
'''
Define Dataset
'''
print("dataset", self.ini['data']['dataset'])
if not os.path.exists(self.ini['data']['dataset']):
raise 'Corpus Data not exists.'
raise Exception('Corpus Data not exists.')
print('Start to load corpus ... %s' % self.ini['data']['dataset'])
self.dataset_pkl_path = self.ini['data']['dataset']
with open(self.dataset_pkl_path, 'rb') as handle:
@@ -59,7 +59,7 @@ def loadLines(self, fileName, fields):
"""
lines = {}

with open(fileName, 'r', encoding='iso-8859-1') as f: # TODO: Solve Iso encoding pb !
with open(fileName, 'r') as f: # TODO: Solve Iso encoding pb !
for line in f:
values = line.split(" +++$+++ ")

@@ -82,7 +82,7 @@ def loadConversations(self, fileName, fields):
"""
conversations = []

with open(fileName, 'r', encoding='iso-8859-1') as f: # TODO: Solve Iso encoding pb !
with open(fileName, 'r') as f: # TODO: Solve Iso encoding pb !
for line in f:
values = line.split(" +++$+++ ")

@@ -268,7 +268,7 @@ def saveDataset(self, dirName):

with open(dataset_pkl_path + '.json', 'w') as fp:
# Save in json format for fast view
json.dump(data, fp)
json.dump(data, fp, ensure_ascii=False, encoding='utf8')

def loadDataset(self, dirName):
"""Load samples from file
@@ -319,10 +319,11 @@ def extractConversation(self, conversation):
for i in range(len(conversation["lines"]) - 1):
inputLine = conversation["lines"][i]
targetLine = conversation["lines"][i + 1]

inputWords = self.extractText(inputLine["text"])
targetWords = self.extractText(targetLine["text"], True)

try:
inputWords = self.extractText(inputLine["text"])
targetWords = self.extractText(targetLine["text"], True)
except:
continue
# Filter wrong samples (if one of the list is empty)
if inputWords and targetWords:
self.trainingSamples.append([inputWords, targetWords])

0 comments on commit 4563cc6

Please sign in to comment.
You can’t perform that action at this time.