Skip to content

Commit

Permalink
support py2.7
Browse files Browse the repository at this point in the history
  • Loading branch information
hailiang-wang committed Oct 20, 2017
1 parent e48be1a commit 4563cc6
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 9 deletions.
3 changes: 2 additions & 1 deletion .gitignore
@@ -1,6 +1,7 @@
.vscode/
*.swp
*.swo
*.pyc
**/__pycache__
nohup.out

Expand All @@ -18,4 +19,4 @@ config.ini

# serve
/deepqa2/serve/db.sqlite3
/deepqa2/serve/dump.rdb
/deepqa2/serve/dump.rdb
3 changes: 2 additions & 1 deletion deepqa2/config.py
Expand Up @@ -106,8 +106,9 @@ def __init__(self):
'''
Define Dataset
'''
print("dataset", self.ini['data']['dataset'])
if not os.path.exists(self.ini['data']['dataset']):
raise 'Corpus Data not exists.'
raise Exception('Corpus Data not exists.')
print('Start to load corpus ... %s' % self.ini['data']['dataset'])
self.dataset_pkl_path = self.ini['data']['dataset']
with open(self.dataset_pkl_path, 'rb') as handle:
Expand Down
4 changes: 2 additions & 2 deletions deepqa2/dataset/cornelldata.py
Expand Up @@ -59,7 +59,7 @@ def loadLines(self, fileName, fields):
"""
lines = {}

with open(fileName, 'r', encoding='iso-8859-1') as f: # TODO: Solve Iso encoding pb !
with open(fileName, 'r') as f: # TODO: Solve Iso encoding pb !
for line in f:
values = line.split(" +++$+++ ")

Expand All @@ -82,7 +82,7 @@ def loadConversations(self, fileName, fields):
"""
conversations = []

with open(fileName, 'r', encoding='iso-8859-1') as f: # TODO: Solve Iso encoding pb !
with open(fileName, 'r') as f: # TODO: Solve Iso encoding pb !
for line in f:
values = line.split(" +++$+++ ")

Expand Down
11 changes: 6 additions & 5 deletions deepqa2/dataset/textdata.py
Expand Up @@ -268,7 +268,7 @@ def saveDataset(self, dirName):

with open(dataset_pkl_path + '.json', 'w') as fp:
# Save in json format for fast view
json.dump(data, fp)
json.dump(data, fp, ensure_ascii=False, encoding='utf8')

def loadDataset(self, dirName):
"""Load samples from file
Expand Down Expand Up @@ -319,10 +319,11 @@ def extractConversation(self, conversation):
for i in range(len(conversation["lines"]) - 1):
inputLine = conversation["lines"][i]
targetLine = conversation["lines"][i + 1]

inputWords = self.extractText(inputLine["text"])
targetWords = self.extractText(targetLine["text"], True)

try:
inputWords = self.extractText(inputLine["text"])
targetWords = self.extractText(targetLine["text"], True)
except:
continue
# Filter wrong samples (if one of the list is empty)
if inputWords and targetWords:
self.trainingSamples.append([inputWords, targetWords])
Expand Down

0 comments on commit 4563cc6

Please sign in to comment.