-
Notifications
You must be signed in to change notification settings - Fork 1
/
answer.py
74 lines (54 loc) · 1.98 KB
/
answer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/python
### Imports ###
import nltk
from nltk.corpus import brown
import re
import sys
import os
import regexp_answer
import csim_answer
import pickle
###############
if(len(sys.argv) != 3):
print "Please call skele.py as follows: python skele.py <article.txt> <question.txt>!"
exit(1)
### Globals ###
regexp_tagger = nltk.RegexpTagger(
[(r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers
(r'(The|the|A|a|An|an)$', 'AT'), # articles
(r'.*able$', 'JJ'), # adjectives
(r'.*ness$', 'NN'), # nouns formed from adjectives
(r'.*ly$', 'RB'), # adverbs
(r'.*s$', 'NNS'), # plural nouns
(r'.*ing$', 'VBG'), # gerunds
(r'.*ed$', 'VBD'), # past tense verbs
(r'.*', 'NN') # nouns (default)
])
unigram_tagger = pickle.load(open("unigram_tagger.bin", "r"))
bigram_tagger = pickle.load(open("bigram_tagger.bin", "r"))
trigram_tagger = pickle.load(open("trigram_tagger.bin", "r"))
###############
def main():
questions = open(sys.argv[2], "r").readlines()
article = open(sys.argv[1], "r")
# Now parse it and then get parts of speech
#sentTokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
#sentenceList = sentTokenizer.tokenize(article.read())
#print "DEBUG: " + str(sentenceList)
### The PunktStringTokenizer is nice, will find sentences, we'll need this later
#wordTokenizer = nltk.PunktWordTokenizer()
for question in questions:
### Get a list of words again (will separate punctuation and whatnot)
#listOfWords = wordTokenizer.tokenize(question)
### Create tagger and get POS ###
#result = trigram_tagger.tag(listOfWords)
### Answer Stuff ###
### Regexp Answer ###
#regexp_answer.regexp_answer(question, result, open(sys.argv[1], "r"))
csim_answer.csim_answer(question, article)
#table_answer.run(listOfWords, result, )
# Print results
#print result
### If running as a console script ###
if (__name__ == "__main__"):
main()