-
Notifications
You must be signed in to change notification settings - Fork 0
/
PBA_FinalProject - QAS.py
142 lines (119 loc) · 4.94 KB
/
PBA_FinalProject - QAS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import re
import en_core_web_sm
def remove_stopwords(text, stopwords) :
words = []
for s in text :
for w in nlp(s) :
if w.text.lower() not in words and w.text.lower() not in stopwords :
words.append(w.text)
return words
def find_index(news, question) :
match = 0
value = 0
question = question.split(' ')
for i in range(len(news)):
val = 0
for j in range(len(question)):
if question[j].lower() in news[i].lower():
val += 1
if val > value:
value = val
match = i
return match
def clean_news(news) :
news = re.sub('\n','',news)
news = re.sub('\'','',news)
news = re.sub(',',' ',news)
news = news.split('.')
return news
def clean_question(question) :
question = question.replace('?','')
q_word = re.findall('(^\w+)\s+',question)[0]
q_word = q_word.lower()
question = re.sub('^(\w+)\s+','',question)
ner = []
if q_word == 'who' :
ner.append(['person'])
elif q_word == 'where' :
ner.append(['gpe'])
elif q_word == 'when' :
ner.append(['date'])
elif q_word == 'how' :
ner.append(['money'])
return question, ner
def find_answer(sentence, question, q_word):
ner = find_ner(sentence)
# print(ner)
answer = ''
count = 0
found = False
index = 0
for i in range(len(question)) :
for j in range(len(ner)) :
if question[i].lower() in ner[j][0].lower() :
# print(question[i], ner[j][0])
for l in range(j, len(ner)) :
if found == False :
if q_word[0].lower() in ner[l][1].lower():
answer = ner[l][0]
count = abs(j-l)
index = l
found = True
# print('0 q=', question[i],' a=', answer, ' count=',count)
elif found == True :
if q_word[0].lower() in ner[l][1].lower():
if ner[index][0].lower() == ner[l-1][0].lower() and ner[l][0] not in answer:
answer = " ".join((answer, ner[l][0]))
index = l
# print('1 q=', question[i],' a=',answer, ' count=',count)
elif abs(j-l) < count :
answer = ner[l][0]
count = abs(j-l)
index = l
found = True
# print('2 q=', question[i],' a=',answer, ' count=',count)
for l in range(j, -1, -1):
if found == False :
if q_word[0].lower() in ner[l][1].lower():
answer = ner[l][0]
count = abs(j-l)
index = l
found = True
# print('3 q=', question[i],' a=',answer, ' count=',count)
elif found == True :
if q_word[0].lower() in ner[l][1].lower():
if ner[index][0].lower() == ner[l+1][0].lower() and ner[l][0] not in answer :
answer = " ".join((ner[l][0], answer))
index = l
# print('4 q=', question[i],' a=',answer, ' count=',count)
elif abs(j-l) < count :
answer = ner[l][0]
count = abs(j-l)
index = l
found = True
# print('5 q=', question[i],' a=',answer, ' count=',count)
return answer
def find_ner(sentence):
nlp = en_core_web_sm.load()
ner = []
for t in nlp(sentence) :
ner.append([t.text, t.ent_type_])
return ner
with open("news.txt","r") as file :
news = file.read()
with open("questions.txt","r") as file :
questions = file.read()
questions = questions.split('\n')
nlp = en_core_web_sm.load()
news = clean_news(news)
words = []
stopwords = nlp.Defaults.stop_words
words = remove_stopwords(news,stopwords)
for i, b in enumerate(questions) :
q = []
question, q_word = clean_question(b)
q = question.split(' ')
print(i+1, b)
found_index = find_index(news, question)
answer = find_answer(news[found_index], q, q_word[0])
print(answer, '\n')