/
firetestv1_05.py
112 lines (109 loc) · 3.58 KB
/
firetestv1_05.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#import finder_beng as finder
import finder
from collections import defaultdict
import beng_search as beng
def main():
lno=1
word_tot=0
corr=0
init_corr=0
fin=open("./BanglaEnglish_FIRE2013_AnnotatedDev.txt",'r')
sent=fin.readline()
while(sent):
words=[]
lang=[]
sent=sent.split()
for elem in sent:
elem=elem.split('\\')
lang.append(elem[1][0])
words.append(elem[0])
type_map = defaultdict(str)
type_count = defaultdict(int)
word_count=0
for word in words:
word=word.strip(" ")
word_count+=1
type_word=finder.find(word)
type_map[word]=type_word
type_count[type_word]+=1
print(str(word)+"(Detect:"+str(type_map[word])+")")
if((type_map[word]=="Bengali word" and lang[words.index(word)]=='B') or (type_map[word]=="English word" and lang[words.index(word)]=='E')):
init_corr+=1
'''
for word in words:
print (word+": "+type_map[word])
print("Type count of English: "+str(type_count["English word"]))
print("Type count of Bengali: "+str(type_count["Bengali word"]))
'''
if((type_count["English word"])>(type_count["Bengali word"])):
default="e"
else:
default="b"
print(str(lno)+default)
lno+=1
for i in range(len(words)):
word_count=0
type_count["English word"]=0
type_count["Bengali word"]=0
if(default=="e"):
if(beng.beng_word(words[i])==1 and type_map[words[i]]=="English word"):
if(i>1 and i<(len(words)-2)):
word_count=4
for j in range(i-2,min((len(words)-1),(i+3))):
if(j!=i):
type_count[type_map[words[j]]]+=1
if(type_count["Bengali word"]>type_count["English word"]):
type_map[words[i]]="Bengali word"
elif (i<=1):
word_count=min(i+2,len(words)-1)
for j in range(min((len(words)-1),(i+3))):
if(j!=i):
type_count[type_map[words[j]]]+=1
if(type_count["Bengali word"]>type_count["English word"]):
type_map[words[i]]="Bengali word"
elif(i>=(len(words)-2)):
word_count= (len(words)-i)+1
for j in range(i-2,len(words)):
if(j!=i):
type_count[type_map[words[j]]]+=1
if(type_count["Bengali word"]>type_count["English word"]):
type_map[words[i]]="Bengali word"
elif(default=="b"):
if(beng.beng_word(words[i])==1 and type_map[words[i]]=="English word"):
if(i>1 and i<(len(words)-2)):
word_count=4
for j in range(i-2,min((len(words)-1),(i+3))):
if(j!=i):
type_count[type_map[words[j]]]+=1
if(type_count["Bengali word"]>=type_count["English word"]):
type_map[words[i]]="Bengali word"
elif (i<=1):
word_count=min(i+2,len(words)-1)
for j in range(min((len(words)-1),(i+3))):
if(j!=i):
type_count[type_map[words[j]]]+=1
if(type_count["Bengali word"]>=type_count["English word"]):
type_map[words[i]]="Bengali word"
elif(i>=(len(words)-2)):
word_count= (len(words)-i)+1
for j in range(i-2,len(words)):
if(j!=i):
type_count[type_map[words[j]]]+=1
if(type_count["Bengali word"]>=type_count["English word"]):
type_map[words[i]]="Bengali word"
if(type_map[words[i]]=="Bengali word"):
det="B"
elif(type_map[words[i]]=="English word"):
det="E"
else:
det="N"
print(str(words[i])+"(Orig:"+str(lang[i])+" Detect:"+str(det)+")")
if((type_map[words[i]]=="Bengali word" and lang[i]=='B') or (type_map[words[i]]=="English word" and lang[i]=='E')):
corr+=1
word_tot+=1
#for word in words:
# print (word+": "+type_map[word])
sent=fin.readline()
print("The uncorrected accuracy is: "+str(init_corr*100/word_tot)+"%")
print("The accuracy is: "+str(corr*100/word_tot)+"%")
main()