Skip to content

Commit 488bc1b

Browse files
authored
Merge pull request #2254 from iBug/misc-fix
Detection accuracy patch
2 parents eb3055c + 98800aa commit 488bc1b

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

findspam.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
LEVEN_DOMAIN_DISTANCE = 3
2828
SIMILAR_THRESHOLD = 0.95
2929
SIMILAR_ANSWER_THRESHOLD = 0.7
30-
BODY_TITLE_SIMILAR_THRESHOLD = 0.99 # That's enough
30+
BODY_TITLE_SIMILAR_THRESHOLD = 0.90
3131
CHARACTER_USE_RATIO = 0.42
3232
REPEATED_CHARACTER_RATIO = 0.20
3333
EXCEPTION_RE = r"^Domain (.*) didn't .*!$"
@@ -731,9 +731,12 @@ def mostly_dots(s, *args):
731731

732732
body = strip_urls_and_tags(body)
733733

734-
dot_count = len(regex.findall(r"\.", body))
734+
dot_count = body.count(".")
735+
s = strip_urls_and_tags(s)
736+
if not s:
737+
return False, ""
735738

736-
if dot_count / float(len(s)) >= 0.4:
739+
if dot_count / len(s) >= 0.4:
737740
return True, u"Post contains {} dots out of {} characters".format(dot_count, len(s))
738741
else:
739742
return False, ""

0 commit comments

Comments
 (0)