Skip to content

Commit

Permalink
Remove all-caps (answer)
Browse files Browse the repository at this point in the history
Effectiveness can be found here: https://metasmoke.erwaysoftware.com/reason/74. We can revert this if we want, but it seems few reported posts are actually abusive-flag worthy. I don't really see a reason to continue catching these posts in that case.
  • Loading branch information
csnardi committed Oct 29, 2016
1 parent 3e0e841 commit 9052997
Showing 1 changed file with 0 additions and 13 deletions.
13 changes: 0 additions & 13 deletions findspam.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,6 @@
import phonenumbers


def all_caps_text(s, site):
s = regex.sub("<[^>]*>", "", s) # remove HTML tags
s = regex.sub("&\w+;", "", s) # remove HTML entities
if len(s) <= 150 and regex.compile(ur"SQL|\b(ERROR|PHP|QUERY|ANDROID|CASE|SELECT|HAVING|COUNT|GROUP|ORDER BY|INNER|OUTER)\b").search(s):
return False, "" # common words in non-spam all-caps titles
if len(s) >= 15 and regex.compile(ur"^(?=.*\p{upper})\P{lower}*$", regex.UNICODE).search(s):
return True, "All in caps"
return False, ""


def has_repeated_words(s, site):
words = regex.split(r"[\s.,;!/\()\[\]+_-]", s)
words = [word for word in words if word != ""]
Expand Down Expand Up @@ -493,9 +483,6 @@ class FindSpam:
# Offensive title: titles are more sensitive
{'regex': ur"(?i)\bfuck|(?<!brain)fuck(ers?|ing)?\b", 'all': True, 'sites': [], 'reason': "offensive {} detected", 'title': True, 'body': False, 'username': False, 'stripcodeblocks': True, 'body_summary': False,
'max_rep': 101, 'max_score': 5},
# All-caps text
{'method': all_caps_text, 'all': True, 'sites': ["pt.stackoverflow.com", "ru.stackoverflow.com", "es.stackoverflow.com", "ja.stackoverflow.com", "rus.stackexchange.com"],
'reason': "all-caps {}", 'title': False, 'body': True, 'questions': False, 'username': False, 'stripcodeblocks': True, 'body_summary': False, 'max_rep': 1, 'max_score': 0},
# No whitespace, punctuation, or formatting in a post
{'regex': ur"(?i)^<p>[a-z]+</p>\s*$", 'all': True, 'sites': ["codegolf.stackexchange.com", "puzzling.stackexchange.com"],
'reason': "no whitespace in {}", 'title': False, 'body': True, 'username': False, 'stripcodeblocks': False, 'body_summary': False, 'max_rep': 1, 'max_score': 0},
Expand Down

0 comments on commit 9052997

Please sign in to comment.