Skip to content

Commit

Permalink
detect more non-linked URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
normalhuman committed Jul 1, 2016
1 parent 329c7d5 commit 47e7591
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion findspam.py
Expand Up @@ -555,8 +555,11 @@ class FindSpam:
{'regex': ur'(?is)^.{0,75}(thank you|Thank(s| you) for sharing|dear forum members).{0,200}<a href.{0,200}$', 'all': True,
'sites': [], 'reason': 'bad keyword with a link in {}', 'title': False, 'body': True, 'username': False, 'stripcodeblocks': False, 'body_summary': False, 'questions': False, 'max_rep': 1, 'max_score': 0},
# non-linked .tk site at the end of an answer
{'regex': ur'(?is)\w{3}\.tk(?:</strong>)?\s*</p>\s*$', 'all': True,
{'regex': ur'(?is)\w{3}\.tk(?:</strong>)?\W*</p>\s*$', 'all': True,
'sites': [], 'reason': 'pattern-matching website in {}', 'title': False, 'body': True, 'username': False, 'stripcodeblocks': False, 'body_summary': False, 'questions': False, 'max_rep': 1, 'max_score': 0},
# non-linked site at the end of an answer
{'regex': ur'(?is)\w{6}\.(com|net|co\.uk)(?:</strong>)?\W*</p>\s*$', 'all': True,
'sites': [], 'reason': 'link at end of {}', 'title': False, 'body': True, 'username': False, 'stripcodeblocks': False, 'body_summary': False, 'questions': False, 'max_rep': 1, 'max_score': 0},
# Shortened URL near the end of question
{'regex': ur"(?is)://(goo\.gl|bit\.ly|tinyurl\.com|fb\.me|cl\.ly|t\.co|is\.gd|j\.mp|tr\.im|ow\.ly|wp\.me|alturl\.com|tiny\.cc|9nl\.me|post\.ly|dyo\.gs|bfy\.tw|amzn\.to)/.{0,200}$", 'all': True, 'sites': ["superuser.com", "askubuntu.com"], 'reason': "shortened URL in {}", 'title': False, 'body': True, 'username': False, 'stripcodeblocks': True, 'body_summary': False, 'answers': False, 'max_rep': 1, 'max_score': 0},
# Shortened URL in a short answer
Expand Down

0 comments on commit 47e7591

Please sign in to comment.