Skip to content

Commit

Permalink
Merge pull request #974 from NobodyNada/master --autopull
Browse files Browse the repository at this point in the history
Add a "Pattern-matching email" reason
  • Loading branch information
angussidney committed Jul 22, 2017
2 parents 6f77f48 + 9d3da2e commit 9ecfc2f
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
3 changes: 0 additions & 3 deletions bad_keywords.txt
Expand Up @@ -120,7 +120,6 @@ service proposal essay
enetdocumentation
okaygoods
(love|miracle).*spell ?casters?
\w*(spell(home)?|temple|classes)\w*@
black magic specialist
great\Wspell\Wcaster
viagra
Expand Down Expand Up @@ -214,8 +213,6 @@ soleil\Wglo
Clariderm\WCream
ATM hackers?
hack\Wtool
\w*hacker\w*@
\w*hack\w*@
professional\Whacker
gain\Wxt(reme)?
evermax
Expand Down
15 changes: 15 additions & 0 deletions findspam.py
Expand Up @@ -251,6 +251,18 @@ def keyword_email(s, site, *args): # a keyword and an email in the same post
return False, ""


# noinspection PyUnusedLocal,PyMissingTypeHints
def pattern_email(s, site, *args):
pattern = regex.compile(r"(?<![=#/])\b[A-z0-9_.%+-]*"
r"(dr|loan|hack|financ|fund|spell|temple|herbal|spiritual|atm|heal|priest|classes)"
r"[A-z0-9_.%+-]*"
r"@(?!(example|domain|site|foo|\dx)\.[A-z]{2,4})[A-z0-9_.%+-]+\.[A-z]{2,4}\b"
).search(s.lower())
if pattern:
return True, u"Pattern-matching email {}".format(pattern.group(0))
return False, ""


# noinspection PyUnusedLocal,PyMissingTypeHints
def keyword_link(s, site, *args): # thanking keyword and a link in the same short answer
if len(s) > 400:
Expand Down Expand Up @@ -944,6 +956,9 @@ class FindSpam:
# Combination of keyword and email in questions and answers, for all sites
{'method': keyword_email, 'all': True, 'sites': [], 'reason': "bad keyword with email in {}", 'title': True,
'body': True, 'username': False, 'stripcodeblocks': True, 'body_summary': False, 'max_rep': 1, 'max_score': 0},
# Spammy-looking email in questions and answers, for all sites
{'method': pattern_email, 'all': True, 'sites': [], 'reason': "pattern-matching email in {}", 'title': True,
'body': True, 'username': False, 'stripcodeblocks': True, 'body_summary': False, 'max_rep': 1, 'max_score': 0},
# QQ/ICQ/Whatsapp... numbers, for all sites
{'regex': r'(?i)(?<![a-z0-9])Q{1,2}(?:(?:[vw]|[^a-z0-9])\D{0,8})?\d{5}[.-]?\d{4,5}(?!["\d])|'
r'\bICQ[ :]{0,5}\d{9}\b|\bwh?atsapp?[ :]{0,5}\d{10}', 'all': True, 'sites': [],
Expand Down

0 comments on commit 9ecfc2f

Please sign in to comment.