Skip to content

Commit

Permalink
updated keywords-with-email
Browse files Browse the repository at this point in the history
  • Loading branch information
normalhuman committed May 12, 2016
1 parent 83e84f4 commit e56801f
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion findspam.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def has_health(s, site): # flexible detection of health spam in titles
def keyword_email(s, site): # a keyword and an email in the same post
if regex.compile("<pre>|<code>").search(s) and site == "stackoverflow.com": # Avoid false positives on SO
return False, ""
keyword = regex.compile(ur"(?i)\b(training|we (will )?(offer|develop|provide)|sell|money|payment|guarantee|catalog|rent|crack|opportunity|candidate|loan|lover|husband|illuminati|brotherhood|(join|reach|contact) (me|us)|spell(caster)?|doctor|hack(er|ing)?|spying|passport|visa|seaman|scam|pics|vampire|bless(ed)?|atm|miracle|testimony|kidney|hospital|wetting)s?\b| Dr\.? ").search(s)
keyword = regex.compile(ur"(?i)\b(training|we (will )?(offer|develop|provide)|sell|order|invest(or|ing|ment)|money|payment|quality|legit|interest(ed)?|guarantee|catalog|rent|crack|expert|opportunity|fundraising|campaign|career|employment|candidate|resume|loan|lover|husband|illuminati|brotherhood|(join|reach) (me|us)|contact|job|spell(caster)?|doctor|hack(er|ing)?|spying|passport|visa|seaman|scam|pics|vampire|bless(ed)?|atm|miracle|testimony|kidney|hospital|wetting)s?\b| Dr\.? ").search(s)
if keyword:
email = regex.compile(ur"(?<![=#/])\b[A-z0-9_.%+-]+@(?!(example|domain|site|foo|\dx)\.[A-z]{2,4})[A-z0-9_.%+-]+\.[A-z]{2,4}\b").search(s)
if email:
Expand Down

0 comments on commit e56801f

Please sign in to comment.