Skip to content

Commit

Permalink
Give Hangul characters their own reason
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-daniels committed Jun 2, 2015
1 parent a267c45 commit e953cdc
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion findspam.py
Expand Up @@ -29,7 +29,7 @@ class FindSpam:
"slumber pm", "1-844-400-7325", "male enhancement", "bestcollegechina",
"bbwdesire"]
bad_keywords_nwb = [u"ಌ", "babyliss", "garcinia", "acai ?berry", # "nwb" == "no word boundary"
"aging ?cream", u"오피", "b ?a ?m ?((w ?o ?w)|(w ?a ?r))",
"aging ?cream", "b ?a ?m ?((w ?o ?w)|(w ?a ?r))",
"abam26"]
blacklisted_websites = ["online ?kelas", "careyourhealths", "wowtoes",
"ipubsoft", "orabank", "powerigfaustralia",
Expand Down Expand Up @@ -117,6 +117,8 @@ class FindSpam:
'sites': [], 'reason': "Bad keyword in {}", 'title': True, 'body': True, 'username': True, 'stripcodeblocks': False, 'body_summary': True},
{'regex': u"(?i)\\b(baba|nike)\\b|\\p{Script=Hangul}", 'all': True,
'sites': [], 'reason': "Bad keyword in {}", 'title': True, 'body': True, 'username': False, 'stripcodeblocks': False, 'body_summary': True},
{'regex': u"(?i)\\p{Script=Hangul}", 'all': True,
'sites': [], 'reason': "Korean character in {}", 'title': True, 'body': True, 'username': False, 'stripcodeblocks': False, 'body_summary': True},
{'regex': u"(?i)(>>>>|===>|==>>>)(?s).*http", 'all': True,
'sites': [], 'reason': "Bad keyword in {}", 'title': True, 'body': True, 'username': True, 'stripcodeblocks': True, 'body_summary': False},
{'regex': u"<blockquote>[^\/]*<blockquote>", 'all': True,
Expand Down

0 comments on commit e953cdc

Please sign in to comment.