Skip to content

Commit

Permalink
Merge pull request #1 from honnza/master
Browse files Browse the repository at this point in the history
fixed a regex - baba(ji?) matches babaj and babaji; completed testsite
  • Loading branch information
Manishearth committed Feb 4, 2014
2 parents b7591ec + 50d42d1 commit da3b1bb
Showing 1 changed file with 20 additions and 1 deletion.
21 changes: 20 additions & 1 deletion findspam.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,28 @@


class FindSpam:
rules = [
{'regex': "\\b(baba(ji)?|vashikaran|fashion|here is|porn)\\b", 'all': True,
'sites': [], 'reason': "Bad keyword detected"},
{'regex': "\\+\\d{10}|\\+?\\d{2}\\s?\\d{8}", 'all': True,
'sites': ["patents.stackexchange.com"], 'reason': "Phone number detected"},
{'regex': "\\b(asshole|crap|fag|fuck|idiot|shit|whore)s?\\b", 'all': True,
'sites': [], 'reason': "Offensive title detected"},
{'regex': "^[^a-z]*$", 'all': True, 'sites': [], 'reason': "All-caps title"}
]

@staticmethod
def testpost(title, site):
result = [];
for rule in rules:
if rule['all'] != (site in rule['sites']):
if re.compile(rule['regex']).search(title):
result.append(rule['reason'])
return result

@staticmethod
def testtitle(title):
regexes=["\\b(baba(ji?)|vashikaran|fashion|here is|porn)\\b","\\+\\d{10}","\\+?\\d{2}\\s?\\d{8}","\\b(asshole|crap|fag|fuck|idiot|shit|whore)s?\\b"]
regexes=["\\b(baba(ji)?|vashikaran|fashion|here is|porn)\\b","\\+\\d{10}","\\+?\\d{2}\\s?\\d{8}","\\b(asshole|crap|fag|fuck|idiot|shit|whore)s?\\b"]
result = []
p = [not not re.compile(s).search(title) for s in regexes]
if 'vashikaran' in title or 'baba' in title or True in p:
Expand Down

0 comments on commit da3b1bb

Please sign in to comment.