Skip to content

Commit

Permalink
Merge branch 'master' of github.com:Charcoal-SE/SmokeDetector
Browse files Browse the repository at this point in the history
  • Loading branch information
ArtOfCode- committed Jun 9, 2017
2 parents be18612 + 631a544 commit fc5225e
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 9 deletions.
2 changes: 2 additions & 0 deletions bad_keywords.txt
Original file line number Diff line number Diff line change
Expand Up @@ -520,3 +520,5 @@ active\W?plus
truvitaliti
dermalab
expandom
zylix\Wplus
Mevaqesh .*lo chelek .*olam haba
8 changes: 8 additions & 0 deletions blacklisted_websites.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1161,3 +1161,11 @@ maleenhancementshop\.info
github\.com/Appuploader
lish\.ir
realcoloncleansingworks\.com
vplak\.com
alluc\.ee
installthetech\.com
ithemesforests\.com
parsonscollegemuseum\.wordpress\.com
gratuitxblcodes\.com
selfcarinsurance\.com
agilechamps\.com
12 changes: 8 additions & 4 deletions chatcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def command_blacklist_help(*args, **kwargs):


# noinspection PyIncorrectDocstring,PyUnusedLocal
def command_blacklist_website(message_parts, ev_user_name, ev_room, ev_user_id, wrap2, *args, **kwargs):
def command_blacklist_website(message_parts, ev_user_name, ev_room, ev_user_id, wrap2, message_id, *args, **kwargs):
"""
Adds a string to the website blacklist and commits/pushes to GitHub
:param message_parts:
Expand All @@ -315,13 +315,14 @@ def command_blacklist_website(message_parts, ev_user_name, ev_room, ev_user_id,
item_to_blacklist=website_pattern,
username=ev_user_name,
chat_profile_link=chat_user_profile_link,
message_id=message_id,
code_permissions=datahandling.is_code_privileged(ev_room, ev_user_id, wrap2)
)
return Response(command_status=result[0], message=result[1])


# noinspection PyIncorrectDocstring,PyUnusedLocal
def command_blacklist_keyword(message_parts, ev_user_name, ev_room, ev_user_id, wrap2, *args, **kwargs):
def command_blacklist_keyword(message_parts, ev_user_name, ev_room, ev_user_id, wrap2, message_id, *args, **kwargs):
"""
Adds a string to the keyword blacklist and commits/pushes to GitHub
:param message_parts:
Expand All @@ -343,13 +344,14 @@ def command_blacklist_keyword(message_parts, ev_user_name, ev_room, ev_user_id,
item_to_blacklist=keyword_pattern,
username=ev_user_name,
chat_profile_link=chat_user_profile_link,
message_id=message_id,
code_permissions=datahandling.is_code_privileged(ev_room, ev_user_id, wrap2)
)
return Response(command_status=result[0], message=result[1])


# noinspection PyIncorrectDocstring,PyUnusedLocal
def command_watch_keyword(message_parts, ev_user_name, ev_room, ev_user_id, wrap2, *args, **kwargs):
def command_watch_keyword(message_parts, ev_user_name, ev_room, ev_user_id, wrap2, message_id, *args, **kwargs):
"""
Adds a string to the watched keywords list and commits/pushes to GitHub
:param message_parts:
Expand All @@ -373,13 +375,14 @@ def command_watch_keyword(message_parts, ev_user_name, ev_room, ev_user_id, wrap
item_to_blacklist=watchlist_entry,
username=ev_user_name,
chat_profile_link=chat_user_profile_link,
message_id=message_id,
code_permissions=datahandling.is_code_privileged(ev_room, ev_user_id, wrap2)
)
return Response(command_status=result[0], message=result[1])


# noinspection PyIncorrectDocstring,PyUnusedLocal
def command_blacklist_username(message_parts, ev_user_name, ev_room, ev_user_id, wrap2, *args, **kwargs):
def command_blacklist_username(message_parts, ev_user_name, ev_room, ev_user_id, wrap2, message_id, *args, **kwargs):
"""
Adds a string to the username blacklist and commits/pushes to GitHub
:param message_parts:
Expand All @@ -401,6 +404,7 @@ def command_blacklist_username(message_parts, ev_user_name, ev_room, ev_user_id,
item_to_blacklist=username_pattern,
username=ev_user_name,
chat_profile_link=chat_user_profile_link,
message_id=message_id,
code_permissions=datahandling.is_code_privileged(ev_room, ev_user_id, wrap2)
)
return Response(command_status=result[0], message=result[1])
Expand Down
31 changes: 30 additions & 1 deletion findspam.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
from urllib.parse import urlparse
from helpers import all_matches_unique, log
from itertools import chain
from collections import Counter

SIMILAR_THRESHOLD = 0.95
SIMILAR_ANSWER_THRESHOLD = 0.7
CHARACTER_USE_RATIO = 0.42
EXCEPTION_RE = r"^Domain (.*) didn't .*!$"
RE_COMPILE = regex.compile(EXCEPTION_RE)
COMMON_MALFORMED_PROTOCOLS = [
Expand Down Expand Up @@ -343,6 +345,26 @@ def username_similar_website(s, site, *args):
return False, ""


# noinspection PyUnusedLocal,PyMissingTypeHints,PyTypeChecker
def character_utilization_ratio(s, site, *args):
counter = Counter(s)
total_chars = len(s)
highest_ratio = 0.0
# highest_char = None

for key, value in counter.items():
char_ratio = value / float(total_chars)
key, value, char_ratio
if char_ratio > highest_ratio:
highest_ratio = char_ratio
# highest_char = key

if highest_ratio > CHARACTER_USE_RATIO:
return True, "The `{}` character appears in a high percentage of the post"
else:
return False, ""


# noinspection PyMissingTypeHints
def perform_similarity_checks(post, name):
"""
Expand Down Expand Up @@ -988,7 +1010,14 @@ class FindSpam:
{'method': similar_answer, 'all': True, 'sites': ["codegolf.stackexchange.com"],
'reason': "answer similar to existing answer on post", 'whole_post': True,
'title': False, 'body': False, 'username': False, 'stripcodeblocks': False,
'max_rep': 50, 'max_score': 0}
'max_rep': 50, 'max_score': 0},

# A single character is utilized in a high percentage of the post
{'method': character_utilization_ratio, 'all': False, 'sites': ["judaism.stackexchange.com"],
'reason': "single character over used in post",
'title': False, 'body': True, 'username': False, 'stripcodeblocks': False, 'body_summary': True,
'max_rep': 20, 'max_score': 0}

]

@staticmethod
Expand Down
15 changes: 13 additions & 2 deletions gitmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def add_to_blacklist(cls, **kwargs):
blacklist = kwargs.get("blacklist", "")
item_to_blacklist = kwargs.get("item_to_blacklist", "")
username = kwargs.get("username", "")
message_id = kwargs.get("message_id", "<unknown>")
chat_profile_link = kwargs.get("chat_profile_link", "http://chat.stackexchange.com/users")
code_permissions = kwargs.get("code_permissions", False)

Expand Down Expand Up @@ -137,7 +138,9 @@ def add_to_blacklist(cls, **kwargs):
git.add('watched_keywords.txt')

git.commit("--author='SmokeDetector <smokey@erwaysoftware.com>'",
"-m", u"Auto {0} of {1} by {2} --autopull".format(op, item, username))
"-m", u"Auto {0} of {1} by {2} --autopull".format(op, item, username),
"-m", u"Request: https://chat.stackexchange.com/transcript/message/{0}#{0}".format(message_id))
commit_sha = git('rev-parse', 'HEAD')

if code_permissions:
git.checkout("master")
Expand Down Expand Up @@ -193,7 +196,15 @@ def add_to_blacklist(cls, **kwargs):
finally:
cls.gitmanager_lock.release()

return (True, "{0}ed {1}".format(op.title(), item))
return (
True,
"{0}ed {1}: [`{2}`](https://github.com/Charcoal-SE/SmokeDetector/commit/{3})".format(
op.title(),
item,
commit_sha[:7],
commit_sha
)
)

@staticmethod
def current_git_status():
Expand Down
3 changes: 3 additions & 0 deletions globalvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ class GlobalVars:
"158742", # Rob
"207356", # 4castle
"133031", # Mithrandir
"215671", # Locutus of Borg (Mithrandir's Sock)
"169713", # Mego
"126657", # Cerbrus
"10145", # Thomas Ward
Expand All @@ -170,6 +171,8 @@ class GlobalVars:
"64521", # CalvT
"165474", # Hyper Neutrino
"169252", # Cai
"155243", # Nisse Engström
"69330", # Sconibulus
],
meta_tavern_room_id: [
"315433", # Normal Human
Expand Down
2 changes: 1 addition & 1 deletion test/test_spamhandling.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
('Mostly Non-latin', '冰冰冰test冰冰冰冰冰冰冰冰冰冰冰冰 test 冰冰冰冰', '', '', True),
('Pattern Matching product name - 2 words', """<p>vxl male enhancement</p>""", '', '', True),
('Pattern Matching product name - 3 words', """<p>Extends Monster Male Enhancement And Male Penile Enhancement</p>""", '', '', True),
('A Title', """<p>E x t e n d s M o n s t e r Male E n h a n c e m e n t And M a l e P e n i l e E n h a n c e m e n t</p>""", '', 'judaism.stackexchange.com', True),
])
def test_check_if_spam(title, body, username, site, match):
# We can't check blacklists/whitelists in tests, so these are set to their default values
Expand Down
14 changes: 13 additions & 1 deletion watched_keywords.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@
1496382349 Glorfindel garagegymplanner\.com
1496385668 tripleee salefakepassport\.cc
1496385791 tripleee claudiug\.com
1496386146 tripleee ithemesforests\.com
1496388295 tripleee 0?808\W?281\W?8685
1496388407 tripleee mcafee-support-number\.uk
1496392531 tripleee lumiskin
Expand All @@ -195,3 +194,16 @@
1496406334 tripleee errorcodespro\.com
1496468161 Mego paragyte
1496600401 Mithrandir bestessayspapers\.com
1496651009 Mithrandir rajatsingla\.in
1496651211 Mithrandir magnesiumking\.com
1496655122 Mithrandir davita\.in
1496665122 Mithrandir PromoCode2017\.com
1496665766 Mithrandir unlockninja\.com
1496737080 Mithrandir canadacrusher\.com
1496737567 Glorfindel ultrafine\Wmill
1496815157 Glorfindel website99\.net
1496914850 Glorfindel bellasvish\.com
1496915146 Glorfindel saiproperties\.com
1496921326 Glorfindel alphonsacollege\.com
1496927422 Mithrandir surejob\.in
1496949638 Ferrybig hyderabadfairy

0 comments on commit fc5225e

Please sign in to comment.