Skip to content

Commit

Permalink
Let rooms turn off offensive title mask
Browse files Browse the repository at this point in the history
  • Loading branch information
ArtOfCode- committed Oct 29, 2019
1 parent 93ed0c5 commit 627fbc6
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 55 deletions.
28 changes: 28 additions & 0 deletions rooms.yml
Expand Up @@ -7,6 +7,7 @@ stackexchange.com:
- metasmoke
- all
- experimental
- no-offensive-mask

privileges:
- 2848444 # Santa Claus
Expand Down Expand Up @@ -152,6 +153,7 @@ stackexchange.com:

msg_types:
- debug
- offensive-mask

privileges:
inherit:
Expand All @@ -163,31 +165,37 @@ stackexchange.com:
watcher: true
msg_types:
- site-askubuntu.com
- offensive-mask

2165: # CRUDE
commands: false
msg_types:
- site-math.stackexchange.com
- offensive-mask

95: # English Language & Usage chat
commands: false
msg_types:
- site-english.stackexchange.com
- offensive-mask

388: # Parenting chat
commands: false
msg_types:
- site-parenting.stackexchange.com
- offensive-mask

8089: # mempool
commands: false
msg_types:
- site-bitcoin.stackexchange.com
- offensive-mask

468: # V'dibarta Bam
commands: false
msg_types:
- site-judaism.stackexchange.com
- offensive-mask

35068: # Money.SE - Smoke Detector
commands: false
Expand All @@ -196,58 +204,69 @@ stackexchange.com:
- no-all-caps title
- no-all-caps body
- no-all-caps answer
- offensive-mask

34620: # Whisper
commands: false
msg_types:
- site-ethereum.stackexchange.com
- offensive-mask

22462: # Russian.SO General Room
commands: false
msg_types:
- site-ru.stackoverflow.com
- offensive-mask

11: # Role-Playing Games General Chat
commands: false
msg_types:
- site-rpg.stackexchange.com
- offensive-mask

24938: # Language Overflow
commands: false
msg_types:
- site-ell.stackexchange.com
- offensive-mask

38932: # The Studio
commands: false
msg_types:
- site-crafts.stackexchange.com
- offensive-mask

56223: # The Spam Blot
commands: false
msg_types:
- site-graphicdesign.stackexchange.com
- offensive-mask

59281: # SFF Cleanup room
commands: false
msg_types:
- site-scifi.stackexchange.com
- offensive-mask

58631: # The Restaurant at the End of the Universe
commands: true
watcher: true
msg_types:
- site-scifi.stackexchange.com
- no-phone number detected in title
- offensive-mask

61165: # The Awkward Silence
commands: false
msg_types:
- site-interpersonal.stackexchange.com
- offensive-mask

511:
commands: false
msg_types:
- site-japanese.stackexchange.com
- offensive-mask

84778: # The Terminal
commands: true
Expand All @@ -270,22 +289,26 @@ stackexchange.com:
- site-ja.stackoverflow.com
- site-es.stackoverflow.com
- delay
- offensive-mask

27: # Stack Apps
commands: false
watcher: false
msg_types:
- site-stackapps.com
- offensive-mask

96491: # Jeff's U&L smokedetector room
commands: true
msg_types:
- site-unix.stackexchange.com
- offensive-mask

46061: # La Tertulia
commands: false
msg_types:
- site-spanish.stackexchange.com
- offensive-mask

meta.stackexchange.com:
89: # Tavern on the Meta
Expand All @@ -309,6 +332,7 @@ meta.stackexchange.com:
- no-offensive body detected
- no-no whitespace in body
- no-no whitespace in answer
- offensive-mask

privileges:
inherit:
Expand Down Expand Up @@ -353,6 +377,7 @@ meta.stackexchange.com:
msg_types:
- all
- experimental
- offensive-mask

privileges:
- 6109921 # quartata
Expand All @@ -373,6 +398,7 @@ stackoverflow.com:
msg_types:
- site-stackoverflow.com
- no-allspam
- no-offensive-mask

privileges:
- 270038 # Floern
Expand Down Expand Up @@ -468,6 +494,7 @@ stackoverflow.com:

msg_types:
- site-stackoverflow.com
- offensive-mask

privileges:
- 270038 # Floern
Expand Down Expand Up @@ -506,6 +533,7 @@ stackoverflow.com:

msg_types:
- site-stackoverflow.com
- offensive-mask

privileges:
- 270038 # Floern
Expand Down
146 changes: 91 additions & 55 deletions spamhandling.py
Expand Up @@ -105,10 +105,8 @@ def check_if_spam_json(json_data):

# noinspection PyBroadException,PyProtectedMember
def handle_spam(post, reasons, why):
post_url = parsing.to_protocol_relative(parsing.url_to_shortlink(post.post_url))
poster_url = parsing.to_protocol_relative(parsing.user_url_to_shortlink(post.user_url))
shortened_site = post.post_site.replace("stackexchange.com", "SE") # site.stackexchange.com -> site.SE
datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title if not post.is_answer else "")

if len(reasons) == 1 and ("all-caps title" in reasons or
"repeating characters in title" in reasons or
"repeating characters in body" in reasons or
Expand All @@ -117,72 +115,34 @@ def handle_spam(post, reasons, why):
"repeating words in body" in reasons or
"repeating words in answer" in reasons):
datahandling.add_auto_ignored_post((post.post_id, post.post_site, datetime.now()))

if why is not None and why != "":
datahandling.add_why(post.post_site, post.post_id, why)

if post.is_answer and post.post_id is not None and post.post_id is not "":
datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.parent.post_id)
if GlobalVars.reason_weights or GlobalVars.metasmoke_key:
reason_weight = sum_weight(reasons)
if reason_weight >= 1000:
reason_weight_s = " (**{}**)".format(reason_weight)
else:
reason_weight_s = " ({})".format(reason_weight)
else: # No reason weight if neither cache nor MS
reason_weight_s = ""
try:
# If the post is an answer type post, the 'title' is going to be blank, so when posting the
# message contents we need to set the post title to the *parent* title, so the message in the
# chat is properly constructed with parent title instead. This will make things 'print'
# in a proper way in chat messages.
sanitized_title = parsing.sanitize_title(post.title if not post.is_answer else post.parent.title)
sanitized_title = escape_format(sanitized_title).strip()

# Remove title if it is potentially offensive
if 'offensive title detected' not in reasons:
message_title = sanitized_title
else:
message_title = '(Potentially offensive title -- see MS for details)'

prefix = u"[ [SmokeDetector](//git.io/vyDZv) ]"
if GlobalVars.metasmoke_key:
prefix_ms = u"[ [SmokeDetector](//git.io/vyDZv) | [MS]({}) ]".format(
to_metasmoke_link(post_url, protocol=False))
else:
prefix_ms = prefix

# We'll insert reason list later
edited = '' if not post.edited else ' \u270F\uFE0F'
try:
post_url = parsing.to_protocol_relative(parsing.url_to_shortlink(post.post_url))
poster_url = parsing.to_protocol_relative(parsing.user_url_to_shortlink(post.user_url))
if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""):
s = " {{}}{}: [{}]({}){} by a deleted user on `{}`".format(
reason_weight_s, message_title, post_url, edited, shortened_site)
username = ""
else:
username = post.user_name.strip()
escaped_username = escape_format(parsing.escape_markdown(username))
s = " {{}}{}: [{}]({}){} by [{}]({}) on `{}`".format(
reason_weight_s, message_title, post_url, edited, escaped_username, poster_url, shortened_site)

Tasks.do(metasmoke.Metasmoke.send_stats_on_post,
post.title_ignore_type, post_url, reasons, post.body, username,
post.user_link, why, post.owner_rep, post.post_score,
post.up_vote_count, post.down_vote_count)

log('debug', GlobalVars.parser.unescape(s).encode('ascii', errors='replace'))
GlobalVars.deletion_watcher.subscribe(post_url)

reason = message = None
for reason_count in range(5, 0, -1): # Try 5 reasons and all the way down to 1
reason = ", ".join(reasons[:reason_count])
if len(reasons) > reason_count:
reason += ", +{} more".format(len(reasons) - reason_count)
reason = reason.capitalize()
message = prefix_ms + s.format(reason) # Insert reason list
if len(message) <= 500:
break # Problem solved, stop attempting
offensive_mask = 'offensive title detected' in reasons
message = build_message(post, reasons)
if offensive_mask:
post.title = "(potentially offensive title -- see MS for details)"
clean_message = build_message(post, reasons)

s = s.format(reason) # Later code needs this variable
if len(message) > 500:
message = (prefix_ms + s)[:500] # Truncate directly and keep MS link
log('debug', GlobalVars.parser.unescape(message).encode('ascii', errors='replace'))
GlobalVars.deletion_watcher.subscribe(post_url)

without_roles = tuple(["no-" + reason for reason in reasons]) + ("site-no-" + post.post_site,)

Expand All @@ -191,7 +151,83 @@ def handle_spam(post, reasons, why):
chatcommunicate.tell_rooms(message, ("experimental",),
without_roles, notify_site=post.post_site, report_data=(post_url, poster_url))
else:
chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site),
without_roles, notify_site=post.post_site, report_data=(post_url, poster_url))
if offensive_mask:
chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site),
without_roles + ("offensive-mask"), notify_site=post.post_site,
report_data=(post_url, poster_url))
chatcommunicate.tell_rooms(clean_message, ("all", "site-" + post.post_site),
without_roles + ("no-offensive-mask"), notify_site=post.post_site,
report_data=(post_url, poster_url))
else:
chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site),
without_roles, notify_site=post.post_site,
report_data=(post_url, poster_url))
except Exception as e:
excepthook.uncaught_exception(*sys.exc_info())


def build_message(post, reasons):
# This is the main report format. Username and user link are deliberately not separated as with title and post
# link, because we may want to use "by a deleted user" rather than a username+link.
message_format = "{prefix_ms} {{reasons}} ({reason_weight}): [{title}]({post_url}) by {user} on {site}"

# Post URL, user URL, and site details are all easy - just data from the post object, transformed a bit
# via datahandling.
post_url = parsing.to_protocol_relative(parsing.url_to_shortlink(post.post_url))
poster_url = parsing.to_protocol_relative(parsing.user_url_to_shortlink(post.user_url))
shortened_site = post.post_site.replace("stackexchange.com", "SE") # site.stackexchange.com -> site.SE

# Message prefix. There's always a link to SmokeDetector; if we have a metasmoke key, there's also a link to the
# post's MS record. If we *don't* have a MS key, it's a fair assumption that the post won't be in metasmoke as
# we didn't have a key to create a record for it.
prefix = u"[ [SmokeDetector](//git.io/vyDZv) ]"
if GlobalVars.metasmoke_key:
prefix = u"[ [SmokeDetector](//git.io/vyDZv) | [MS]({}) ]".format(
to_metasmoke_link(post_url, protocol=False))

# If we have reason weights cached (GlobalVars.reason_weights) we can calculate total weight for this report;
# likewise, if we have a MS key, we can fetch the weights and then calculate. If we have neither, tough luck.
if GlobalVars.reason_weights or GlobalVars.metasmoke_key:
reason_weight = sum_weight(reasons)
if reason_weight >= 1000:
reason_weight = "**{}**".format(reason_weight)
else:
reason_weight = "{}".format(reason_weight)
else:
reason_weight = ""

# If the post is an answer, it doesn't have a title, so we use the question's title instead. Either way, we
# make sure it's escaped. We also add the edited indicator here.
sanitized_title = parsing.sanitize_title(post.title if not post.is_answer else post.parent.title)
sanitized_title = escape_format(sanitized_title).strip()
sanitized_title = sanitized_title + ('' if not post.edited else ' \u270F\uFE0F')

# If we have user details available, we'll linkify the username. If we don't, we call it a deleted user.
if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""):
user = "a deleted user"
else:
username = post.user_name.strip()
escaped_username = escape_format(parsing.escape_markdown(username))
user = "[{}]({})".format(escaped_username, poster_url)

# Build the main body of the message. The next step is to insert the reason list while keeping the message
# under 500 characters long.
message = message_format.format(prefix_ms=prefix, reason_weight=reason_weight, title=sanitized_title,
post_url=post_url, user=user, site=shortened_site)

for reason_count in range(5, 0, -1):
reason = ", ".join(reasons[:reason_count])
if len(reasons) > reason_count:
reason += ", +{} more".format(len(reasons) - reason_count)
reason = reason.capitalize()
attempt = message.format(reasons=reason)
if len(attempt) <= 500:
message = attempt
break

# If the message is still longer than 500 chars after trying to reduce the reason list, we're out of options,
# so just cut the end of the message off.
if len(message) > 500:
message = message[:500]

return message

0 comments on commit 627fbc6

Please sign in to comment.