Skip to content

Commit

Permalink
Improve escaping methods.
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-daniels committed Jan 25, 2015
1 parent 8f6ce53 commit d33252d
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
7 changes: 5 additions & 2 deletions parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,8 @@ def fetch_title_from_msg_content(content):
return None


def fetch_unescaped_title_from_encoded(title_encoded):
return GlobalVars.parser.unescape(re.sub(r"([_*\\`\[\]])", r"\\\1", title_encoded)).strip()
def unescape_title(title_escaped):
return GlobalVars.parser.unescape(title_escaped).strip()

def escape_special_chars_in_title(title_unescaped):
return re.sub(r"([_*\\`\[\]])", r"\\\1", title_unescaped)
11 changes: 5 additions & 6 deletions spamhandling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import time
from findspam import FindSpam
from datahandling import *
from parsing import get_user_from_url, fetch_unescaped_title_from_encoded
from parsing import get_user_from_url, unescape_title, escape_special_chars_in_title
from bayesianfuncs import *


Expand Down Expand Up @@ -31,7 +31,7 @@ def check_if_spam_json(data):
return False, None # owner's account doesn't exist anymore, no need to post it in chat:
# http://chat.stackexchange.com/transcript/message/18380776#18380776
title = d["titleEncodedFancy"]
title = fetch_unescaped_title_from_encoded(title)
title = unescape_title(title)
poster = d["ownerDisplayName"]
url = d["url"]
post_id = str(d["id"])
Expand Down Expand Up @@ -60,7 +60,7 @@ def handle_spam(title, poster, site, post_url, poster_url, post_id, reasons, is_
except Exception as e:
print e
try:
title.replace(']', '\]')
title = escape_special_chars_in_title(title)
s = "[ [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector) ] %s: [%s](%s) by [%s](%s) on `%s`" % \
(reason, title.strip(), post_url, poster.strip(), poster_url, site)
print GlobalVars.parser.unescape(s).encode('ascii',errors='replace')
Expand All @@ -79,13 +79,12 @@ def handle_spam(title, poster, site, post_url, poster_url, post_id, reasons, is_
def handle_spam_json(data, reason):
try:
d=json.loads(json.loads(data)["data"])
title = d["titleEncodedFancy"]
title = unescape_title(d["titleEncodedFancy"])
poster = d["ownerDisplayName"]
site = d["siteBaseHostAddress"]
url = d["url"]
poster_url = d["ownerUrl"]
post_id = str(d["id"])
title_to_post = fetch_unescaped_title_from_encoded(title)
handle_spam(title_to_post, poster, site, url, poster_url, post_id, reason, False)
handle_spam(title, poster, site, url, poster_url, post_id, reason, False)
except:
print "NOP"

0 comments on commit d33252d

Please sign in to comment.