Skip to content

Commit

Permalink
Use protocol-relative links in reports
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-daniels committed Jul 6, 2015
1 parent 2cd5a6e commit 4eddbac
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 11 deletions.
23 changes: 16 additions & 7 deletions parsing.py
Expand Up @@ -4,7 +4,7 @@


def get_user_from_url(url):
m = regex.compile(r"https?://([\w.]+)/users/(\d+)/.+/?").search(url)
m = regex.compile(r"(?:https?:)?//([\w.]+)/users/(\d+)/.+/?").search(url)
if m is None:
return None
try:
Expand All @@ -16,7 +16,7 @@ def get_user_from_url(url):


def fetch_post_url_from_msg_content(content):
search_regex = r"^\[ \[SmokeDetector\]\(https:\/\/github.com\/Charcoal-SE\/SmokeDetector\) \] [\w\s,-]+: \[.+]\((http:\/\/[\w.]+\/questions\/\d+\/.+|http:\/\/[\w.]+\/[qa]\/\d+)\) by \[?.*\]?\(?(?:.*)\)? on `[\w.]+`$"
search_regex = r"^\[ \[SmokeDetector\]\(https:\/\/github.com\/Charcoal-SE\/SmokeDetector\) \] [\w\s,-]+: \[.+]\(((?:http:)?\/\/[\w.]+\/questions\/\d+\/.+|(?:http:)?\/\/[\w.]+\/[qa]\/\d+)\) by \[?.*\]?\(?(?:.*)\)? on `[\w.]+`$"
m = regex.compile(search_regex).search(content)
if m is None:
return None
Expand All @@ -35,10 +35,10 @@ def fetch_post_id_and_site_from_url(url):
search_regex = ""
if regex.compile(post_type_regex).search(url):
post_type = "answer"
search_regex = r"^https?:\/\/([\w.]+)/questions/\d+/.+/(\d+)#\d+$"
search_regex = r"^(?:https?:)?\/\/([\w.]+)/questions/\d+/.+/(\d+)#\d+$"
else:
post_type = "question"
search_regex = r"^https?:\/\/([\w.]+)/questions/(\d+)(?:/.+)?$"
search_regex = r"^(?:https?:)?\/\/([\w.]+)/questions/(\d+)(?:/.+)?$"
found = regex.compile(search_regex).search(url)
if found is not None:
try:
Expand All @@ -47,7 +47,7 @@ def fetch_post_id_and_site_from_url(url):
return (post_id, post_site, post_type)
except:
return None
search_regex = r"^https?:\/\/([\w.]+)/(q|a)/(\d+)(?:/\d+)?"
search_regex = r"^(?:https?:)?\/\/([\w.]+)/(q|a)/(\d+)(?:/\d+)?"
found = regex.compile(search_regex).search(url)
if found is None:
return None
Expand All @@ -66,7 +66,7 @@ def fetch_post_id_and_site_from_msg_content(content):


def fetch_owner_url_from_msg_content(content):
search_regex = r"^\[ \[SmokeDetector\]\(https:\/\/github.com\/Charcoal-SE\/SmokeDetector\) \] [\w\s,-]+: \[.+]\((?:http:\/\/[\w.]+\/questions\/\d+\/.+|http:\/\/[\w.]+\/[qa]\/\d+)\) by \[.+\]\((.+)\) on `[\w.]+`$"
search_regex = r"^\[ \[SmokeDetector\]\(https:\/\/github.com\/Charcoal-SE\/SmokeDetector\) \] [\w\s,-]+: \[.+]\((?:(?:http:)?\/\/[\w.]+\/questions\/\d+\/.+|(?:http:)?\/\/[\w.]+\/[qa]\/\d+)\) by \[.+\]\((.+)\) on `[\w.]+`$"
m = regex.compile(search_regex).search(content)
if m is None:
return None
Expand All @@ -78,7 +78,7 @@ def fetch_owner_url_from_msg_content(content):


def fetch_title_from_msg_content(content):
search_regex = r"^\[ \[SmokeDetector\]\(https:\/\/github.com\/Charcoal-SE\/SmokeDetector\) \] [\w\s,-]+: \[(.+)]\((?:http:\/\/[\w.]+\/questions\/\d+\/.+|http:\/\/[\w.]+\/[qa]\/\d+)\) by \[?.*\]?\(?.*\)? on `[\w.]+`$"
search_regex = r"^\[ \[SmokeDetector\]\(https:\/\/github.com\/Charcoal-SE\/SmokeDetector\) \] [\w\s,-]+: \[(.+)]\((?:(?:http:)?\/\/[\w.]+\/questions\/\d+\/.+|(?:http:)?\/\/[\w.]+\/[qa]\/\d+)\) by \[?.*\]?\(?.*\)? on `[\w.]+`$"
m = regex.compile(search_regex).search(content)
if m is None:
return None
Expand Down Expand Up @@ -133,6 +133,15 @@ def url_to_shortlink(url):
return "http://%s/a/%s" % (id_and_site[1], id_and_site[0])


def to_protocol_relative(url):
if url.startswith("http://"):
return url[5:]
elif url.startswith("https://"):
return url[6:]
else:
return url


def preprocess_shortcut_command(cmd):
cmd = regex.sub(r"(\d)\s+", r"\1", cmd)
parts = cmd.split(" ")
Expand Down
5 changes: 3 additions & 2 deletions spamhandling.py
Expand Up @@ -6,7 +6,7 @@
is_whitelisted_user, has_already_been_posted, is_false_positive, \
is_auto_ignored_post, is_ignored_post, append_to_latest_questions
from parsing import get_user_from_url, unescape_title,\
escape_special_chars_in_title
escape_special_chars_in_title, to_protocol_relative
from bayesianfuncs import bayesian_score
from globalvars import GlobalVars
from datetime import datetime
Expand Down Expand Up @@ -68,7 +68,8 @@ def check_if_spam_json(data):


def handle_spam(title, poster, site, post_url, poster_url, post_id, reasons, is_answer):
post_url = url_to_shortlink(post_url)
post_url = to_protocol_relative(url_to_shortlink(post_url))
poster_url = to_protocol_relative(poster_url)
reasons = list(set(reasons))
reasons.sort()
reason = ", ".join(reasons).capitalize()
Expand Down
3 changes: 2 additions & 1 deletion test/data_test_parsing.txt
Expand Up @@ -7,4 +7,5 @@
[ [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector) ] All-caps title: [TEST TEST TEST ]]])))](http://stackoverflow.com/q/0) by [TEST TEST](http://stackoverflow.com/users/0/test-test) on `stackoverflow.com`
[ [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector) ] Offensive answer detected: [TEST TEST TEST 2]]])))](http://stackoverflow.com/a/42) by [TEST TEST 2](http://stackoverflow.com/users/0/test-test) on `stackoverflow.com`
[ [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector) ] Repeating characters in body: [Why I can't insert data in a model from a custom controller?](http://stackoverflow.com/q/27954020) by [user3754535](http://stackoverflow.com/users/3754535/user3754535) on `stackoverflow.com`
[ [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector) ] Repeating characters in body: [Why I can't insert data in a model from a custom controller?](http://stackoverflow.com/q/27954020) by a deleted user on `stackoverflow.com`
[ [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector) ] Repeating characters in body: [Why I can't insert data in a model from a custom controller?](http://stackoverflow.com/q/27954020) by a deleted user on `stackoverflow.com`
[ [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector) ] Repeating characters in body: [Why I can't insert data in a model from a custom controller?](//stackoverflow.com/q/27954020) by [user3754535](//stackoverflow.com/users/3754535/user3754535) on `stackoverflow.com`
9 changes: 8 additions & 1 deletion test/test_parsing.py
Expand Up @@ -19,6 +19,7 @@
('http://codegolf.stackexchange.com/users/9275/programfox', get_user_from_url, ('9275', 'codegolf.stackexchange.com')),
('http://stackoverflow.com/users/1/jeff-atwood', get_user_from_url, ('1', 'stackoverflow.com')),
('http://mathoverflow.net/users/66/ben-webster', get_user_from_url, ('66', 'mathoverflow.net')),
('//stackoverflow.com/users/1/jeff-atwood', get_user_from_url, ('1', 'stackoverflow.com')),
('!!/addblu http://stackoverflow.com/users/0/test', get_user_from_list_command, ('0', 'stackoverflow.com')),
('!!/rmblu http://stackoverflow.com/users/0/test', get_user_from_list_command, ('0', 'stackoverflow.com')),
('!!/addwlu http://stackoverflow.com/users/0/test', get_user_from_list_command, ('0', 'stackoverflow.com')),
Expand All @@ -33,6 +34,9 @@
('http://writers.stackexchange.com/questions/1/%2f%2f', url_to_shortlink, 'http://writers.stackexchange.com/q/1'),
('http://writers.stackexchange.com/questions/1/%2f%2f/2#2', url_to_shortlink, 'http://writers.stackexchange.com/a/2'),
('http://mathoverflow.net/q/1', url_to_shortlink, 'http://mathoverflow.net/q/1'),
('http://stackexchange.com', to_protocol_relative, '//stackexchange.com'),
('https://stackexchange.com', to_protocol_relative, '//stackexchange.com'),
('//stackexchange.com', to_protocol_relative, '//stackexchange.com'),
('sd 2tpu', preprocess_shortcut_command, 'sd tpu tpu'),
('sd - 3tpu fp', preprocess_shortcut_command, 'sd - tpu tpu tpu fp'),
('sd 3- 2fp', preprocess_shortcut_command, 'sd - - - fp fp'),
Expand Down Expand Up @@ -72,7 +76,10 @@
(test_data_inputs[8], fetch_title_from_msg_content, "Why I can't insert data in a model from a custom controller?"),
(test_data_inputs[9], fetch_post_id_and_site_from_msg_content, ('27954020', 'stackoverflow.com', 'question')),
(test_data_inputs[9], fetch_owner_url_from_msg_content, None),
(test_data_inputs[9], fetch_title_from_msg_content, "Why I can't insert data in a model from a custom controller?")
(test_data_inputs[9], fetch_title_from_msg_content, "Why I can't insert data in a model from a custom controller?"),
(test_data_inputs[10], fetch_post_id_and_site_from_msg_content, ('27954020', 'stackoverflow.com', 'question')),
(test_data_inputs[10], fetch_owner_url_from_msg_content, '//stackoverflow.com/users/3754535/user3754535'),
(test_data_inputs[10], fetch_title_from_msg_content, "Why I can't insert data in a model from a custom controller?")
])
def test_parsing(input_data, parse_method, expected):
assert parse_method(input_data.strip()) == expected

0 comments on commit 4eddbac

Please sign in to comment.