Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
ArtOfCode committed Mar 26, 2017
2 parents bd68fd6 + 02a7186 commit 477c3e8
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 31 deletions.
1 change: 1 addition & 0 deletions blacklisted_websites.txt
Expand Up @@ -916,3 +916,4 @@ www\.pokemonomegarubyandalphasapphiredownload\.com
alphagenixsweden\.com
click2trial\.com
examfreeresult\.in
fiverr\.com
41 changes: 22 additions & 19 deletions bodyfetcher.py
Expand Up @@ -316,27 +316,30 @@ def make_api_call_for_site(self, site):
handle_spam(post=post_,
reasons=reason,
why=why)
except:
pass
except Exception as e:
log('error', "Exception in handle_spam:", e)

try:
for answer in post["answers"]:
num_scanned += 1
answer["IsAnswer"] = True # Necesssary for Post object
answer["title"] = "" # Necessary for proper Post object creation
answer["site"] = site # Necessary for proper Post object creation
answer_ = Post(api_response=answer, parent=post_)

is_spam, reason, why = check_if_spam(answer_)
if is_spam:
try:
handle_spam(answer_,
reasons=reason,
why=why)
except:
pass
except:
pass
if "answers" not in post:
pass
else:
for answer in post["answers"]:
num_scanned += 1
answer["IsAnswer"] = True # Necesssary for Post object
answer["title"] = post_.title # Necessary for proper Post object creation
answer["site"] = site # Necessary for proper Post object creation
answer_ = Post(api_response=answer, parent=post_)

is_spam, reason, why = check_if_spam(answer_)
if is_spam:
try:
handle_spam(answer_,
reasons=reason,
why=why)
except Exception as e:
log('error', "Exception in handle_spam:", e)
except Exception as e:
log('error', "Exception handling answers:", e)

end_time = time.time()
GlobalVars.posts_scan_stats_lock.acquire()
Expand Down
4 changes: 2 additions & 2 deletions chatcommands.py
Expand Up @@ -30,14 +30,14 @@


# noinspection PyMissingTypeHints
def check_permissions(function):
def check_permissions(function_):
# noinspection PyMissingTypeHints
def run_command(ev_room, ev_user_id, wrap2, *args, **kwargs):
if datahandling.is_privileged(ev_room, ev_user_id, wrap2):
kwargs['ev_room'] = ev_room
kwargs['ev_user_id'] = ev_user_id
kwargs['wrap2'] = wrap2
return function(*args, **kwargs)
return function_(*args, **kwargs)
else:
return Response(command_status=False,
message="You are not a privileged user. Please see [the privileges wiki page](" +
Expand Down
7 changes: 6 additions & 1 deletion classes/Post.py
Expand Up @@ -12,6 +12,7 @@ class Post:
_post_id = ""
_post_score = 0
_post_site = ""
_post_url = ""
_title = ""
_user_name = ""
_user_url = ""
Expand Down Expand Up @@ -195,9 +196,13 @@ def post_score(self):
def post_site(self):
return unicode(self._post_site)

# noinspection PyBroadException
@property
def post_url(self):
return unicode(self._post_url)
try:
return unicode(self._post_url)
except:
return "NoLink"

@property
def title(self):
Expand Down
13 changes: 7 additions & 6 deletions findspam.py
Expand Up @@ -10,6 +10,7 @@
from helpers import all_matches_unique, log

SIMILAR_THRESHOLD = 0.95
SIMILAR_ANSWER_THRESHOLD = 0.7
EXCEPTION_RE = r"^Domain (.*) didn't .*!$"
RE_COMPILE = regex.compile(EXCEPTION_RE)
COMMON_MALFORMED_PROTOCOLS = [
Expand All @@ -30,7 +31,7 @@
r"""*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:/\S*)?""", regex.UNICODE)


# noinspection PyUnusedLocal,PyMissingTypeHints
# noinspection PyUnusedLocal,PyMissingTypeHints,PyTypeChecker
def has_repeated_words(s, site, *args):
words = regex.split(r"[\s.,;!/\()\[\]+_-]", s)
words = [word for word in words if word != ""]
Expand Down Expand Up @@ -83,7 +84,7 @@ def link_at_end(s, site, *args): # link at end of question, on selected sites
return False, ""


# noinspection PyUnusedLocal,PyMissingTypeHints
# noinspection PyUnusedLocal,PyMissingTypeHints,PyTypeChecker
def non_english_link(s, site, *args): # non-english link in short answer
if len(s) < 600:
links = regex.compile(ur'nofollow(?: noreferrer)?">([^<]*)(?=</a>)', regex.UNICODE).findall(s)
Expand All @@ -96,7 +97,7 @@ def non_english_link(s, site, *args): # non-english link in short answer
return False, ""


# noinspection PyUnusedLocal,PyMissingTypeHints
# noinspection PyUnusedLocal,PyMissingTypeHints,PyTypeChecker
def mostly_non_latin(s, site, *args): # majority of post is in non-Latin, non-Cyrillic characters
word_chars = regex.sub(r'(?u)[\W0-9]|http\S*', "", s)
non_latin_chars = regex.sub(r"(?u)\p{script=Latin}|\p{script=Cyrillic}", "", word_chars)
Expand Down Expand Up @@ -190,7 +191,7 @@ def pattern_product_name(s, site, *args):
"Junivive", "Apexatropin", "Gain", "Allure", "Nuvella", "Trimgenix", "Satin", "Prodroxatone",
"Elite", "Force", "Exceptional", "Enhance(ment)?", "Nitro", "Max", "Boost", "E?xtreme", "Grow",
"Deep", "Male", "Pro", "Advanced", "Monster", "Divine", "Royale", "Angele", "Trinity", "Andro",
"Pure", "Skin", "Sea", "Muscle", "Ascend", "Youth",
"Pure", "Skin", "Sea", "Muscle", "Ascend", "Youth", "Hyper(tone)?",
"Serum", "Supplement", "Fuel", "Cream"]
if site != "math.stackexchange.com" and site != "mathoverflow.net":
keywords += ["E?X[tl\d]?", "Alpha", "Prime", "Formula"]
Expand Down Expand Up @@ -301,7 +302,7 @@ def has_eltima(s, site, *args):
return False, ""


# noinspection PyUnusedLocal,PyMissingTypeHints
# noinspection PyUnusedLocal,PyMissingTypeHints,PyTypeChecker
def username_similar_website(s, site, *args):
username = args[0]
sim_result = perform_similarity_checks(s, username)
Expand Down Expand Up @@ -397,7 +398,7 @@ def similar_answer(post):
sanitized_answer = strip_urls_and_tags(other_answer.body)
ratio = similar_ratio(sanitized_body, sanitized_answer)

if ratio >= SIMILAR_THRESHOLD:
if ratio >= SIMILAR_ANSWER_THRESHOLD:
return False, False, True, \
u"Answer similar to answer {}, ratio {}".format(other_answer.post_id, ratio)

Expand Down
4 changes: 3 additions & 1 deletion globalvars.py
Expand Up @@ -122,7 +122,9 @@ class GlobalVars:
"248139", # FelixSFD
"156721", # D-side
"167070", # quartata
"172450" # Hovercraft Full Of Eels
"172450", # Hovercraft Full Of Eels
"56200", # Eric Leschinski
"211021" # Henders
],
meta_tavern_room_id: [
"315433", # Normal Human
Expand Down
4 changes: 2 additions & 2 deletions spamhandling.py
Expand Up @@ -86,8 +86,8 @@ def handle_spam(post, reasons, why):
datahandling.add_auto_ignored_post((post.post_id, post.post_site, datetime.now()))
if why is not None and why != "":
datahandling.add_why(post.post_site, post.post_id, why)
if post.is_answer and post.question_id is not None:
datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.question_id)
if post.is_answer and post.post_id is not None and post.post_id is not "":
datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.post_id)
try:
post.title = parsing.escape_special_chars_in_title(post.title)
sanitized_title = regex.sub('(https?://|\n)', '', post.title)
Expand Down

0 comments on commit 477c3e8

Please sign in to comment.