Merge branch 'master' of https://github.com/Charcoal-SE/SmokeDetector

Charcoal-SE · Mar 26, 2017 · 477c3e8 · 477c3e8
2 parents bd68fd6 + 02a7186
commit 477c3e8
Show file tree

Hide file tree

Showing 7 changed files with 43 additions and 31 deletions.
diff --git a/blacklisted_websites.txt b/blacklisted_websites.txt
@@ -916,3 +916,4 @@ www\.pokemonomegarubyandalphasapphiredownload\.com
 alphagenixsweden\.com
 click2trial\.com
 examfreeresult\.in
+fiverr\.com
diff --git a/bodyfetcher.py b/bodyfetcher.py
@@ -316,27 +316,30 @@ def make_api_call_for_site(self, site):
                     handle_spam(post=post_,
                                 reasons=reason,
                                 why=why)
-                except:
-                    pass
+                except Exception as e:
+                    log('error', "Exception in handle_spam:", e)
 
             try:
-                for answer in post["answers"]:
-                    num_scanned += 1
-                    answer["IsAnswer"] = True  # Necesssary for Post object
-                    answer["title"] = ""  # Necessary for proper Post object creation
-                    answer["site"] = site  # Necessary for proper Post object creation
-                    answer_ = Post(api_response=answer, parent=post_)
-
-                    is_spam, reason, why = check_if_spam(answer_)
-                    if is_spam:
-                        try:
-                            handle_spam(answer_,
-                                        reasons=reason,
-                                        why=why)
-                        except:
-                            pass
-            except:
-                pass
+                if "answers" not in post:
+                    pass
+                else:
+                    for answer in post["answers"]:
+                        num_scanned += 1
+                        answer["IsAnswer"] = True  # Necesssary for Post object
+                        answer["title"] = post_.title  # Necessary for proper Post object creation
+                        answer["site"] = site  # Necessary for proper Post object creation
+                        answer_ = Post(api_response=answer, parent=post_)
+
+                        is_spam, reason, why = check_if_spam(answer_)
+                        if is_spam:
+                            try:
+                                handle_spam(answer_,
+                                            reasons=reason,
+                                            why=why)
+                            except Exception as e:
+                                log('error', "Exception in handle_spam:", e)
+            except Exception as e:
+                log('error', "Exception handling answers:", e)
 
         end_time = time.time()
         GlobalVars.posts_scan_stats_lock.acquire()

diff --git a/chatcommands.py b/chatcommands.py
@@ -30,14 +30,14 @@
 
 
 # noinspection PyMissingTypeHints
-def check_permissions(function):
+def check_permissions(function_):
     # noinspection PyMissingTypeHints
     def run_command(ev_room, ev_user_id, wrap2, *args, **kwargs):
         if datahandling.is_privileged(ev_room, ev_user_id, wrap2):
             kwargs['ev_room'] = ev_room
             kwargs['ev_user_id'] = ev_user_id
             kwargs['wrap2'] = wrap2
-            return function(*args, **kwargs)
+            return function_(*args, **kwargs)
         else:
             return Response(command_status=False,
                             message="You are not a privileged user. Please see [the privileges wiki page](" +

diff --git a/classes/Post.py b/classes/Post.py
@@ -12,6 +12,7 @@ class Post:
     _post_id = ""
     _post_score = 0
     _post_site = ""
+    _post_url = ""
     _title = ""
     _user_name = ""
     _user_url = ""
@@ -195,9 +196,13 @@ def post_score(self):
     def post_site(self):
         return unicode(self._post_site)
 
+    # noinspection PyBroadException
     @property
     def post_url(self):
-        return unicode(self._post_url)
+        try:
+            return unicode(self._post_url)
+        except:
+            return "NoLink"
 
     @property
     def title(self):

diff --git a/findspam.py b/findspam.py
@@ -10,6 +10,7 @@
 from helpers import all_matches_unique, log
 
 SIMILAR_THRESHOLD = 0.95
+SIMILAR_ANSWER_THRESHOLD = 0.7
 EXCEPTION_RE = r"^Domain (.*) didn't .*!$"
 RE_COMPILE = regex.compile(EXCEPTION_RE)
 COMMON_MALFORMED_PROTOCOLS = [
@@ -30,7 +31,7 @@
     r"""*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:/\S*)?""", regex.UNICODE)
 
 
-# noinspection PyUnusedLocal,PyMissingTypeHints
+# noinspection PyUnusedLocal,PyMissingTypeHints,PyTypeChecker
 def has_repeated_words(s, site, *args):
     words = regex.split(r"[\s.,;!/\()\[\]+_-]", s)
     words = [word for word in words if word != ""]
@@ -83,7 +84,7 @@ def link_at_end(s, site, *args):   # link at end of question, on selected sites
     return False, ""
 
 
-# noinspection PyUnusedLocal,PyMissingTypeHints
+# noinspection PyUnusedLocal,PyMissingTypeHints,PyTypeChecker
 def non_english_link(s, site, *args):   # non-english link in short answer
     if len(s) < 600:
         links = regex.compile(ur'nofollow(?: noreferrer)?">([^<]*)(?=</a>)', regex.UNICODE).findall(s)
@@ -96,7 +97,7 @@ def non_english_link(s, site, *args):   # non-english link in short answer
     return False, ""
 
 
-# noinspection PyUnusedLocal,PyMissingTypeHints
+# noinspection PyUnusedLocal,PyMissingTypeHints,PyTypeChecker
 def mostly_non_latin(s, site, *args):   # majority of post is in non-Latin, non-Cyrillic characters
     word_chars = regex.sub(r'(?u)[\W0-9]|http\S*', "", s)
     non_latin_chars = regex.sub(r"(?u)\p{script=Latin}|\p{script=Cyrillic}", "", word_chars)
@@ -190,7 +191,7 @@ def pattern_product_name(s, site, *args):
                 "Junivive", "Apexatropin", "Gain", "Allure", "Nuvella", "Trimgenix", "Satin", "Prodroxatone",
                 "Elite", "Force", "Exceptional", "Enhance(ment)?", "Nitro", "Max", "Boost", "E?xtreme", "Grow",
                 "Deep", "Male", "Pro", "Advanced", "Monster", "Divine", "Royale", "Angele", "Trinity", "Andro",
-                "Pure", "Skin", "Sea", "Muscle", "Ascend", "Youth",
+                "Pure", "Skin", "Sea", "Muscle", "Ascend", "Youth", "Hyper(tone)?",
                 "Serum", "Supplement", "Fuel", "Cream"]
     if site != "math.stackexchange.com" and site != "mathoverflow.net":
         keywords += ["E?X[tl\d]?", "Alpha", "Prime", "Formula"]
@@ -301,7 +302,7 @@ def has_eltima(s, site, *args):
     return False, ""
 
 
-# noinspection PyUnusedLocal,PyMissingTypeHints
+# noinspection PyUnusedLocal,PyMissingTypeHints,PyTypeChecker
 def username_similar_website(s, site, *args):
     username = args[0]
     sim_result = perform_similarity_checks(s, username)
@@ -397,7 +398,7 @@ def similar_answer(post):
             sanitized_answer = strip_urls_and_tags(other_answer.body)
             ratio = similar_ratio(sanitized_body, sanitized_answer)
 
-            if ratio >= SIMILAR_THRESHOLD:
+            if ratio >= SIMILAR_ANSWER_THRESHOLD:
                 return False, False, True, \
                     u"Answer similar to answer {}, ratio {}".format(other_answer.post_id, ratio)
 

diff --git a/globalvars.py b/globalvars.py
@@ -122,7 +122,9 @@ class GlobalVars:
             "248139",   # FelixSFD
             "156721",   # D-side
             "167070",   # quartata
-            "172450"    # Hovercraft Full Of Eels
+            "172450",   # Hovercraft Full Of Eels
+            "56200",    # Eric Leschinski
+            "211021"    # Henders
         ],
         meta_tavern_room_id: [
             "315433",   # Normal Human

diff --git a/spamhandling.py b/spamhandling.py
@@ -86,8 +86,8 @@ def handle_spam(post, reasons, why):
         datahandling.add_auto_ignored_post((post.post_id, post.post_site, datetime.now()))
     if why is not None and why != "":
         datahandling.add_why(post.post_site, post.post_id, why)
-    if post.is_answer and post.question_id is not None:
-        datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.question_id)
+    if post.is_answer and post.post_id is not None and post.post_id is not "":
+        datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.post_id)
     try:
         post.title = parsing.escape_special_chars_in_title(post.title)
         sanitized_title = regex.sub('(https?://|\n)', '', post.title)