Skip to content

Commit

Permalink
Merge branch 'master' into auto-blacklist-1520892688.7315655
Browse files Browse the repository at this point in the history
  • Loading branch information
tripleee committed Mar 13, 2018
2 parents 85effc2 + 098bbad commit 8a602b5
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 96 deletions.
1 change: 1 addition & 0 deletions bad_keywords.txt
Expand Up @@ -1177,3 +1177,4 @@ d\W?bal
mega\W?boost\W?perform\W?xl
seo\W*services
vidhigra
nuallura
2 changes: 2 additions & 0 deletions blacklisted_websites.txt
Expand Up @@ -1779,3 +1779,5 @@ healthcareorder\.com
healthsuppfacts\.com
adviksoft\.com
skincare4your\.com
goo\.gl/7J79so
3peartechnologies\.com
20 changes: 16 additions & 4 deletions chatcommands.py
Expand Up @@ -361,10 +361,16 @@ def blame2(msg, x):
for i, char in enumerate(reversed(x)):
user += (len(base)**i) * base[char]

unlucky_victim = msg._client.get_user(user)
return "It's [{}](https://chat.{}/users/{})'s fault.".format(unlucky_victim.name,
msg._client.host,
unlucky_victim.id)
try:
unlucky_victim = msg._client.get_user(user)
return "It's [{}](https://chat.{}/users/{})'s fault.".format(unlucky_victim.name,
msg._client.host,
unlucky_victim.id)
except HTTPError:
unlucky_victim = msg.owner
return "It's [{}](https://chat.{}/users/{})'s fault.".format(unlucky_victim.name,
msg._client.host,
unlucky_victim.id)


# noinspection PyIncorrectDocstring
Expand Down Expand Up @@ -664,6 +670,12 @@ def inqueue(url):
return "Not in queue."


@command()
def listening():
# return "{} post(s) currently monitored for deletion.".format(len(GlobalVars.deletion_watcher.posts))
return repr(GlobalVars.deletion_watcher.posts)


# noinspection PyIncorrectDocstring,PyProtectedMember
@command(str, whole_msg=True, privileged=True, arity=(0, 1))
def stappit(msg, location_search):
Expand Down
22 changes: 14 additions & 8 deletions chatcommunicate.py
Expand Up @@ -14,10 +14,10 @@
import yaml

import datahandling
from deletionwatcher import DeletionWatcher
from excepthook import log_exception
from globalvars import GlobalVars
from parsing import fetch_post_url_from_msg_content, fetch_owner_url_from_msg_content
from parsing import fetch_post_id_and_site_from_url, fetch_post_url_from_msg_content, fetch_owner_url_from_msg_content
from tasks import Tasks

LastMessages = collections.namedtuple("LastMessages", ["messages", "reports"])

Expand Down Expand Up @@ -169,9 +169,9 @@ def send_messages():
_last_messages.reports.popitem(last=False)

if room.deletion_watcher:
threading.Thread(name="deletion watcher",
target=DeletionWatcher.check_if_report_was_deleted,
args=(report_data[0], room.room._client.get_message(message_id))).start()
callback = room.room._client.get_message(message_id).delete

GlobalVars.deletion_watcher.subscribe(report_data[0], callback=callback, timeout=120)

_pickle_run.set()

Expand Down Expand Up @@ -262,9 +262,15 @@ def tell_rooms(msg, has, hasnt, notify_site="", report_data=None):

if room.block_time < timestamp and _global_block < timestamp:
if report_data and "delay" in _room_roles and room_id in _room_roles["delay"]:
threading.Thread(name="delayed post",
target=DeletionWatcher.post_message_if_not_deleted,
args=(msg_pings, room, report_data)).start()
def callback():
post = fetch_post_id_and_site_from_url(report_data[0])[0:2]

if not datahandling.is_false_positive(post) and not datahandling.is_ignored_post(post):
_msg_queue.put(room, msg_pings, report_data)

task = Tasks.later(callback, after=300)

GlobalVars.deletion_watcher.subscribe(report_data[0], callback=task.cancel)
else:
_msg_queue.put((room, msg_pings, report_data))

Expand Down
176 changes: 106 additions & 70 deletions deletionwatcher.py
@@ -1,7 +1,10 @@
# coding=utf-8
import json
import os.path
import pickle
import requests
import time
import threading
# noinspection PyPackageRequirements
import websocket
# noinspection PyPackageRequirements
Expand All @@ -11,86 +14,119 @@
import metasmoke
from globalvars import GlobalVars
import datahandling
from parsing import fetch_post_id_and_site_from_url
from helpers import log
from parsing import fetch_post_id_and_site_from_url, to_protocol_relative
from tasks import Tasks


# noinspection PyClassHasNoInit,PyBroadException,PyMethodParameters
class DeletionWatcher:
@classmethod
def update_site_id_list(self):
soup = BeautifulSoup(requests.get("https://meta.stackexchange.com/topbar/site-switcher/site-list").text,
"html.parser")
site_id_dict = {}
for site in soup.findAll("a", attrs={"data-id": True}):
site_name = urlparse(site["href"]).netloc
site_id = site["data-id"]
site_id_dict[site_name] = site_id
GlobalVars.site_id_dict = site_id_dict
def __init__(self):
DeletionWatcher.update_site_id_list()

self.socket = websocket.create_connection("wss://qa.sockets.stackexchange.com/")
self.posts = {}

if os.path.exists("deletionIDs.p"):
with open("deletionIDs.p", "rb") as fh:
for post in DeletionWatcher._check_batch(pickle.load(fh)):
self.subscribe(post, pickle=False)

self._save()

threading.Thread(name="deletion watcher", target=self._start, daemon=True).start()

def _start(self):
while True:
msg = self.socket.recv()

if msg:
msg = json.loads(msg)
action = msg["action"]

if action == "hb":
self.socket.send("hb")
else:
data = json.loads(msg["data"])

if data["a"] == "post-deleted":
try:
post_id, _, post_type, post_url, callbacks = self.posts[action]
del self.posts[action]

if not post_type == "answer" or ("aId" in data and str(data["aId"]) == post_id):
self.socket.send("-" + action)
Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, True)

for callback, max_time in callbacks:
if not max_time or time.time() < max_time:
callback()
except KeyError:
pass

def subscribe(self, post_url, callback=None, pickle=True, timeout=None):
post_id, post_site, post_type = fetch_post_id_and_site_from_url(post_url)

if post_site not in GlobalVars.site_id_dict:
log("warning", "unknown site {} when subscribing to {}".format(post_site, post_url))
return

@classmethod
def check_websocket_for_deletion(self, post_site_id, post_url, timeout):
time_to_check = time.time() + timeout
post_id = post_site_id[0]
post_type = post_site_id[2]
if post_type == "answer":
question_id = str(datahandling.get_post_site_id_link(post_site_id))
question_id = datahandling.get_post_site_id_link((post_id, post_site, post_type))

if question_id is None:
return
else:
question_id = post_id
post_site = post_site_id[1]
if post_site not in GlobalVars.site_id_dict:
return

site_id = GlobalVars.site_id_dict[post_site]
action = "{}-question-{}".format(site_id, question_id)
max_time = (time.time() + timeout) if timeout else None

if action in self.posts and callback:
_, _, _, _, callbacks = self.posts[action]
callbacks.append((callback, max_time))
else:
self.posts[action] = (post_id, post_site, post_type, post_url, [(callback, max_time)] if callback else [])
self.socket.send(action)

if pickle:
Tasks.do(self._save)

def _save(self):
pickle_output = {}

for post_id, post_site, _, _, _ in self.posts.values():
if post_site not in pickle_output:
pickle_output[post_site] = [post_id]
else:
pickle_output[post_site].append(post_id)

with open("deletionIDs.p", "wb") as pickle_file:
pickle.dump(pickle_output, pickle_file)

@staticmethod
def _check_batch(saved):
for site, posts in saved.items():
ids = ";".join([post_id for post_id in posts if not DeletionWatcher._ignore((post_id, site))])
uri = "https://api.stackexchange.com/2.2/posts/{}?site={}&key=IAkbitmze4B8KpacUfLqkw((".format(ids, site)

ws = websocket.create_connection("wss://qa.sockets.stackexchange.com/")
ws.send(site_id + "-question-" + question_id)

while time.time() < time_to_check:
ws.settimeout(time_to_check - time.time())
try:
a = ws.recv()
except websocket.WebSocketTimeoutException:
Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, False)
return False
if a is not None and a != "":
try:
action = json.loads(a)["action"]
if action == "hb":
ws.send("hb")
continue
else:
d = json.loads(json.loads(a)["data"])
except:
continue
if d["a"] == "post-deleted" and str(d["qId"]) == question_id:
if (post_type == "answer" and "aId" in d and str(d["aId"]) == post_id) or post_type == "question":
Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, True)
return True

Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, False)
return False

@classmethod
def check_if_report_was_deleted(self, post_url, message):
post_site_id = fetch_post_id_and_site_from_url(post_url)
was_report_deleted = self.check_websocket_for_deletion(post_site_id, post_url, 1200)

if was_report_deleted:
try:
message.delete()
except:
pass

@classmethod
def post_message_if_not_deleted(self, message_text, room, report_data):
post_url = report_data[0]

post_site_id = fetch_post_id_and_site_from_url(post_url)
was_report_deleted = self.check_websocket_for_deletion(post_site_id, post_url, 300)

if not was_report_deleted and not datahandling.is_false_positive(post_site_id[0:2]) and not \
datahandling.is_ignored_post(post_site_id[0:2]):

chatcommunicate._msg_queue.put((room, message_text, report_data))
for post in requests.get(uri).json()["items"]:
if time.time() - post["creation_date"] < 7200:
yield to_protocol_relative(post["link"]).replace("/q/", "/questions/")

@staticmethod
def _ignore(post_site_id):
return datahandling.is_false_positive(post_site_id) or datahandling.is_ignored_post(post_site_id) or \
datahandling.is_auto_ignored_post(post_site_id)

@staticmethod
def update_site_id_list():
soup = BeautifulSoup(requests.get("https://meta.stackexchange.com/topbar/site-switcher/site-list").text,
"html.parser")
site_id_dict = {}
for site in soup.findAll("a", attrs={"data-id": True}):
site_name = urlparse(site["href"]).netloc
site_id = site["data-id"]
site_id_dict[site_name] = site_id
GlobalVars.site_id_dict = site_id_dict
8 changes: 2 additions & 6 deletions spamhandling.py
Expand Up @@ -25,12 +25,6 @@ def should_whitelist_prevent_alert(user_url, reasons):
return len(reasons_comparison) == 0


# noinspection PyMissingTypeHints
def should_reasons_prevent_tavern_posting(reasons):
reasons_comparison = [r for r in set(reasons) if r not in GlobalVars.non_tavern_reasons]
return len(reasons_comparison) == 0


# noinspection PyMissingTypeHints
def check_if_spam(post):
# if not post.body:
Expand Down Expand Up @@ -148,6 +142,8 @@ def handle_spam(post, reasons, why):
else:
chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site),
without_roles, notify_site=post.post_site, report_data=(post_url, poster_url))

GlobalVars.deletion_watcher.subscribe(post_url)
except:
exc_type, exc_obj, exc_tb = sys.exc_info()
excepthook.uncaught_exception(exc_type, exc_obj, exc_tb)
12 changes: 9 additions & 3 deletions tasks.py
Expand Up @@ -16,14 +16,18 @@ def _run(cls):

@classmethod
def do(cls, func, *args, **kwargs):
cls.loop.call_soon(lambda: func(*args, **kwargs))
handle = cls.loop.call_soon(lambda: func(*args, **kwargs))
cls.loop._write_to_self()

return handle

@classmethod
def later(cls, func, *args, after=None, **kwargs):
cls.loop.call_later(after, lambda: func(*args, **kwargs))
handle = cls.loop.call_later(after, lambda: func(*args, **kwargs))
cls.loop._write_to_self()

return handle

@classmethod
def periodic(cls, func, *args, interval=None, **kwargs):
@asyncio.coroutine
Expand All @@ -32,8 +36,10 @@ def f():
yield from asyncio.sleep(interval)
func(*args, **kwargs)

cls.loop.create_task(f())
handle = cls.loop.create_task(f())
cls.loop._write_to_self()

return handle


threading.Thread(name="tasks", target=Tasks._run, daemon=True).start()
10 changes: 7 additions & 3 deletions watched_keywords.txt
Expand Up @@ -2474,7 +2474,6 @@
1519739799 Federico dentoaviation\.com
1519745836 Glorfindel ind99info\.com
1519786348 WELZ fix\W?card\W?tech
1519796009 tripleee 3peartechnologies\.com
1519802723 tripleee asetsafety\.ac\.in
1519806308 doppelgreener softsolution\.al
1519808512 Glorfindel hot166\.com
Expand Down Expand Up @@ -2584,7 +2583,6 @@
1520772093 WELZ noavarangermi\.ir
1520773183 WELZ sourcesara\.com
1520795181 Glorfindel meshkalla
1520822770 iBug nuallura
1520830900 K.Dᴀᴠɪs chinasunlead\.net
1520834947 micsthepick ajk\Wtorus
1520837032 tripleee theitsol\.com
Expand All @@ -2599,4 +2597,10 @@
1520861335 tripleee ^Nastia Mykoliuk$
1520861497 tripleee ora-error\.com
1520882503 Glorfindel howtogethelpinwindows10\.co
1520892688 K.Dᴀᴠɪs low(er|est)?\Wprices?
1520892688 K.Dᴀᴠɪs low(?:er|est)?\Wprices?
1520907077 WELZ plasticbidet\.com
1520907152 WELZ aquatownbidet\.com
1520917730 tripleee lts\W?secure
1520918729 tripleee (?:7\W*)?914\W*611\W*04\W*44
1520919103 tripleee golden\W?farms
1520919209 tripleee rose\W?diamond
3 changes: 1 addition & 2 deletions ws.py
Expand Up @@ -73,6 +73,7 @@

# We need an instance of bodyfetcher before load_files() is called
GlobalVars.bodyfetcher = BodyFetcher()
GlobalVars.deletion_watcher = DeletionWatcher()

load_files()
filter_auto_ignored_posts()
Expand Down Expand Up @@ -142,8 +143,6 @@ def restart_automatically():
log('info', GlobalVars.location)
log('info', GlobalVars.metasmoke_host)

DeletionWatcher.update_site_id_list()

ws = websocket.create_connection("wss://qa.sockets.stackexchange.com/")
ws.send("155-questions-active")

Expand Down

0 comments on commit 8a602b5

Please sign in to comment.