-
Notifications
You must be signed in to change notification settings - Fork 0
/
rcworker.py
228 lines (202 loc) · 9.9 KB
/
rcworker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# -*- coding: UTF-8 -*-
from datetime import timezone,datetime
import json
import os
import shutil
import tempfile
import threading
import traceback
import uuid
import logging
from logging.config import fileConfig
import pywikibot
from pywikibot.data.api import APIError
from pywikibot.throttle import Throttle
import pwb_wrappers
from database_stuff import store_image, get_next_month
from image_corruption_utils import image_is_corrupt, notify_user, allow_bots
from config import REDIS_KEY
from PIL import UnidentifiedImageError
from redis import Redis
from Image import ImageObj
from EUtils import EDayCount, EJobType
import pickle
logger = None
def retry_apierror(f):
# https://github.com/toolforge/embeddeddata/blob/5ecd31417a4c3c5d1be9c2a58f55a1665d9c767f/worker.py#L238
for i in range(8):
try:
f()
except APIError:
pywikibot.warning(
'Failed API request on attempt %d' % i)
else:
break
else:
raise
def run_worker():
tmpdir = None # Gets rid of IDE complaint/warning about access before assignment
try:
tmpdir = tempfile.mkdtemp()
site = pywikibot.Site(user="TheSandBot")
site._throttle = Throttle(site, multiplydelay=False)
# Multi-workers are enough to cause problems, no need for internal
# locking to cause even more problems
site.lock_page = lambda *args, **kwargs: None # noop
site.unlock_page = lambda *args, **kwargs: None # noop
redis = Redis(host="localhost")
while True:
_, picklemsg = redis.blpop(REDIS_KEY)
change = pickle.loads(picklemsg) # Need to unpickle and build object once more - T99
global logger
logger.info(change.title)
file_page = pywikibot.FilePage(site, change.title)
if not allow_bots(file_page.text, "TheSandBot"):
logger.critical("Not to edit " + file_page.title())
continue
if not file_page.exists():
logger.debug(pywikibot.warning('File page does not exist ' + change.title))
continue
# T125
if file_page.isRedirectPage():
logger.debug(pywikibot.warning('File page is redirect' + change.title))
continue
# for i in range(8):
# try:
# file_page.get_file_history()
# except pywikibot.exceptions.PageRelatedError as e:
# # pywikibot.exceptions.PageRelatedError:
# # loadimageinfo: Query on ... returned no imageinfo
# pywikibot.exception(e)
# site.throttle(write=True)
# else:
# break
#else:
# raise
# try:
# revision = file_page.get_file_history()[
# pywikibot.Timestamp.fromtimestampformat(
# change['log_params']['img_timestamp'])]
# except KeyError:
# try:
# # From rcbacklog
# revision = file_page.get_file_history()[
# pywikibot.Timestamp.fromISOformat(
# change['params']['img_timestamp'])]
# except KeyError:
# try:
# revision = file_page.get_file_history()[
# pywikibot.Timestamp.fromtimestamp(
# change['timestamp'])]
# except KeyError:
# revision = file_page.latest_file_info
# pywikibot.warning(
# 'Cannot fetch specified revision, falling back to '
# 'latest revision.')
revision = change.getRevision(file_page)
if revision is None:
logger.debug("Revision none")
continue
#pywikibot.output('Working on: %s at %s' % (change['title'],
# revision.timestamp))
pywikibot.output('Working on: %s at %s' % (change.title, revision.timestamp))
path = os.path.join(tmpdir, str(uuid.uuid1()))
# Download image
try:
for i in range(8): # Attempt to download 8 times. If it fails after this many, move on
try:
# returns download success result (True or False)
success = file_page.download(path, revision=revision)
except Exception as e:
pywikibot.exception(e)
success = False
if success:
break # if we have a success, no point continuing to try and download
else:
pywikibot.warning(
'Possibly corrupted download on attempt %d' % i)
site.throttle(write=True)
else:
pywikibot.warning('FIXME: Download attempt exhausted')
pywikibot.warning('FIXME: Download of ' + str(file_page.title() + ' failed. Aborting...'))
continue # move on to the next file
del success
try:
corrupt_result = image_is_corrupt(path)
except UnidentifiedImageError as e:
logger.debug(change.title + " ::: is not an image (or at very least not currently supported by PIL)")
os.remove(path) # file not an image
store_image(change.title, False, img_hash=change.hash, not_image=True) # store in database
# Previously the idea was to just raise the error,
# but since this is a constant running loop, just move on
# to the next file (once local removed)
continue
if corrupt_result:
handle_result(site, file_page, change, logger)
# nom_date = str(get_next_month(7)).split('/')
# pwb_wrappers.tag_page(file_page,
# "{{TSB image identified corrupt|"
# + datetime.now(
# timezone.utc).strftime("%m/%d/%Y") + "|day=" +
# nom_date[1] + "|month=" + nom_date[0] + "|year=" + nom_date[2] + "}}",
# "Image detected as corrupt, tagging.")
# #store_image(file_page.title(), True, img_hash=img_hash, day_count=7) # store in database
# store_image(file_page.title(), True, img_hash=change.hash, day_count=7) # store in database
# logger.info("Saved page and logged in database")
# number_saved += 1 # FIXME: This MUST be removed once trials done and approved
# # Notify the user that the file needs updating
# try: # TODO: Add record to database about successful notification?
# notify_user(site, file_page, EDayCount.DAYS_7, EJobType.MONITOR, minor=False)
# except: # TODO: Add record to database about failed notification?
# logger.error("ERROR: Could not notify user about " + str(file_page.title()) + " being corrupt.")
else: # image not corrupt
#store_image(file_page.title(), False, img_hash=img_hash) # store in database
store_image(file_page.title(), False, img_hash=change.hash) # store in database
logger.info(file_page.title() + " :Not corrupt. Stored")
except Exception:
traceback.print_exc()
finally:
if os.path.exists(path):
os.remove(path)
pywikibot.output("Exit - THIS SHOULD NOT HAPPEN")
finally:
shutil.rmtree(tmpdir)
def handle_result(site, file_page, change, logger):
tag_page(file_page)
#nom_date = str(get_next_month(7)).split('/')
#pwb_wrappers.tag_page(file_page,
# "{{TSB image identified corrupt|"
# + datetime.now(
# timezone.utc).strftime("%m/%d/%Y") + "|day=" +
# nom_date[1] + "|month=" + nom_date[0] + "|year=" + nom_date[2] + "}}",
# "Image detected as corrupt, tagging.")
# store_image(file_page.title(), True, img_hash=img_hash, day_count=7) # store in database
store_image(file_page.title(), True, img_hash=change.hash, day_count=7) # store in database
logger.info("Saved page and logged in database")
# Notify the user that the file needs updating
try: # TODO: Add record to database about successful notification?
notify_user(site, file_page, EDayCount.DAYS_7, EJobType.MONITOR, minor=False)
except:
logger.error("ERROR: Could not notify user about " + str(file_page.title()) + " being corrupt.")
def tag_page(file_page):
nom_date = str(get_next_month(7)).split('/')
pwb_wrappers.tag_page(file_page,
"{{TSB image identified corrupt|"
+ datetime.now(
timezone.utc).strftime("%m/%d/%Y") + "|day=" +
nom_date[1] + "|month=" + nom_date[0] + "|year=" + nom_date[2] + "}}",
"Image detected as corrupt, tagging.")
#store_image(file_page.title(), True, img_hash=change.hash, day_count=7) # store in database
def main():
pywikibot.handle_args()
run_worker()
if __name__ == "__main__":
try:
fileConfig('logging_config.ini')
logger = logging.getLogger(__name__)
main()
except KeyboardInterrupt:
logger.critical("Worker shutdown")
pass
finally:
pywikibot.stopme()