-
Notifications
You must be signed in to change notification settings - Fork 3
/
dyknotifier.py
370 lines (314 loc) · 15 KB
/
dyknotifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
"""
A module implementing a bot to notify editors when articles they create or
expand are nominated for DYK by someone else.
"""
import argparse
import ConfigParser
from datetime import datetime, timedelta
import functools
import json
import operator
import os.path
# pylint: disable=import-error
import pywikibot
import pywikibot.pagegenerators as pagegenerators
import re
import sys
import time
import traceback
from bs4 import BeautifulSoup
from clint.textui import prompt
CONFIG = None
BAD_TEXT = re.compile(r"(Self(-|\s)nominated|Category:((f|F)ailed|(p|P)assed) DYK)",
re.I)
def main():
"The main function."
print("Starting dyknotifier at " + datetime.utcnow().isoformat())
read_config()
verify_data_present()
args = parse_args()
wiki = pywikibot.Site("en", "wikipedia")
wiki.login()
people_to_notify = get_people_to_notify(wiki)
people_to_notify = prune_list_of_people(people_to_notify)
notify_people(people_to_notify, args, wiki)
def read_config():
"""Read the config file."""
global CONFIG
CONFIG = ConfigParser.RawConfigParser()
CONFIG.read("/data/project/apersonbot/bot/dyknotifier/config.txt")
def verify_data_present():
"""Check that the already-notified database is there."""
if not os.path.isfile(CONFIG.get("dyknotifier", "ALREADY_NOTIFIED_FILE")):
print("Couldn't locate %s" % CONFIG.get("dyknotifier", "ALREADY_NOTIFIED_FILE"))
sys.exit(1)
def parse_args():
"Parse the arguments."
parser = argparse.ArgumentParser(prog="DYKNotifier",
description=\
"Notify editors of their DYK noms.")
parser.add_argument("-i", "--interactive", action="store_true",
help="Confirm before each edit.")
parser.add_argument("-c", "--count", type=int,
help="Notify at most n people.")
return parser.parse_args()
def get_people_to_notify(wiki):
"""
Returns a dict of user talkpages to notify about their creations and
the noms about which they should be notified.
"""
people_to_notify = dict()
cat_dykn = pywikibot.Category(wiki, "Category:Pending DYK nominations")
print("Getting nominations from " + cat_dykn.title() + "...")
for nomination in pagegenerators.CategorizedPageGenerator(
cat_dykn, content=True):
wikitext = nomination.get()
if not BAD_TEXT.search(wikitext):
who_to_nominate = get_who_to_nominate(wikitext,
nomination.title())
for username, nomination in who_to_nominate.items():
people_to_notify[username] = people_to_notify.get(
username, []) + [nomination]
print("Found {} people to notify.".format(len(people_to_notify)))
return people_to_notify
# pylint: disable=too-many-branches
# pylint: disable=too-many-locals
def prune_list_of_people(people_to_notify):
"Removes people who shouldn't be notified from the list."
# Define a couple of helper functions...
# ... one purely for logging purposes,
def print_people_left(what_was_removed):
"Print the number of people left after removing something."
# This has been made a no-op for performance.
pass
#print("%d people for %d noms left after removing %s." %
# (len(people_to_notify),
# len(functools.reduce(operator.add,
# people_to_notify.values(),
# [])),
# what_was_removed))
# ... and another simply to save keystrokes.
def user_talk_pages():
"A generator for valid user talk pages."
titles = ["User talk:" + name for name in people_to_notify.keys()]
for user_talk_page in [page for page in
pagegenerators.PagesFromTitlesGenerator(titles)
if page.exists() and not page.isRedirectPage()]:
# First, a sanity check
username = user_talk_page.title(withNamespace=False)
if username not in people_to_notify:
continue
# Then yield the page and username
yield (user_talk_page, username)
# Prune empty entries
people_to_notify = {k: v for k, v in people_to_notify.items() if k}
print_people_left("empty entries")
# Prune talk pages that don't exist
user_talk_titles = ["User talk:" + name for name in people_to_notify.keys()]
titles_gen = pagegenerators.PagesFromTitlesGenerator(user_talk_titles)
usernames_without_talk_pages = [page.title(withNamespace=False)
for page in titles_gen]
for each_username in usernames_without_talk_pages:
del people_to_notify[each_username]
# Prune people I've already notified
with open(CONFIG.get("dyknotifier", "ALREADY_NOTIFIED_FILE")) as already_notified_file:
try:
already_notified_data = json.load(already_notified_file)
except ValueError as error:
if error.message != "No JSON object could be decoded":
raise
else:
already_notified_data = {}
# Since the outer dict in the file is keyed on month string,
# smush all the values together to get a dict keyed on username
already_notified = {}
for month_dict in already_notified_data.values():
for month_username, month_items in month_dict.items():
already_notified[month_username] =\
already_notified.get(month_username, []) + month_items
# Now that we've built a dict, filter the list for each username
for username, prior_nominations in already_notified.items():
if username not in people_to_notify:
continue
prior_nominations = [CONFIG.get("dyknotifier", "NOMINATION_TEMPLATE") + x
for x in prior_nominations]
proposed = set(people_to_notify[username])
people_to_notify[username] = list(proposed -
set(prior_nominations))
people_to_notify = {k: v for k, v in people_to_notify.items() if v}
print_people_left("already-notified people")
# Prune user talk pages that link to this nom.
for user_talk_page, username in user_talk_pages():
people_to_notify[username] = [nom for nom in people_to_notify[username]
if nom not in user_talk_page.get()]
people_to_notify = {k: v for k, v in people_to_notify.items() if v}
print_people_left("linked people")
return people_to_notify
# Disabling pylint because breaking stuff out into
# methods would spill too much into global scope
# pylint: disable=too-many-branches
# pylint: disable=too-many-statements
def notify_people(people_to_notify, args, wiki):
"Adds a message to people who ought to be notified about their DYK noms."
# Check if there's anybody to notify
if len(people_to_notify) == 0:
print("Nobody to notify.")
return
# Do the notification
people_notified = dict()
def write_notified_people_to_file():
"""Update the file of notified people with people_notified."""
this_month = datetime.now().strftime("%B %Y")
with open(CONFIG.get("dyknotifier", "ALREADY_NOTIFIED_FILE")) as already_notified_file:
try:
already_notified = json.load(already_notified_file)
except ValueError as error:
if error.message != "No JSON object could be decoded":
raise
else:
already_notified = {} # eh, we'll be writing to it anyway
already_notified_this_month = already_notified.get(this_month, {})
with open(CONFIG.get("dyknotifier", "ALREADY_NOTIFIED_FILE"), "w") as already_notified_file:
usernames = set(already_notified_this_month.keys() +
people_notified.keys())
for username in usernames:
already_notified_this_month[username] = list(set(
already_notified_this_month.get(username, []) +\
people_notified.get(username, [])))
already_notified[this_month] = already_notified_this_month
# Remove all data from more than a year ago
a_year_ago = datetime.today() - timedelta(365)
def not_too_old(month):
"""True if the given month was less than a year ago."""
return datetime.strptime(month, "%B %Y") > a_year_ago
already_notified = {k: v for k, v in already_notified.items()
if not_too_old(k)}
json.dump(already_notified, already_notified_file)
print("Wrote %d people for %d nominations this month." %
(len(already_notified_this_month),
len(functools.reduce(operator.add,
already_notified_this_month.values(),
[]))))
notify_iter = zip(people_to_notify.items(),
reversed(range(len(people_to_notify))))
for (person, nom_names), counter in notify_iter:
if args.count:
edits_made = len(functools.reduce(operator.add,
people_notified.values(), []))
if edits_made >= args.count:
print("%d notified; exiting.", edits_made)
write_notified_people_to_file()
sys.exit(0)
# Remove namespaces from the nom names.
nom_names = [name[34:] for name in nom_names]
# Format nom names into a string
nom_names_string = "".join(name.encode("utf-8") for name in nom_names)
if args.interactive:
print("About to notify {} for {}.".format(person.encode("utf-8"),
nom_names_string))
choice = raw_input("What (s[kip], c[ontinue], q[uit])? ")
if choice[0] == "s":
if prompt.yn("Because I've already notified them?"):
people_notified[person] = people_notified.get(
person, []) + nom_names
print("Skipping " + person + ".")
continue
elif choice[0] == "q":
print("Stop requested; exiting.")
write_notified_people_to_file()
sys.exit(0)
talkpage = pywikibot.Page(wiki, title="User talk:" + person)
try:
summary = CONFIG.get("dyknotifier", "SUMMARY").format(nom_names_string)
talkpage.save(appendtext=generate_message(nom_names, wiki),
comment=summary)
print("Success! Notified %s because of %s. (%d left)" %
(person.encode("utf-8"), nom_names_string, counter))
people_notified[person] = people_notified.get(person,
[]) + nom_names
except pywikibot.Error as error:
print("Couldn't notify {} because of {} - result: {}".format(person, nom_names_string, str(error)))
except (KeyboardInterrupt, SystemExit):
write_notified_people_to_file()
raise
except UnicodeEncodeError as error2:
traceback.print_exc()
print("Unicode encoding error notifiying {} about {}: {}".format(
person.encode("utf-8"), nom_names_string, str(error2)))
write_notified_people_to_file()
def get_who_to_nominate(wikitext, title):
"""
Given the wikitext of a DYK nom and its title, return a dict of user
talkpages of who to notify and the titles of the noms for which they
should be notified).
"""
if "#REDIRECT" in wikitext:
print(title + " is a redirect.")
return {}
if "<small>" not in wikitext:
print("<small> not found in " + title)
return {}
soup = BeautifulSoup(wikitext, "lxml")
small_tags = [unicode(x.string) for x in soup.find_all("small")]
def is_nom_string(text):
"Is text the line in a DYK nom reading 'Created by... Nominated by...'?"
return u"Nominated by" in text
nom_lines = [tag for tag in small_tags if is_nom_string(tag)]
if len(nom_lines) != 1:
print(u"Small tags for " + title + u": " + unicode(small_tags))
return {}
# Every user whose talk page is linked to within the <small> tags
# is assumed to have contributed. Looking for piped links to user
# talk pages.
usernames = usernames_from_text_with_sigs(nom_lines[0])
# If there aren't any usernames, WTF and exit
if len(usernames) == 0:
print("WTF, no usernames for " + title.encode("utf-8"))
return {}
# The last one is the nominator.
nominator = usernames[-1]
# Removing all instances of nominator from usernames, since he or she
# already knows about the nomination
while nominator in usernames:
usernames.remove(nominator)
# Removing people who have contributed to the discussion
discussion_text = wikitext[wikitext.find("</small>") + len("</small>"):]
discussion = usernames_from_text_with_sigs(discussion_text)
usernames = [user for user in usernames if user not in discussion]
result = dict()
for username in usernames:
result[username] = title
return result
def usernames_from_text_with_sigs(wikitext):
"Returns the users whose talk pages are linked to in the wikitext."
return [wikitext[m.end():m.end()+wikitext[m.end():].find("|")]\
for m in re.finditer(r"User talk:", wikitext)]
def generate_message(nom_names, wiki):
"Returns the template message to be placed on the nominator's talk page."
# Check for nom subpage names that don't match up to articles in the hook
def flag_subpage(nom_subpage_name):
"Flag nom subpage names that don't correspond to articles."
if "," not in nom_subpage_name:
return (nom_subpage_name, False)
the_subpage = pywikibot.Page(wiki, title=nom_subpage_name)
return (nom_subpage_name, not the_subpage.exists())
nom_names = [flag_subpage(x) for x in nom_names]
message = u"\n\n{{{{subst:DYKNom|{0}|passive=yes}}}}"
flagged_message = u"\n\n{{{{subst:DYKNom||passive=yes|section={0}}}}}"
multiple_message = u"\n\n{{{{subst:DYKNom|{0}|passive=yes|multiple=yes}}}}"
item = u"* {0} ([[Template:Did you know nominations/{1}|discussion]])"
if len(nom_names) == 1:
message_to_use = flagged_message if nom_names[0][1] else message
return message_to_use.format(nom_names[0][0])
else:
wikitext_list = ""
for nom_subpage_name, flagged in nom_names:
main_item = ("Multiple articles"
if flagged
else ("[[" + nom_subpage_name + "]]"))
wikitext_list += "\n" + item.format(main_item, nom_subpage_name)
# Get rid of initial newline
wikitext_list = wikitext_list[1:]
return multiple_message.format(wikitext_list)
if __name__ == "__main__":
main()