-
Notifications
You must be signed in to change notification settings - Fork 0
/
blogimporter.py
68 lines (51 loc) · 4.31 KB
/
blogimporter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
'''Blog-Importer.py
This script handles tedious setup tasks for the Blog section of the Wikipedia Signpost.
It can be run both as a library, returning the post via blogimporter.main(), and as a command line script.
Note that running this script as a library requires that pywikibot by installed.
Running it in the command line further requires that it be configrued.
Note that the "blogimporter.py" script in signpostlab is a copy of this one.'''
import signpostlib
import requests
import argparse
def main(page, target=signpostlib.getNextSignpostPublicationString() + '/Blog'):
# Check to make sure that the --page argument is filled in with valid input, and return an error if it is not.
if not page or 'blog.wikimedia.org' not in page:
raise IOError('This script requires a valid --page parameter. Otherwise the script doesn\'t know what Blog content to intake! For example try: python Blog_Importer.py --page \'https://blog.wikimedia.org/2015/07/16/third-transparency-report-released/\'')
# Check if a target is provided. If it is, make sure it is valid; if it isn't have the script fetch it.
if 'User:Resident Mario/' not in target and 'Wikipedia:Wikipedia Signpost/' not in target:
raise IOError('A target page was provided but did not conform to legal targets for this script. Please direct your target at a subpage of User:Resident Mario or of Wikipedia:Wikipedia Signpost. To just set it to the next Signpost issue, don\'t provide this argument at all.')
# Fetch and store the contents of the blog post.
post = requests.get(page).text
# Core the data to the post itself.
post = post[post.index('<div class="entry">') + len('<div class="entry">'):]
post = post[:post.index('<div class="socials">')]
post = post[:post.rfind('</div>')]
# Use the RESTBase API to convert the blog's HTML to native wikicode.
post = signpostlib.htmlToWikitext(post)
# Package the post for inclusion in the Signpost.
post = '''<noinclude>{{Signpost draft}}
{{Wikipedia:Signpost/Template:Signpost-header|||}}</noinclude>
<div style="padding-left:50px; padding-right:50px;">
{{Wikipedia:Signpost/Template:Signpost-article-start|{{{1|Your title}}}|By ?| {{subst:#time:j F Y|{{subst:Wikipedia:Wikipedia Signpost/Issue|4}}}}}}
</div>
{{Wikipedia:Wikipedia Signpost/Templates/WM Blog}}
<div style="width:46em; line-height:1.6em; font-size:1em; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; padding-left:5em;" class="plainlinks">''' + post + '''</div>
<noinclude>{{Wikipedia:Signpost/Template:Signpost-article-comments-end||2015-04-22|2015-05-06}}
</noinclude>'''
return post
# The following instructions are only executed with this script is called from the command line.
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--page", help='The webpage from which this script will attempt to intake content. Must be a page in the Wikimedia Blog domain. This is a required argument.')
parser.add_argument("-t", "--target", help='The target page to which the output of this script will be written. Must be a subpage of the Wikipedia Signpost or in the userspace this script\'s writer, Resident Mario.')
args = parser.parse_args()
# Check to make sure that the --page argument is filled in with valid input, and return an error if it is not.
if not args.page or 'blog.wikimedia.org' not in args.page:
raise IOError('This script requires a valid --page parameter. Otherwise the script doesn\'t know what Blog content to intake! For example try: python Blog_Importer.py --page \'https://blog.wikimedia.org/2015/07/16/third-transparency-report-released/\'')
# Check if a target is provided. If it is, make sure it is valid; if it isn't have the script fetch it.
if not args.target:
args.target = signpostlib.getNextSignpostPublicationString() + '/Blog'
elif 'User:Resident Mario/' not in args.target and 'Wikipedia:Wikipedia Signpost/' not in args.target:
raise IOError('A target page was provided but did not conform to legal targets for this script. Please direct your target at a subpage of User:Resident Mario or of Wikipedia:Wikipedia Signpost. To just set it to the next Signpost issue, don\'t provide this argument at all.')
post = main(args.page, args.target)
signpostlib.saveContentToPage(post, args.target, 'Importing basic Blog repost via the [https://github.com/ResidentMario/Blog_Importer Blog_Importer] script.')