Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3dc0f77
commit 8aad83f
Showing
5 changed files
with
249 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
#!/usr/bin/env python | ||
|
||
#*************************************************************************** | ||
#* * | ||
#* Copyright (c) 2017 Yorik van Havre <yorik@uncreated.net> * | ||
#* * | ||
#* This program is free software; you can redistribute it and/or modify * | ||
#* it under the terms of the GNU Lesser General Public License (LGPL) * | ||
#* as published by the Free Software Foundation; either version 2 of * | ||
#* the License, or (at your option) any later version. * | ||
#* for detail see the LICENCE text file. * | ||
#* * | ||
#* This program is distributed in the hope that it will be useful, * | ||
#* but WITHOUT ANY WARRANTY; without even the implied warranty of * | ||
#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * | ||
#* GNU Library General Public License for more details. * | ||
#* * | ||
#* You should have received a copy of the GNU Library General Public * | ||
#* License along with this program; if not, write to the Free Software * | ||
#* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * | ||
#* USA * | ||
#* * | ||
#*************************************************************************** | ||
|
||
__title__="update.py" | ||
__author__ = "Yorik van Havre <yorik@uncreated.net>" | ||
__url__ = "http://www.freecadweb.org" | ||
|
||
""" | ||
This script needs to be ran after the wiki has been fully downloaded. It has three usages: | ||
1) If no revisions.txt file is found, it parses the contents of the wikifiles.txt file | ||
and, for each entry, it retrieves a corresponding revision ID, and creates a revisions.txt file | ||
2) If a revisions.txt file exists but no update.txt file exists, it crawls through all entries of | ||
wikifiles.txt, and for each one, compares the current revision with the one stored in revisions.txt. | ||
An update.txt file is created with all pages that have different revision IDs | ||
3) If update.txt exists, each entry of it will be scanned again for new links and all the needed | ||
files downloaded. Revision.txt and wikifiles.txt get also updated. | ||
""" | ||
|
||
import sys, os, re, tempfile, getopt | ||
from urllib2 import urlopen, HTTPError | ||
|
||
# CONFIGURATION ################################################# | ||
|
||
URL = "https://www.freecadweb.org/wiki" #default URL if no URL is passed | ||
GETTRANSLATIONS = False # Set true if you want to get the translations too. | ||
MAXFAIL = 3 # max number of retries if download fails | ||
VERBOSE = True # to display what's going on. Otherwise, runs totally silent. | ||
|
||
# END CONFIGURATION ############################################## | ||
|
||
wikiindex = "/index.php?title=" | ||
|
||
def update(pagename=None): | ||
|
||
if not os.path.exists("revisions.txt"): # case 1) | ||
if not os.path.exists("wikifiles.txt"): | ||
print "No wikifiles.txt found. Aborting" | ||
sys.exit() | ||
pages = [] | ||
f = open("wikifiles.txt","r") | ||
if VERBOSE: print "Reading existing list..." | ||
for l in f.readlines(): | ||
if l.strip() != "": | ||
if not "/wiki/" in l: | ||
if VERBOSE: print "Adding ",l.strip() | ||
pages.append(l.strip()) | ||
f.close() | ||
if VERBOSE: print "Added ",str(len(pages))," entries" | ||
i = 1 | ||
revs = [] | ||
for page in pages: | ||
rev = getRevision(page) | ||
if VERBOSE: print str(i)," revision: ",rev | ||
revs.append(page+":"+rev) | ||
i += 1 | ||
writeList(revs,"revisions.txt") | ||
print "All done. Successfully written revisions.txt with ",len(revs)," entries." | ||
|
||
elif os.path.exists("revisions.txt") and (not os.path.exists("updates.txt")): # case 2) | ||
f = open("revisions.txt","r") | ||
if VERBOSE: print "Reading revisions list..." | ||
revisions = {} | ||
for l in f.readlines(): | ||
if l.strip() != "": | ||
r = l.strip().split(":") | ||
p = ":".join(r[:-1]) | ||
if VERBOSE: print "Adding ",p | ||
revisions[p] = r[1] | ||
f.close() | ||
if VERBOSE: print "Added ",str(len(revisions.keys()))," entries" | ||
updates = [] | ||
i = 1 | ||
for page in revisions.keys(): | ||
rev = getRevision(page) | ||
if rev != revisions[page]: | ||
if VERBOSE: print str(i),page," has a new revision: ",rev | ||
updates.append(page) | ||
else: | ||
if VERBOSE: print str(i),page," is up to date " | ||
i += 1 | ||
if updates: | ||
writeList(updates,"updates.txt") | ||
print "All done. Successfully written updates.txt with ",len(updates)," entries." | ||
else: | ||
print "Everything up to date. Nothing to be done." | ||
|
||
elif os.path.exists("revisions.txt") and os.path.exists("updates.txt"): # case 3) | ||
if not os.path.exists("wikifiles.txt"): | ||
print "No wikifiles.txt found. Aborting" | ||
sys.exit() | ||
wikifiles = [] | ||
f = open("wikifiles.txt","r") | ||
if VERBOSE: print "Reading wikifiles list..." | ||
for l in f.readlines(): | ||
if l.strip() != "": | ||
wikifiles.append(l.strip()) | ||
f.close() | ||
if VERBOSE: print "Read ",str(len(wikifiles))," entries" | ||
f = open("revisions.txt","r") | ||
if VERBOSE: print "Reading revisions list..." | ||
revisions = {} | ||
for l in f.readlines(): | ||
if l.strip() != "": | ||
r = l.strip().split(":") | ||
p = ":".join(r[:-1]) | ||
revisions[p] = r[1] | ||
f.close() | ||
todo = [] | ||
f = open("updates.txt","r") | ||
if VERBOSE: print "Reading updates list..." | ||
for l in f.readlines(): | ||
if l.strip() != "": | ||
todo.append(l.strip()) | ||
f.close() | ||
if VERBOSE: print str(len(todo))," pages to scan..." | ||
import buildwikiindex | ||
buildwikiindex.WRITETHROUGH = False | ||
buildwikiindex.VERBOSE = VERBOSE | ||
updates = [] | ||
for t in todo: | ||
if VERBOSE: print "Scanning ",t | ||
updates.extend(buildwikiindex.crawl(t)) | ||
updates = [u for u in updates if not u in wikifiles] | ||
if VERBOSE: print str(len(updates))," files to download..." | ||
import downloadwiki | ||
i = 1 | ||
for u in updates: | ||
if VERBOSE: print i, ": Fetching ", u | ||
downloadwiki.get(u) | ||
if not "/wiki/" in u: | ||
rev = getRevision(u) | ||
revisions[u] = rev | ||
if not u in wikifiles: | ||
wikifiles.append(u) | ||
i += 1 | ||
if VERBOSE: print "Updating wikifiles and revisions..." | ||
writeList(wikifiles,"wikifiles.txt") | ||
updatedrevs = [] | ||
for k in revisions.keys(): | ||
updatedrevs.append(k+":"+revisions[k]) | ||
writeList(updatedrevs,"revisions.txt") | ||
os.remove("updates.txt") | ||
if VERBOSE: print "All done!" | ||
|
||
def getRevision(page): | ||
html = fetchPage(page) | ||
revs = re.findall("wgCurRevisionId\"\:(.*?),",html) | ||
if len(revs) == 1: | ||
return revs[0] | ||
print 'Error: unable to get revision ID of ' + page | ||
sys.exit() | ||
|
||
def fetchPage(page): | ||
"retrieves given page from the wiki" | ||
print "fetching: ",page | ||
failcount = 0 | ||
while failcount < MAXFAIL: | ||
try: | ||
html = (urlopen(URL + wikiindex + page).read()) | ||
return html | ||
except HTTPError: | ||
failcount += 1 | ||
print 'Error: unable to fetch page ' + page | ||
sys.exit() | ||
|
||
def writeList(pages,filename): | ||
f = open(filename,"wb") | ||
for p in pages: | ||
f.write(p+"\n") | ||
f.close() | ||
if VERBOSE: print "written ",filename | ||
|
||
if __name__ == "__main__": | ||
update(sys.argv[1:]) |