Skip to content

Commit

Permalink
More cleaning in offline doc scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
yorikvanhavre committed Apr 8, 2014
1 parent 162b820 commit dc0bbee
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/Tools/offlinedoc/downloadwiki.py
Expand Up @@ -183,15 +183,15 @@ def getimagelinks(html):
def cleanhtml(html):
"cleans given html code from dirty script stuff"
html = html.replace('\n','Wlinebreak') # removing linebreaks for regex processing
html = re.compile('(.*)<div[^>]+column-content+[^>]+>').sub('',html) # stripping before content
html = re.compile('<div[^>]+column-one+[^>]+>.*').sub('',html) # stripping after content
html = re.compile('(.*)<div id=\"content+[^>]+>').sub('',html) # stripping before content
html = re.compile('<div id="mw-head+[^>]+>.*').sub('',html) # stripping after content
html = re.compile('<!--[^>]+-->').sub('',html) # removing comment tags
html = re.compile('<script[^>]*>.*?</script>').sub('',html) # removing script tags
html = re.compile('<!--\[if[^>]*>.*?endif\]-->').sub('',html) # removing IE tags
html = re.compile('<div id="jump-to-nav"[^>]*>.*?</div>').sub('',html) # removing nav div
html = re.compile('<h3 id="siteSub"[^>]*>.*?</h3>').sub('',html) # removing print subtitle
html = re.compile('Retrieved from').sub('Online version:',html) # changing online title
html = re.compile('<div id="mw-normal-catlinks[^>]>.*?</div>').sub('',html) # removing catlinks
html = re.compile('<div id="mw-normal-catlinks.*?</div>').sub('',html) # removing catlinks
html = re.compile('<div class="NavHead.*?</div>').sub('',html) # removing nav stuff
html = re.compile('<div class="NavContent.*?</div>').sub('',html) # removing nav stuff
html = re.compile('<div class="NavEnd.*?</div>').sub('',html) # removing nav stuff
Expand Down Expand Up @@ -278,12 +278,12 @@ def webroot(url):

def output(html,page):
"encapsulates raw html code into nice html body"
title = page.replace("_"," ")
header = "<html><head>"
header += "<title>"
header += page
header += "</title>"
header += "<title>" + title + "</title>"
header += "<link type='text/css' href='wiki.css' rel='stylesheet'>"
header += "</head><body>"
header += "<h1>" + title + "</h1>"
footer = "</body></html>"
html = header+html+footer
filename = local(page.replace("/","-"))
Expand Down

0 comments on commit dc0bbee

Please sign in to comment.