Skip to content

Commit

Permalink
Specify character encoding in HTML HEAD
Browse files Browse the repository at this point in the history
This adds an explicit declaration of the UTF-8 character encoding
to the Qt generated HTML dump (since we've just explicitly requested
UTF-8 conversion from QString, this declaration should be correct),
c.f. http://www.w3.org/International/O-charset

This patch fixes incorrect characters (e.g., A-hat for tab) in, e.g.,
Firefox's default rendering of exported HTML.

Applying the same fix to both HTML and XHTML export even though
Firefox and WebKit appear to assume UTF-8 for XHTML even without
an explicit declaration.
  • Loading branch information
markvoorhies committed Oct 13, 2010
1 parent 1024e5d commit f467f96
Showing 1 changed file with 28 additions and 1 deletion.
29 changes: 28 additions & 1 deletion IPython/frontend/qt/console/console_widget.py
Expand Up @@ -549,9 +549,11 @@ def export_html(self, parent = None, inline = False):
# N.B. this is overly restrictive, but Qt's output is
# predictable...
img_re = re.compile(r'<img src="(?P<name>[\d]+)" />')
html = self.fix_html_encoding(
str(self._control.toHtml().toUtf8()))
f.write(img_re.sub(
lambda x: self.image_tag(x, path = path, format = "png"),
str(self._control.toHtml().toUtf8())))
html))
finally:
f.close()
return filename
Expand All @@ -578,6 +580,8 @@ def export_xhtml(self, parent = None):
assert(offset > -1)
html = ('<html xmlns="http://www.w3.org/1999/xhtml">\n'+
html[offset+6:])
# And now declare UTF-8 encoding
html = self.fix_html_encoding(html)
f.write(img_re.sub(
lambda x: self.image_tag(x, path = None, format = "svg"),
html))
Expand All @@ -586,6 +590,29 @@ def export_xhtml(self, parent = None):
return filename
return None

def fix_html_encoding(self, html):
""" Return html string, with a UTF-8 declaration added to <HEAD>.
Assumes that html is Qt generated and has already been UTF-8 encoded
and coerced to a python string. If the expected head element is
not found, the given object is returned unmodified.
This patching is needed for proper rendering of some characters
(e.g., indented commands) when viewing exported HTML on a local
system (i.e., without seeing an encoding declaration in an HTTP
header).
C.f. http://www.w3.org/International/O-charset for details.
"""
offset = html.find("<head>")
if(offset > -1):
html = (html[:offset+6]+
'\n<meta http-equiv="Content-Type" '+
'content="text/html; charset=utf-8" />\n'+
html[offset+6:])

return html

def image_tag(self, match, path = None, format = "png"):
""" Return (X)HTML mark-up for the image-tag given by match.
Expand Down

0 comments on commit f467f96

Please sign in to comment.