Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Enhancements for Py3K support #3

Closed
wants to merge 3 commits into from

1 participant

@scottkmaxwell

I am working on getting supervisord to run on Python 3. I started with meld3 and made the minimum changes required to allow it to run on both Py2 and Py3. The oldest version of Python I have on my Mac is 2.5, but I believe that all changes should at least run on 2.4. It may work on 2.3 but I don't know enough about the differences between 2.3 and 2.4 to say for sure. I made simple inline changes rather than using six.

Interestingly, getting meld3 to run on Py3 exposed a segfault in the latest Python 3.3.0 when processing XML. I have verified this with the pre-built version on my Mac and a source-built version on Linux.

Anyway, I hope you will consider accepting my changes. It was a bit more work than I had expected due to changes in the XML library.

Best,

Scott Maxwell

scottkmaxwell added some commits
@scottkmaxwell scottkmaxwell Enhance to run on Python 3
Replace mimetools with a simple parse.
Fix imports
Add aliases for basestring, unicode and unichr
Add encode function and replace str.encode(encoding) with encode(str,encoding) so that we can do nothing on Py3
Create stub for _encode_entity if we don't have it
Always return False for is_not_ascii on Py3
Wrap keys() and values() in list() for Py3
Enhance __setitem__ and __delitem__ to handle slices since Py3 doesn't use __setslice__ and __delslice__
Change MeldTreeBuilder from a function to a class so we can provide the comment method for Py3
Change all raise exception statements to be ValueError(msg) rather than ValueError, msg
Replace back ticks with repr()
d7c4ead
@scottkmaxwell scottkmaxwell Use PY3 variable to do correct compatibility layer 93b6efe
@scottkmaxwell scottkmaxwell Fix setup.py to remove Py3 exclusion bed37d3
@mnaberez mnaberez closed this in 66fd6e0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Oct 12, 2012
  1. @scottkmaxwell

    Enhance to run on Python 3

    scottkmaxwell authored
    Replace mimetools with a simple parse.
    Fix imports
    Add aliases for basestring, unicode and unichr
    Add encode function and replace str.encode(encoding) with encode(str,encoding) so that we can do nothing on Py3
    Create stub for _encode_entity if we don't have it
    Always return False for is_not_ascii on Py3
    Wrap keys() and values() in list() for Py3
    Enhance __setitem__ and __delitem__ to handle slices since Py3 doesn't use __setslice__ and __delslice__
    Change MeldTreeBuilder from a function to a class so we can provide the comment method for Py3
    Change all raise exception statements to be ValueError(msg) rather than ValueError, msg
    Replace back ticks with repr()
Commits on Oct 13, 2012
  1. @scottkmaxwell
  2. @scottkmaxwell
This page is out of date. Refresh to see the latest.
View
1  .gitignore
@@ -16,3 +16,4 @@ meld3/logfile.dat
meld3/logfile_zpt.dat
nosetests.xml
tmp/
+.idea/
View
14 meld3/__init__.py
@@ -1,5 +1,11 @@
# make these easier to import
-from meld3 import parse_xml
-from meld3 import parse_html
-from meld3 import parse_xmlstring
-from meld3 import parse_htmlstring
+try:
+ from .meld3 import parse_xml
+ from .meld3 import parse_html
+ from .meld3 import parse_xmlstring
+ from .meld3 import parse_htmlstring
+except ImportError:
+ from meld3 import parse_xml
+ from meld3 import parse_html
+ from meld3 import parse_xmlstring
+ from meld3 import parse_htmlstring
View
5 meld3/example.py
@@ -30,7 +30,10 @@
"""
from meld3 import parse_xmlstring
from meld3 import parse_htmlstring
-from StringIO import StringIO
+try:
+ from StringIO import StringIO
+except ImportError:
+ from io import StringIO
import sys
root = parse_xmlstring(xml)
View
149 meld3/meld3.py
@@ -1,10 +1,24 @@
-import htmlentitydefs
import os
import re
-import types
-import mimetools
-import string
-from StringIO import StringIO
+import sys
+PY3 = sys.version>'3'
+if PY3:
+ import html.entities as htmlentitydefs
+ from io import StringIO
+ long = int
+ basestring = str
+ unichr = chr
+ class unicode(str):
+ def __init__(self, string, encoding, errors):
+ str.__init__(self, string)
+ def encode(text, encoding):
+ return text
+else:
+ import htmlentitydefs
+ #noinspection PyUnresolvedReferences
+ from StringIO import StringIO
+ def encode(text, encoding):
+ return text.encode(encoding)
try:
from elementtree.ElementTree import TreeBuilder
@@ -14,6 +28,7 @@
from elementtree.ElementTree import QName
from elementtree.ElementTree import _raise_serialization_error
from elementtree.ElementTree import _namespace_map
+ from elementtree.ElementTree import _encode_entity
from elementtree.ElementTree import fixtag
from elementtree.ElementTree import parse as et_parse
from elementtree.ElementTree import ElementPath
@@ -25,6 +40,11 @@
from xml.etree.ElementTree import QName
from xml.etree.ElementTree import _raise_serialization_error
from xml.etree.ElementTree import _namespace_map
+ from xml.etree.ElementTree import XMLParser
+ try:
+ from xml.etree.ElementTree import _encode_entity
+ except ImportError:
+ def _encode_entity(s): return s
from xml.etree.ElementTree import parse as et_parse
from xml.etree.ElementTree import ElementPath
@@ -36,7 +56,7 @@ def fixtag(tag, namespaces):
# tag and namespace declaration, if any
if isinstance(tag, QName):
tag = tag.text
- namespace_uri, tag = string.split(tag[1:], "}", 1)
+ namespace_uri, tag = tag[1:].split("}", 1)
prefix = namespaces.get(namespace_uri)
if prefix is None:
prefix = _namespace_map.get(namespace_uri)
@@ -54,10 +74,14 @@ def fixtag(tag, namespaces):
# HTMLTreeBuilder does not exist in python 2.5 standard elementtree
-from HTMLParser import HTMLParser
+try:
+ #noinspection PyUnresolvedReferences
+ from HTMLParser import HTMLParser
+except ImportError:
+ from html.parser import HTMLParser
AUTOCLOSE = "p", "li", "tr", "th", "td", "head", "body"
IGNOREEND = "img", "hr", "meta", "link", "br"
-is_not_ascii = re.compile(eval(r'u"[\u0080-\uffff]"')).search
+is_not_ascii = lambda s: False if PY3 else re.compile(eval(r'u"[\u0080-\uffff]"')).search
# replace element factory
def Replace(text, structure=False):
@@ -231,10 +255,10 @@ def set(self, key, value):
self.attrib[key] = value
def keys(self):
- return self.attrib.keys()
+ return list(self.attrib.keys())
def items(self):
- return self.attrib.items()
+ return list(self.attrib.items())
def getiterator(self, *ignored_args, **ignored_kw):
# we ignore any tag= passed in to us, because it's too painful
@@ -244,8 +268,12 @@ def getiterator(self, *ignored_args, **ignored_kw):
# overrides to support parent pointers and factories
def __setitem__(self, index, element):
+ if isinstance(index, slice):
+ for e in element:
+ e.parent = self
+ else:
+ element.parent = self
self._children[index] = element
- element.parent = self
def __setslice__(self, start, stop, elements):
for element in elements:
@@ -261,8 +289,11 @@ def insert(self, index, element):
element.parent = self
def __delitem__(self, index):
- ob = self._children[index]
- ob.parent = None
+ if isinstance(index, slice):
+ for ob in self._children[index]:
+ ob.parent = None
+ else:
+ self._children[index].parent = None
del self._children[index]
def __delslice__(self, start, stop):
@@ -282,7 +313,7 @@ def makeelement(self, tag, attrib):
def __mod__(self, other):
""" Fill in the text values of meld nodes in tree; only
- support dictionarylike operand (sequence operand doesn't seem
+ support dictionary-like operand (sequence operand doesn't seem
to make sense here)"""
return self.fillmelds(**other)
@@ -539,10 +570,10 @@ def attributes(self, **kw):
""" Set attributes on this node. """
for k, v in kw.items():
# prevent this from getting to the parser if possible
- if not isinstance(k, types.StringTypes):
- raise ValueError, 'do not set non-stringtype as key: %s' % k
- if not isinstance(v, types.StringTypes):
- raise ValueError, 'do not set non-stringtype as val: %s' % v
+ if not isinstance(k, basestring):
+ raise ValueError('do not set non-stringtype as key: %s' % k)
+ if not isinstance(v, basestring):
+ raise ValueError('do not set non-stringtype as val: %s' % v)
self.attrib[k] = kw[k]
# output methods
@@ -560,7 +591,7 @@ def write_xmlstring(self, encoding=None, doctype=None, fragment=False,
def write_xml(self, file, encoding=None, doctype=None,
fragment=False, declaration=True, pipeline=False):
- """ Write XML to 'file' (which can be a filename or filelike object)
+ """ Write XML to 'file' (which can be a filename or file-like object)
encoding - encoding string (if None, 'utf-8' encoding is assumed)
Must be a recognizable Python encoding type.
@@ -608,7 +639,7 @@ def write_htmlstring(self, encoding=None, doctype=doctype.html,
def write_html(self, file, encoding=None, doctype=doctype.html,
fragment=False):
- """ Write HTML to 'file' (which can be a filename or filelike object)
+ """ Write HTML to 'file' (which can be a filename or file-like object)
encoding - encoding string (if None, 'utf-8' encoding is assumed).
Unlike XML output, this is not used in a declaration,
@@ -645,7 +676,7 @@ def write_xhtmlstring(self, encoding=None, doctype=doctype.xhtml,
def write_xhtml(self, file, encoding=None, doctype=doctype.xhtml,
fragment=False, declaration=False, pipeline=False):
- """ Write XHTML to 'file' (which can be a filename or filelike object)
+ """ Write XHTML to 'file' (which can be a filename or file-like object)
encoding - encoding string (if None, 'utf-8' encoding is assumed)
Must be a recognizable Python encoding type.
@@ -752,8 +783,15 @@ def lineage(self):
return L
-def MeldTreeBuilder():
- return TreeBuilder(element_factory=_MeldElementInterface)
+class MeldTreeBuilder(TreeBuilder):
+ def __init__(self):
+ TreeBuilder.__init__(self, element_factory=_MeldElementInterface)
+ def comment(self, data):
+ self.start(Comment, {})
+ self.data(data)
+ self.end(Comment)
+ def doctype(self, name, pubid, system):
+ pass
class MeldParser(XMLTreeBuilder):
@@ -766,7 +804,8 @@ class MeldParser(XMLTreeBuilder):
def __init__(self, html=0, target=None):
XMLTreeBuilder.__init__(self, html, target)
# assumes ElementTree 1.2.X
- self._parser.CommentHandler = self.handle_comment
+ if not PY3:
+ self._parser.CommentHandler = self.handle_comment
self.meldids = {}
def handle_comment(self, data):
@@ -782,7 +821,7 @@ def _start(self, tag, attrib_in):
if '{' + key == _MELD_ID:
meldid = attrib_in[key]
if self.meldids.get(meldid):
- raise ValueError, ('Repeated meld id "%s" in source' %
+ raise ValueError('Repeated meld id "%s" in source' %
meldid)
self.meldids[meldid] = 1
return XMLTreeBuilder._start(self, tag, attrib_in)
@@ -798,7 +837,7 @@ def _start_list(self, tag, attrib_in):
if _MELD_ID == attrib:
meldid = attrib_in[i+1]
if self.meldids.get(meldid):
- raise ValueError, ('Repeated meld id "%s" in source' %
+ raise ValueError('Repeated meld id "%s" in source' %
meldid)
self.meldids[meldid] = 1
return XMLTreeBuilder._start_list(self, tag, attrib_in)
@@ -837,11 +876,7 @@ def handle_starttag(self, tag, attrs):
elif k == "content":
content = v
if http_equiv == "content-type" and content:
- # use mimetools to parse the http header
- header = mimetools.Message(
- StringIO("%s: %s\n\n" % (http_equiv, content))
- )
- encoding = header.getparam("charset")
+ encoding = dict(v.strip().partition('=')[0::2] for v in content.split(';')).get('charset')
if encoding:
self.encoding = encoding
if tag in AUTOCLOSE:
@@ -854,7 +889,7 @@ def handle_starttag(self, tag, attrs):
if k == _MELD_SHORT_ID:
k = _MELD_ID
if self.meldids.get(v):
- raise ValueError, ('Repeated meld id "%s" in source' %
+ raise ValueError('Repeated meld id "%s" in source' %
v)
self.meldids[v] = 1
else:
@@ -920,7 +955,7 @@ def do_parse(source, parser):
return root
def parse_xml(source):
- """ Parse source (a filelike object) into an element tree. If
+ """ Parse source (a file-like object) into an element tree. If
html is true, use a parser that can resolve somewhat ambiguous
HTML into XHTML. Otherwise use a 'normal' parser only."""
builder = MeldTreeBuilder()
@@ -945,7 +980,7 @@ def parse_htmlstring(text, encoding=None):
def _both_case(mapping):
# Add equivalent upper-case keys to mapping.
- lc_keys = mapping.keys()
+ lc_keys = list(mapping.keys())
for k in lc_keys:
mapping[k.upper()] = mapping[k]
@@ -979,17 +1014,17 @@ def _write_html(write, node, encoding, namespaces, depth=-1, maxdepth=None):
if not node.structure:
if cdata_needs_escaping(text):
text = _escape_cdata(text)
- write(text.encode(encoding))
+ write(encode(text,encoding))
elif tag is Comment:
if cdata_needs_escaping(text):
text = _escape_cdata(text)
- write('<!-- ' + text + ' -->'.encode(encoding))
+ write(encode('<!-- ' + text + ' -->', encoding))
elif tag is ProcessingInstruction:
if cdata_needs_escaping(text):
text = _escape_cdata(text)
- write('<!-- ' + text + ' -->'.encode(encoding))
+ write(encode('<!-- ' + text + ' -->', encoding))
else:
xmlns_items = [] # new namespaces in this scope
@@ -1004,13 +1039,13 @@ def _write_html(write, node, encoding, namespaces, depth=-1, maxdepth=None):
except TypeError:
_raise_serialization_error(tag)
- to_write += "<%s" % tag.encode(encoding)
+ to_write += "<%s" % encode(tag,encoding)
attrib = node.attrib
if attrib is not None:
if len(attrib) > 1:
- attrib_keys = attrib.keys()
+ attrib_keys = list(attrib.keys())
attrib_keys.sort()
else:
attrib_keys = attrib
@@ -1021,7 +1056,7 @@ def _write_html(write, node, encoding, namespaces, depth=-1, maxdepth=None):
except TypeError:
_raise_serialization_error(k)
if k in _HTMLATTRS_BOOLEAN:
- to_write += ' ' + k.encode(encoding)
+ to_write += ' ' + encode(k,encoding)
else:
v = attrib[k]
to_write += " %s=\"%s\"" % (k, v)
@@ -1033,11 +1068,11 @@ def _write_html(write, node, encoding, namespaces, depth=-1, maxdepth=None):
if text is not None and text:
if tag in _HTMLTAGS_NOESCAPE:
- to_write += text.encode(encoding)
+ to_write += encode(text,encoding)
elif cdata_needs_escaping(text):
to_write += _escape_cdata(text)
else:
- to_write += text.encode(encoding)
+ to_write += encode(text,encoding)
write(to_write)
@@ -1054,13 +1089,13 @@ def _write_html(write, node, encoding, namespaces, depth=-1, maxdepth=None):
_write_html(write, child, encoding, namespaces, depth, maxdepth)
if text or node._children or tag not in _HTMLTAGS_UNBALANCED:
- write("</" + tag.encode(encoding) + ">")
+ write("</" + encode(tag,encoding) + ">")
if tail:
if cdata_needs_escaping(tail):
write(_escape_cdata(tail))
else:
- write(tail.encode(encoding))
+ write(encode(tail,encoding))
def _write_html_no_encoding(write, node, namespaces):
""" Append HTML to string without any particular unicode encoding.
@@ -1111,7 +1146,7 @@ def _write_html_no_encoding(write, node, namespaces):
if attrib is not None:
if len(attrib) > 1:
- attrib_keys = attrib.keys()
+ attrib_keys = list(attrib.keys())
attrib_keys.sort()
else:
@@ -1167,7 +1202,7 @@ def _write_xml(write, node, encoding, namespaces, pipeline, xhtml=False):
elif tag is Replace:
if node.structure:
# this may produce invalid xml
- write(node.text.encode(encoding))
+ write(encode(node.text,encoding))
else:
write(_escape_cdata(node.text, encoding))
else:
@@ -1175,7 +1210,7 @@ def _write_xml(write, node, encoding, namespaces, pipeline, xhtml=False):
if tag[:_XHTML_PREFIX_LEN] == _XHTML_PREFIX:
tag = tag[_XHTML_PREFIX_LEN:]
if node.attrib:
- items = node.attrib.items()
+ items = list(node.attrib.items())
else:
items = [] # must always be sortable.
xmlns_items = [] # new namespaces in this scope
@@ -1186,7 +1221,7 @@ def _write_xml(write, node, encoding, namespaces, pipeline, xhtml=False):
xmlns_items.append(xmlns)
except TypeError:
_raise_serialization_error(tag)
- write("<" + tag.encode(encoding))
+ write("<" + encode(tag,encoding))
if items or xmlns_items:
items.sort() # lexical order
for k, v in items:
@@ -1203,10 +1238,10 @@ def _write_xml(write, node, encoding, namespaces, pipeline, xhtml=False):
continue
except TypeError:
_raise_serialization_error(k)
- write(" %s=\"%s\"" % (k.encode(encoding),
+ write(" %s=\"%s\"" % (encode(k,encoding),
_escape_attrib(v, encoding)))
for k, v in xmlns_items:
- write(" %s=\"%s\"" % (k.encode(encoding),
+ write(" %s=\"%s\"" % (encode(k,encoding),
_escape_attrib(v, encoding)))
if node.text or node._children:
write(">")
@@ -1214,7 +1249,7 @@ def _write_xml(write, node, encoding, namespaces, pipeline, xhtml=False):
write(_escape_cdata(node.text, encoding))
for n in node._children:
_write_xml(write, n, encoding, namespaces, pipeline, xhtml)
- write("</" + tag.encode(encoding) + ">")
+ write("</" + encode(tag,encoding) + ">")
else:
write(" />")
for k, v in xmlns_items:
@@ -1231,7 +1266,7 @@ def _escape_cdata(text, encoding=None):
try:
if encoding:
try:
- text = text.encode(encoding)
+ text = encode(text,encoding)
except UnicodeError:
return _encode_entity(text)
text = nonentity_re.sub('&amp;', text)
@@ -1245,7 +1280,7 @@ def _escape_attrib(text, encoding=None):
try:
if encoding:
try:
- text = text.encode(encoding)
+ text = encode(text,encoding)
except UnicodeError:
return _encode_entity(text)
# don't requote properly-quoted entities
@@ -1281,13 +1316,13 @@ def _write_doctype(write, doctype):
try:
name, pubid, system = doctype
except (ValueError, TypeError):
- raise ValueError, ("doctype must be supplied as a 3-tuple in the form "
+ raise ValueError("doctype must be supplied as a 3-tuple in the form "
"(name, pubid, system) e.g. '%s'" % doctype.xhtml)
write('<!DOCTYPE %s PUBLIC "%s" "%s">\n' % (name, pubid, system))
xml_decl_re = re.compile(r'<\?xml .*?\?>')
begin_tag_re = re.compile(r'<[^/?!]?\w+')
-'<!DOCTYPE %s PUBLIC "%s" "%s">' % doctype.html
+#'<!DOCTYPE %s PUBLIC "%s" "%s">' % doctype.html
def insert_doctype(data, doctype=doctype.xhtml):
# jam an html doctype declaration into 'data' if it
@@ -1368,7 +1403,7 @@ def melditerator(element, meldid=None, _MELD_ID=_MELD_ID):
def search(name):
if not "." in name:
- raise ValueError("unloadable datatype name: " + `name`)
+ raise ValueError("unloadable datatype name: " + repr(name))
components = name.split('.')
start = components[0]
g = globals()
@@ -1409,7 +1444,7 @@ def sample_mutator(root):
mutator = sys.argv[2]
except IndexError:
mutator = None
- import timeit
+# import timeit
root = parse_html(open(filename, 'r'))
io = StringIO()
if mutator:
View
14 meld3/test_meld3.py
@@ -1,7 +1,9 @@
import unittest
-from StringIO import StringIO
+try:
+ from StringIO import StringIO
+except ImportError:
+ from io import StringIO
import re
-import sys
_SIMPLE_XML = r"""<?xml version="1.0"?>
<root xmlns:meld="http://www.plope.com/software/meld3">
@@ -691,10 +693,10 @@ def test_clone(self):
self.assertEqual(div[0][0][0].text, div2[0][0][0].text)
self.assertEqual(div[0][0][0].tail, div2[0][0][0].tail)
- self.failIfEqual(id(div), id(div2))
- self.failIfEqual(id(div[0]), id(div2[0]))
- self.failIfEqual(id(div[0][0]), id(div2[0][0]))
- self.failIfEqual(id(div[0][0][0]), id(div2[0][0][0]))
+ self.assertNotEqual(id(div), id(div2))
+ self.assertNotEqual(id(div[0]), id(div2[0]))
+ self.assertNotEqual(id(div[0][0]), id(div2[0][0]))
+ self.assertNotEqual(id(div[0][0][0]), id(div2[0][0][0]))
def test_deparent_noparent(self):
div = self._makeOne('div', {})
View
22 meld3/testclone.py
@@ -1,6 +1,7 @@
import meld3
import time
import timeit
+import sys
parent = meld3._MeldElementInterface('parent', {})
clonable = meld3._MeldElementInterface('root', {})
@@ -26,15 +27,18 @@ def dotimeit(timer, name):
best = min(result)
usec = best * 1e6 / number
msec = usec / 1000
- print "%s best of %d: %.*g msec per loop" % (name, repeat, 8, msec)
-
-t = timeit.Timer("meld3.chelper.clone(clonable, parent)",
- "from __main__ import meld3, clonable, parent")
-dotimeit(t, "C DF")
-
-t = timeit.Timer("meld3.chelper.bfclone(clonable, parent)",
- "from __main__ import meld3, clonable, parent")
-dotimeit(t, "C BF")
+ sys.stdout.write("%s best of %d: %.*g msec per loop\n" % (name, repeat, 8, msec))
+
+try:
+ t = timeit.Timer("meld3.chelper.clone(clonable, parent)",
+ "from __main__ import meld3, clonable, parent")
+ dotimeit(t, "C DF")
+
+ t = timeit.Timer("meld3.chelper.bfclone(clonable, parent)",
+ "from __main__ import meld3, clonable, parent")
+ dotimeit(t, "C BF")
+except AttributeError:
+ pass
t = timeit.Timer("meld3.pyhelper.clone(clonable, parent)",
"from __main__ import meld3, clonable, parent")
View
7 setup.py
@@ -2,10 +2,9 @@
import os
import sys
-if sys.version_info[:2] < (2, 3) or sys.version_info[0] > 2:
- msg = ("meld3 requires Python 2.3 or later but does not work on any "
- "version of Python 3. You are using version %s. Please "
- "install using a supported version." % sys.version)
+if sys.version_info[:2] < (2, 3):
+ msg = ("meld3 requires Python 2.3 or later. You are using version %s. "
+ "Please install using a supported version." % sys.version)
sys.stderr.write(msg)
sys.exit(1)
Something went wrong with that request. Please try again.