Skip to content

Commit

Permalink
Update supported Python versions and zope.interface and zope.schema.
Browse files Browse the repository at this point in the history
This exposed a bug in unicode normalization.

Fixes #23, fixes #24, fixes #25 and fixes #26.
  • Loading branch information
jamadden committed Mar 27, 2020
1 parent e5570af commit 7f6abb0
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 121 deletions.
21 changes: 18 additions & 3 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,31 @@
Changes
=========

1.2.2 (unreleased)
1.3.0 (unreleased)
==================

- Nothing changed yet.
- Add support for Python 3.8.

- Depend on zope.interface 5.0.

- Update the datrie dependency. See https://github.com/NextThought/nti.contentfragments/issues/24

- Make ``IUnicodeContentFragment`` extend
``zope.interface.common.collections.ISequence`` instead of the
semi-deprecated ``zope.interface.common.sequence.IReadSequence``.

- Replace custom interfaces ``IString``, ``IUnicode`` and ``IBytes``
with aliases for ``INativeString``, ``ITextString`` and
``IByteString`` from ``zope.interface.common.builtins``. These
custom aliases are now deprecated. See https://github.com/NextThought/nti.contentfragments/issues/23.

- Fix unicode normalization breaking schema fields with zope.schema
6.0. See https://github.com/NextThought/nti.contentfragments/issues/26

1.2.1 (2019-11-07)
==================

- Remove a word from the censored word list. See issue #22.
- Remove a word from the censored word list. See issue https://github.com/NextThought/nti.contentfragments/issues/22.


1.2.0 (2018-10-15)
Expand Down
9 changes: 4 additions & 5 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))

import pkg_resources
rqmt = pkg_resources.require('nti.contentfragments')[0]
# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
Expand Down Expand Up @@ -65,11 +66,9 @@
# built documents.
#
# The short X.Y version.
with open('../version.txt') as f:
release = f.read().strip()
version = u'1.0'
version = '%s.%s' % tuple(map(int, rqmt.version.split('.')[:2]))
# The full version, including alpha/beta/rc tags.
release = u'1.0'
release = rqmt.version

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
32 changes: 16 additions & 16 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _read(fname):
with codecs.open(fname, encoding='utf-8') as f:
return f.read()

version = _read('version.txt').strip()
version = '1.3.0.dev0'

setup(
name='nti.contentfragments',
Expand All @@ -28,7 +28,7 @@ def _read(fname):
author_email='jason@nextthought.com',
description="NTI ContentFragments",
url="https://github.com/NextThought/nti.contentfragments",
long_description=_read('README.rst'),
long_description=_read('README.rst') + '\n\n' + _read('CHANGES.rst'),
license='Apache',
keywords='Content fragments semantic typing interfaces classes sanitize censor',
classifiers=[
Expand All @@ -40,6 +40,7 @@ def _read(fname):
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
],
Expand All @@ -58,20 +59,13 @@ def _read(fname):
# xml.etree.ElementTree, even on PyPy.
'lxml >= 4.2.5',
'repoze.lru >= 0.6',
'zope.component >= 4.5.0',
'zope.component >= 4.6.1',
'zope.event >= 4.4.0',
'zope.interface >= 4.5.0',
'zope.mimetype >= 2.3.2',
'zope.security >= 4.3.0',
'zope.interface >= 5.0.0',
'zope.mimetype >= 2.4.0',
'zope.security >= 5.1.1',
'zope.cachedescriptors >= 4.3.1',
'nti.schema >= 1.12.0',
],
extras_require={
'test': TESTS_REQUIRE,
'docs': [
'repoze.sphinx.autointerface',
'sphinx_rtd_theme',
],
'nti.schema >= 1.14.0',
# html5lib > 0.99999999 install datrie if appropriate for the platform
# with its own [datrie] extra. But we do not explicitly depend
# on that version to help avoid conflicts, and older versions of
Expand All @@ -80,8 +74,14 @@ def _read(fname):

# datrie 0.7.1 does not build on CPython 3.7. See
# https://github.com/pytries/datrie/issues/52
":platform_python_implementation == 'CPython' and python_version < '3.7'": [
"datrie"

"datrie >= 0.8.2 ; platform_python_implementation == 'CPython'",
],
extras_require={
'test': TESTS_REQUIRE,
'docs': [
'repoze.sphinx.autointerface',
'sphinx_rtd_theme',
],

},
Expand Down
50 changes: 25 additions & 25 deletions src/nti/contentfragments/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

from __future__ import print_function, absolute_import, division
__docformat__ = "restructuredtext en"

import sys
logger = __import__('logging').getLogger(__name__)
# pylint:disable=inherit-non-class,too-many-ancestors,no-self-argument,abstract-method
# pylint:disable=useless-object-inheritance
PY2 = str is bytes
PYPY = hasattr(sys, 'pypy_version_info')
PYPY2 = PY2 and PYPY
if PY2: # pragma: no cover
import copy_reg # pylint:disable=import-error
text_type = unicode # pylint:disable=undefined-variable
Expand All @@ -21,15 +23,14 @@
from zope import component
from zope import interface

from zope.interface.common import sequence
from zope.interface.common.collections import ISequence
from zope.interface.common.builtins import INativeString
from zope.interface.common.builtins import IByteString
from zope.interface.common.builtins import ITextString

from zope.contenttype import add_files as zc_add_files

try:
from zope.mimetype import types as mime_types
except ImportError: # pragma: no cover
# They moved this in zope.mimetype 2.0 (python 3 compat?)
from zope.mimetype import mtypes as mime_types
from zope.mimetype import mtypes as mime_types
mime_types.setup() # register interface classes and utilities if not already

resource_filename = __import__('pkg_resources').resource_filename
Expand All @@ -49,22 +50,10 @@ def _setup():
zc_add_files([mime_map_file])
_setup()


class IString(interface.Interface):
"""Marker interface for native strings."""


class IUnicode(interface.Interface):
"""Marker interface for unicode strings."""


class IBytes(interface.Interface):
"""Marker interface for byte strings."""


interface.classImplements(str, IString)
interface.classImplements(bytes, IBytes)
interface.classImplements(text_type, IUnicode)
# BWC aliases. These will be removed in the future.
IString = INativeString
IUnicode = ITextString
IBytes = IByteString


class IContentFragment(interface.Interface):
Expand All @@ -73,17 +62,28 @@ class IContentFragment(interface.Interface):
be in.
"""


class IUnicodeContentFragment(IContentFragment, sequence.IReadSequence):
class IUnicodeContentFragment(IContentFragment, ISequence):
"""
Content represented as a unicode string.
Although it is simplest to subclass :class:`unicode`, that is not required.
At a minimum, what is required are the `__getitem__` method (and others
declared by :class:`IReadSequence`), plus the `encode` method.
.. versionchanged:: 1.3.0
Extend ``zope.interface.common.collections.ISequence`` instead of the semi-deprecated
``zope.interface.common.sequence.IReadSequence``. Except on PyPy2, where
``ISequence`` cannot validate against unicode objects.
"""
# TODO: extend IUnicode?

if PYPY2: # pragma: no cover
IUnicodeContentFragment.__bases__ = tuple(
x
for x in IUnicodeContentFragment.__bases__
if x is not ISequence
)


@interface.implementer(IUnicodeContentFragment)
class UnicodeContentFragment(text_type):
Expand Down
64 changes: 45 additions & 19 deletions src/nti/contentfragments/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
# pylint: disable=too-many-ancestors
# pylint:disable=useless-object-inheritance

import unicodedata

from zope.interface import implementer

from .interfaces import IContentFragment
from .interfaces import HTMLContentFragment as HTMLContentFragmentType
from .interfaces import IHTMLContentFragment
from .interfaces import LatexContentFragment
Expand All @@ -42,33 +43,57 @@
from nti.schema.field import ValidText as Text
from nti.schema.field import ValidTextLine as TextLine

def _massage_kwargs(self, kwargs):

assert self._iface.isOrExtends(IUnicodeContentFragment), self._iface
assert self._iface.implementedBy(self._impl), self._impl

# We're imported too early for ZCA to be configured and we can't automatically
# adapt.
if 'default' in kwargs and not self._iface.providedBy(kwargs['default']):
kwargs['default'] = self._impl(kwargs['default'])
if 'default' not in kwargs and 'defaultFactory' not in kwargs and not kwargs.get('min_length'): # 0/None
kwargs['defaultFactory'] = self._impl
return kwargs

class _FromUnicodeMixin(object):

# Set the interface to use as self.schema. This will be implemented by
# objects returned from ``fromUnicode``. However...
_iface = None
# If the adapter registered to produce _iface may produce some
# interface less restrictive than that (e.g., _iface is HTML, but
# we can produce plain text)
# set this to become self.schema.
_iface_upper_bound = None
# This is the class used to copy defaults.
_impl = lambda *args: None

def __init__(self, *args, **kwargs):
super(_FromUnicodeMixin, self).__init__(self._iface,
*args,
**_massage_kwargs(self, kwargs))
super(_FromUnicodeMixin, self).__init__(
self._iface_upper_bound or self._iface, # Becomes self.schema.
*args,
**self.__massage_kwargs(kwargs))

def __massage_kwargs(self, kwargs):

assert self._iface.isOrExtends(IUnicodeContentFragment), self._iface
assert self._iface.implementedBy(self._impl), self._impl

# We're imported too early for ZCA to be configured and we can't automatically
# adapt.
if 'default' in kwargs and not self._iface.providedBy(kwargs['default']):
kwargs['default'] = self._impl(kwargs['default'])
if 'default' not in kwargs and 'defaultFactory' not in kwargs and not kwargs.get('min_length'): # 0/None
kwargs['defaultFactory'] = self._impl
# Disable unicode normalization at this level; we need to handle it
# to properly deal with our content fragment subclasses.
assert 'unicode_normalization' not in kwargs
kwargs['unicode_normalization'] = None
return kwargs

def fromUnicode(self, value):
"""
We implement :class:`.IFromUnicode` by adapting the given object
to our text schema.
This happens *after* unicode normalization.
"""
return super(_FromUnicodeMixin, self).fromUnicode(self.schema(value))
# unicodedate.normalize does not preserve the class of the
# object it's given (it goes back to text_type; always under PyPy, only if
# changes are needed under CPython). So we must handle normalization ourself
# before converting to the schema.
value = unicodedata.normalize(self.__class__.unicode_normalization, value)
value = self.schema(value)
result = super(_FromUnicodeMixin, self).fromUnicode(value)
return result


@implementer(ITextUnicodeContentFragmentField)
Expand Down Expand Up @@ -156,6 +181,8 @@ class SanitizedHTMLContentFragment(HTMLContentFragment):
"""
A :class:`Text` type that also requires the object implement
an interface descending from :class:`.ISanitizedHTMLContentFragment`.
Note that the default adapter for this can actually produce
``IPlainTextContentFragment`` if there is no HTML present in the input.
Pass the keyword arguments for :class:`zope.schema.Text` to the constructor; the ``schema``
argument for :class:`~zope.schema.Object` is already handled.
Expand All @@ -168,7 +195,6 @@ class SanitizedHTMLContentFragment(HTMLContentFragment):
_iface = ISanitizedHTMLContentFragment
_impl = SanitizedHTMLContentFragmentType


@implementer(IPlainTextField)
class PlainText(TextUnicodeContentFragment):
"""
Expand Down
43 changes: 43 additions & 0 deletions src/nti/contentfragments/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# pylint:disable=useless-object-inheritance

from hamcrest import assert_that
from hamcrest import is_

from nti.testing.layers import ZopeComponentLayer
from nti.testing.layers import ConfiguringLayerMixin
Expand Down Expand Up @@ -52,6 +53,48 @@ def _getTargetClass(self):
def _getTargetInterface(self):
raise NotImplementedError()

def _transform_normalized_for_comparison(self, val):
return val

def _transform_raw_for_fromUnicode(self, raw):
return raw

def test_implements_interface(self):
inst = self._makeOne()
assert_that(inst, verifiably_provides(self._getTargetInterface()))

def test_fromUnicode_implements_schema(self):
inst = self._makeOne()
assert_that(
inst.fromUnicode(
self._transform_raw_for_fromUnicode(u'abc')),
verifiably_provides(inst.schema))

def test_fromUnicode_normalizes(self):
import unicodedata
inst = self._makeOne()
raw = b'A\xcc\x88O\xcc\x88U\xcc\x88'.decode('utf-8')
normalized = unicodedata.normalize('NFC', raw)
self.assertEqual(
[unicodedata.name(c) for c in raw],
[
'LATIN CAPITAL LETTER A',
'COMBINING DIAERESIS',
'LATIN CAPITAL LETTER O',
'COMBINING DIAERESIS',
'LATIN CAPITAL LETTER U',
'COMBINING DIAERESIS',
]
)
assert_that(
[unicodedata.name(c) for c in normalized],
is_([
'LATIN CAPITAL LETTER A WITH DIAERESIS',
'LATIN CAPITAL LETTER O WITH DIAERESIS',
'LATIN CAPITAL LETTER U WITH DIAERESIS',
])
)

fromUnicode = inst.fromUnicode(self._transform_raw_for_fromUnicode(raw))
self.assertEqual(fromUnicode, self._transform_normalized_for_comparison(normalized))
assert_that(fromUnicode, verifiably_provides(inst.schema))

0 comments on commit 7f6abb0

Please sign in to comment.