diff --git a/cf_units/__init__.py b/cf_units/__init__.py index 0f86af93..6b57f4cb 100644 --- a/cf_units/__init__.py +++ b/cf_units/__init__.py @@ -841,15 +841,15 @@ def __init__(self, unit, calendar=None): unit = _NO_UNIT_STRING else: category = _CATEGORY_UDUNIT - if six.PY2: - str_unit = unit.encode(sys.getdefaultencoding(), 'replace') + if six.PY2 and not isinstance(unit, six.text_type): + str_unit = unit.encode('utf8') else: str_unit = unit try: ut_unit = _ud.parse(_ud_system, unit.encode('utf8'), encoding) except _ud.UdunitsError as e: self._propogate_error( - 'Failed to parse unit "%s"' % str_unit, e) + u'Failed to parse unit "{}"'.format(str_unit), e) if _OP_SINCE in unit.lower(): if calendar is None: calendar_ = CALENDAR_GREGORIAN @@ -884,7 +884,10 @@ def _propogate_error(self, msg, ud_err): """ error_msg = ': "%s"' % ud_err.error_msg() if ud_err.errnum != 0 else '' - raise ValueError('[%s] %s%s' % (ud_err.status_msg(), msg, error_msg)) + msg = '[%s] %s%s' % (ud_err.status_msg(), msg, error_msg) + if six.PY2: + msg = msg.encode('utf8') + raise ValueError(msg) # NOTE: # "__getstate__" and "__setstate__" functions are defined here to @@ -1462,10 +1465,13 @@ def __str__(self): 'miles/hour' """ - r = self.origin or self.symbol - if six.PY2 and sys.getdefaultencoding() == 'ascii': - r = r.encode('ascii', 'replace') - return r + return self.origin or self.symbol + + if six.PY2: + __unicode__ = __str__ + + def __str__(self): + return unicode(self).encode('utf8') def __repr__(self): """ @@ -1482,12 +1488,11 @@ def __repr__(self): "Unit('meters')" """ - if self.calendar is None: - result = "%s('%s')" % (self.__class__.__name__, self) + result = "{}('{}')".format(self.__class__.__name__, self) else: - result = "%s('%s', calendar='%s')" % (self.__class__.__name__, - self, self.calendar) + result = "{}('{}', calendar='{}')".format( + self.__class__.__name__, self, self.calendar) return result def _offset_common(self, offset): diff --git a/cf_units/tests/test_unit.py b/cf_units/tests/test_unit.py index a8070542..4f1c4a48 100644 --- a/cf_units/tests/test_unit.py +++ b/cf_units/tests/test_unit.py @@ -79,18 +79,12 @@ def test_calendar_w_unicode(self): calendar = unit.CALENDAR_365_DAY u = Unit(u'hours\xb2 hours-1 since epoch', calendar=calendar) self.assertEqual(u.calendar, calendar) - if six.PY2: - # Python 2 str MUST return an ascii string, yet the input - # was a unicode. We therefore return the ASCII encoded form. - expected = 'hours? hours-1 since 1970-01-01 00:00:00' - else: - expected = 'hours\xb2 hours-1 since 1970-01-01 00:00:00' - self.assertEqual(str(u), expected) + expected = u'hours² hours-1 since 1970-01-01 00:00:00' + self.assertEqual(u.origin, expected) - @unittest.skipIf(six.PY2, "Unicode literals in str aren't a thing") def test_unicode_valid(self): # Some unicode characters are allowed. - u = Unit('m²') + u = Unit(u'm²') assert u.symbol == 'm2' def test_py2k_unicode(self): @@ -99,9 +93,17 @@ def test_py2k_unicode(self): def test_unicode_invalid(self): # Not all unicode characters are allowed. - msg = '[UT_UNKNOWN] Failed to parse unit "ø"' - with self.assertRaises(ValueError, msg=msg): - Unit('ø') + + if six.PY2: + msg = u'[UT_UNKNOWN] Failed to parse unit "ø"' + # NOTE: assertRaisesRegex doesn't work with unicode chars in py2 :( + with self.assertRaises(ValueError) as e: + Unit(u'ø') + self.assertIn(msg, e.exception.message.decode('utf8')) + else: + msg = r'Failed to parse unit \"ø\"' + with six.assertRaisesRegex(self, ValueError, msg): + Unit('ø') class Test_modulus(unittest.TestCase):