diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 23f34a3de0f05c..4dcb13ddfd6e69 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -788,6 +788,14 @@ section. The long type is described in :ref:`typesnumeric`. + .. versionchanged:: 2.7.18.6 + :class:`long` string inputs and string representations can be limited to + help avoid denial of service attacks. A :exc:`ValueError` is raised when + the limit is exceeded while converting a string *x* to an :class:`long` or + when converting an :class:`long` into a string would exceed the limit. + See the :ref:`integer string conversion length limitation + ` documentation. + .. function:: map(function, iterable, ...) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 87dc0541dc7f1c..29867ff33201b7 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -14,6 +14,11 @@ is a lightweight data interchange format inspired by `JavaScript `_ object literal syntax (although it is not a strict subset of JavaScript [#rfc-errata]_ ). +.. warning:: + Be cautious when parsing JSON data from untrusted sources. A malicious + JSON string may cause the decoder to consume considerable CPU and memory + resources. Limiting the size of data to be parsed is recommended. + :mod:`json` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. @@ -249,6 +254,12 @@ Basic Usage be used to use another datatype or parser for JSON integers (e.g. :class:`float`). + .. versionchanged:: 2.7.18.6 + The default *parse_int* of :func:`int` now limits the maximum length of + the integer string via the interpreter's :ref:`integer string + conversion length limitation ` to help avoid denial + of service attacks. + *parse_constant*, if specified, will be called with one of the following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This can be used to raise an exception if invalid JSON numbers diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index b4fe19a5f0818a..91b560693f9e42 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -521,6 +521,13 @@ class`. float also has the following additional methods. .. versionadded:: 2.6 + .. note:: + + The values returned by ``as_integer_ratio()`` can be huge. Attempts + to render such integers into decimal strings may bump into the + :ref:`integer string conversion length limitation + `. + .. method:: float.is_integer() Return ``True`` if the float instance is finite with integral @@ -3190,6 +3197,167 @@ The following attributes are only supported by :term:`new-style class`\ es. [] +.. _int_max_str_digits: + +Integer string conversion length limitation +=========================================== + +CPython has a global limit for converting between :class:`long` and :class:`str` +or :class:`unicode` to mitigate denial of service attacks. This limit *only* applies +to decimal or other non-power-of-two number bases. Hexadecimal, octal, and binary +conversions are unlimited. The limit can be configured. + +The :class:`long` type in CPython is an arbitrary length number stored in binary +form (commonly known as a "bignum"). There exists no algorithm that can convert +a string to a binary integer or a binary integer to a string in linear time, +*unless* the base is a power of 2. Even the best known algorithms for base 10 +have sub-quadratic complexity. Converting a large value such as ``long('1' * +500_000)`` can take over a second on a fast CPU. + +Limiting conversion size offers a practical way to avoid `CVE-2020-10735 +`_. + +The limit is applied to the number of digit characters in the input or output +string when a non-linear conversion algorithm would be involved. Trailing *L*s +and the sign are not counted towards the limit. + +When an operation would exceed the limit, a :exc:`ValueError` is raised: + +.. doctest:: + + >>> import sys + >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default. + >>> _ = long('2' * 5432) + Traceback (most recent call last): + ... + ValueError: Exceeds the limit (4300) for integer string conversion: value has 5432 digits. + >>> i = long('2' * 4300) + >>> len(str(i)) + 4300 + >>> i_squared = i*i + >>> len(str(i_squared)) + Traceback (most recent call last): + ... + ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. + >>> len(hex(i_squared)) + 7144 + >>> assert long(hex(i_squared), base=16) == i*i # Hexadecimal is unlimited. + +The default limit is 4300 digits as provided in +:data:`sys.long_info.default_max_str_digits `. +The lowest limit that can be configured is 640 digits as provided in +:data:`sys.long_info.str_digits_check_threshold `. + +Verification: + +.. doctest:: + + >>> import sys + >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info + >>> assert sys.int_info.str_digits_check_threshold == 640, sys.int_info + >>> msg = long('578966293710682886880994035146873798396722250538762761564' + ... '9252925514383915483333812743580549779436104706260696366600' + ... '571186405732').to_bytes(53, 'big') + ... + +.. versionadded:: 2.7.18.6 + + +Affected APIs +------------- + +Because int automatically converts to long if the value is larger than +:data:`sys.maxint` this limitation applies to potentially slow conversions +between any of :class:`int` or :class:`long` and :class:`str` or :class:`unicode`: + +* ``int(string)`` with default base 10. +* ``int(string, base)`` for all bases that are not a power of 2. +* ``long(string)`` with default base 10. +* ``long(string, base)`` for all bases that are not a power of 2. +* ``int(unicode)`` with default base 10. +* ``int(unicode, base)`` for all bases that are not a power of 2. +* ``long(unicode)`` with default base 10. +* ``long(unicode, base)`` for all bases that are not a power of 2. +* ``str(long)``. +* ``repr(long)``. +* ``unicode(long)``. +* any other string conversion to base 10, for example ``"{}".format(long)``. + +The limitations do not apply to functions with a linear algorithm: + +* ``long(string, base)`` with base 2, 4, 8, 16, or 32. +* :func:`hex`, :func:`oct`, :func:`bin`. +* :ref:`formatspec` for hex, octal, and binary numbers. +* :class:`str` to :class:`float`. +* :class:`str` to :class:`decimal.Decimal`. + +Configuring the limit +--------------------- + +Before Python starts up you can use an environment variable to configure the limit: + +* :envvar:`PYTHONINTMAXSTRDIGITS`, e.g. + ``PYTHONINTMAXSTRDIGITS=640 python`` to set the limit to 640 or + ``PYTHONINTMAXSTRDIGITS=0 python`` to disable the limitation. +* :data:`sys.flags.long_max_str_digits` contains the value of + :envvar:`PYTHONINTMAXSTRDIGITS`. A value of *-1* indicates that none was set, + thus a value of :data:`sys.int_info.default_max_str_digits` was used during + initialization. + +From code, you can inspect the current limit and set a new one using these +:mod:`sys` APIs: + +* :func:`sys.get_int_max_str_digits` and :func:`sys.set_int_max_str_digits` are + a getter and setter for the interpreter-wide limit. + +Information about the default and minimum can be found in :attr:`sys.long_info`: + +* :data:`sys.long_info.default_max_str_digits ` is the compiled-in + default limit. +* :data:`sys.long_info.str_digits_check_threshold ` is the lowest + accepted value for the limit (other than 0 which disables it). + +.. versionadded:: 2.7.18.6 + +.. caution:: + + Setting a low limit *can* lead to problems. While rare, code exists that + contains integer constants in decimal in their source that exceed the + minimum threshold. A consequence of setting the limit is that Python source + code containing decimal integer literals longer than the limit will + encounter an error during parsing, usually at startup time or import time or + even at installation time - anytime an up to date ``.pyc`` does not already + exist for the code. A workaround for source that contains such large + constants is to convert them to ``0x`` hexadecimal form as it has no limit. + + Test your application thoroughly if you use a low limit. Ensure your tests + run with the limit set early via the environment so that it applies during + startup and even during any installation step that may invoke Python to + precompile ``.py`` sources to ``.pyc`` files. + +Recommended configuration +------------------------- + +The default :data:`sys.long_info.default_max_str_digits` is expected to be +reasonable for most applications. If your application requires a different +limit, set it from your main entry point using Python version agnostic code as +these APIs were ported from the original fix in version 3.12. + +Example:: + + >>> import sys + >>> if hasattr(sys, "set_int_max_str_digits"): + ... upper_bound = 68000 + ... lower_bound = 4004 + ... current_limit = sys.get_int_max_str_digits() + ... if current_limit == 0 or current_limit > upper_bound: + ... sys.set_int_max_str_digits(upper_bound) + ... elif current_limit < lower_bound: + ... sys.set_int_max_str_digits(lower_bound) + +If you need to disable it entirely, set it to ``0``. + + .. rubric:: Footnotes .. [1] Additional information on these special methods may be found in the Python diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 339625ad38021f..3f3b2d393d9c78 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -431,6 +431,14 @@ always available. an argument to :func:`getrefcount`. +.. function:: get_int_max_str_digits() + + Returns the current value for the :ref:`integer string conversion length + limitation `. See also :func:`set_int_max_str_digits`. + + .. versionadded:: 2.7.18.6 + + .. function:: getrecursionlimit() Return the current value of the recursion limit, the maximum depth of the Python @@ -603,19 +611,30 @@ always available. .. tabularcolumns:: |l|L| - +-------------------------+----------------------------------------------+ - | Attribute | Explanation | - +=========================+==============================================+ - | :const:`bits_per_digit` | number of bits held in each digit. Python | - | | integers are stored internally in base | - | | ``2**long_info.bits_per_digit`` | - +-------------------------+----------------------------------------------+ - | :const:`sizeof_digit` | size in bytes of the C type used to | - | | represent a digit | - +-------------------------+----------------------------------------------+ + +----------------------------------------+-----------------------------------------------+ + | Attribute | Explanation | + +========================================+===============================================+ + | :const:`bits_per_digit` | number of bits held in each digit. Python | + | | integers are stored internally in base | + | | ``2**int_info.bits_per_digit`` | + +----------------------------------------+-----------------------------------------------+ + | :const:`sizeof_digit` | size in bytes of the C type used to | + | | represent a digit | + +----------------------------------------+-----------------------------------------------+ + | :const:`default_max_str_digits` | default value for | + | | :func:`sys.get_int_max_str_digits` when it | + | | is not otherwise explicitly configured. | + +----------------------------------------+-----------------------------------------------+ + | :const:`str_digits_check_threshold` | minimum non-zero value for | + | | :func:`sys.set_int_max_str_digits`, | + | | :envvar:`PYTHONINTMAXSTRDIGITS`. | + +----------------------------------------+-----------------------------------------------+ .. versionadded:: 2.7 + .. versionchanged:: 2.7.18.6 + Added ``default_max_str_digits`` and ``str_digits_check_threshold``. + .. data:: last_type last_value @@ -848,6 +867,15 @@ always available. .. versionadded:: 2.2 +.. function:: set_int_max_str_digits(n) + + Set the :ref:`integer string conversion length limitation + ` used by this interpreter. See also + :func:`get_int_max_str_digits`. + + .. versionadded:: 2.7.18.6 + + .. function:: setprofile(profilefunc) .. index:: diff --git a/Doc/library/test.rst b/Doc/library/test.rst index 9d78c90f55d54d..1b149673d4168c 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -443,6 +443,16 @@ The :mod:`test.support` module defines the following functions: .. versionadded:: 2.7 +.. function:: adjust_int_max_str_digits(max_digits) + + This function returns a context manager that will change the global + :func:`sys.set_int_max_str_digits` setting for the duration of the + context to allow execution of test code that needs a different limit + on the number of digits when converting between an integer and string. + + .. versionadded:: 2.7.18.6 + + The :mod:`test.support` module defines the following classes: .. class:: TransientResource(exc[, **kwargs]) diff --git a/Doc/library/xmlrpclib.rst b/Doc/library/xmlrpclib.rst index e818c3de6ab1a6..9f8db27c7724e9 100644 --- a/Doc/library/xmlrpclib.rst +++ b/Doc/library/xmlrpclib.rst @@ -557,6 +557,12 @@ Convenience Functions .. versionchanged:: 2.5 The *use_datetime* flag was added. + .. versionchanged:: 2.7.18.6 + The default *parse_int* of :func:`int` now limits the maximum length of + the integer string via the interpreter's :ref:`integer string + conversion length limitation ` to help avoid denial + of service attacks. + .. _xmlrpc-client-example: diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index c27ec4f3e634a5..a55c64ee607158 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -638,6 +638,15 @@ conflict. .. versionadded:: 2.7.12 + +.. envvar:: PYTHONINTMAXSTRDIGITS + + If this variable is set to an integer, it is used to configure the + interpreter's global :ref:`integer string conversion length limitation + `. + + .. versionadded:: 2.7.18.6 + Debug-mode variables ~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst index 992658e8bab83e..b2c96361afde0e 100644 --- a/Doc/whatsnew/2.7.rst +++ b/Doc/whatsnew/2.7.rst @@ -884,6 +884,18 @@ Some smaller changes made to the core Python language are: now only cleared if no one else is holding a reference to the dictionary (:issue:`7140`). +* Converting between :class:`int` or :class:`long` and :class:`str` or + :class:`unicode` in bases other than 2 (binary), 4, 8 (octal), 16 + (hexadecimal), or 32 such as base 10 (decimal) now raises a + :exc:`ValueError` if the number of digits in string form is above a + limit to avoid potential denial of service attacks due to the + algorithmic complexity. This is a mitigation for `CVE-2020-10735 + `_. + This limit can be configured or disabled by environment variable or + :mod:`sys` APIs. See the :ref:`integer string conversion length + limitation ` documentation. The default limit + is 4300 digits in string form. + .. ====================================================================== .. _new-27-interpreter: diff --git a/Include/longobject.h b/Include/longobject.h index 4e33ff2bb17ffd..ffda440e819cfb 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -129,6 +129,32 @@ PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, char *format_spec, Py_ssize_t format_spec_len); +#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +/* + * Default long base conversion size limitation: Denial of Service prevention. + * + * Chosen such that this isn't wildly slow on modern hardware + * 4300 decimal digits fits a ~14284 bit number. + */ +#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 4300 +/* + * Threshold for max digits check. For performance reasons long() and + * long.__str__() don't checks values that are smaller than this + * threshold. Acts as a guaranteed minimum size limit for bignums that + * applications can expect from CPython. + * + * "640 digits should be enough for anyone." - gps + * fits a ~2126 bit decimal number. + */ +#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 640 + +#if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \ + (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) +# error "_PY_LONG_DEFAULT_MAX_STR_DIGITS smaller than threshold." +#endif + +int Py_LongMaxStrDigits; + #ifdef __cplusplus } #endif diff --git a/Include/pydebug.h b/Include/pydebug.h index 0f45960f90f511..d655e3e219ee84 100644 --- a/Include/pydebug.h +++ b/Include/pydebug.h @@ -27,6 +27,7 @@ PyAPI_DATA(int) _Py_QnewFlag; /* Warn about 3.x issues */ PyAPI_DATA(int) Py_Py3kWarningFlag; PyAPI_DATA(int) Py_HashRandomizationFlag; +PyAPI_DATA(int) Py_LongMaxStrDigitsFlag; /* this is a wrapper around getenv() that pays attention to Py_IgnoreEnvironmentFlag. It should be used for getting variables like diff --git a/Include/pystate.h b/Include/pystate.h index f2cfc30208f5ef..c347759c5ff5ad 100644 --- a/Include/pystate.h +++ b/Include/pystate.h @@ -34,6 +34,8 @@ typedef struct _is { int tscdump; #endif + int long_max_str_digits; + } PyInterpreterState; diff --git a/Include/pythonrun.h b/Include/pythonrun.h index f0f4e382e5ec83..7950d94a085288 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -128,6 +128,7 @@ PyAPI_FUNC(int) _PyLong_Init(void); PyAPI_FUNC(void) _PyFloat_Init(void); PyAPI_FUNC(int) PyByteArray_Init(void); PyAPI_FUNC(void) _PyRandom_Init(void); +PyAPI_FUNC(void) _PyLongMaxStrDigits_Init(void); /* Various internal finalizers */ PyAPI_FUNC(void) _PyExc_Fini(void); diff --git a/Lib/json/tests/test_decode.py b/Lib/json/tests/test_decode.py index 0014546b8284b3..701aaf1672dfb5 100644 --- a/Lib/json/tests/test_decode.py +++ b/Lib/json/tests/test_decode.py @@ -2,6 +2,7 @@ from StringIO import StringIO from collections import OrderedDict from json.tests import PyTest, CTest +from test import test_support class TestDecode(object): @@ -65,5 +66,12 @@ def test_negative_index(self): self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000) self.assertRaises(ValueError, d.raw_decode, u'a'*42, -50000) + def test_limit_int(self): + maxdigits = 5000 + with test_support.adjust_int_max_str_digits(maxdigits): + self.loads('1' * maxdigits) + with self.assertRaises(ValueError): + self.loads('1' * (maxdigits + 1)) + class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index ccc11c1b4b0a81..00b066d3ff1ca7 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -45,7 +45,7 @@ "check_impl_detail", "get_attribute", "py3k_bytes", "import_fresh_module", "threading_cleanup", "reap_children", "strip_python_stderr", "IPV6_ENABLED", "run_with_tz", - "SuppressCrashReport"] + "SuppressCrashReport", "adjust_int_max_str_digits"] class Error(Exception): """Base class for regression test exceptions.""" @@ -2175,3 +2175,14 @@ def save(self): def restore(self): for signum, handler in self.handlers.items(): self.signal.signal(signum, handler) + + +@contextlib.contextmanager +def adjust_int_max_str_digits(max_digits): + """Temporarily change the integer string conversion length limit.""" + current = sys.get_int_max_str_digits() + try: + sys.set_int_max_str_digits(max_digits) + yield + finally: + sys.set_int_max_str_digits(current) diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 3cfe6188ac1342..b9a657ac690461 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -492,6 +492,13 @@ def test_literal_eval_issue4907(self): self.assertEqual(ast.literal_eval('1.5 - 2j'), 1.5 - 2j) self.assertRaises(ValueError, ast.literal_eval, '2 + (3 + 4j)') + def test_literal_eval_str_int_limit(self): + with test_support.adjust_int_max_str_digits(4000): + ast.literal_eval('3'*4000) + with self.assertRaises(ValueError) as err_ctx: + ast.literal_eval('3'*4001) + self.assertIn('Exceeds the limit ', str(err_ctx.exception)) + def test_main(): with test_support.check_py3k_warnings(("backquote not supported", diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 5396838f3d26be..74d0b8763a7b5e 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -63,6 +63,15 @@ def __iter__(self): class BuiltinTest(unittest.TestCase): + def setUp(self): + super(BuiltinTest, self).setUp() + self._previous_int_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(7000) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_int_limit) + super(BuiltinTest, self).tearDown() + def test_import(self): __import__('sys') __import__('time') diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 12f26d9e4506f1..0e31e60a3dbd06 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -134,6 +134,30 @@ def test_unknown_options(self): self.assertEqual(err.splitlines().count(b'Unknown option: -a'), 1) self.assertEqual(b'', out) + def test_int_max_str_digits(self): + code = 'import sys; print sys.flags.int_max_str_digits, sys.get_int_max_str_digits()' + + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='foo') + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='100') + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='-1') + + def parse(res): + return tuple(int(r) for r in res[1].strip().split()) + + res = assert_python_ok('-c', code) + self.assertEqual(parse(res), (-1, sys.get_int_max_str_digits())) + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='0') + self.assertEqual(parse(res), (0, 0)) + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='4000') + self.assertEqual(parse(res), (4000, 4000)) + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='100000') + self.assertEqual(parse(res), (100000, 100000)) + + res = assert_python_ok('-E', '-c', code, PYTHONINTMAXSTRDIGITS='0') + self.assertEqual(parse(res), (-1, sys.get_int_max_str_digits())) + res = assert_python_ok('-E', '-c', code, PYTHONINTMAXSTRDIGITS='4000') + self.assertEqual(parse(res), (-1, sys.get_int_max_str_digits())) + def test_main(): test.test_support.run_unittest(CmdLineTest) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 15a00f3cf7fcfe..3a2e84ee52075e 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -291,6 +291,17 @@ def test_literals_with_leading_zeroes(self): self.assertEqual(eval("01000000000000000000000.0"), 1000000000000000000000.0) + def test_int_literals_too_long(self): + n = 3000 + bign = '3'*n + source = 'a = 1\nb = 2\nc = {bign}\nd = 4'.format(bign=bign) + with test_support.adjust_int_max_str_digits(n): + compile(source, '', 'exec') + with test_support.adjust_int_max_str_digits(n-1): + with self.assertRaises(ValueError) as err_ctx: + compile(source, '', 'exec') + self.assertIn('Exceeds the limit ', str(err_ctx.exception)) + def test_unary_minus(self): # Verify treatment of unary minus on negative numbers SF bug #660455 if sys.maxint == 2147483647: diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index ea5c0e3f48d5bb..0de7f0c25f077a 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -495,8 +495,142 @@ def __trunc__(self): self.assertIs(type(n), IntSubclass) +class IntStrDigitLimitsTests(unittest.TestCase): + + int_class = int # Override this in subclasses to reuse the suite. + + def setUp(self): + super(IntStrDigitLimitsTests, self).setUp() + self._previous_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(2048) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_limit) + super(IntStrDigitLimitsTests, self).tearDown() + + def stringify(self, i, will_error=False): + if not will_error: + str(i) + # repr will also create a string, but append 'L' if i was long + repr(i) + unicode(i) + return + with self.assertRaises(ValueError): + str(i) + with self.assertRaises(ValueError): + repr(i) + with self.assertRaises(ValueError): + unicode(i) + + def test_disabled_limit(self): + self.assertGreater(sys.get_int_max_str_digits(), 0) + self.assertLess(sys.get_int_max_str_digits(), 20000) + with test_support.adjust_int_max_str_digits(0): + self.assertEqual(sys.get_int_max_str_digits(), 0) + i = self.int_class('1' * 20000) + self.stringify(i) + self.assertGreater(sys.get_int_max_str_digits(), 0) + + def test_max_str_digits_edge_cases(self): + """Ignore the +/- sign 'L' and space padding.""" + int_class = self.int_class + maxdigits = sys.get_int_max_str_digits() + + int_class('1' * maxdigits) + int_class(' ' + '1' * maxdigits) + int_class('1' * maxdigits + ' ') + int_class('+' + '1' * maxdigits) + int_class('-' + '1' * maxdigits) + self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) + + int_class(u'1' * maxdigits) + int_class(u' ' + u'1' * maxdigits) + int_class(u'1' * maxdigits + u' ') + int_class(u'+' + u'1' * maxdigits) + int_class(u'-' + u'1' * maxdigits) + self.assertEqual(len(unicode(10 ** (maxdigits - 1))), maxdigits) + + def check(self, i, base=None): + with self.assertRaises(ValueError): + if base is None: + self.int_class(i) + else: + self.int_class(i, base) + + def test_max_str_digits(self): + maxdigits = sys.get_int_max_str_digits() + + self.check('1' * (maxdigits + 1)) + self.check(' ' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1) + ' ') + self.check('+' + '1' * (maxdigits + 1)) + self.check('-' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1)) + + self.check(u'1' * (maxdigits + 1)) + self.check(u' ' + u'1' * (maxdigits + 1)) + self.check(u'1' * (maxdigits + 1) + u' ') + self.check(u'+' + u'1' * (maxdigits + 1)) + self.check(u'-' + u'1' * (maxdigits + 1)) + self.check(u'1' * (maxdigits + 1)) + + i = 10 ** maxdigits + self.stringify(i, will_error=True) + + def test_changed_limit(self): + int_class = self.int_class + newmax = 900 + self.assertLess(sys.long_info.str_digits_check_threshold, newmax) + with test_support.adjust_int_max_str_digits(newmax): + i = 10 ** (newmax - 1) + self.stringify(i) + int_class('1' * newmax) + + i = 10 ** newmax + self.stringify(i, will_error=True) + self.check('1' * (newmax + 1)) + + def test_power_of_two_bases_unlimited(self): + """The limit does not apply to power of 2 bases.""" + maxdigits = sys.get_int_max_str_digits() + + for base in (2, 4, 8, 16, 32): + self.int_class('1' * (maxdigits + 1), base) + assert maxdigits < 100000 + self.int_class('1' * 100000, base) + + def test_sign_not_counted(self): + int_class = self.int_class + maxdigits = sys.get_int_max_str_digits() + s = '5' * maxdigits + i = int_class(s) + pos_i = int_class('+{s}'.format(s=s)) + assert i == pos_i + neg_i = int_class('-{s}'.format(s=s)) + assert -pos_i == neg_i + self.stringify(pos_i) + self.stringify(neg_i) + + def _other_base_helper(self, base): + int_class = self.int_class + maxdigits = sys.get_int_max_str_digits() + s = '2' * maxdigits + i = int_class(s, base) + if base > 10: + self.stringify(i, will_error=True) + elif base < 10: + self.stringify(i) + with self.assertRaises(ValueError) as err: + int_class('{s}1'.format(s=s), base) + + def test_int_from_other_bases(self): + self._other_base_helper(base=3) + self._other_base_helper(base=36) + + def test_main(): run_unittest(IntTestCases) + run_unittest(IntStrDigitLimitsTests) if __name__ == "__main__": test_main() diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 6788cedd809dec..bb43f821508543 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -941,8 +941,37 @@ def test_bit_length(self): self.assertEqual((-a-1).bit_length(), i+1) +class LongStrDigitLimitsTests(test_int.IntStrDigitLimitsTests): + int_class = long + + def test_l_not_counted(self): + int_class = self.int_class + maxdigits = sys.get_int_max_str_digits() + + s = '5' * maxdigits + i = int_class(s) + long_i = int_class('{s}L'.format(s=s)) + assert i == long_i + self.stringify(long_i) + + def test_max_l_str_digits(self): + int_class = self.int_class + maxdigits = sys.get_int_max_str_digits() + + int_class('1' * maxdigits + 'L') + int_class(u'1' * maxdigits + u'L') + + self.check('1' * (maxdigits + 1) + 'L') + self.check(u'1' * (maxdigits + 1) + u'L') + +class LongSubclassStrDigitLimitsTests(LongStrDigitLimitsTests): + int_class = LongSubclass + + def test_main(): test_support.run_unittest(LongTest) + test_support.run_unittest(LongStrDigitLimitsTests) + test_support.run_unittest(LongSubclassStrDigitLimitsTests) if __name__ == "__main__": test_main() diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 9342716272a716..d3d8d9ca2f8606 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -389,13 +389,19 @@ def test_attributes(self): self.assertIsInstance(sys.executable, basestring) self.assertEqual(len(sys.float_info), 11) self.assertEqual(sys.float_info.radix, 2) - self.assertEqual(len(sys.long_info), 2) + self.assertEqual(len(sys.long_info), 4) self.assertTrue(sys.long_info.bits_per_digit % 5 == 0) self.assertTrue(sys.long_info.sizeof_digit >= 1) + self.assertGreaterEqual(sys.long_info.default_max_str_digits, 500) + self.assertGreaterEqual(sys.long_info.str_digits_check_threshold, 100) + self.assertGreater(sys.long_info.default_max_str_digits, + sys.long_info.str_digits_check_threshold) self.assertEqual(type(sys.long_info.bits_per_digit), int) self.assertEqual(type(sys.long_info.sizeof_digit), int) self.assertIsInstance(sys.hexversion, int) self.assertIsInstance(sys.maxint, int) + self.assertIsInstance(sys.long_info.default_max_str_digits, long) + self.assertIsInstance(sys.long_info.str_digits_check_threshold, long) if test.test_support.have_unicode: self.assertIsInstance(sys.maxunicode, int) self.assertIsInstance(sys.platform, basestring) @@ -436,7 +442,8 @@ def test_sys_flags(self): attrs = ("debug", "py3k_warning", "division_warning", "division_new", "inspect", "interactive", "optimize", "dont_write_bytecode", "no_site", "ignore_environment", "tabcheck", "verbose", - "unicode", "bytes_warning", "hash_randomization") + "unicode", "bytes_warning", "hash_randomization", + "int_max_str_digits") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) self.assertEqual(type(getattr(sys.flags, attr)), int, attr) diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index 90ccb30716ff88..3ca0e3623b1006 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -222,6 +222,14 @@ def test_loads_unsupported(self): '') self.assertRaises(ResponseError, xmlrpclib.loads, data) + def test_limit_int(self): + data = '{i}' + maxdigits = 5000 + with test_support.adjust_int_max_str_digits(maxdigits): + i = '1' * (maxdigits + 1) + with self.assertRaises(ValueError): + xmlrpclib.loads(data.format(i=i)) + class HelperTestCase(unittest.TestCase): def test_escape(self): diff --git a/Modules/main.c b/Modules/main.c index a6edf822d039cd..733bb39d6b01de 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -115,6 +115,10 @@ PYTHONHASHSEED: if this variable is set to 'random', the effect is the same\n\ as specifying the -R option: a random value is used to seed the hashes of\n\ str, bytes and datetime objects. It can also be set to an integer\n\ in the range [0,4294967295] to get hash values with a predictable seed.\n\ +PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an integer value\n\ + when converting from a string and when converting an integer back to a str.\n\ + A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n\ + 16, and 32 are never limited.\n\ "; @@ -482,6 +486,14 @@ Py_Main(int argc, char **argv) free(buf); } + /* The variable is only tested for existence here; _PyLongMaxStrDigits_Init + will check its value further. */ + if (!Py_LongMaxStrDigitsFlag && + (p = Py_GETENV("PYTHONINTMAXSTRDIGITS")) && *p != '\0') + Py_LongMaxStrDigitsFlag = 1; + + _PyLongMaxStrDigits_Init(); + if (command == NULL && module == NULL && _PyOS_optind < argc && strcmp(argv[_PyOS_optind], "-") != 0) { diff --git a/Objects/longobject.c b/Objects/longobject.c index c05f67c36c72c8..eceb6290757f1e 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1338,7 +1338,7 @@ long_to_decimal_string(PyObject *aa, int addL) { PyLongObject *scratch, *a; PyObject *str; - Py_ssize_t size, strlen, size_a, i, j; + Py_ssize_t size, digitlen, strlen, size_a, i, j; digit *pout, *pin, rem, tenpow; char *p; int negative; @@ -1401,13 +1401,23 @@ long_to_decimal_string(PyObject *aa, int addL) pout[size++] = 0; /* calculate exact length of output string, and allocate */ - strlen = (addL != 0) + negative + - 1 + (size - 1) * _PyLong_DECIMAL_SHIFT; + digitlen = 1 + (size - 1) * _PyLong_DECIMAL_SHIFT; tenpow = 10; rem = pout[size-1]; while (rem >= tenpow) { tenpow *= 10; - strlen++; + digitlen++; + } + strlen = (addL != 0) + negative + digitlen; + if (digitlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = PyThreadState_GET()->interp; + int max_str_digits = interp->long_max_str_digits; + if ((max_str_digits > 0) && (digitlen > max_str_digits)) { + Py_DECREF(scratch); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + max_str_digits, digitlen); + return NULL; + } } str = PyString_FromStringAndSize(NULL, strlen); if (str == NULL) { @@ -1761,6 +1771,7 @@ PyLong_FromString(char *str, char **pend, int base) start = str; if ((base & (base - 1)) == 0) + /* binary bases are not limited by long_max_str_digits */ z = long_from_binary_base(&str, base); else { /*** @@ -1885,6 +1896,17 @@ digit beyond the first. while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base) ++scan; + /* Limit the size to avoid excessive computation attacks. */ + if ((scan - str) > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = PyThreadState_GET()->interp; + int max_str_digits = interp->long_max_str_digits; + if ((max_str_digits > 0) && ((scan - str) > max_str_digits)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + max_str_digits, (scan - str)); + return NULL; + } + } + /* Create a long object that can contain the largest possible * integer with this base and length. Note that there's no * need to initialize z->ob_digit -- no slot is read up before @@ -4367,6 +4389,8 @@ internal representation of integers. The attributes are read only."); static PyStructSequence_Field long_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, + {"default_max_str_digits", "maximum string conversion digits limitation"}, + {"str_digits_check_threshold", "minimum positive value for long_max_str_digits"}, {NULL, NULL} }; @@ -4374,7 +4398,7 @@ static PyStructSequence_Desc long_info_desc = { "sys.long_info", /* name */ long_info__doc__, /* doc */ long_info_fields, /* fields */ - 2 /* number of fields */ + 4 /* number of fields */ }; PyObject * @@ -4389,6 +4413,10 @@ PyLong_GetInfo(void) PyInt_FromLong(PyLong_SHIFT)); PyStructSequence_SET_ITEM(long_info, field++, PyInt_FromLong(sizeof(digit))); + PyStructSequence_SET_ITEM(long_info, field++, + PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS)); + PyStructSequence_SET_ITEM(long_info, field++, + PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(long_info); return NULL; @@ -4399,8 +4427,45 @@ PyLong_GetInfo(void) int _PyLong_Init(void) { + PyInterpreterState *interp = PyThreadState_GET()->interp; /* initialize long_info */ if (Long_InfoType.tp_name == 0) PyStructSequence_InitType(&Long_InfoType, &long_info_desc); + interp->long_max_str_digits = Py_LongMaxStrDigits; + if (interp->long_max_str_digits == -1) { + interp->long_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS; + } return 1; } + + +void +_PyLongMaxStrDigits_Init(void) +{ + char *env; + const char *endptr; + long maxdigits; + + + if (Py_LongMaxStrDigits >= 0 || + !Py_LongMaxStrDigitsFlag) + return; + + env = Py_GETENV("PYTHONINTMAXSTRDIGITS"); + if (env && *env != '\0') { + errno = 0; + maxdigits = strtol(env, (char **)&endptr, 10); + if (*endptr != '\0' || errno == ERANGE || maxdigits < INT_MIN || maxdigits > INT_MAX || + !((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD))) { +#define STRINGIFY(VAL) _STRINGIFY(VAL) +#define _STRINGIFY(VAL) #VAL + Py_FatalError( + "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) + " or 0 for unlimited."); +#undef _STRINGIFY +#undef STRINGIFY + } + Py_LongMaxStrDigits = (int)maxdigits; + } +} diff --git a/Python/pystate.c b/Python/pystate.c index f33f18202360f5..3a4f0a9db32b2a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -84,6 +84,7 @@ PyInterpreterState_New(void) #ifdef WITH_TSC interp->tscdump = 0; #endif + interp->long_max_str_digits = -1; HEAD_LOCK(); interp->next = interp_head; diff --git a/Python/pythonrun.c b/Python/pythonrun.c index abdfb146ae51f1..22ca16fe47daea 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -82,6 +82,8 @@ int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */ int _Py_QnewFlag = 0; int Py_NoUserSiteDirectory = 0; /* for -s and site.py */ int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */ +int Py_LongMaxStrDigitsFlag = 0; /* for PYTHONINTMAXSTRDIGITS */ +int Py_LongMaxStrDigits = -1; /* for longobject.c */ /* Hack to force loading of object files */ @@ -197,8 +199,12 @@ Py_InitializeEx(int install_sigs) check its value further. */ if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0') Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p); - _PyRandom_Init(); + /* The variable is only tested for existence here; _PyLongMaxStrDigits_Init + will check its value further. */ + if ((p = Py_GETENV("PYTHONINTMAXSTRDIGITS")) && *p != '\0') + Py_LongMaxStrDigitsFlag = add_flag(Py_LongMaxStrDigitsFlag, p); + _PyLongMaxStrDigits_Init(); interp = PyInterpreterState_New(); if (interp == NULL) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index fdb7af2f5f6764..fde6a8448eeec7 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -668,6 +668,46 @@ The flag constants are defined in the ctypes and DLFCN modules."); #endif /* HAVE_DLOPEN */ +static PyObject * +sys_set_int_max_str_digits(PyObject *self, PyObject *args) +{ + int new_val; + PyThreadState *tstate = PyThreadState_GET(); + if (!PyArg_ParseTuple(args, "i:set_int_max_str_digits", &new_val)) + return NULL; + if (!tstate) + return NULL; + if ((!new_val) || new_val >= (_PY_LONG_MAX_STR_DIGITS_THRESHOLD)) { + tstate->interp->long_max_str_digits = new_val; + } else { + PyErr_Format( + PyExc_ValueError, "maxdigits must be 0 or larger than %d", + _PY_LONG_MAX_STR_DIGITS_THRESHOLD); + return NULL; + } + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(set_int_max_str_digits_doc, +"set_int_max_str_digits_doc() -> None\n\ +\n\ +Set the maximum string digits limit for non-binary int<->str conversions."); + +static PyObject * +sys_get_int_max_str_digits(PyObject *self, PyObject *args) +{ + PyThreadState *tstate = PyThreadState_GET(); + if (!tstate) + return NULL; + return PyInt_FromLong(tstate->interp->long_max_str_digits); +} + +PyDoc_STRVAR(get_int_max_str_digits_doc, +"get_int_max_str_digits_doc() -> int\n\ +\n\ +Return the maximum string digits limit for non-binary int<->str conversions."); + #ifdef USE_MALLOPT /* Link with -lmalloc (or -lmpc) on an SGI */ #include @@ -937,6 +977,8 @@ static PyMethodDef sys_methods[] = { {"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS, getdlopenflags_doc}, #endif + {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits, + METH_NOARGS, get_int_max_str_digits_doc}, #ifdef COUNT_ALLOCS {"getcounts", (PyCFunction)sys_getcounts, METH_NOARGS}, #endif @@ -978,6 +1020,8 @@ static PyMethodDef sys_methods[] = { {"setdlopenflags", sys_setdlopenflags, METH_VARARGS, setdlopenflags_doc}, #endif + {"set_int_max_str_digits", (PyCFunction)sys_set_int_max_str_digits, + METH_VARARGS, set_int_max_str_digits_doc}, {"setprofile", sys_setprofile, METH_O, setprofile_doc}, {"getprofile", sys_getprofile, METH_NOARGS, getprofile_doc}, {"setrecursionlimit", sys_setrecursionlimit, METH_VARARGS, @@ -1139,6 +1183,7 @@ exc_info() -- return thread-safe information about the current exception\n\ exc_clear() -- clear the exception state for the current thread\n\ exit() -- exit the interpreter by raising SystemExit\n\ getdlopenflags() -- returns flags to be used for dlopen() calls\n\ +get_int_max_str_digits() -- returns the length limit for non-binary int<->str conversions\n\ getprofile() -- get the global profiling function\n\ getrefcount() -- return the reference count for an object (plus one :-)\n\ getrecursionlimit() -- return the max recursion depth for the interpreter\n\ @@ -1146,6 +1191,7 @@ getsizeof() -- return the size of an object in bytes\n\ gettrace() -- get the global debug tracing function\n\ setcheckinterval() -- control how often the interpreter checks for events\n\ setdlopenflags() -- set the flags to be used for dlopen() calls\n\ +set_int_max_str_digits() -- set the length limit for non-binary int<->str conversions\n\ setprofile() -- set the global profiling function\n\ setrecursionlimit() -- set the max recursion depth for the interpreter\n\ settrace() -- set the global debug tracing function\n\ @@ -1227,6 +1273,7 @@ static PyStructSequence_Field flags_fields[] = { /* {"skip_first", "-x"}, */ {"bytes_warning", "-b"}, {"hash_randomization", "-R"}, + {"int_max_str_digits", "???"}, {0} }; @@ -1235,9 +1282,9 @@ static PyStructSequence_Desc flags_desc = { flags__doc__, /* doc */ flags_fields, /* fields */ #ifdef RISCOS - 17 + 18 #else - 16 + 17 #endif }; @@ -1275,6 +1322,7 @@ make_flags(void) /* SetFlag(skipfirstline); */ SetFlag(Py_BytesWarningFlag); SetFlag(Py_HashRandomizationFlag); + SetFlag(Py_LongMaxStrDigits); #undef SetFlag if (PyErr_Occurred()) {