TechPenguineer · pull · May 27, 2025 · May 27, 2025 · May 27, 2025 · May 27, 2025
diff --git a/Doc/library/asyncio-stream.rst b/Doc/library/asyncio-stream.rst
@@ -171,13 +171,17 @@ and work with streams:
 .. function:: start_unix_server(client_connected_cb, path=None, \
                  *, limit=None, sock=None, backlog=100, ssl=None, \
                  ssl_handshake_timeout=None, \
-                 ssl_shutdown_timeout=None, start_serving=True)
+                 ssl_shutdown_timeout=None, start_serving=True, cleanup_socket=True)
    :async:
 
    Start a Unix socket server.
 
    Similar to :func:`start_server` but works with Unix sockets.
 
+   If *cleanup_socket* is true then the Unix socket will automatically
+   be removed from the filesystem when the server is closed, unless the
+   socket has been replaced after the server has been created.
+
    See also the documentation of :meth:`loop.create_unix_server`.
 
    .. note::
@@ -198,6 +202,9 @@ and work with streams:
    .. versionchanged:: 3.11
       Added the *ssl_shutdown_timeout* parameter.
 
+   .. versionchanged:: 3.13
+      Added the *cleanup_socket* parameter.
+
 
 StreamReader
 ============

diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst
@@ -44,6 +44,20 @@ The available exception and functions in this module are:
    .. versionchanged:: 3.0
       The result is always unsigned.
 
+.. function:: adler32_combine(adler1, adler2, len2, /)
+
+   Combine two Adler-32 checksums into one.
+
+   Given the Adler-32 checksum *adler1* of a sequence ``A`` and the
+   Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*,
+   return the Adler-32 checksum of ``A`` and ``B`` concatenated.
+
+   This function is typically useful to combine Adler-32 checksums
+   that were concurrently computed. To compute checksums sequentially, use
+   :func:`adler32` with the running checksum as the ``value`` argument.
+
+   .. versionadded:: next
+
 .. function:: compress(data, /, level=-1, wbits=MAX_WBITS)
 
    Compresses the bytes in *data*, returning a bytes object containing compressed data.
@@ -136,6 +150,20 @@ The available exception and functions in this module are:
    .. versionchanged:: 3.0
       The result is always unsigned.
 
+.. function:: crc32_combine(crc1, crc2, len2, /)
+
+   Combine two CRC-32 checksums into one.
+
+   Given the CRC-32 checksum *crc1* of a sequence ``A`` and the
+   CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*,
+   return the CRC-32 checksum of ``A`` and ``B`` concatenated.
+
+   This function is typically useful to combine CRC-32 checksums
+   that were concurrently computed. To compute checksums sequentially, use
+   :func:`crc32` with the running checksum as the ``value`` argument.
+
+   .. versionadded:: next
+
 .. function:: decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE)
 
    Decompresses the bytes in *data*, returning a bytes object containing the

diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
@@ -97,6 +97,16 @@ ssl
   (Contributed by Will Childs-Klein in :gh:`133624`.)
 
 
+zlib
+----
+
+* Allow combining two Adler-32 checksums via :func:`~zlib.adler32_combine`.
+  (Contributed by Callum Attryde and Bénédikt Tran in :gh:`134635`.)
+
+* Allow combining two CRC-32 checksums via :func:`~zlib.crc32_combine`.
+  (Contributed by Bénédikt Tran in :gh:`134635`.)
+
+
 .. Add improved modules above alphabetically, not here at the end.
 
 Optimizations

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -1,6 +1,7 @@
 import codecs
 import contextlib
 import copy
+import importlib
 import io
 import pickle
 import os
@@ -3111,9 +3112,9 @@ def test_aliases(self):
     def test_alias_modules_exist(self):
         encodings_dir = os.path.dirname(encodings.__file__)
         for value in encodings.aliases.aliases.values():
-            codec_file = os.path.join(encodings_dir, value + ".py")
-            self.assertTrue(os.path.isfile(codec_file),
-                            "Codec file not found: " + codec_file)
+            codec_mod = f"encodings.{value}"
+            self.assertIsNotNone(importlib.util.find_spec(codec_mod),
+                                 f"Codec module not found: {codec_mod}")
 
     def test_quopri_stateless(self):
         # Should encode with quotetabs=True

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
@@ -3241,39 +3241,40 @@ def test_exact_flag(self):
 
 
 class StringPrefixTest(unittest.TestCase):
-    def test_prefixes(self):
-        # Get the list of defined string prefixes.  I don't see an
-        # obvious documented way of doing this, but probably the best
-        # thing is to split apart tokenize.StringPrefix.
-
-        # Make sure StringPrefix begins and ends in parens.
-        self.assertEqual(tokenize.StringPrefix[0], '(')
-        self.assertEqual(tokenize.StringPrefix[-1], ')')
-
-        # Then split apart everything else by '|'.
-        defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
-
-        # Now compute the actual string prefixes, by exec-ing all
-        # valid prefix combinations, followed by an empty string.
-
-        # Try all prefix lengths until we find a length that has zero
-        # valid prefixes.  This will miss the case where for example
-        # there are no valid 3 character prefixes, but there are valid
-        # 4 character prefixes.  That seems extremely unlikely.
-
-        # Note that the empty prefix is being included, because length
-        # starts at 0.  That's expected, since StringPrefix includes
-        # the empty prefix.
+    @staticmethod
+    def determine_valid_prefixes():
+        # Try all lengths until we find a length that has zero valid
+        # prefixes.  This will miss the case where for example there
+        # are no valid 3 character prefixes, but there are valid 4
+        # character prefixes.  That seems unlikely.
+
+        single_char_valid_prefixes = set()
+
+        # Find all of the single character string prefixes. Just get
+        # the lowercase version, we'll deal with combinations of upper
+        # and lower case later.  I'm using this logic just in case
+        # some uppercase-only prefix is added.
+        for letter in itertools.chain(string.ascii_lowercase, string.ascii_uppercase):
+            try:
+                eval(f'{letter}""')
+                single_char_valid_prefixes.add(letter.lower())
+            except SyntaxError:
+                pass
 
+        # This logic assumes that all combinations of valid prefixes only use
+        # the characters that are valid single character prefixes.  That seems
+        # like a valid assumption, but if it ever changes this will need
+        # adjusting.
         valid_prefixes = set()
         for length in itertools.count():
             num_at_this_length = 0
             for prefix in (
-                "".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length))
+                "".join(l)
+                for l in itertools.combinations(single_char_valid_prefixes, length)
             ):
                 for t in itertools.permutations(prefix):
                     for u in itertools.product(*[(c, c.upper()) for c in t]):
-                        p = ''.join(u)
+                        p = "".join(u)
                         if p == "not":
                             # 'not' can never be a string prefix,
                             # because it's a valid expression: not ""
@@ -3289,9 +3290,26 @@ def test_prefixes(self):
                         except SyntaxError:
                             pass
             if num_at_this_length == 0:
-                break
+                return valid_prefixes
+
+
+    def test_prefixes(self):
+        # Get the list of defined string prefixes.  I don't see an
+        # obvious documented way of doing this, but probably the best
+        # thing is to split apart tokenize.StringPrefix.
+
+        # Make sure StringPrefix begins and ends in parens.  We're
+        # assuming it's of the form "(a|b|ab)", if a, b, and cd are
+        # valid string prefixes.
+        self.assertEqual(tokenize.StringPrefix[0], '(')
+        self.assertEqual(tokenize.StringPrefix[-1], ')')
+
+        # Then split apart everything else by '|'.
+        defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
 
-        self.assertEqual(defined_prefixes, valid_prefixes)
+        # Now compute the actual allowed string prefixes and compare
+        # to what is defined in the tokenize module.
+        self.assertEqual(defined_prefixes, self.determine_valid_prefixes())
 
 
 if __name__ == "__main__":

diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
@@ -119,6 +119,114 @@ def test_same_as_binascii_crc32(self):
         self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam'))
 
 
+class ChecksumCombineMixin:
+    """Mixin class for testing checksum combination."""
+
+    N = 1000
+    default_iv: int
+
+    def parse_iv(self, iv):
+        """Parse an IV value.
+
+        - The default IV is returned if *iv* is None.
+        - A random IV is returned if *iv* is -1.
+        - Otherwise, *iv* is returned as is.
+        """
+        if iv is None:
+            return self.default_iv
+        if iv == -1:
+            return random.randint(1, 0x80000000)
+        return iv
+
+    def checksum(self, data, init=None):
+        """Compute the checksum of data with a given initial value.
+
+        The *init* value is parsed by ``parse_iv``.
+        """
+        iv = self.parse_iv(init)
+        return self._checksum(data, iv)
+
+    def _checksum(self, data, init):
+        raise NotImplementedError
+
+    def combine(self, a, b, blen):
+        """Combine two checksums together."""
+        raise NotImplementedError
+
+    def get_random_data(self, data_len, *, iv=None):
+        """Get a triplet (data, iv, checksum)."""
+        data = random.randbytes(data_len)
+        init = self.parse_iv(iv)
+        checksum = self.checksum(data, init)
+        return data, init, checksum
+
+    def test_combine_empty(self):
+        for _ in range(self.N):
+            a, iv, checksum = self.get_random_data(32, iv=-1)
+            res = self.combine(iv, self.checksum(a), len(a))
+            self.assertEqual(res, checksum)
+
+    def test_combine_no_iv(self):
+        for _ in range(self.N):
+            a, _, chk_a = self.get_random_data(32)
+            b, _, chk_b = self.get_random_data(64)
+            res = self.combine(chk_a, chk_b, len(b))
+            self.assertEqual(res, self.checksum(a + b))
+
+    def test_combine_no_iv_invalid_length(self):
+        a, _, chk_a = self.get_random_data(32)
+        b, _, chk_b = self.get_random_data(64)
+        checksum = self.checksum(a + b)
+        for invalid_len in [1, len(a), 48, len(b) + 1, 191]:
+            invalid_res = self.combine(chk_a, chk_b, invalid_len)
+            self.assertNotEqual(invalid_res, checksum)
+
+        self.assertRaises(TypeError, self.combine, 0, 0, "len")
+
+    def test_combine_with_iv(self):
+        for _ in range(self.N):
+            a, iv_a, chk_a_with_iv = self.get_random_data(32, iv=-1)
+            chk_a_no_iv = self.checksum(a)
+            b, iv_b, chk_b_with_iv = self.get_random_data(64, iv=-1)
+            chk_b_no_iv = self.checksum(b)
+
+            # We can represent c = COMBINE(CHK(a, iv_a), CHK(b, iv_b)) as:
+            #
+            #   c = CHK(CHK(b'', iv_a) + CHK(a) + CHK(b'', iv_b) + CHK(b))
+            #     = COMBINE(
+            #           COMBINE(CHK(b'', iv_a), CHK(a)),
+            #           COMBINE(CHK(b'', iv_b), CHK(b)),
+            #       )
+            #     = COMBINE(COMBINE(iv_a, CHK(a)), COMBINE(iv_b, CHK(b)))
+            tmp0 = self.combine(iv_a, chk_a_no_iv, len(a))
+            tmp1 = self.combine(iv_b, chk_b_no_iv, len(b))
+            expected = self.combine(tmp0, tmp1, len(b))
+            checksum = self.combine(chk_a_with_iv, chk_b_with_iv, len(b))
+            self.assertEqual(checksum, expected)
+
+
+class CRC32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
+
+    default_iv = 0
+
+    def _checksum(self, data, init):
+        return zlib.crc32(data, init)
+
+    def combine(self, a, b, blen):
+        return zlib.crc32_combine(a, b, blen)
+
+
+class Adler32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
+
+    default_iv = 1
+
+    def _checksum(self, data, init):
+        return zlib.adler32(data, init)
+
+    def combine(self, a, b, blen):
+        return zlib.adler32_combine(a, b, blen)
+
+
 # Issue #10276 - check that inputs >=4 GiB are handled correctly.
 class ChecksumBigBufferTestCase(unittest.TestCase):
 

diff --git a/Misc/NEWS.d/next/Library/2025-05-24-13-10-35.gh-issue-134210.0IuMY2.rst b/Misc/NEWS.d/next/Library/2025-05-24-13-10-35.gh-issue-134210.0IuMY2.rst
@@ -0,0 +1,2 @@
+:func:`curses.window.getch` now correctly handles signals. Patch by Bénédikt
+Tran.
diff --git a/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst b/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst
@@ -0,0 +1,3 @@
+:mod:`zlib`: Allow to combine Adler-32 and CRC-32 checksums via
+:func:`~zlib.adler32_combine` and :func:`~zlib.crc32_combine`. Patch by
+Callum Attryde and Bénédikt Tran.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		:func:`curses.window.getch` now correctly handles signals. Patch by Bénédikt
		Tran.