Skip to content

Commit 185111d

Browse files
committed
Update base64.py and test_base64.py from CPython v3.11.2
1 parent 439b44e commit 185111d

File tree

2 files changed

+195
-73
lines changed

2 files changed

+195
-73
lines changed

Lib/base64.py

+73-61
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#! /usr/bin/python3.6
1+
#! /usr/bin/env python3
22

33
"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
44

@@ -16,7 +16,7 @@
1616
'encode', 'decode', 'encodebytes', 'decodebytes',
1717
# Generalized interface for other encodings
1818
'b64encode', 'b64decode', 'b32encode', 'b32decode',
19-
'b16encode', 'b16decode',
19+
'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
2020
# Base85 and Ascii85 encodings
2121
'b85encode', 'b85decode', 'a85encode', 'a85decode',
2222
# Standard Base64 encoding
@@ -76,15 +76,16 @@ def b64decode(s, altchars=None, validate=False):
7676
normal base-64 alphabet nor the alternative alphabet are discarded prior
7777
to the padding check. If validate is True, these non-alphabet characters
7878
in the input result in a binascii.Error.
79+
For more information about the strict base64 check, see:
80+
81+
https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
7982
"""
8083
s = _bytes_from_decode_data(s)
8184
if altchars is not None:
8285
altchars = _bytes_from_decode_data(altchars)
8386
assert len(altchars) == 2, repr(altchars)
8487
s = s.translate(bytes.maketrans(altchars, b'+/'))
85-
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
86-
raise binascii.Error('Non-base64 digit found')
87-
return binascii.a2b_base64(s)
88+
return binascii.a2b_base64(s, strict_mode=validate)
8889

8990

9091
def standard_b64encode(s):
@@ -135,19 +136,40 @@ def urlsafe_b64decode(s):
135136

136137

137138
# Base32 encoding/decoding must be done in Python
139+
_B32_ENCODE_DOCSTRING = '''
140+
Encode the bytes-like objects using {encoding} and return a bytes object.
141+
'''
142+
_B32_DECODE_DOCSTRING = '''
143+
Decode the {encoding} encoded bytes-like object or ASCII string s.
144+
145+
Optional casefold is a flag specifying whether a lowercase alphabet is
146+
acceptable as input. For security purposes, the default is False.
147+
{extra_args}
148+
The result is returned as a bytes object. A binascii.Error is raised if
149+
the input is incorrectly padded or if there are non-alphabet
150+
characters present in the input.
151+
'''
152+
_B32_DECODE_MAP01_DOCSTRING = '''
153+
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
154+
letter O (oh), and for optional mapping of the digit 1 (one) to
155+
either the letter I (eye) or letter L (el). The optional argument
156+
map01 when not None, specifies which letter the digit 1 should be
157+
mapped to (when map01 is not None, the digit 0 is always mapped to
158+
the letter O). For security purposes the default is None, so that
159+
0 and 1 are not allowed in the input.
160+
'''
138161
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
139-
_b32tab2 = None
140-
_b32rev = None
162+
_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
163+
_b32tab2 = {}
164+
_b32rev = {}
141165

142-
def b32encode(s):
143-
"""Encode the bytes-like object s using Base32 and return a bytes object.
144-
"""
166+
def _b32encode(alphabet, s):
145167
global _b32tab2
146168
# Delay the initialization of the table to not waste memory
147169
# if the function is never called
148-
if _b32tab2 is None:
149-
b32tab = [bytes((i,)) for i in _b32alphabet]
150-
_b32tab2 = [a + b for a in b32tab for b in b32tab]
170+
if alphabet not in _b32tab2:
171+
b32tab = [bytes((i,)) for i in alphabet]
172+
_b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
151173
b32tab = None
152174

153175
if not isinstance(s, bytes_types):
@@ -158,9 +180,9 @@ def b32encode(s):
158180
s = s + b'\0' * (5 - leftover) # Don't use += !
159181
encoded = bytearray()
160182
from_bytes = int.from_bytes
161-
b32tab2 = _b32tab2
183+
b32tab2 = _b32tab2[alphabet]
162184
for i in range(0, len(s), 5):
163-
c = from_bytes(s[i: i + 5], 'big')
185+
c = from_bytes(s[i: i + 5]) # big endian
164186
encoded += (b32tab2[c >> 30] + # bits 1 - 10
165187
b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
166188
b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
@@ -177,29 +199,12 @@ def b32encode(s):
177199
encoded[-1:] = b'='
178200
return bytes(encoded)
179201

180-
def b32decode(s, casefold=False, map01=None):
181-
"""Decode the Base32 encoded bytes-like object or ASCII string s.
182-
183-
Optional casefold is a flag specifying whether a lowercase alphabet is
184-
acceptable as input. For security purposes, the default is False.
185-
186-
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
187-
letter O (oh), and for optional mapping of the digit 1 (one) to
188-
either the letter I (eye) or letter L (el). The optional argument
189-
map01 when not None, specifies which letter the digit 1 should be
190-
mapped to (when map01 is not None, the digit 0 is always mapped to
191-
the letter O). For security purposes the default is None, so that
192-
0 and 1 are not allowed in the input.
193-
194-
The result is returned as a bytes object. A binascii.Error is raised if
195-
the input is incorrectly padded or if there are non-alphabet
196-
characters present in the input.
197-
"""
202+
def _b32decode(alphabet, s, casefold=False, map01=None):
198203
global _b32rev
199204
# Delay the initialization of the table to not waste memory
200205
# if the function is never called
201-
if _b32rev is None:
202-
_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
206+
if alphabet not in _b32rev:
207+
_b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
203208
s = _bytes_from_decode_data(s)
204209
if len(s) % 8:
205210
raise binascii.Error('Incorrect padding')
@@ -220,7 +225,7 @@ def b32decode(s, casefold=False, map01=None):
220225
padchars = l - len(s)
221226
# Now decode the full quanta
222227
decoded = bytearray()
223-
b32rev = _b32rev
228+
b32rev = _b32rev[alphabet]
224229
for i in range(0, len(s), 8):
225230
quanta = s[i: i + 8]
226231
acc = 0
@@ -229,18 +234,38 @@ def b32decode(s, casefold=False, map01=None):
229234
acc = (acc << 5) + b32rev[c]
230235
except KeyError:
231236
raise binascii.Error('Non-base32 digit found') from None
232-
decoded += acc.to_bytes(5, 'big')
237+
decoded += acc.to_bytes(5) # big endian
233238
# Process the last, partial quanta
234239
if l % 8 or padchars not in {0, 1, 3, 4, 6}:
235240
raise binascii.Error('Incorrect padding')
236241
if padchars and decoded:
237242
acc <<= 5 * padchars
238-
last = acc.to_bytes(5, 'big')
243+
last = acc.to_bytes(5) # big endian
239244
leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1
240245
decoded[-5:] = last[:leftover]
241246
return bytes(decoded)
242247

243248

249+
def b32encode(s):
250+
return _b32encode(_b32alphabet, s)
251+
b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
252+
253+
def b32decode(s, casefold=False, map01=None):
254+
return _b32decode(_b32alphabet, s, casefold, map01)
255+
b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
256+
extra_args=_B32_DECODE_MAP01_DOCSTRING)
257+
258+
def b32hexencode(s):
259+
return _b32encode(_b32hexalphabet, s)
260+
b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
261+
262+
def b32hexdecode(s, casefold=False):
263+
# base32hex does not have the 01 mapping
264+
return _b32decode(_b32hexalphabet, s, casefold)
265+
b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
266+
extra_args='')
267+
268+
244269
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
245270
# lowercase. The RFC also recommends against accepting input case
246271
# insensitively.
@@ -320,7 +345,7 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
320345
global _a85chars, _a85chars2
321346
# Delay the initialization of tables to not waste memory
322347
# if the function is never called
323-
if _a85chars is None:
348+
if _a85chars2 is None:
324349
_a85chars = [bytes((i,)) for i in range(33, 118)]
325350
_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
326351

@@ -428,7 +453,7 @@ def b85encode(b, pad=False):
428453
global _b85chars, _b85chars2
429454
# Delay the initialization of tables to not waste memory
430455
# if the function is never called
431-
if _b85chars is None:
456+
if _b85chars2 is None:
432457
_b85chars = [bytes((i,)) for i in _b85alphabet]
433458
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
434459
return _85encode(b, _b85chars, _b85chars2, pad)
@@ -531,49 +556,36 @@ def encodebytes(s):
531556
pieces.append(binascii.b2a_base64(chunk))
532557
return b"".join(pieces)
533558

534-
def encodestring(s):
535-
"""Legacy alias of encodebytes()."""
536-
import warnings
537-
warnings.warn("encodestring() is a deprecated alias since 3.1, "
538-
"use encodebytes()",
539-
DeprecationWarning, 2)
540-
return encodebytes(s)
541-
542559

543560
def decodebytes(s):
544561
"""Decode a bytestring of base-64 data into a bytes object."""
545562
_input_type_check(s)
546563
return binascii.a2b_base64(s)
547564

548-
def decodestring(s):
549-
"""Legacy alias of decodebytes()."""
550-
import warnings
551-
warnings.warn("decodestring() is a deprecated alias since Python 3.1, "
552-
"use decodebytes()",
553-
DeprecationWarning, 2)
554-
return decodebytes(s)
555-
556565

557566
# Usable as a script...
558567
def main():
559568
"""Small main program"""
560569
import sys, getopt
570+
usage = """usage: %s [-h|-d|-e|-u|-t] [file|-]
571+
-h: print this help message and exit
572+
-d, -u: decode
573+
-e: encode (default)
574+
-t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
561575
try:
562-
opts, args = getopt.getopt(sys.argv[1:], 'deut')
576+
opts, args = getopt.getopt(sys.argv[1:], 'hdeut')
563577
except getopt.error as msg:
564578
sys.stdout = sys.stderr
565579
print(msg)
566-
print("""usage: %s [-d|-e|-u|-t] [file|-]
567-
-d, -u: decode
568-
-e: encode (default)
569-
-t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
580+
print(usage)
570581
sys.exit(2)
571582
func = encode
572583
for o, a in opts:
573584
if o == '-e': func = encode
574585
if o == '-d': func = decode
575586
if o == '-u': func = decode
576587
if o == '-t': test(); return
588+
if o == '-h': print(usage); return
577589
if args and args[0] != '-':
578590
with open(args[0], 'rb') as f:
579591
func(f, sys.stdout.buffer)

0 commit comments

Comments
 (0)