From 3cd3292d5b477319e21435cf0933d05a422febbc Mon Sep 17 00:00:00 2001 From: andreea-popescu-reef <160024917+andreea-popescu-reef@users.noreply.github.com> Date: Fri, 15 Mar 2024 23:26:58 +0800 Subject: [PATCH] Escape Control Characters (#480) * fix * fixes * tqdm * add to v2 --- b2sdk/_v3/__init__.py | 1 + b2sdk/progress.py | 6 +- b2sdk/raw_api.py | 19 +----- b2sdk/sync/action.py | 9 +-- b2sdk/utils/escape.py | 58 +++++++++++++++++++ b2sdk/v2/__init__.py | 3 +- b2sdk/v2/raw_api.py | 3 + .../+escape_control_characters.added.md | 1 + test/unit/utils/test_escape.py | 32 ++++++++++ 9 files changed, 108 insertions(+), 24 deletions(-) create mode 100644 b2sdk/utils/escape.py create mode 100644 changelog.d/+escape_control_characters.added.md create mode 100644 test/unit/utils/test_escape.py diff --git a/b2sdk/_v3/__init__.py b/b2sdk/_v3/__init__.py index 1817efe72..697ecbdef 100644 --- a/b2sdk/_v3/__init__.py +++ b/b2sdk/_v3/__init__.py @@ -257,6 +257,7 @@ ) from b2sdk.session import B2Session from b2sdk.utils.thread_pool import ThreadPoolMixin +from b2sdk.utils.escape import unprintable_to_hex, escape_control_chars, substitute_control_chars # filter from b2sdk.filter import FilterType, Filter diff --git a/b2sdk/progress.py b/b2sdk/progress.py index 1db55b839..504b1f3cd 100644 --- a/b2sdk/progress.py +++ b/b2sdk/progress.py @@ -12,6 +12,8 @@ import time from abc import ABCMeta, abstractmethod +from .utils.escape import escape_control_chars + try: from tqdm import tqdm # displays a nice progress bar except ImportError: @@ -112,7 +114,7 @@ def __init__(self, *args, **kwargs): def set_total_bytes(self, total_byte_count: int) -> None: if self.tqdm is None: self.tqdm = tqdm( - desc=self.description, + desc=escape_control_chars(self.description), total=total_byte_count, unit='B', unit_scale=True, @@ -159,7 +161,7 @@ def bytes_completed(self, byte_count: int) -> None: elapsed = now - self.last_time if 3 <= elapsed and self.total != 0: if not self.any_printed: - print(self.description) + print(escape_control_chars(self.description)) print(' %d%%' % int(100.0 * byte_count / self.total)) self.last_time = now self.any_printed = True diff --git a/b2sdk/raw_api.py b/b2sdk/raw_api.py index abbd0fe1c..403415573 100644 --- a/b2sdk/raw_api.py +++ b/b2sdk/raw_api.py @@ -10,12 +10,12 @@ from __future__ import annotations import base64 -import re from abc import ABCMeta, abstractmethod from enum import Enum, unique from logging import getLogger from typing import Any +from .utils.escape import unprintable_to_hex from .utils.typing import JSON try: @@ -873,21 +873,6 @@ def update_file_legal_hold( except AccessDenied: raise RetentionWriteError() - def unprintable_to_hex(self, string): - """ - Replace unprintable chars in string with a hex representation. - - :param string: an arbitrary string, possibly with unprintable characters. - :return: the string, with unprintable characters changed to hex (e.g., "\x07") - - """ - unprintables_pattern = re.compile(r'[\x00-\x1f]') - - def hexify(match): - return fr'\x{ord(match.group()):02x}' - - return unprintables_pattern.sub(hexify, string) - def check_b2_filename(self, filename): """ Raise an appropriate exception with details if the filename is unusable. @@ -906,7 +891,7 @@ def check_b2_filename(self, filename): lowest_unicode_value = ord(min(filename)) if lowest_unicode_value < 32: message = "Filename \"{}\" contains code {} (hex {:02x}), less than 32.".format( - self.unprintable_to_hex(filename), lowest_unicode_value, lowest_unicode_value + unprintable_to_hex(filename), lowest_unicode_value, lowest_unicode_value ) raise UnusableFileName(message) # No DEL for you. diff --git a/b2sdk/sync/action.py b/b2sdk/sync/action.py index ab123bd4b..306640351 100644 --- a/b2sdk/sync/action.py +++ b/b2sdk/sync/action.py @@ -22,6 +22,7 @@ from ..sync.report import ProgressReport, SyncReport from ..transfer.outbound.outbound_source import OutboundTransferSource from ..transfer.outbound.upload_source import UploadSourceLocalFile +from ..utils.escape import escape_control_chars from .encryption_provider import AbstractSyncEncryptionSettingsProvider from .report import SyncFileReporter @@ -179,7 +180,7 @@ def do_report(self, bucket: Bucket, reporter: ProgressReport) -> None: :param bucket: a Bucket object :param reporter: a place to report errors """ - reporter.print_completion('upload ' + self.relative_name) + reporter.print_completion(f'upload {escape_control_chars(self.relative_name)}') def __str__(self) -> str: return f'b2_upload({self.local_full_path}, {self.b2_file_name}, {self.mod_time_millis})' @@ -255,7 +256,7 @@ def do_report(self, bucket: Bucket, reporter: SyncReport): :param reporter: a place to report errors """ reporter.update_transfer(1, 0) - reporter.print_completion('hide ' + self.relative_name) + reporter.print_completion(f'hide {escape_control_chars(self.relative_name)}') def __str__(self) -> str: return f'b2_hide({self.b2_file_name})' @@ -478,7 +479,7 @@ def do_report(self, bucket: Bucket, reporter: SyncReport): :param reporter: a place to report errors """ reporter.update_transfer(1, 0) - reporter.print_completion('delete ' + self.relative_name + ' ' + self.note) + reporter.print_completion(f"delete {escape_control_chars(self.relative_name)} {self.note}") def __str__(self) -> str: return f'b2_delete({self.b2_file_name}, {self.file_id}, {self.note})' @@ -519,7 +520,7 @@ def do_report(self, bucket: Bucket, reporter: SyncReport): :param reporter: a place to report errors """ reporter.update_transfer(1, 0) - reporter.print_completion('delete ' + self.relative_name) + reporter.print_completion(f'delete {escape_control_chars(self.relative_name)}') def __str__(self) -> str: return f'local_delete({self.full_path})' diff --git a/b2sdk/utils/escape.py b/b2sdk/utils/escape.py new file mode 100644 index 000000000..2f3a73ef3 --- /dev/null +++ b/b2sdk/utils/escape.py @@ -0,0 +1,58 @@ +###################################################################### +# +# File: b2sdk/utils/escape.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### + +import re +import shlex + +# skip newline, tab +UNPRINTABLE_PATTERN = re.compile(r'[\x00-\x08\x0e-\x1f\x7f-\x9f]') + + +def unprintable_to_hex(s): + """ + Replace unprintable chars in string with a hex representation. + + :param string: an arbitrary string, possibly with unprintable characters. + :return: the string, with unprintable characters changed to hex (e.g., "\x07") + + """ + + def hexify(match): + return fr'\x{ord(match.group()):02x}' + + if s: + return UNPRINTABLE_PATTERN.sub(hexify, s) + return None + + +def escape_control_chars(s): + """ + Replace unprintable chars in string with a hex representation AND shell quotes the string. + + :param string: an arbitrary string, possibly with unprintable characters. + :return: the string, with unprintable characters changed to hex (e.g., "\x07") + + """ + if s: + return shlex.quote(unprintable_to_hex(s)) + return None + + +def substitute_control_chars(s): + """ + Replace unprintable chars in string with � unicode char + + :param string: an arbitrary string, possibly with unprintable characters. + :return: tuple of the string with � replacements made and boolean indicated if chars were replaced + + """ + match_result = UNPRINTABLE_PATTERN.search(s) + s = UNPRINTABLE_PATTERN.sub('�', s) + return (s, match_result is not None) diff --git a/b2sdk/v2/__init__.py b/b2sdk/v2/__init__.py index fea5870ca..6bf066e13 100644 --- a/b2sdk/v2/__init__.py +++ b/b2sdk/v2/__init__.py @@ -13,6 +13,7 @@ from b2sdk._v3 import parse_folder as parse_sync_folder from b2sdk._v3 import AbstractPath as AbstractSyncPath from b2sdk._v3 import LocalPath as LocalSyncPath +from b2sdk.utils.escape import unprintable_to_hex, escape_control_chars, substitute_control_chars from .account_info import AbstractAccountInfo from .api import B2Api @@ -45,4 +46,4 @@ # large_file from .large_file import LargeFileServices -from .large_file import UnfinishedLargeFile \ No newline at end of file +from .large_file import UnfinishedLargeFile diff --git a/b2sdk/v2/raw_api.py b/b2sdk/v2/raw_api.py index f0da2ba42..d839d9a13 100644 --- a/b2sdk/v2/raw_api.py +++ b/b2sdk/v2/raw_api.py @@ -51,6 +51,9 @@ def get_upload_file_headers( **kwargs, ) + def unprintable_to_hex(self, s): + return v3.unprintable_to_hex(s) + @_file_infos_rename def upload_file( self, diff --git a/changelog.d/+escape_control_characters.added.md b/changelog.d/+escape_control_characters.added.md new file mode 100644 index 000000000..f050a8cda --- /dev/null +++ b/changelog.d/+escape_control_characters.added.md @@ -0,0 +1 @@ +Added control character escaping for bucket and filenames. diff --git a/test/unit/utils/test_escape.py b/test/unit/utils/test_escape.py new file mode 100644 index 000000000..2c505f6a9 --- /dev/null +++ b/test/unit/utils/test_escape.py @@ -0,0 +1,32 @@ +###################################################################### +# +# File: test/unit/utils/test_escape.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### + +from b2sdk.utils.escape import escape_control_chars, substitute_control_chars, unprintable_to_hex + + +def test_unprintable_to_hex(): + cases = [ + (' abc-z', ' abc-z', "' abc-z'", (' abc-z', False)), + ('a\x7fb', 'a\\x7fb', "'a\\x7fb'", ('a�b', True)), + ('a\x00b a\x9fb ', 'a\\x00b a\\x9fb ', "'a\\x00b a\\x9fb '", ('a�b a�b ', True)), + ('a\x7fb\nc', 'a\\x7fb\nc', "'a\\x7fb\nc'", ('a�b\nc', True)), + ('\x9bT\x9bEtest', '\\x9bT\\x9bEtest', "'\\x9bT\\x9bEtest'", ('�T�Etest', True)), + ( + '\x1b[32mC\x1b[33mC\x1b[34mI', '\\x1b[32mC\\x1b[33mC\\x1b[34mI', + "'\\x1b[32mC\\x1b[33mC\\x1b[34mI'", ('�[32mC�[33mC�[34mI', True) + ) + ] + for ( + s, expected_unprintable_to_hex, expected_escape_control_chars, + expected_substitute_control_chars + ) in cases: + assert unprintable_to_hex(s) == expected_unprintable_to_hex + assert escape_control_chars(s) == expected_escape_control_chars + assert substitute_control_chars(s) == expected_substitute_control_chars