diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py index 6c7e8983..9dbf4201 100644 --- a/charset_normalizer/api.py +++ b/charset_normalizer/api.py @@ -175,7 +175,6 @@ def from_bytes( prioritized_encodings.append("utf_8") for encoding_iana in prioritized_encodings + IANA_SUPPORTED: - if cp_isolation and encoding_iana not in cp_isolation: continue @@ -318,7 +317,9 @@ def from_bytes( bom_or_sig_available and strip_sig_or_bom is False ): break - except UnicodeDecodeError as e: # Lazy str loading may have missed something there + except ( + UnicodeDecodeError + ) as e: # Lazy str loading may have missed something there logger.log( TRACE, "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s", diff --git a/charset_normalizer/cd.py b/charset_normalizer/cd.py index ae2813fb..6e56fe84 100644 --- a/charset_normalizer/cd.py +++ b/charset_normalizer/cd.py @@ -140,7 +140,6 @@ def alphabet_languages( source_have_accents = any(is_accentuated(character) for character in characters) for language, language_characters in FREQUENCIES.items(): - target_have_accents, target_pure_latin = get_target_features(language) if ignore_non_latin and target_pure_latin is False: diff --git a/charset_normalizer/cli/normalizer.py b/charset_normalizer/cli/normalizer.py index ad26b4d0..f4bcbaac 100644 --- a/charset_normalizer/cli/normalizer.py +++ b/charset_normalizer/cli/normalizer.py @@ -147,7 +147,6 @@ def cli_detect(argv: Optional[List[str]] = None) -> int: x_ = [] for my_file in args.files: - matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose) best_guess = matches.best() @@ -222,7 +221,6 @@ def cli_detect(argv: Optional[List[str]] = None) -> int: ) if args.normalize is True: - if best_guess.encoding.startswith("utf") is True: print( '"{}" file does not need to be normalized, as it already came from unicode.'.format( diff --git a/charset_normalizer/utils.py b/charset_normalizer/utils.py index e3536267..76eafc64 100644 --- a/charset_normalizer/utils.py +++ b/charset_normalizer/utils.py @@ -311,7 +311,6 @@ def range_scan(decoded_sequence: str) -> List[str]: def cp_similarity(iana_name_a: str, iana_name_b: str) -> float: - if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b): return 0.0 @@ -351,7 +350,6 @@ def set_logging_handler( level: int = logging.INFO, format_string: str = "%(asctime)s | %(levelname)s | %(message)s", ) -> None: - logger = logging.getLogger(name) logger.setLevel(level) @@ -371,7 +369,6 @@ def cut_sequence_chunks( is_multi_byte_decoder: bool, decoded_payload: Optional[str] = None, ) -> Generator[str, None, None]: - if decoded_payload and is_multi_byte_decoder is False: for i in offsets: chunk = decoded_payload[i : i + chunk_size] @@ -397,7 +394,6 @@ def cut_sequence_chunks( # multi-byte bad cutting detector and adjustment # not the cleanest way to perform that fix but clever enough for now. if is_multi_byte_decoder and i > 0: - chunk_partial_size_chk: int = min(chunk_size, 16) if ( diff --git a/dev-requirements.txt b/dev-requirements.txt index e3cdf4a3..503a1620 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -6,7 +6,7 @@ pytest-cov==4.0.0 build==0.10.0 wheel==0.38.4 -black==22.12.0 +black==23.1.0 mypy==1.0.1 Flask==2.2.3 pytest==7.2.1