Skip to content

Commit

Permalink
🎨 reformat files
Browse files Browse the repository at this point in the history
  • Loading branch information
Ousret committed Sep 24, 2021
1 parent dd56776 commit 59d22e0
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 9 deletions.
4 changes: 3 additions & 1 deletion charset_normalizer/cd.py
Expand Up @@ -113,7 +113,9 @@ def alphabet_languages(characters: List[str]) -> List[str]:
for language, language_characters in FREQUENCIES.items():
character_count = len(language_characters) # type: int

character_match_count = len([c for c in language_characters if c in characters]) # type: int
character_match_count = len(
[c for c in language_characters if c in characters]
) # type: int

if character_match_count / character_count >= 0.2:
languages.append(language)
Expand Down
2 changes: 1 addition & 1 deletion charset_normalizer/constant.py
Expand Up @@ -493,4 +493,4 @@
KO_NAMES = {"johab", "cp949", "euc_kr"} # type: Set[str]
ZH_NAMES = {"big5", "cp950", "big5hkscs", "hz"} # type: Set[str]

NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+")
NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+")
21 changes: 16 additions & 5 deletions charset_normalizer/md.py
Expand Up @@ -70,7 +70,10 @@ def eligible(self, character: str) -> bool:
def feed(self, character: str) -> None:
self._character_count += 1

if character != self._last_printable_char and character not in COMMON_SAFE_ASCII_CHARACTERS:
if (
character != self._last_printable_char
and character not in COMMON_SAFE_ASCII_CHARACTERS
):
if is_punctuation(character):
self._punctuation_count += 1
elif (
Expand Down Expand Up @@ -461,8 +464,14 @@ def is_suspiciously_successive_range(
return False

# Japanese Exception
range_a_jp_chars, range_b_jp_chars = unicode_range_a in ("Hiragana", "Katakana"), \
unicode_range_b in ("Hiragana", "Katakana")
range_a_jp_chars, range_b_jp_chars = (
unicode_range_a
in (
"Hiragana",
"Katakana",
),
unicode_range_b in ("Hiragana", "Katakana"),
)
if range_a_jp_chars or range_b_jp_chars:
if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
return False
Expand Down Expand Up @@ -494,9 +503,11 @@ def mess_ratio(
) -> float:
"""
Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
"""
"""

detectors = [md_class() for md_class in MessDetectorPlugin.__subclasses__()] # type: List[MessDetectorPlugin]
detectors = [
md_class() for md_class in MessDetectorPlugin.__subclasses__()
] # type: List[MessDetectorPlugin]

length = len(decoded_sequence) # type: int

Expand Down
6 changes: 4 additions & 2 deletions charset_normalizer/models.py
Expand Up @@ -4,7 +4,7 @@
from hashlib import sha256
from json import dumps
from re import sub
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union

from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE
from .md import mess_ratio
Expand Down Expand Up @@ -226,7 +226,9 @@ def alphabets(self) -> List[str]:
if self._unicode_ranges is not None:
return self._unicode_ranges
# list detected ranges
detected_ranges = [unicode_range(char) for char in str(self)] # type: List[Optional[str]]
detected_ranges = [
unicode_range(char) for char in str(self)
] # type: List[Optional[str]]
# filter and sort
self._unicode_ranges = sorted([r for r in detected_ranges if r]) # type: ignore
return self._unicode_ranges
Expand Down

0 comments on commit 59d22e0

Please sign in to comment.