Skip to content

Commit

Permalink
Merge pull request #23 from Ousret/develop
Browse files Browse the repository at this point in the history
Release 2.0.5
  • Loading branch information
Ousret committed Apr 19, 2020
2 parents a966162 + 9959d43 commit 51c3168
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 52 deletions.
4 changes: 2 additions & 2 deletions README.md
Expand Up @@ -49,7 +49,7 @@ charset = headers['Content-Type'].split(';')[-1].split('=')[-1].replace('"', '')
* A backwards-compatible syntax using bracket style.
* Capability to alter headers using simple, human-readable operator notation `+` and `-`.
* Flexibility if headers are from IMAP4 or HTTP, use as you need with one library.
* Ability to parse any object and extract recognized headers from it.
* Ability to parse any object and extract recognized headers from it, it also support UTF-8 encoded headers.
* Fully type-annotated.
* Provide great auto-completion in Python interpreter or any capable IDE.
* Absolutely no dependencies.
Expand All @@ -60,7 +60,7 @@ Plus all the features that you would expect from handling headers...
* Properties syntax for headers and attribute in header.
* Supports headers and attributes OneToOne, OneToMany and ManySquashedIntoOne.
* Capable of parsing `bytes`, `fp`, `str`, `dict`, `email.Message`, `requests.Response` and `httpx._models.Response`.
* Automatically unquote value of an attribute when retrieving it.
* Automatically unquote and unfold value of an attribute when retrieving it.
* Case insensitive with header name and attribute key.
* Character `-` equal `_` in addition of above feature.
* Any syntax you like, we like.
Expand Down
17 changes: 7 additions & 10 deletions kiss_headers/api.py
@@ -1,13 +1,14 @@
from email.message import Message
from email.parser import BytesHeaderParser, HeaderParser
from io import BytesIO, IOBase
from email.parser import HeaderParser
from io import RawIOBase
from typing import Any, Iterable, List, Mapping, Optional, Tuple

from kiss_headers.models import Header, Headers
from kiss_headers.structures import CaseInsensitiveDict
from kiss_headers.utils import (
decode_partials,
extract_class_name,
extract_encoded_headers,
header_content_split,
header_name_to_class,
is_legal_header_name,
Expand All @@ -26,15 +27,11 @@ def parse_it(raw_headers: Any) -> Headers:

if isinstance(raw_headers, str):
headers = HeaderParser().parsestr(raw_headers, headersonly=True).items()
elif isinstance(raw_headers, bytes) or isinstance(raw_headers, IOBase):
headers = (
BytesHeaderParser()
.parse(
BytesIO(raw_headers) if isinstance(raw_headers, bytes) else raw_headers, # type: ignore
headersonly=True,
)
.items()
elif isinstance(raw_headers, bytes) or isinstance(raw_headers, RawIOBase):
decoded, not_decoded = extract_encoded_headers(
raw_headers if isinstance(raw_headers, bytes) else raw_headers.read() or b""
)
return parse_it(decoded)
elif isinstance(raw_headers, Mapping) or isinstance(raw_headers, Message):
headers = raw_headers.items()
else:
Expand Down
67 changes: 33 additions & 34 deletions kiss_headers/models.py
Expand Up @@ -15,12 +15,14 @@

from kiss_headers.structures import CaseInsensitiveDict
from kiss_headers.utils import (
extract_comments,
header_content_split,
header_name_to_class,
header_strip,
is_legal_header_name,
normalize_str,
prettify_header_name,
unfold,
unpack_protected_keyword,
unquote,
)
Expand Down Expand Up @@ -66,10 +68,9 @@ def __init__(self, name: str, content: str):
]

self._not_valued_attrs: List[str] = list()
self._valued_attrs: MutableMapping[str, Union[str, List[str]]] = dict()
self._valued_attrs_normalized: MutableMapping[
self._valued_attrs: MutableMapping[
str, Union[str, List[str]]
] = dict()
] = CaseInsensitiveDict()

for member in self._members:
if member == "":
Expand All @@ -91,9 +92,6 @@ def __init__(self, name: str, content: str):
else:
self._valued_attrs[key].append(value) # type: ignore

self._valued_attrs_normalized[normalize_str(key)] = self._valued_attrs[
key
]
continue

self._not_valued_attrs.append(unquote(member))
Expand Down Expand Up @@ -133,6 +131,11 @@ def content(self) -> str:

return self._content

@property
def comments(self) -> List[str]:
"""Retrieve comments in header content."""
return extract_comments(self.content)

def __lt__(self, other: object) -> bool:
"""
This method is only implemented to make sorted work with Header.
Expand Down Expand Up @@ -245,7 +248,7 @@ def __isub__(self, other: str) -> "Header":

other = normalize_str(other)

if other in self._valued_attrs_normalized:
if other in self._valued_attrs:
del self[other]

if other in self._not_valued_attrs:
Expand Down Expand Up @@ -286,7 +289,6 @@ def __setattr__(self, key: str, value: str) -> None:
"_content",
"_members",
"_not_valued_attrs",
"_valued_attrs_normalized",
"_valued_attrs",
}:
return super().__setattr__(key, value)
Expand All @@ -300,15 +302,13 @@ def __setitem__(self, key: str, value: str) -> None:
Set an attribute bracket syntax like. This will erase previously set attribute named after the key.
Any value that are not a str are casted to str.
"""
key_normalized = normalize_str(key)

if key in self:
del self[key]
if not isinstance(value, str):
value = str(value)

self._valued_attrs[key] = value
self._valued_attrs_normalized[key_normalized] = self._valued_attrs[key]

self._content += '{semi_colon_r}{key}="{value}"'.format(
key=key,
Expand All @@ -326,22 +326,14 @@ def __delitem__(self, key: str) -> None:
>>> str(headers.content_type)
'text/html'
"""
key_normalized = normalize_str(key)

if key_normalized not in self._valued_attrs_normalized:
if key not in self._valued_attrs:
raise KeyError(
"'{item}' attribute is not defined within '{header}' header.".format(
item=key, header=self.name
)
)

del self._valued_attrs_normalized[key]
not_normalized_keys = self._valued_attrs.keys()

for key_ in not_normalized_keys:
if normalize_str(key_) == key_normalized:
del self._valued_attrs[key_]
break
del self._valued_attrs[key]

for elem in findall(
r"{key_name}=.*?(?=[;\n])".format(key_name=escape(key)),
Expand All @@ -362,7 +354,7 @@ def __delattr__(self, item: str) -> None:
"""
item = normalize_str(item)

if item not in self._valued_attrs_normalized:
if item not in self._valued_attrs:
raise AttributeError(
"'{item}' attribute is not defined within '{header}' header.".format(
item=item, header=self.name
Expand Down Expand Up @@ -430,7 +422,7 @@ def __dir__(self) -> Iterable[str]:
Provide a better auto-completion when using python interpreter. We are feeding __dir__ so Python can be aware
of what properties are callable. In other word, more precise auto-completion when not using IDE.
"""
return list(super().__dir__()) + list(self._valued_attrs_normalized.keys())
return list(super().__dir__()) + list(self._valued_attrs.keys())

@property
def attrs(self) -> List[str]:
Expand All @@ -450,6 +442,15 @@ def has(self, attr: str) -> bool:
def get(self, attr: str) -> Optional[Union[str, List[str]]]:
"""
Retrieve associated value of an attribute.
>>> header = Header("Content-Type", "application/json; charset=UTF-8; format=flowed")
>>> header.charset
'UTF-8'
>>> header.ChArSeT
'UTF-8'
>>> header.FORMAT
'flowed'
>>> header.format
'flowed'
"""
if attr not in self._valued_attrs:
return None
Expand All @@ -471,16 +472,17 @@ def has_many(self, name: str) -> bool:

return isinstance(r, list) and len(r) > 1

def __getitem__(self, item: Union[str]) -> Union[str, List[str]]:
def __getitem__(self, item: Union[str, int]) -> Union[str, List[str]]:
"""
This method will allow you to retrieve attribute value using the bracket syntax, list-like.
This method will allow you to retrieve attribute value using the bracket syntax, list-like or dict-like.
"""
normalized_item = normalize_str(item)
if isinstance(item, int):
return (
self._members[item] if not OUTPUT_LOCK_TYPE else [self._members[item]]
)

if item in self._valued_attrs:
value = self._valued_attrs[item]
elif normalized_item in self._valued_attrs_normalized:
value = self._valued_attrs_normalized[normalized_item]
else:
raise KeyError(
"'{item}' attribute is not defined within '{header}' header.".format(
Expand All @@ -492,9 +494,9 @@ def __getitem__(self, item: Union[str]) -> Union[str, List[str]]:
value = [value]

return (
unquote(value)
unfold(unquote(value))
if not isinstance(value, list)
else [unquote(v) for v in value]
else [unfold(unquote(v)) for v in value]
)

def __getattr__(self, item: str) -> Union[str, List[str]]:
Expand All @@ -504,10 +506,7 @@ def __getattr__(self, item: str) -> Union[str, List[str]]:
"""
item = unpack_protected_keyword(item)

if (
item not in self._valued_attrs
and normalize_str(item) not in self._valued_attrs_normalized
):
if item not in self._valued_attrs:
raise AttributeError(
"'{item}' attribute is not defined within '{header}' header.".format(
item=item, header=self.name
Expand All @@ -525,7 +524,7 @@ def __contains__(self, item: str) -> bool:
item = normalize_str(item)
for attr in self.attrs:
target = normalize_str(attr)
if item == target or item in target.split(" "):
if item == target or item in header_content_split(target, " "):
return True
return False

Expand Down
8 changes: 5 additions & 3 deletions kiss_headers/structures.py
Expand Up @@ -2,6 +2,8 @@
from collections.abc import Mapping, MutableMapping
from typing import Any, Iterator, Optional, Tuple

from kiss_headers.utils import normalize_str


"""
Disclaimer : CaseInsensitiveDict has been borrowed from `psf/requests`.
Expand Down Expand Up @@ -45,13 +47,13 @@ def __init__(self, data: Optional[Mapping] = None, **kwargs: Any):
def __setitem__(self, key: str, value: Any) -> None:
# Use the lowercased key for lookups, but store the actual
# key alongside the value.
self._store[key.lower().replace("-", "_")] = (key, value)
self._store[normalize_str(key)] = (key, value)

def __getitem__(self, key: str) -> Any:
return self._store[key.lower().replace("-", "_")][1]
return self._store[normalize_str(key)][1]

def __delitem__(self, key: str) -> None:
del self._store[key.lower().replace("-", "_")]
del self._store[normalize_str(key)]

def __iter__(self) -> Iterator[Tuple[str, Any]]:
return (casedkey for casedkey, mappedvalue in self._store.values())
Expand Down
45 changes: 43 additions & 2 deletions kiss_headers/utils.py
Expand Up @@ -85,8 +85,8 @@ def header_content_split(string: str, delimiter: str) -> List[str]:
>>> header_content_split("text/html; charset=UTF-8", ";")
['text/html', 'charset=UTF-8']
"""
if len(delimiter) != 1 or delimiter not in {";", ","}:
raise ValueError("Delimiter should be either semi-colon or a coma.")
if len(delimiter) != 1 or delimiter not in {";", ",", " "}:
raise ValueError("Delimiter should be either semi-colon, a coma or a space.")

in_double_quote: bool = False
in_parenthesis: bool = False
Expand Down Expand Up @@ -372,3 +372,44 @@ def is_legal_header_name(name: str) -> bool:
name != ""
and search(r"[^\x00-\x7F]|[:;(),<>=@?\[\]\r\n\t &{}\\]", name) is None
)


def extract_comments(content: str) -> List[str]:
"""
Extract parts of content that are considered as comments. Between parenthesis.
>>> extract_comments("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:50.0) Gecko/20100101 Firefox/50.0 (hello) llll (abc)")
['Macintosh; Intel Mac OS X 10.9; rv:50.0', 'hello', 'abc']
"""
return findall(r"\(([^)]+)\)", content)


def unfold(content: str) -> str:
"""Some header content may have folded content (LF + 9 spaces or LF + 7 spaces) in it, making your job at reading them a little more difficult.
This function undo the folding in given content.
>>> unfold("eqHS2AQD+hfNNlTiLej73CiBUGVQifX4watAaxUkdjGeH578i7n3Wwcdw2nLz+U0bH\\n ehSe/2QytZGWM5CewwNdumT1IVGzjFs+cRgfK0V6JlEIOoV3bRXxnjenWFfWdVNXtw8s")
'eqHS2AQD+hfNNlTiLej73CiBUGVQifX4watAaxUkdjGeH578i7n3Wwcdw2nLz+U0bHehSe/2QytZGWM5CewwNdumT1IVGzjFs+cRgfK0V6JlEIOoV3bRXxnjenWFfWdVNXtw8s'
"""
return content.replace("\n" + (9 * " "), "").replace("\n" + (7 * " "), " ")


def extract_encoded_headers(payload: bytes) -> Tuple[str, bytes]:
"""This function purpose is to extract lines that can be decoded using utf-8.
>>> extract_encoded_headers("Host: developer.mozilla.org\\r\\nX-Hello-World: 死の漢字\\r\\n\\r\\n".encode("utf-8"))
('Host: developer.mozilla.org\\r\\nX-Hello-World: 死の漢字\\r\\n', b'')
>>> extract_encoded_headers("Host: developer.mozilla.org\\r\\nX-Hello-World: 死の漢字\\r\\n\\r\\nThat IS totally random.".encode("utf-8"))
('Host: developer.mozilla.org\\r\\nX-Hello-World: 死の漢字\\r\\n', b'\\r\\nThat IS totally random.')
"""
result: str = ""
lines: List[bytes] = payload.splitlines()
index: int = 0

for line, index in zip(lines, range(0, len(lines))):
if line == b"":
return result, b"\r\n".join(lines[index:])

try:
result += line.decode("utf-8") + "\r\n"
except UnicodeDecodeError:
break

return result, b"\r\n".join(lines[index:])
2 changes: 1 addition & 1 deletion kiss_headers/version.py
Expand Up @@ -2,5 +2,5 @@
Expose version
"""

__version__ = "2.0.4"
__version__ = "2.0.5"
VERSION = __version__.split(".")
6 changes: 6 additions & 0 deletions tests/test_headers_from_string.py
Expand Up @@ -54,6 +54,12 @@ def test_decode_partials(self):
decode_partials([("Subject", "=?iso-8859-1?q?p=F6stal?=")]),
)

def test_bytes_headers(self):

self.assertEqual(
MyKissHeadersFromStringTest.headers, parse_it(RAW_HEADERS.encode("utf-8"))
)

def test_two_headers_eq(self):

self.assertEqual(MyKissHeadersFromStringTest.headers, parse_it(RAW_HEADERS))
Expand Down

0 comments on commit 51c3168

Please sign in to comment.