From 2a11d6812f703e01150c2aedbca7ec2a6590f254 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Mon, 23 Dec 2019 14:36:41 +0100 Subject: [PATCH] Add file with RFC7230 definitions/ABNF -> regex Currently it is used for header name/fields only, but this should get expanded as necessary. Other stuff from other parts of waitress may move here too to create a sort utility library of useful RFC standards based functions. --- waitress/rfc7230.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 waitress/rfc7230.py diff --git a/waitress/rfc7230.py b/waitress/rfc7230.py new file mode 100644 index 00000000..a9f047c8 --- /dev/null +++ b/waitress/rfc7230.py @@ -0,0 +1,44 @@ +""" +This contains a bunch of RFC7230 definitions and regular expressions that are +needed to properly parse HTTP messages. +""" + +import re + +from .compat import tobytes + +WS = "[ \t]" +OWS = WS + "{0,}?" +RWS = WS + "{1,}?" +BWS = OWS + +# RFC 7230 Section 3.2.6 "Field Value Components": +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +# / DIGIT / ALPHA +# obs-text = %x80-FF +TCHAR = r"[!#$%&'*+\-.^_`|~0-9A-Za-z]" +OBS_TEXT = r"\x80-\xff" + +TOKEN = TCHAR + "{1,}" + +# RFC 5234 Appendix B.1 "Core Rules": +# VCHAR = %x21-7E +# ; visible (printing) characters +VCHAR = r"\x21-\x7e" + +# header-field = field-name ":" OWS field-value OWS +# field-name = token +# field-value = *( field-content / obs-fold ) +# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +# field-vchar = VCHAR / obs-text + +FIELD_VCHAR = "[" + VCHAR + OBS_TEXT + "]" +FIELD_CONTENT = FIELD_VCHAR + "(" + RWS + FIELD_VCHAR + "){0,}" +FIELD_VALUE = "(" + FIELD_CONTENT + "){0,}" + +HEADER_FIELD = re.compile( + tobytes( + "^(?P" + TOKEN + "):" + OWS + "(?P" + FIELD_VALUE + ")" + OWS + "$" + ) +)