Merge pull request from GHSA-m5ff-3wj3-8ph4

HTTP header-field stricter validation
Pylons · Dec 24, 2019 · 11d9e13 · 11d9e13
2 parents f11093a + a046a76
commit 11d9e13
Show file tree

Hide file tree

Showing 6 changed files with 194 additions and 25 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -1,3 +1,17 @@
+1.4.1 (2019-12-??)
+------------------
+
+Security Fixes
+~~~~~~~~~~~~~~
+
+- Waitress did not properly validate that the HTTP headers it received were
+  properly formed, thereby potentially allowing a front-end server to treat a
+  request different from Waitress. This could lead to HTTP request
+  smuggling/splitting.
+
+  Please see the security advisory for more information:
+  https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4
+
 1.4.0 (2019-12-20)
 ------------------
 
@@ -36,6 +50,11 @@ Security Fixes
   For more information I can highly recommend the blog post by ZeddYu Lu
   https://blog.zeddyu.info/2019/12/08/HTTP-Smuggling-en/
 
+  Please see the security advisory for more information:
+  https://github.com/Pylons/waitress/security/advisories/GHSA-pg36-wpm5-g57p
+
+  CVE-ID: CVE-2019-16785
+
 - Waitress used to treat LF the same as CRLF in ``Transfer-Encoding: chunked``
   requests, while the maintainer doesn't believe this could lead to a security
   issue, this is no longer supported and all chunks are now validated to be
@@ -61,6 +80,11 @@ Security Fixes
   ``Transfer-Encoding: chunked`` instead of ``Transfer-Encoding: identity,
   chunked``.
 
+  PLease see the security advisory for more information: 
+  https://github.com/Pylons/waitress/security/advisories/GHSA-g2xc-35jw-c63p
+
+  CVE-ID: CVE-2019-16786
+
 - While validating the ``Transfer-Encoding`` header, Waitress now properly
   handles line-folded ``Transfer-Encoding`` headers or those that contain
   multiple comma seperated values. This closes a potential issue where a
@@ -75,3 +99,6 @@ Security Fixes
   for a potential request to be split and treated as two requests by HTTP
   pipelining support in Waitress. If Waitress is now unable to parse the
   Content-Length header, a 400 Bad Request is sent back to the client.
+
+  Please see the security advisory for more information:
+  https://github.com/Pylons/waitress/security/advisories/GHSA-4ppp-gpcr-7qf6
diff --git a/setup.py b/setup.py
@@ -34,7 +34,7 @@
 
 setup(
     name="waitress",
-    version="1.4.0",
+    version="1.4.1",
     author="Zope Foundation and Contributors",
     author_email="zope-dev@zope.org",
     maintainer="Pylons Project",

diff --git a/waitress/parser.py b/waitress/parser.py
@@ -29,6 +29,7 @@
     ServerNotImplemented,
     find_double_newline,
 )
+from .rfc7230 import HEADER_FIELD
 
 
 class ParsingError(Exception):
@@ -38,7 +39,6 @@ class ParsingError(Exception):
 class TransferEncodingNotImplemented(Exception):
     pass
 
-
 class HTTPRequestParser(object):
     """A structure that collects the HTTP request.
 
@@ -208,26 +208,27 @@ def parse_header(self, header_plus):
 
         headers = self.headers
         for line in lines:
-            index = line.find(b":")
-            if index > 0:
-                key = line[:index]
-
-                if key != key.strip():
-                    raise ParsingError("Invalid whitespace after field-name")
-
-                if b"_" in key:
-                    continue
-                value = line[index + 1 :].strip()
-                key1 = tostr(key.upper().replace(b"-", b"_"))
-                # If a header already exists, we append subsequent values
-                # seperated by a comma. Applications already need to handle
-                # the comma seperated values, as HTTP front ends might do
-                # the concatenation for you (behavior specified in RFC2616).
-                try:
-                    headers[key1] += tostr(b", " + value)
-                except KeyError:
-                    headers[key1] = tostr(value)
-            # else there's garbage in the headers?
+            header = HEADER_FIELD.match(line)
+
+            if not header:
+                raise ParsingError("Invalid header")
+
+            key, value = header.group('name', 'value')
+
+            if b"_" in key:
+                # TODO(xistence): Should we drop this request instead?
+                continue
+
+            value = value.strip()
+            key1 = tostr(key.upper().replace(b"-", b"_"))
+            # If a header already exists, we append subsequent values
+            # seperated by a comma. Applications already need to handle
+            # the comma seperated values, as HTTP front ends might do
+            # the concatenation for you (behavior specified in RFC2616).
+            try:
+                headers[key1] += tostr(b", " + value)
+            except KeyError:
+                headers[key1] = tostr(value)
 
         # command, uri, version will be bytes
         command, uri, version = crack_first_line(first_line)
@@ -352,6 +353,9 @@ def get_header_lines(header):
     r = []
     lines = header.split(b"\r\n")
     for line in lines:
+        if not line:
+            continue
+
         if b"\r" in line or b"\n" in line:
             raise ParsingError('Bare CR or LF found in header line "%s"' % tostr(line))
 

diff --git a/waitress/rfc7230.py b/waitress/rfc7230.py
@@ -0,0 +1,44 @@
+"""
+This contains a bunch of RFC7230 definitions and regular expressions that are
+needed to properly parse HTTP messages.
+"""
+
+import re
+
+from .compat import tobytes
+
+WS = "[ \t]"
+OWS = WS + "{0,}?"
+RWS = WS + "{1,}?"
+BWS = OWS
+
+# RFC 7230 Section 3.2.6 "Field Value Components":
+# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+#                / DIGIT / ALPHA
+# obs-text      = %x80-FF
+TCHAR = r"[!#$%&'*+\-.^_`|~0-9A-Za-z]"
+OBS_TEXT = r"\x80-\xff"
+
+TOKEN = TCHAR + "{1,}"
+
+# RFC 5234 Appendix B.1 "Core Rules":
+# VCHAR         =  %x21-7E
+#                  ; visible (printing) characters
+VCHAR = r"\x21-\x7e"
+
+# header-field   = field-name ":" OWS field-value OWS
+# field-name     = token
+# field-value    = *( field-content / obs-fold )
+# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+# field-vchar    = VCHAR / obs-text
+
+FIELD_VCHAR = "[" + VCHAR + OBS_TEXT + "]"
+FIELD_CONTENT = FIELD_VCHAR + "(" + RWS + FIELD_VCHAR + "){0,}"
+FIELD_VALUE = "(" + FIELD_CONTENT + "){0,}"
+
+HEADER_FIELD = re.compile(
+    tobytes(
+        "^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
+    )
+)
diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py
@@ -308,10 +308,93 @@ def test_parse_header_invalid_whitespace(self):
         try:
             self.parser.parse_header(data)
         except ParsingError as e:
-            self.assertIn("Invalid whitespace after field-name", e.args[0])
+            self.assertIn("Invalid header", e.args[0])
         else:  # pragma: nocover
             self.assertTrue(False)
 
+    def test_parse_header_invalid_whitespace_vtab(self):
+        from waitress.parser import ParsingError
+
+        data = b"GET /foobar HTTP/1.1\r\nfoo:\x0bbar\r\n"
+        try:
+            self.parser.parse_header(data)
+        except ParsingError as e:
+            self.assertIn("Invalid header", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
+    def test_parse_header_invalid_no_colon(self):
+        from waitress.parser import ParsingError
+
+        data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nnotvalid\r\n"
+        try:
+            self.parser.parse_header(data)
+        except ParsingError as e:
+            self.assertIn("Invalid header", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
+    def test_parse_header_invalid_folding_spacing(self):
+        from waitress.parser import ParsingError
+
+        data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\t\x0bbaz\r\n"
+        try:
+            self.parser.parse_header(data)
+        except ParsingError as e:
+            self.assertIn("Invalid header", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
+    def test_parse_header_invalid_chars(self):
+        from waitress.parser import ParsingError
+
+        data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\foo: \x0bbaz\r\n"
+        try:
+            self.parser.parse_header(data)
+        except ParsingError as e:
+            self.assertIn("Invalid header", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
+    def test_parse_header_empty(self):
+        from waitress.parser import ParsingError
+
+        data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nempty:\r\n"
+        self.parser.parse_header(data)
+
+        self.assertIn("EMPTY", self.parser.headers)
+        self.assertIn("FOO", self.parser.headers)
+        self.assertEqual(self.parser.headers["EMPTY"], "")
+        self.assertEqual(self.parser.headers["FOO"], "bar")
+
+    def test_parse_header_multiple_values(self):
+        from waitress.parser import ParsingError
+
+        data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever, more, please, yes\r\n"
+        self.parser.parse_header(data)
+
+        self.assertIn("FOO", self.parser.headers)
+        self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")
+
+    def test_parse_header_multiple_values_header_folded(self):
+        from waitress.parser import ParsingError
+
+        data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more, please, yes\r\n"
+        self.parser.parse_header(data)
+
+        self.assertIn("FOO", self.parser.headers)
+        self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")
+
+    def test_parse_header_multiple_values_header_folded_multiple(self):
+        from waitress.parser import ParsingError
+
+        data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more\r\nfoo: please, yes\r\n"
+        self.parser.parse_header(data)
+
+        self.assertIn("FOO", self.parser.headers)
+        self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")
+
+
 
 class Test_split_uri(unittest.TestCase):
     def _callFUT(self, uri):

diff --git a/waitress/utilities.py b/waitress/utilities.py
@@ -22,6 +22,8 @@
 import stat
 import time
 
+from .rfc7230 import OBS_TEXT, VCHAR
+
 logger = logging.getLogger("waitress")
 queue_logger = logging.getLogger("waitress.queue")
 
@@ -63,6 +65,7 @@ def group(s):
 long_day_reg = group(join(long_days, "|"))
 
 daymap = {}
+
 for i in range(7):
     daymap[short_days[i]] = i
     daymap[long_days[i]] = i
@@ -85,6 +88,7 @@ def group(s):
 ]
 
 monmap = {}
+
 for i in range(12):
     monmap[months[i]] = i + 1
 
@@ -113,6 +117,7 @@ def group(s):
 
 def unpack_rfc822(m):
     g = m.group
+
     return (
         int(g(4)),  # year
         monmap[g(3)],  # month
@@ -142,8 +147,10 @@ def unpack_rfc822(m):
 def unpack_rfc850(m):
     g = m.group
     yr = g(4)
+
     if len(yr) == 2:
         yr = "19" + yr
+
     return (
         int(yr),  # year
         monmap[g(3)],  # month
@@ -180,6 +187,7 @@ def unpack_rfc850(m):
 
 def build_http_date(when):
     year, month, day, hh, mm, ss, wd, y, z = time.gmtime(when)
+
     return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
         weekdayname[wd],
         day,
@@ -194,28 +202,31 @@ def build_http_date(when):
 def parse_http_date(d):
     d = d.lower()
     m = rfc850_reg.match(d)
+
     if m and m.end() == len(d):
         retval = int(calendar.timegm(unpack_rfc850(m)))
     else:
         m = rfc822_reg.match(d)
+
         if m and m.end() == len(d):
             retval = int(calendar.timegm(unpack_rfc822(m)))
         else:
             return 0
+
     return retval
 
 
 # RFC 5234 Appendix B.1 "Core Rules":
 # VCHAR         =  %x21-7E
 #                  ; visible (printing) characters
-vchar_re = "\x21-\x7e"
+vchar_re = VCHAR
 
 # RFC 7230 Section 3.2.6 "Field Value Components":
 # quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
 # qdtext        = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
 # obs-text      = %x80-FF
 # quoted-pair   = "\" ( HTAB / SP / VCHAR / obs-text )
-obs_text_re = "\x80-\xff"
+obs_text_re = OBS_TEXT
 
 # The '\\' between \x5b and \x5d is needed to escape \x5d (']')
 qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"