From 2f09fe26b94ede913b151cb1bb3bcef543e113a2 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Wed, 15 Jun 2022 20:10:15 +0000 Subject: [PATCH 1/7] gh-87389: Fix an open redirection vulnerability in http.server. Fix an open redirection vulnerability in the `http.server` module when an URI path starts with `//`. Vulnerability discovered, and initial fix proposed, by Hamza Avvan. Test authored and secondary mitigation by Gregory P. Smith [Google]. --- Lib/http/server.py | 14 +++++++++-- Lib/test/test_httpservers.py | 24 +++++++++++++++++-- ...2-06-15-20-09-23.gh-issue-87389.QVaC3f.rst | 3 +++ 3 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2022-06-15-20-09-23.gh-issue-87389.QVaC3f.rst diff --git a/Lib/http/server.py b/Lib/http/server.py index d115dfc162bb14..751a9936ac625f 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -329,6 +329,13 @@ def parse_request(self): return False self.command, self.path = command, path + # gh-87389: The purpose of replacing '//' with '/' is to protect against + # open redirect attacks module which could be triggered if the path + # starts with '//' because web clients treat //path as an absolute url + # without scheme (similar to http://path) rather than a relative path. + if self.path.startswith('//'): + self.path = '/' + self.path.lstrip('/') # Reduce to a single / + # Examine the headers and look for a Connection directive. try: self.headers = http.client.parse_headers(self.rfile, @@ -682,8 +689,11 @@ def send_head(self): if not parts.path.endswith('/'): # redirect browser - doing basically what apache does self.send_response(HTTPStatus.MOVED_PERMANENTLY) - new_parts = (parts[0], parts[1], parts[2] + '/', - parts[3], parts[4]) + # scheme[0] and netloc[1] are intentionally blanked out as we + # are only processing a path. They could allow injection into + # Location header if self.path wound up containing + # more than it was supposed to. See gh-87389. + new_parts = ('', '', parts[2] + '/', parts[3], parts[4]) new_url = urllib.parse.urlunsplit(new_parts) self.send_header("Location", new_url) self.send_header("Content-Length", "0") diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 1f041aa121f974..4fc2d6a3145f7a 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -334,7 +334,7 @@ class request_handler(NoLogRequestHandler, SimpleHTTPRequestHandler): pass def setUp(self): - BaseTestCase.setUp(self) + super().setUp() self.cwd = os.getcwd() basetempdir = tempfile.gettempdir() os.chdir(basetempdir) @@ -362,7 +362,7 @@ def tearDown(self): except: pass finally: - BaseTestCase.tearDown(self) + super().tearDown() def check_status_and_reason(self, response, status, data=None): def close_conn(): @@ -418,6 +418,26 @@ def test_undecodable_filename(self): self.check_status_and_reason(response, HTTPStatus.OK, data=os_helper.TESTFN_UNDECODABLE) + def test_get_dir_redirect_location_domain_injection_bug(self): + """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. + + //domain/ in a Location header is a redirect to a new domain name. + https://github.com/python/cpython/issues/87389 + + This checks that a path resolving to a directory on our server cannot + resolve into a redirect to another server telling it that the + directory in question exists on the Referrer server. + """ + os.mkdir(os.path.join(self.tempdir, 'existing_directory')) + attack_url = f'//python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' + response = self.request(attack_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertFalse(location.startswith('//'), msg=location) + self.assertEqual(location, attack_url[1:] + '/', + msg='Expected Location: to start with a single / and ' + 'end with a / as this is a directory redirect.') + def test_get(self): #constructs the path relative to the root directory of the HTTPServer response = self.request(self.base_url + '/test') diff --git a/Misc/NEWS.d/next/Security/2022-06-15-20-09-23.gh-issue-87389.QVaC3f.rst b/Misc/NEWS.d/next/Security/2022-06-15-20-09-23.gh-issue-87389.QVaC3f.rst new file mode 100644 index 00000000000000..029d437190deb5 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2022-06-15-20-09-23.gh-issue-87389.QVaC3f.rst @@ -0,0 +1,3 @@ +:mod:`http.server`: Fix an open redirection vulnerability in the HTTP server +when an URI path starts with ``//``. Vulnerability discovered, and initial +fix proposed, by Hamza Avvan. From 3b38f5299c311745e2a5768989f7ab67b5d50a25 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Wed, 15 Jun 2022 20:46:01 +0000 Subject: [PATCH 2/7] A more defensive assert within the test. --- Lib/test/test_httpservers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 4fc2d6a3145f7a..f632a80e0ca572 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -434,8 +434,8 @@ def test_get_dir_redirect_location_domain_injection_bug(self): self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) location = response.getheader('Location') self.assertFalse(location.startswith('//'), msg=location) - self.assertEqual(location, attack_url[1:] + '/', - msg='Expected Location: to start with a single / and ' + self.assertEqual(location, f'/{attack_url.lstrip("/")}/', + msg='Expected Location header to start with a single / and ' 'end with a / as this is a directory redirect.') def test_get(self): From 19a5bf685c7a208e76720bc3826b3ebb68daaf21 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Wed, 15 Jun 2022 20:50:18 +0000 Subject: [PATCH 3/7] Fix wording in some comments. --- Lib/http/server.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 751a9936ac625f..b7a4c986b3cba8 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -329,10 +329,10 @@ def parse_request(self): return False self.command, self.path = command, path - # gh-87389: The purpose of replacing '//' with '/' is to protect against - # open redirect attacks module which could be triggered if the path - # starts with '//' because web clients treat //path as an absolute url - # without scheme (similar to http://path) rather than a relative path. + # gh-87389: The purpose of replacing '//' with '/' is to protect + # against open redirect attacks possibly triggered if the path starts + # with '//' because http clients treat //path as an absolute URI + # without scheme (similar to http://path) rather than a path. if self.path.startswith('//'): self.path = '/' + self.path.lstrip('/') # Reduce to a single / @@ -691,7 +691,7 @@ def send_head(self): self.send_response(HTTPStatus.MOVED_PERMANENTLY) # scheme[0] and netloc[1] are intentionally blanked out as we # are only processing a path. They could allow injection into - # Location header if self.path wound up containing + # the Location header if self.path wound up containing # more than it was supposed to. See gh-87389. new_parts = ('', '', parts[2] + '/', parts[3], parts[4]) new_url = urllib.parse.urlunsplit(new_parts) From e16d38d15e64ec76510c74e7a3a8e1c1dc6eb72a Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Wed, 15 Jun 2022 20:53:49 +0000 Subject: [PATCH 4/7] Add an explanatory comment in the test. --- Lib/test/test_httpservers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index f632a80e0ca572..718fe71e09c5c5 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -429,6 +429,8 @@ def test_get_dir_redirect_location_domain_injection_bug(self): directory in question exists on the Referrer server. """ os.mkdir(os.path.join(self.tempdir, 'existing_directory')) + # Canonicalizes to /tmp/tempdir_name/existing_directory which does + # exist and is a dir, triggering the 301 redirect and former bug. attack_url = f'//python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' response = self.request(attack_url) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) From 25a3a1c4188fb8f50bb7dd07c9ccdb7c9c8a654f Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Thu, 16 Jun 2022 18:54:31 +0000 Subject: [PATCH 5/7] Address vstinner comments on the test. make the base urls, attack urls, and expected_location more clear in the test. Adds an additional test for a triple-slash path to ensure we're not only treating double slashes as special. --- Lib/test/test_httpservers.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 718fe71e09c5c5..4818f7223d1b76 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -421,25 +421,32 @@ def test_undecodable_filename(self): def test_get_dir_redirect_location_domain_injection_bug(self): """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. - //domain/ in a Location header is a redirect to a new domain name. + //netloc/ in a Location header is a redirect to a new host. https://github.com/python/cpython/issues/87389 This checks that a path resolving to a directory on our server cannot - resolve into a redirect to another server telling it that the - directory in question exists on the Referrer server. + resolve into a redirect to another server. """ os.mkdir(os.path.join(self.tempdir, 'existing_directory')) + url = f'/python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' # Canonicalizes to /tmp/tempdir_name/existing_directory which does # exist and is a dir, triggering the 301 redirect and former bug. - attack_url = f'//python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' + attack_url = f'/{url}' # //python.org... multi-slash prefix, no trailing slash + expected_location = f'{url}/' # /python.org.../ single slash single prefix, trailing slash + response = self.request(attack_url) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) location = response.getheader('Location') self.assertFalse(location.startswith('//'), msg=location) - self.assertEqual(location, f'/{attack_url.lstrip("/")}/', + self.assertEqual(location, expected_location, msg='Expected Location header to start with a single / and ' 'end with a / as this is a directory redirect.') + attack3_url = f'//{url}' # ///python.org... triple-slash prefix, no trailing slash + response = self.request(attack3_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + self.assertEqual(response.getheader('Location'), expected_location) + def test_get(self): #constructs the path relative to the root directory of the HTTPServer response = self.request(self.base_url + '/test') From 7c9464a2ef986352273f305f36c25f99dbb09622 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Thu, 16 Jun 2022 20:26:02 +0000 Subject: [PATCH 6/7] Add a test for absolute Request-URI and fixup to allow it. Remove the send_head scheme & netloc neutering that I had added. Unnecessary in the primary issue's case and actually causes a problem in this other potential case. It's unclear how often any clients ever send an absolute-URI to a http server instead of a relative-URI but as it works today we shouldn't break that. --- Lib/http/server.py | 7 ++----- Lib/test/test_httpservers.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index b7a4c986b3cba8..f2aeb65942020a 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -689,11 +689,8 @@ def send_head(self): if not parts.path.endswith('/'): # redirect browser - doing basically what apache does self.send_response(HTTPStatus.MOVED_PERMANENTLY) - # scheme[0] and netloc[1] are intentionally blanked out as we - # are only processing a path. They could allow injection into - # the Location header if self.path wound up containing - # more than it was supposed to. See gh-87389. - new_parts = ('', '', parts[2] + '/', parts[3], parts[4]) + new_parts = (parts[0], parts[1], parts[2] + '/', + parts[3], parts[4]) new_url = urllib.parse.urlunsplit(new_parts) self.send_header("Location", new_url) self.send_header("Content-Length", "0") diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 4818f7223d1b76..df3d81a5239fc5 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -447,6 +447,17 @@ def test_get_dir_redirect_location_domain_injection_bug(self): self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) self.assertEqual(response.getheader('Location'), expected_location) + # If the second word in the http request (Request-URI for the http + # method) is a full URI, we don't worry about it, as that'll be parsed + # and reassembled as a full URI within BaseHTTPRequestHandler.send_head + # so no errant scheme-less //netloc//evil.co/ domain mixup can happen. + attack_scheme_netloc_2slash_url = f'https://pypi.org/{url}' + expected_scheme_netloc_location = f'{attack_scheme_netloc_2slash_url}/' + response = self.request(attack_scheme_netloc_2slash_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertEqual(location, expected_scheme_netloc_location) + def test_get(self): #constructs the path relative to the root directory of the HTTPServer response = self.request(self.base_url + '/test') From 8563d4a74d7ee0abca7dc3910c9a2dac4e08ce65 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Thu, 16 Jun 2022 20:34:29 +0000 Subject: [PATCH 7/7] Further improve test cases. Adds a test for the non-attack as a starting all-ok check. Makes the absolute scheme://netloc request-uri case more lenient as all we care about there is that the scheme://netloc makes it into the Location: header in that scenario. --- Lib/test/test_httpservers.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index df3d81a5239fc5..a937258069ed89 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -429,11 +429,16 @@ def test_get_dir_redirect_location_domain_injection_bug(self): """ os.mkdir(os.path.join(self.tempdir, 'existing_directory')) url = f'/python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' - # Canonicalizes to /tmp/tempdir_name/existing_directory which does - # exist and is a dir, triggering the 301 redirect and former bug. - attack_url = f'/{url}' # //python.org... multi-slash prefix, no trailing slash expected_location = f'{url}/' # /python.org.../ single slash single prefix, trailing slash + # Canonicalizes to /tmp/tempdir_name/existing_directory which does + # exist and is a dir, triggering the 301 redirect logic. + response = self.request(url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertEqual(location, expected_location, msg='non-attack failed!') + # //python.org... multi-slash prefix, no trailing slash + attack_url = f'/{url}' response = self.request(attack_url) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) location = response.getheader('Location') @@ -442,7 +447,8 @@ def test_get_dir_redirect_location_domain_injection_bug(self): msg='Expected Location header to start with a single / and ' 'end with a / as this is a directory redirect.') - attack3_url = f'//{url}' # ///python.org... triple-slash prefix, no trailing slash + # ///python.org... triple-slash prefix, no trailing slash + attack3_url = f'//{url}' response = self.request(attack3_url) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) self.assertEqual(response.getheader('Location'), expected_location) @@ -456,7 +462,10 @@ def test_get_dir_redirect_location_domain_injection_bug(self): response = self.request(attack_scheme_netloc_2slash_url) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) location = response.getheader('Location') - self.assertEqual(location, expected_scheme_netloc_location) + # We're just ensuring that the scheme and domain make it through, if + # there are or aren't multiple slashes at the start of the path that + # follows that isn't important in this Location: header. + self.assertTrue(location.startswith('https://pypi.org/'), msg=location) def test_get(self): #constructs the path relative to the root directory of the HTTPServer