Skip to content

Commit

Permalink
Merge pull request from GHSA-x456-3ccm-m6j4
Browse files Browse the repository at this point in the history
* Add suggested mitigation against malicious HTML form file input. Test cases and docs not updated.

* Make tests pass. Add test specifically for this vulnerability. Fix the `Form.new_control()` method so that it correctly sets the input value if the value is a file object.

* Add better documentation about the vulnerability remediation. Wording fix.

* tests: Add separate test to check for CVE-2023-34457.

* Revert "tests: Add separate test to check for CVE-2023-34457."

This reverts commit bd4c6d92a8a803499a447b511bc46b1ba00841d0.

* Misc cleanup

* No need to raise the error on forms with an enctype that do not
  upload files (e.g. everything besides multipart).

* Make sure that open file inputs are converted to their basename
  regardless of what kind of form it is to avoid leaking local file
  paths.

* Throw error at time when the file input is set incorrectly, rather
  than at submission time.

* Factor complex repeated logic into their own functions (i.e. for
  the multipart file input check and invalid file value check).

* Fix failing tests.

* Avoid excessive whitespace in exception message.

* Reorder 'and' operands so the slightly less expensive check comes first.

---------

Co-authored-by: Dan Hemberger <daniel.hemberger@gmail.com>
  • Loading branch information
e-c-d and hemberger committed Jul 4, 2023
1 parent b9c8a0c commit d57c4a2
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 23 deletions.
17 changes: 10 additions & 7 deletions mechanicalsoup/browser.py
@@ -1,3 +1,4 @@
import io
import os
import tempfile
import urllib
Expand All @@ -10,7 +11,7 @@

from .__version__ import __title__, __version__
from .form import Form
from .utils import LinkNotFoundError
from .utils import LinkNotFoundError, is_multipart_file_upload


class Browser:
Expand Down Expand Up @@ -228,18 +229,20 @@ def get_request_kwargs(cls, form, url=None, **kwargs):

# If the enctype is not multipart, the filename is put in
# the form as a text input and the file is not sent.
if tag.get("type", "").lower() == "file" and multipart:
filepath = value
if filepath != "" and isinstance(filepath, str):
content = open(filepath, "rb")
if is_multipart_file_upload(form, tag):
if isinstance(value, io.IOBase):
content = value
filename = os.path.basename(getattr(value, "name", ""))
else:
content = ""
filename = os.path.basename(filepath)
# If value is the empty string, we still pass it
filename = os.path.basename(value)
# If content is the empty string, we still pass it
# for consistency with browsers (see
# https://github.com/MechanicalSoup/MechanicalSoup/issues/250).
files[name] = (filename, content)
else:
if isinstance(value, io.IOBase):
value = os.path.basename(getattr(value, "name", ""))
data.append((name, value))

elif tag.name == "button":
Expand Down
23 changes: 20 additions & 3 deletions mechanicalsoup/form.py
@@ -1,9 +1,10 @@
import copy
import io
import warnings

from bs4 import BeautifulSoup

from .utils import LinkNotFoundError
from .utils import LinkNotFoundError, is_multipart_file_upload


class InvalidFormMethod(LinkNotFoundError):
Expand Down Expand Up @@ -68,6 +69,7 @@ def set_input(self, data):
i = self.form.find("input", {"name": name})
if not i:
raise InvalidFormMethod("No input field named " + name)
self._assert_valid_file_upload(i, value)
i["value"] = value

def uncheck_all(self, name):
Expand Down Expand Up @@ -261,12 +263,12 @@ def set(self, name, value, force=False):
form.set("eula-checkbox", True)
Example: uploading a file through a ``<input type="file"
name="tagname">`` field (provide the path to the local file,
name="tagname">`` field (provide an open file object,
and its content will be uploaded):
.. code-block:: python
form.set("tagname", path_to_local_file)
form.set("tagname", open(path_to_local_file, "rb"))
"""
for func in ("checkbox", "radio", "input", "textarea", "select"):
Expand Down Expand Up @@ -300,6 +302,7 @@ def new_control(self, type, name, value, **kwargs):
control['value'] = value
for k, v in kwargs.items():
control[k] = v
self._assert_valid_file_upload(control, value)
self.form.append(control)
return control

Expand Down Expand Up @@ -383,3 +386,17 @@ def print_summary(self):
if subtag.string:
subtag.string = subtag.string.strip()
print(input_copy)

def _assert_valid_file_upload(self, tag, value):
"""Raise an exception if a multipart file input is not an open file."""
if (
is_multipart_file_upload(self.form, tag) and
not isinstance(value, io.IOBase)
):
raise ValueError(
"From v1.3.0 onwards, you must pass an open file object "
'directly, e.g. `form["name"] = open("/path/to/file", "rb")`. '
"This change is to remediate a security vulnerability where "
"a malicious web server could read arbitrary files from the "
"client (CVE-2023-34457)."
)
7 changes: 7 additions & 0 deletions mechanicalsoup/utils.py
Expand Up @@ -14,3 +14,10 @@ class LinkNotFoundError(Exception):
StatefulBrowser).
"""
pass


def is_multipart_file_upload(form, tag):
return (
form.get("enctype", "") == "multipart/form-data" and
tag.get("type", "").lower() == "file"
)
5 changes: 3 additions & 2 deletions tests/test_browser.py
Expand Up @@ -164,8 +164,9 @@ def test_enctype_and_file_submit(httpbin, enctype, submit_file, file_field):
else:
# Encoding doesn't allow sending the content, we expect
# the filename as a normal text field.
expected_content = pic_path.encode()
form.find("input", {"name": "pic"})["value"] = pic_path
expected_content = os.path.basename(pic_path.encode())
tag = form.find("input", {"name": "pic"})
tag["value"] = open(pic_path, "rb")

browser = mechanicalsoup.Browser()
response = browser._request(form)
Expand Down
67 changes: 56 additions & 11 deletions tests/test_stateful_browser.py
Expand Up @@ -391,32 +391,77 @@ def test_form_multiple():

def test_upload_file(httpbin):
browser = mechanicalsoup.StatefulBrowser()
browser.open(httpbin + "/forms/post")
url = httpbin + "/post"
file_input_form = f"""
<form method="post" action="{url}" enctype="multipart/form-data">
<input type="file" name="first" />
</form>
"""

# Create two temporary files to upload
def make_file(content):
path = tempfile.mkstemp()[1]
with open(path, "w") as fd:
fd.write(content)
return path
path1, path2 = (make_file(content) for content in
("first file content", "second file content"))
path1 = make_file("first file content")
path2 = make_file("second file content")

# The form doesn't have a type=file field, but the target action
# does show it => add the fields ourselves, and add enctype too.
value1 = open(path1, "rb")
value2 = open(path2, "rb")

browser.open_fake_page(file_input_form)
browser.select_form()
browser._StatefulBrowser__state.form.form[
"enctype"] = "multipart/form-data"
browser.new_control("file", "first", path1)
browser.new_control("file", "second", "")
browser["second"] = path2
browser.form.print_summary()

# Test filling an existing input and creating a new input
browser["first"] = value1
browser.new_control("file", "second", value2)

response = browser.submit_selected()
files = response.json()["files"]
assert files["first"] == "first file content"
assert files["second"] == "second file content"


def test_upload_file_with_malicious_default(httpbin):
"""Check for CVE-2023-34457 by setting the form input value directly to a
file that the user does not explicitly consent to upload, as a malicious
server might do.
"""
browser = mechanicalsoup.StatefulBrowser()
sensitive_path = tempfile.mkstemp()[1]
with open(sensitive_path, "w") as fd:
fd.write("Some sensitive information")
url = httpbin + "/post"
malicious_html = f"""
<form method="post" action="{url}" enctype="multipart/form-data">
<input type="file" name="malicious" value="{sensitive_path}" />
</form>
"""
browser.open_fake_page(malicious_html)
browser.select_form()
response = browser.submit_selected()
assert response.json()["files"] == {"malicious": ""}


def test_upload_file_raise_on_string_input():
"""Check for use of the file upload API that was modified to remediate
CVE-2023-34457. Users must now open files manually to upload them.
"""
browser = mechanicalsoup.StatefulBrowser()
file_input_form = """
<form enctype="multipart/form-data">
<input type="file" name="upload" />
</form>
"""
browser.open_fake_page(file_input_form)
browser.select_form()
with pytest.raises(ValueError, match="CVE-2023-34457"):
browser["upload"] = "/path/to/file"
with pytest.raises(ValueError, match="CVE-2023-34457"):
browser.new_control("file", "upload2", "/path/to/file")


def test_with():
"""Test that __enter__/__exit__ properly create/close the browser."""
with mechanicalsoup.StatefulBrowser() as browser:
Expand Down

0 comments on commit d57c4a2

Please sign in to comment.