Skip to content

Commit 3dce676

Browse files
trflynn89gmta
authored andcommitted
LibWeb: Extract some CORS and MIME Fetch helpers to their own files
An upcoming commit will migrate the contents of Headers.h/cpp to LibHTTP for use outside of LibWeb. These CORS and MIME helpers depend on other LibWeb facilities, however, so they cannot be moved.
1 parent 0fd80a8 commit 3dce676

30 files changed

+382
-299
lines changed

Libraries/LibWeb/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,10 @@ set(SOURCES
379379
Fetch/Infrastructure/FetchTimingInfo.cpp
380380
Fetch/Infrastructure/HTTP.cpp
381381
Fetch/Infrastructure/HTTP/Bodies.cpp
382+
Fetch/Infrastructure/HTTP/CORS.cpp
382383
Fetch/Infrastructure/HTTP/Headers.cpp
383384
Fetch/Infrastructure/HTTP/Methods.cpp
385+
Fetch/Infrastructure/HTTP/MIME.cpp
384386
Fetch/Infrastructure/HTTP/Requests.cpp
385387
Fetch/Infrastructure/HTTP/Responses.cpp
386388
Fetch/Infrastructure/HTTP/Statuses.cpp

Libraries/LibWeb/CSS/CSSImportRule.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <LibWeb/CSS/StyleComputer.h>
1919
#include <LibWeb/DOM/Document.h>
2020
#include <LibWeb/DOMURL/DOMURL.h>
21+
#include <LibWeb/Fetch/Infrastructure/HTTP/MIME.h>
2122
#include <LibWeb/Fetch/Infrastructure/HTTP/Responses.h>
2223
#include <LibWeb/HTML/Window.h>
2324

@@ -153,7 +154,7 @@ void CSSImportRule::fetch()
153154
// 4. Let importedStylesheet be the result of parsing byteStream given parsedUrl.
154155
// FIXME: Tidy up our parsing API. For now, do the decoding here.
155156
Optional<String> mime_type_charset;
156-
if (auto extracted_mime_type = response->header_list()->extract_mime_type(); extracted_mime_type.has_value()) {
157+
if (auto extracted_mime_type = Fetch::Infrastructure::extract_mime_type(response->header_list()); extracted_mime_type.has_value()) {
157158
if (auto charset = extracted_mime_type->parameters().get("charset"sv); charset.has_value())
158159
mime_type_charset = charset.value();
159160
}

Libraries/LibWeb/CSS/StyleComputer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
#include <LibWeb/DOM/Element.h>
8484
#include <LibWeb/DOM/ShadowRoot.h>
8585
#include <LibWeb/Fetch/Infrastructure/FetchController.h>
86+
#include <LibWeb/Fetch/Infrastructure/HTTP/MIME.h>
8687
#include <LibWeb/Fetch/Response.h>
8788
#include <LibWeb/HTML/HTMLBRElement.h>
8889
#include <LibWeb/HTML/HTMLHtmlElement.h>
@@ -310,7 +311,7 @@ void FontLoader::font_did_load_or_fail(RefPtr<Gfx::Typeface const> typeface)
310311
ErrorOr<NonnullRefPtr<Gfx::Typeface const>> FontLoader::try_load_font(Fetch::Infrastructure::Response const& response, ByteBuffer const& bytes)
311312
{
312313
// FIXME: This could maybe use the format() provided in @font-face as well, since often the mime type is just application/octet-stream and we have to try every format
313-
auto mime_type = response.header_list()->extract_mime_type();
314+
auto mime_type = Fetch::Infrastructure::extract_mime_type(response.header_list());
314315
if (!mime_type.has_value() || !mime_type->is_font()) {
315316
mime_type = MimeSniff::Resource::sniff(bytes, MimeSniff::SniffingConfiguration { .sniffing_context = MimeSniff::SniffingContext::Font });
316317
}

Libraries/LibWeb/DOM/DocumentLoading.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <LibTextCodec/Decoder.h>
1313
#include <LibWeb/DOM/Document.h>
1414
#include <LibWeb/DOM/DocumentLoading.h>
15+
#include <LibWeb/Fetch/Infrastructure/HTTP/MIME.h>
1516
#include <LibWeb/HTML/HTMLHeadElement.h>
1617
#include <LibWeb/HTML/Navigable.h>
1718
#include <LibWeb/HTML/NavigationParams.h>
@@ -93,8 +94,8 @@ static WebIDL::ExceptionOr<GC::Ref<DOM::Document>> load_html_document(HTML::Navi
9394
// causes a load event to be fired.
9495
else {
9596
// FIXME: Parse as we receive the document data, instead of waiting for the whole document to be fetched first.
96-
auto process_body = GC::create_function(document->heap(), [document, signal_to_continue_session_history_processing, url = navigation_params.response->url().value(), mime_type = navigation_params.response->header_list()->extract_mime_type()](ByteBuffer data) {
97-
Platform::EventLoopPlugin::the().deferred_invoke(GC::create_function(document->heap(), [signal_to_continue_session_history_processing, document = document, data = move(data), url = url, mime_type] {
97+
auto process_body = GC::create_function(document->heap(), [document, signal_to_continue_session_history_processing, url = navigation_params.response->url().value(), mime_type = Fetch::Infrastructure::extract_mime_type(navigation_params.response->header_list())](ByteBuffer data) mutable {
98+
Platform::EventLoopPlugin::the().deferred_invoke(GC::create_function(document->heap(), [signal_to_continue_session_history_processing, document = document, data = move(data), url = url, mime_type = move(mime_type)] {
9899
// NB: If document is part of a session history entry's traversal, resolve the signal_to_continue_session_history_processing.
99100
signal_to_continue_session_history_processing->resolve({});
100101
auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data, mime_type);
@@ -362,7 +363,7 @@ static WebIDL::ExceptionOr<GC::Ref<DOM::Document>> load_media_document(HTML::Nav
362363
auto& realm = document->realm();
363364
navigation_params.response->body()->fully_read(
364365
realm,
365-
GC::create_function(document->heap(), [document, signal_to_continue_session_history_processing](ByteBuffer) {
366+
GC::create_function(document->heap(), [document, signal_to_continue_session_history_processing](ByteBuffer) {
366367
// NB: If document is part of session history traversal, resolve the signal_to_continue_session_history_processing.
367368
signal_to_continue_session_history_processing->resolve({});
368369
HTML::HTMLParser::the_end(document); }),
@@ -418,7 +419,7 @@ GC::Ptr<DOM::Document> load_document(HTML::NavigationParams const& navigation_pa
418419
// NB: Use Core::Promise to signal SessionHistoryTraversalQueue that it can continue to execute next entry.
419420

420421
// 1. Let type be the computed type of navigationParams's response.
421-
auto supplied_type = navigation_params.response->header_list()->extract_mime_type();
422+
auto supplied_type = Fetch::Infrastructure::extract_mime_type(navigation_params.response->header_list());
422423
auto type = MimeSniff::Resource::sniff(
423424
navigation_params.response->body()->source().visit(
424425
[](Empty) { return ReadonlyBytes {}; },

Libraries/LibWeb/Fetch/Fetching/Fetching.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
#include <LibWeb/Fetch/Infrastructure/FetchParams.h>
3333
#include <LibWeb/Fetch/Infrastructure/FetchRecord.h>
3434
#include <LibWeb/Fetch/Infrastructure/FetchTimingInfo.h>
35+
#include <LibWeb/Fetch/Infrastructure/HTTP/CORS.h>
3536
#include <LibWeb/Fetch/Infrastructure/HTTP/Headers.h>
37+
#include <LibWeb/Fetch/Infrastructure/HTTP/MIME.h>
3638
#include <LibWeb/Fetch/Infrastructure/HTTP/Methods.h>
3739
#include <LibWeb/Fetch/Infrastructure/HTTP/Requests.h>
3840
#include <LibWeb/Fetch/Infrastructure/HTTP/Responses.h>
@@ -713,7 +715,7 @@ void fetch_response_handover(JS::Realm& realm, Infrastructure::FetchParams const
713715
response_status = response.status();
714716

715717
// 2. Let mimeType be the result of extracting a MIME type from response’s header list.
716-
auto mime_type = response.header_list()->extract_mime_type();
718+
auto mime_type = Infrastructure::extract_mime_type(response.header_list());
717719

718720
// 3. If mimeType is non-null, then set bodyInfo’s content type to the result of minimizing a supported MIME type given mimeType.
719721
if (mime_type.has_value())

Libraries/LibWeb/Fetch/Headers.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <LibWeb/Bindings/HeadersPrototype.h>
1111
#include <LibWeb/Bindings/Intrinsics.h>
1212
#include <LibWeb/Fetch/Headers.h>
13+
#include <LibWeb/Fetch/Infrastructure/HTTP/CORS.h>
1314

1415
namespace Web::Fetch {
1516

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
/*
2+
* Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org>
3+
* Copyright (c) 2022, Kenneth Myhra <kennethmyhra@serenityos.org>
4+
* Copyright (c) 2022, Luke Wilde <lukew@serenityos.org>
5+
*
6+
* SPDX-License-Identifier: BSD-2-Clause
7+
*/
8+
9+
#include <AK/Checked.h>
10+
#include <LibHTTP/Header.h>
11+
#include <LibTextCodec/Decoder.h>
12+
#include <LibWeb/Fetch/Infrastructure/HTTP/CORS.h>
13+
#include <LibWeb/Fetch/Infrastructure/HTTP/Headers.h>
14+
#include <LibWeb/MimeSniff/MimeType.h>
15+
16+
namespace Web::Fetch::Infrastructure {
17+
18+
// https://fetch.spec.whatwg.org/#cors-safelisted-request-header
19+
bool is_cors_safelisted_request_header(Header const& header)
20+
{
21+
auto const& [name, value] = header;
22+
23+
// 1. If value’s length is greater than 128, then return false.
24+
if (value.length() > 128)
25+
return false;
26+
27+
// 2. Byte-lowercase name and switch on the result:
28+
// `accept`
29+
if (name.equals_ignoring_ascii_case("accept"sv)) {
30+
// If value contains a CORS-unsafe request-header byte, then return false.
31+
if (any_of(value, is_cors_unsafe_request_header_byte))
32+
return false;
33+
}
34+
// `accept-language`
35+
// `content-language`
36+
else if (name.is_one_of_ignoring_ascii_case("accept-language"sv, "content-language"sv)) {
37+
// If value contains a byte that is not in the range 0x30 (0) to 0x39 (9), inclusive, is not in the range 0x41 (A) to 0x5A (Z), inclusive, is not in the range 0x61 (a) to 0x7A (z), inclusive, and is not 0x20 (SP), 0x2A (*), 0x2C (,), 0x2D (-), 0x2E (.), 0x3B (;), or 0x3D (=), then return false.
38+
if (any_of(value, [](auto byte) {
39+
return !(is_ascii_digit(byte) || is_ascii_alpha(byte) || " *,-.;="sv.contains(byte));
40+
}))
41+
return false;
42+
}
43+
// `content-type`
44+
else if (name.equals_ignoring_ascii_case("content-type"sv)) {
45+
// 1. If value contains a CORS-unsafe request-header byte, then return false.
46+
if (any_of(value, is_cors_unsafe_request_header_byte))
47+
return false;
48+
49+
// 2. Let mimeType be the result of parsing the result of isomorphic decoding value.
50+
auto decoded = TextCodec::isomorphic_decode(value);
51+
auto mime_type = MimeSniff::MimeType::parse(decoded);
52+
53+
// 3. If mimeType is failure, then return false.
54+
if (!mime_type.has_value())
55+
return false;
56+
57+
// 4. If mimeType’s essence is not "application/x-www-form-urlencoded", "multipart/form-data", or "text/plain", then return false.
58+
if (!mime_type->essence().is_one_of("application/x-www-form-urlencoded"sv, "multipart/form-data"sv, "text/plain"sv))
59+
return false;
60+
}
61+
// `range`
62+
else if (name.equals_ignoring_ascii_case("range"sv)) {
63+
// 1. Let rangeValue be the result of parsing a single range header value given value and false.
64+
auto range_value = parse_single_range_header_value(value, false);
65+
66+
// 2. If rangeValue is failure, then return false.
67+
if (!range_value.has_value())
68+
return false;
69+
70+
// 3. If rangeValue[0] is null, then return false.
71+
// NOTE: As web browsers have historically not emitted ranges such as `bytes=-500` this algorithm does not safelist them.
72+
if (!range_value->start.has_value())
73+
return false;
74+
}
75+
// Otherwise
76+
else {
77+
// Return false.
78+
return false;
79+
}
80+
81+
// 3. Return true.
82+
return true;
83+
}
84+
85+
// https://fetch.spec.whatwg.org/#cors-unsafe-request-header-byte
86+
bool is_cors_unsafe_request_header_byte(u8 byte)
87+
{
88+
// A CORS-unsafe request-header byte is a byte byte for which one of the following is true:
89+
// - byte is less than 0x20 and is not 0x09 HT
90+
// - byte is 0x22 ("), 0x28 (left parenthesis), 0x29 (right parenthesis), 0x3A (:), 0x3C (<), 0x3E (>), 0x3F (?), 0x40 (@), 0x5B ([), 0x5C (\), 0x5D (]), 0x7B ({), 0x7D (}), or 0x7F DEL.
91+
return (byte < 0x20 && byte != 0x09)
92+
|| (Array { 0x22, 0x28, 0x29, 0x3A, 0x3C, 0x3E, 0x3F, 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7D, 0x7F }.contains_slow(byte));
93+
}
94+
95+
// https://fetch.spec.whatwg.org/#cors-unsafe-request-header-names
96+
Vector<ByteString> get_cors_unsafe_header_names(HeaderList const& headers)
97+
{
98+
// 1. Let unsafeNames be a new list.
99+
Vector<ByteString> unsafe_names;
100+
101+
// 2. Let potentiallyUnsafeNames be a new list.
102+
Vector<ByteString> potentially_unsafe_names;
103+
104+
// 3. Let safelistValueSize be 0.
105+
Checked<size_t> safelist_value_size = 0;
106+
107+
// 4. For each header of headers:
108+
for (auto const& header : headers) {
109+
// 1. If header is not a CORS-safelisted request-header, then append header’s name to unsafeNames.
110+
if (!is_cors_safelisted_request_header(header)) {
111+
unsafe_names.append(header.name);
112+
}
113+
// 2. Otherwise, append header’s name to potentiallyUnsafeNames and increase safelistValueSize by header’s
114+
// value’s length.
115+
else {
116+
potentially_unsafe_names.append(header.name);
117+
safelist_value_size += header.value.length();
118+
}
119+
}
120+
121+
// 5. If safelistValueSize is greater than 1024, then for each name of potentiallyUnsafeNames, append name to
122+
// unsafeNames.
123+
if (safelist_value_size.has_overflow() || safelist_value_size.value() > 1024)
124+
unsafe_names.extend(move(potentially_unsafe_names));
125+
126+
// 6. Return the result of convert header names to a sorted-lowercase set with unsafeNames.
127+
return convert_header_names_to_a_sorted_lowercase_set(unsafe_names.span());
128+
}
129+
130+
// https://fetch.spec.whatwg.org/#cors-non-wildcard-request-header-name
131+
bool is_cors_non_wildcard_request_header_name(StringView header_name)
132+
{
133+
// A CORS non-wildcard request-header name is a header name that is a byte-case-insensitive match for `Authorization`.
134+
return header_name.equals_ignoring_ascii_case("Authorization"sv);
135+
}
136+
137+
// https://fetch.spec.whatwg.org/#privileged-no-cors-request-header-name
138+
bool is_privileged_no_cors_request_header_name(StringView header_name)
139+
{
140+
// A privileged no-CORS request-header name is a header name that is a byte-case-insensitive match for one of
141+
// - `Range`.
142+
return header_name.equals_ignoring_ascii_case("Range"sv);
143+
}
144+
145+
// https://fetch.spec.whatwg.org/#cors-safelisted-response-header-name
146+
bool is_cors_safelisted_response_header_name(StringView header_name, ReadonlySpan<StringView> list)
147+
{
148+
// A CORS-safelisted response-header name, given a list of header names list, is a header name that is a byte-case-insensitive match for one of
149+
// - `Cache-Control`
150+
// - `Content-Language`
151+
// - `Content-Length`
152+
// - `Content-Type`
153+
// - `Expires`
154+
// - `Last-Modified`
155+
// - `Pragma`
156+
// - Any item in list that is not a forbidden response-header name.
157+
return header_name.is_one_of_ignoring_ascii_case(
158+
"Cache-Control"sv,
159+
"Content-Language"sv,
160+
"Content-Length"sv,
161+
"Content-Type"sv,
162+
"Expires"sv,
163+
"Last-Modified"sv,
164+
"Pragma"sv)
165+
|| any_of(list, [&](auto list_header_name) {
166+
return header_name.equals_ignoring_ascii_case(list_header_name)
167+
&& !is_forbidden_response_header_name(list_header_name);
168+
});
169+
}
170+
171+
// https://fetch.spec.whatwg.org/#no-cors-safelisted-request-header-name
172+
bool is_no_cors_safelisted_request_header_name(StringView header_name)
173+
{
174+
// A no-CORS-safelisted request-header name is a header name that is a byte-case-insensitive match for one of
175+
// - `Accept`
176+
// - `Accept-Language`
177+
// - `Content-Language`
178+
// - `Content-Type`
179+
return header_name.is_one_of_ignoring_ascii_case(
180+
"Accept"sv,
181+
"Accept-Language"sv,
182+
"Content-Language"sv,
183+
"Content-Type"sv);
184+
}
185+
186+
// https://fetch.spec.whatwg.org/#no-cors-safelisted-request-header
187+
bool is_no_cors_safelisted_request_header(Header const& header)
188+
{
189+
// 1. If name is not a no-CORS-safelisted request-header name, then return false.
190+
if (!is_no_cors_safelisted_request_header_name(header.name))
191+
return false;
192+
193+
// 2. Return whether (name, value) is a CORS-safelisted request-header.
194+
return is_cors_safelisted_request_header(header);
195+
}
196+
197+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org>
3+
*
4+
* SPDX-License-Identifier: BSD-2-Clause
5+
*/
6+
7+
#pragma once
8+
9+
#include <AK/ByteString.h>
10+
#include <AK/StringView.h>
11+
#include <AK/Vector.h>
12+
#include <LibWeb/Forward.h>
13+
14+
namespace Web::Fetch::Infrastructure {
15+
16+
[[nodiscard]] bool is_cors_safelisted_request_header(Header const&);
17+
[[nodiscard]] bool is_cors_unsafe_request_header_byte(u8);
18+
[[nodiscard]] Vector<ByteString> get_cors_unsafe_header_names(HeaderList const&);
19+
[[nodiscard]] bool is_cors_non_wildcard_request_header_name(StringView);
20+
[[nodiscard]] bool is_privileged_no_cors_request_header_name(StringView);
21+
[[nodiscard]] bool is_cors_safelisted_response_header_name(StringView, ReadonlySpan<StringView>);
22+
[[nodiscard]] bool is_no_cors_safelisted_request_header_name(StringView);
23+
[[nodiscard]] bool is_no_cors_safelisted_request_header(Header const&);
24+
25+
}

0 commit comments

Comments
 (0)