LibJS: Have JS::Lexer take a JS::SourceCode as input

awesomekling · awesomekling · commit 0dacc94edd67 · 2025-11-09T12:14:03.000+01:00
This moves the responsibility of setting up a SourceCode object to the
users of JS::Lexer.

This means Lexer and Parser are free to use string views into the
SourceCode internally while working.

It also means Lexer no longer has to think about anything other than
UTF-16 (or ASCII) inputs. So the unit test for parsing various invalid
UTF-8 sequences is deleted here.
diff --git a/Libraries/LibJS/Lexer.cpp b/Libraries/LibJS/Lexer.cpp
@@ -192,9 +192,9 @@ static constexpr TokenType parse_three_char_token(Utf16View const& view)
     }
 }
 
-static consteval Array<TokenType, 256> make_single_char_tokens_array()
+static consteval AK::Array<TokenType, 256> make_single_char_tokens_array()
 {
-    Array<TokenType, 256> array;
+    AK::Array<TokenType, 256> array;
     array.fill(TokenType::Invalid);
     array['&'] = TokenType::Ampersand;
     array['*'] = TokenType::Asterisk;
@@ -225,33 +225,9 @@ static consteval Array<TokenType, 256> make_single_char_tokens_array()
 
 static constexpr auto s_single_char_tokens = make_single_char_tokens_array();
 
-static Utf16String create_utf16_string_from_possibly_invalid_utf8_string(StringView source)
-{
-    Utf8View utf8_source { source };
-    if (utf8_source.validate()) [[likely]]
-        return Utf16String::from_utf8_without_validation(source);
-
-    StringBuilder builder(StringBuilder::Mode::UTF16);
-
-    for (auto code_point : utf8_source) {
-        builder.append_code_point(code_point);
-        if (code_point == AK::UnicodeUtils::REPLACEMENT_CODE_POINT)
-            break;
-    }
-
-    return builder.to_utf16_string();
-}
-
-Lexer::Lexer(StringView source, StringView filename, size_t line_number, size_t line_column)
-    : Lexer(create_utf16_string_from_possibly_invalid_utf8_string(source), filename, line_number, line_column)
-{
-    // FIXME: Remove this API once all callers are ported to UTF-16.
-}
-
-Lexer::Lexer(Utf16String source, StringView filename, size_t line_number, size_t line_column)
+Lexer::Lexer(NonnullRefPtr<SourceCode const> source, size_t line_number, size_t line_column)
     : m_source(move(source))
     , m_current_token(TokenType::Eof, {}, {}, {}, 0, 0, 0)
-    , m_filename(String::from_utf8(filename).release_value_but_fixme_should_propagate_errors())
     , m_line_number(line_number)
     , m_line_column(line_column)
 {
@@ -304,16 +280,16 @@ Lexer::Lexer(Utf16String source, StringView filename, size_t line_number, size_t
 void Lexer::consume()
 {
     auto did_reach_eof = [this] {
-        if (m_position < m_source.length_in_code_units())
+        if (m_position < m_source->code().length_in_code_units())
             return false;
         m_eof = true;
         m_current_code_unit = '\0';
-        m_position = m_source.length_in_code_units() + 1;
+        m_position = m_source->code().length_in_code_units() + 1;
         m_line_column++;
         return true;
     };
 
-    if (m_position > m_source.length_in_code_units())
+    if (m_position > m_source->code().length_in_code_units())
         return;
 
     if (did_reach_eof())
@@ -339,7 +315,7 @@ void Lexer::consume()
         // and column - don't do it again. From https://tc39.es/ecma262/#sec-line-terminators:
         //   The sequence <CR><LF> is commonly used as a line terminator.
         //   It should be considered a single SourceCharacter for the purpose of reporting line numbers.
-        auto second_char_of_crlf = m_position > 1 && m_source.code_unit_at(m_position - 2) == '\r' && m_current_code_unit == '\n';
+        auto second_char_of_crlf = m_position > 1 && m_source->code().code_unit_at(m_position - 2) == '\r' && m_current_code_unit == '\n';
 
         if (!second_char_of_crlf) {
             m_line_number++;
@@ -349,8 +325,8 @@ void Lexer::consume()
             dbgln_if(LEXER_DEBUG, "Previous was CR, this is LF - not incrementing line number again.");
         }
     } else {
-        if (AK::UnicodeUtils::is_utf16_high_surrogate(m_current_code_unit) && m_position < m_source.length_in_code_units()) {
-            if (AK::UnicodeUtils::is_utf16_low_surrogate(m_source.code_unit_at(m_position))) {
+        if (AK::UnicodeUtils::is_utf16_high_surrogate(m_current_code_unit) && m_position < m_source->code().length_in_code_units()) {
+            if (AK::UnicodeUtils::is_utf16_low_surrogate(m_source->code().code_unit_at(m_position))) {
                 ++m_position;
 
                 if (did_reach_eof())
@@ -361,7 +337,7 @@ void Lexer::consume()
         ++m_line_column;
     }
 
-    m_current_code_unit = m_source.code_unit_at(m_position++);
+    m_current_code_unit = m_source->code().code_unit_at(m_position++);
 }
 
 bool Lexer::consume_decimal_number()
@@ -436,40 +412,40 @@ bool Lexer::consume_binary_number()
 template<typename Callback>
 bool Lexer::match_numeric_literal_separator_followed_by(Callback callback) const
 {
-    if (m_position >= m_source.length_in_code_units())
+    if (m_position >= m_source->code().length_in_code_units())
         return false;
     return m_current_code_unit == '_'
-        && callback(m_source.code_unit_at(m_position));
+        && callback(m_source->code().code_unit_at(m_position));
 }
 
 bool Lexer::match(char16_t a, char16_t b) const
 {
-    if (m_position >= m_source.length_in_code_units())
+    if (m_position >= m_source->code().length_in_code_units())
         return false;
 
     return m_current_code_unit == a
-        && m_source.code_unit_at(m_position) == b;
+        && m_source->code().code_unit_at(m_position) == b;
 }
 
 bool Lexer::match(char16_t a, char16_t b, char16_t c) const
 {
-    if (m_position + 1 >= m_source.length_in_code_units())
+    if (m_position + 1 >= m_source->code().length_in_code_units())
         return false;
 
     return m_current_code_unit == a
-        && m_source.code_unit_at(m_position) == b
-        && m_source.code_unit_at(m_position + 1) == c;
+        && m_source->code().code_unit_at(m_position) == b
+        && m_source->code().code_unit_at(m_position + 1) == c;
 }
 
 bool Lexer::match(char16_t a, char16_t b, char16_t c, char16_t d) const
 {
-    if (m_position + 2 >= m_source.length_in_code_units())
+    if (m_position + 2 >= m_source->code().length_in_code_units())
         return false;
 
     return m_current_code_unit == a
-        && m_source.code_unit_at(m_position) == b
-        && m_source.code_unit_at(m_position + 1) == c
-        && m_source.code_unit_at(m_position + 2) == d;
+        && m_source->code().code_unit_at(m_position) == b
+        && m_source->code().code_unit_at(m_position + 1) == c
+        && m_source->code().code_unit_at(m_position + 2) == d;
 }
 
 bool Lexer::is_eof() const
@@ -493,7 +469,7 @@ ALWAYS_INLINE u32 Lexer::current_code_point() const
     if (m_position == 0)
         return AK::UnicodeUtils::REPLACEMENT_CODE_POINT;
 
-    auto substring = m_source.substring_view(m_position - 1);
+    auto substring = m_source->code().substring_view(m_position - 1);
     if (substring.is_empty())
         return AK::UnicodeUtils::REPLACEMENT_CODE_POINT;
 
@@ -615,7 +591,7 @@ bool Lexer::is_block_comment_end() const
 
 bool Lexer::is_numeric_literal_start() const
 {
-    return is_ascii_digit(m_current_code_unit) || (m_current_code_unit == '.' && m_position < m_source.length_in_code_units() && is_ascii_digit(m_source.code_unit_at(m_position)));
+    return is_ascii_digit(m_current_code_unit) || (m_current_code_unit == '.' && m_position < m_source->code().length_in_code_units() && is_ascii_digit(m_source->code().code_unit_at(m_position)));
 }
 
 bool Lexer::slash_means_division() const
@@ -861,7 +837,7 @@ Token const& Lexer::next()
         while (m_current_code_unit != stop_char && m_current_code_unit != '\r' && m_current_code_unit != '\n' && !is_eof()) {
             if (m_current_code_unit == '\\') {
                 consume();
-                if (m_current_code_unit == '\r' && m_position < m_source.length_in_code_units() && m_source.code_unit_at(m_position) == '\n') {
+                if (m_current_code_unit == '\r' && m_position < m_source->code().length_in_code_units() && m_source->code().code_unit_at(m_position) == '\n') {
                     consume();
                 }
             }
@@ -896,8 +872,8 @@ Token const& Lexer::next()
             consume();
         }
 
-        if (!found_token && m_position + 1 < m_source.length_in_code_units()) {
-            auto three_chars_view = m_source.substring_view(m_position - 1, 3);
+        if (!found_token && m_position + 1 < m_source->code().length_in_code_units()) {
+            auto three_chars_view = m_source->code().substring_view(m_position - 1, 3);
             if (auto type = parse_three_char_token(three_chars_view); type != TokenType::Invalid) {
                 found_token = true;
                 token_type = type;
@@ -907,11 +883,11 @@ Token const& Lexer::next()
             }
         }
 
-        if (!found_token && m_position < m_source.length_in_code_units()) {
-            auto two_chars_view = m_source.substring_view(m_position - 1, 2);
+        if (!found_token && m_position < m_source->code().length_in_code_units()) {
+            auto two_chars_view = m_source->code().substring_view(m_position - 1, 2);
             if (auto type = parse_two_char_token(two_chars_view); type != TokenType::Invalid) {
                 // OptionalChainingPunctuator :: ?. [lookahead ∉ DecimalDigit]
-                if (!(type == TokenType::QuestionMarkPeriod && m_position + 1 < m_source.length_in_code_units() && is_ascii_digit(m_source.code_unit_at(m_position + 1)))) {
+                if (!(type == TokenType::QuestionMarkPeriod && m_position + 1 < m_source->code().length_in_code_units() && is_ascii_digit(m_source->code().code_unit_at(m_position + 1)))) {
                     found_token = true;
                     token_type = type;
                     consume();
@@ -945,8 +921,8 @@ Token const& Lexer::next()
     m_current_token = Token(
         token_type,
         token_message,
-        m_source.substring_view(trivia_start - 1, value_start - trivia_start),
-        m_source.substring_view(value_start - 1, m_position - value_start),
+        m_source->code().substring_view(trivia_start - 1, value_start - trivia_start),
+        m_source->code().substring_view(value_start - 1, m_position - value_start),
         value_start_line_number,
         value_start_column_number,
         value_start - 1);
@@ -976,7 +952,7 @@ Token const& Lexer::force_slash_as_regex()
     size_t value_start = m_position - 1;
 
     if (has_equals) {
-        VERIFY(m_source.code_unit_at(value_start - 1) == '=');
+        VERIFY(m_source->code().code_unit_at(value_start - 1) == '=');
         --value_start;
         --m_position;
         m_current_code_unit = '=';
@@ -988,7 +964,7 @@ Token const& Lexer::force_slash_as_regex()
         token_type,
         Token::Message::None,
         m_current_token.trivia(),
-        m_source.substring_view(value_start - 1, m_position - value_start),
+        m_source->code().substring_view(value_start - 1, m_position - value_start),
         m_current_token.line_number(),
         m_current_token.line_column(),
         value_start - 1);
diff --git a/Libraries/LibJS/Lexer.h b/Libraries/LibJS/Lexer.h
@@ -1,32 +1,33 @@
 /*
  * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
+ * Copyright (c) 2020-2025, Andreas Kling <andreas@ladybird.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
 #pragma once
 
 #include <AK/HashMap.h>
-#include <AK/StringView.h>
 #include <AK/Utf16String.h>
 #include <LibJS/Export.h>
+#include <LibJS/SourceCode.h>
 #include <LibJS/Token.h>
 
 namespace JS {
 
 class JS_API Lexer {
 public:
-    explicit Lexer(StringView source, StringView filename = "(unknown)"sv, size_t line_number = 1, size_t line_column = 0);
-    explicit Lexer(Utf16String source, StringView filename = "(unknown)"sv, size_t line_number = 1, size_t line_column = 0);
+    explicit Lexer(NonnullRefPtr<SourceCode const>, size_t line_number = 1, size_t line_column = 0);
 
     // These both advance the lexer and return a reference to the current token.
     Token const& next();
     Token const& force_slash_as_regex();
 
     [[nodiscard]] Token const& current_token() const { return m_current_token; }
 
-    Utf16String const& source() const { return m_source; }
-    String const& filename() const { return m_filename; }
+    SourceCode const& source_code() const { return m_source; }
+    Utf16String const& source() const { return m_source->code(); }
+    String const& filename() const { return m_source->filename(); }
 
     void disallow_html_comments() { m_allow_html_comments = false; }
 
@@ -59,15 +60,14 @@ class JS_API Lexer {
 
     TokenType consume_regex_literal();
 
-    Utf16String m_source;
+    NonnullRefPtr<SourceCode const> m_source;
     size_t m_position { 0 };
     Token m_current_token;
     char16_t m_current_code_unit { 0 };
     bool m_eof { false };
     bool m_regex_is_in_character_class { false };
     bool m_allow_html_comments { true };
 
-    String m_filename;
     size_t m_line_number { 1 };
     size_t m_line_column { 0 };
 
diff --git a/Libraries/LibJS/Parser.cpp b/Libraries/LibJS/Parser.cpp
@@ -688,7 +688,7 @@ Parser::ParserState::ParserState(Lexer l, Program::Type program_type)
 }
 
 Parser::Parser(Lexer lexer, Program::Type program_type, Optional<EvalInitialState> initial_state_for_eval)
-    : m_source_code(SourceCode::create(lexer.filename(), lexer.source()))
+    : m_source_code(lexer.source_code())
     , m_state(move(lexer), program_type)
     , m_program_type(program_type)
 {
@@ -2596,7 +2596,7 @@ RefPtr<BindingPattern const> Parser::synthesize_binding_pattern(Expression const
     auto source_end_offset = expression.source_range().end.offset;
     auto source = m_state.lexer.source().substring_view(source_start_offset, source_end_offset - source_start_offset);
 
-    Lexer lexer { Utf16String::from_utf16(source), m_state.lexer.filename(), expression.source_range().start.line, expression.source_range().start.column };
+    Lexer lexer(SourceCode::create(m_state.lexer.filename(), Utf16String::from_utf16(source)), expression.source_range().start.line, expression.source_range().start.column);
     Parser parser { lexer };
 
     parser.m_state.current_scope_pusher = m_state.current_scope_pusher;
@@ -5233,7 +5233,7 @@ Parser Parser::parse_function_body_from_string(ByteString const& body_string, u1
 {
     RefPtr<FunctionBody const> function_body;
 
-    auto body_parser = Parser { Lexer { body_string } };
+    auto body_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(body_string))));
     {
         // Set up some parser state to accept things like return await, and yield in the plain function body.
         body_parser.m_state.in_function_context = true;
diff --git a/Libraries/LibJS/Runtime/AbstractOperations.cpp b/Libraries/LibJS/Runtime/AbstractOperations.cpp
@@ -623,7 +623,7 @@ ThrowCompletionOr<Value> perform_eval(VM& vm, Value x, CallerMode strict_caller,
         .in_class_field_initializer = in_class_field_initializer,
     };
 
-    Parser parser { Lexer { code_string->utf8_string_view() }, Program::Type::Script, move(initial_state) };
+    Parser parser(Lexer(SourceCode::create({}, code_string->utf16_string())), Program::Type::Script, move(initial_state));
     auto program = parser.parse_program(strict_caller == CallerMode::Strict);
 
     //     b. If script is a List of errors, throw a SyntaxError exception.
diff --git a/Libraries/LibJS/Runtime/FunctionConstructor.cpp b/Libraries/LibJS/Runtime/FunctionConstructor.cpp
@@ -156,7 +156,7 @@ ThrowCompletionOr<GC::Ref<ECMAScriptFunctionObject>> FunctionConstructor::create
 
     // 17. Let parameters be ParseText(P, parameterSym).
     i32 function_length = 0;
-    auto parameters_parser = Parser { Lexer { parameters_string } };
+    auto parameters_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(parameters_string))));
     auto parameters = parameters_parser.parse_formal_parameters(function_length, parse_options);
 
     // 18. If parameters is a List of errors, throw a SyntaxError exception.
@@ -179,7 +179,7 @@ ThrowCompletionOr<GC::Ref<ECMAScriptFunctionObject>> FunctionConstructor::create
     // 22. NOTE: If this step is reached, sourceText must have the syntax of exprSym (although the reverse implication does not hold). The purpose of the next two steps is to enforce any Early Error rules which apply to exprSym directly.
 
     // 23. Let expr be ParseText(sourceText, exprSym).
-    auto source_parser = Parser { Lexer { source_text } };
+    auto source_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(source_text))));
     // This doesn't need any parse_options, it determines those & the function type based on the tokens that were found.
     auto expr = source_parser.parse_function_node<FunctionExpression>();
 
diff --git a/Libraries/LibJS/Runtime/ShadowRealm.cpp b/Libraries/LibJS/Runtime/ShadowRealm.cpp
@@ -123,7 +123,7 @@ ThrowCompletionOr<Value> perform_shadow_realm_eval(VM& vm, Value source, Realm&
     // 2. Perform the following substeps in an implementation-defined order, possibly interleaving parsing and error detection:
 
     // a. Let script be ParseText(StringToCodePoints(sourceText), Script).
-    auto parser = Parser(Lexer(source_text->utf8_string_view()), Program::Type::Script, Parser::EvalInitialState {});
+    auto parser = Parser(Lexer(SourceCode::create({}, source_text->utf16_string())), Program::Type::Script, Parser::EvalInitialState {});
     auto program = parser.parse_program();
 
     // b. If script is a List of errors, throw a SyntaxError exception.
diff --git a/Libraries/LibJS/Script.cpp b/Libraries/LibJS/Script.cpp
@@ -18,7 +18,7 @@ GC_DEFINE_ALLOCATOR(Script);
 Result<GC::Ref<Script>, Vector<ParserError>> Script::parse(StringView source_text, Realm& realm, StringView filename, HostDefined* host_defined, size_t line_number_offset)
 {
     // 1. Let script be ParseText(sourceText, Script).
-    auto parser = Parser(Lexer(source_text, filename, line_number_offset));
+    auto parser = Parser(Lexer(SourceCode::create(String::from_utf8(filename).release_value_but_fixme_should_propagate_errors(), Utf16String::from_utf8(source_text)), line_number_offset));
     auto script = parser.parse_program();
 
     // 2. If script is a List of errors, return body.
diff --git a/Libraries/LibJS/SourceTextModule.cpp b/Libraries/LibJS/SourceTextModule.cpp
@@ -132,7 +132,7 @@ void SourceTextModule::visit_edges(Cell::Visitor& visitor)
 Result<GC::Ref<SourceTextModule>, Vector<ParserError>> SourceTextModule::parse(StringView source_text, Realm& realm, StringView filename, Script::HostDefined* host_defined)
 {
     // 1. Let body be ParseText(sourceText, Module).
-    auto parser = Parser(Lexer(source_text, filename), Program::Type::Module);
+    auto parser = Parser(Lexer(SourceCode::create(String::from_utf8(filename).release_value_but_fixme_should_propagate_errors(), Utf16String::from_utf8(source_text))), Program::Type::Module);
     auto body = parser.parse_program();
 
     // 2. If body is a List of errors, return body.
diff --git a/Libraries/LibJS/SyntaxHighlighter.cpp b/Libraries/LibJS/SyntaxHighlighter.cpp
@@ -52,7 +52,7 @@ void SyntaxHighlighter::rehighlight(Palette const& palette)
 {
     auto text = m_client->get_text();
 
-    Lexer lexer(text);
+    Lexer lexer(SourceCode::create({}, Utf16String::from_utf8(text)));
 
     Vector<Syntax::TextDocumentSpan> spans;
     Vector<Syntax::TextDocumentFoldingRegion> folding_regions;
diff --git a/Libraries/LibWeb/DOM/EventTarget.cpp b/Libraries/LibWeb/DOM/EventTarget.cpp
@@ -444,7 +444,7 @@ WebIDL::CallbackType* EventTarget::get_current_value_of_event_handler(FlyString
 
         auto source_text = builder.to_byte_string();
 
-        auto parser = JS::Parser(JS::Lexer(source_text));
+        auto parser = JS::Parser(JS::Lexer(JS::SourceCode::create({}, Utf16String::from_utf8(source_text))));
 
         // FIXME: This should only be parsing the `body` instead of `source_text` and therefore use `JS::FunctionBody` instead of `JS::FunctionExpression`.
         //        However, JS::ECMAScriptFunctionObject::create wants parameters and length and JS::FunctionBody does not inherit JS::FunctionNode.
diff --git a/Libraries/LibWeb/WebDriver/ExecuteScript.cpp b/Libraries/LibWeb/WebDriver/ExecuteScript.cpp
@@ -55,7 +55,8 @@ static JS::ThrowCompletionOr<JS::Value> execute_a_function_body(HTML::BrowsingCo
         }})~~~",
         body);
 
-    auto parser = JS::Parser { JS::Lexer { source_text } };
+    auto parser = JS::Parser(JS::Lexer(JS::SourceCode::create({}, Utf16String::from_utf8(source_text))));
+    ;
     auto function_expression = parser.parse_function_node<JS::FunctionExpression>();
 
     // 4. If body is not parsable as a FunctionBody or if parsing detects an early error, return Completion { [[Type]]: normal, [[Value]]: null, [[Target]]: empty }.
diff --git a/Tests/LibJS/CMakeLists.txt b/Tests/LibJS/CMakeLists.txt
@@ -1,4 +1,3 @@
-ladybird_test(test-invalid-unicode-js.cpp LibJS LIBS LibJS LibUnicode)
 ladybird_test(test-value-js.cpp LibJS LIBS LibJS LibUnicode)
 
 ladybird_testjs_test(test-js.cpp test-js LIBS LibGC)
diff --git a/Tests/LibJS/test-invalid-unicode-js.cpp b/Tests/LibJS/test-invalid-unicode-js.cpp
diff --git a/Tests/LibJS/test-js.cpp b/Tests/LibJS/test-js.cpp
diff --git a/Utilities/js.cpp b/Utilities/js.cpp
diff --git a/Utilities/test262-runner.cpp b/Utilities/test262-runner.cpp

Original file line number	Diff line number	Diff line change
`@@ -688,7 +688,7 @@ Parser::ParserState::ParserState(Lexer l, Program::Type program_type)`
`688`	`688`	`}`
`689`	`689`
`690`	`690`	`Parser::Parser(Lexer lexer, Program::Type program_type, Optional<EvalInitialState> initial_state_for_eval)`
`691`		`- : m_source_code(SourceCode::create(lexer.filename(), lexer.source()))`
	`691`	`+ : m_source_code(lexer.source_code())`
`692`	`692`	`, m_state(move(lexer), program_type)`
`693`	`693`	`, m_program_type(program_type)`
`694`	`694`	`{`
`@@ -2596,7 +2596,7 @@ RefPtr<BindingPattern const> Parser::synthesize_binding_pattern(Expression const`
`2596`	`2596`	`auto source_end_offset = expression.source_range().end.offset;`
`2597`	`2597`	`auto source = m_state.lexer.source().substring_view(source_start_offset, source_end_offset - source_start_offset);`
`2598`	`2598`
`2599`		`- Lexer lexer { Utf16String::from_utf16(source), m_state.lexer.filename(), expression.source_range().start.line, expression.source_range().start.column };`
	`2599`	`+ Lexer lexer(SourceCode::create(m_state.lexer.filename(), Utf16String::from_utf16(source)), expression.source_range().start.line, expression.source_range().start.column);`
`2600`	`2600`	`Parser parser { lexer };`
`2601`	`2601`
`2602`	`2602`	`parser.m_state.current_scope_pusher = m_state.current_scope_pusher;`
`@@ -5233,7 +5233,7 @@ Parser Parser::parse_function_body_from_string(ByteString const& body_string, u1`
`5233`	`5233`	`{`
`5234`	`5234`	`RefPtr<FunctionBody const> function_body;`
`5235`	`5235`
`5236`		`- auto body_parser = Parser { Lexer { body_string } };`
	`5236`	`+ auto body_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(body_string))));`
`5237`	`5237`	`{`
`5238`	`5238`	`// Set up some parser state to accept things like return await, and yield in the plain function body.`
`5239`	`5239`	`body_parser.m_state.in_function_context = true;`
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@ GC_DEFINE_ALLOCATOR(Script);`
`18`	`18`	`Result<GC::Ref<Script>, Vector<ParserError>> Script::parse(StringView source_text, Realm& realm, StringView filename, HostDefined* host_defined, size_t line_number_offset)`
`19`	`19`	`{`
`20`	`20`	`// 1. Let script be ParseText(sourceText, Script).`
`21`		`- auto parser = Parser(Lexer(source_text, filename, line_number_offset));`
	`21`	`+ auto parser = Parser(Lexer(SourceCode::create(String::from_utf8(filename).release_value_but_fixme_should_propagate_errors(), Utf16String::from_utf8(source_text)), line_number_offset));`
`22`	`22`	`auto script = parser.parse_program();`
`23`	`23`
`24`	`24`	`// 2. If script is a List of errors, return body.`
Original file line number	Diff line number	Diff line change
`@@ -132,7 +132,7 @@ void SourceTextModule::visit_edges(Cell::Visitor& visitor)`
`132`	`132`	`Result<GC::Ref<SourceTextModule>, Vector<ParserError>> SourceTextModule::parse(StringView source_text, Realm& realm, StringView filename, Script::HostDefined* host_defined)`
`133`	`133`	`{`
`134`	`134`	`// 1. Let body be ParseText(sourceText, Module).`
`135`		`- auto parser = Parser(Lexer(source_text, filename), Program::Type::Module);`
	`135`	`+ auto parser = Parser(Lexer(SourceCode::create(String::from_utf8(filename).release_value_but_fixme_should_propagate_errors(), Utf16String::from_utf8(source_text))), Program::Type::Module);`
`136`	`136`	`auto body = parser.parse_program();`
`137`	`137`
`138`	`138`	`// 2. If body is a List of errors, return body.`
Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,7 @@ void SyntaxHighlighter::rehighlight(Palette const& palette)`
`52`	`52`	`{`
`53`	`53`	`auto text = m_client->get_text();`
`54`	`54`
`55`		`- Lexer lexer(text);`
	`55`	`+ Lexer lexer(SourceCode::create({}, Utf16String::from_utf8(text)));`
`56`	`56`
`57`	`57`	`Vector<Syntax::TextDocumentSpan> spans;`
`58`	`58`	`Vector<Syntax::TextDocumentFoldingRegion> folding_regions;`