@@ -192,9 +192,9 @@ static constexpr TokenType parse_three_char_token(Utf16View const& view)
192192 }
193193}
194194
195- static consteval Array<TokenType, 256 > make_single_char_tokens_array ()
195+ static consteval AK:: Array<TokenType, 256 > make_single_char_tokens_array ()
196196{
197- Array<TokenType, 256 > array;
197+ AK:: Array<TokenType, 256 > array;
198198 array.fill (TokenType::Invalid);
199199 array[' &' ] = TokenType::Ampersand;
200200 array[' *' ] = TokenType::Asterisk;
@@ -225,33 +225,9 @@ static consteval Array<TokenType, 256> make_single_char_tokens_array()
225225
226226static constexpr auto s_single_char_tokens = make_single_char_tokens_array();
227227
228- static Utf16String create_utf16_string_from_possibly_invalid_utf8_string (StringView source)
229- {
230- Utf8View utf8_source { source };
231- if (utf8_source.validate ()) [[likely]]
232- return Utf16String::from_utf8_without_validation (source);
233-
234- StringBuilder builder (StringBuilder::Mode::UTF16);
235-
236- for (auto code_point : utf8_source) {
237- builder.append_code_point (code_point);
238- if (code_point == AK::UnicodeUtils::REPLACEMENT_CODE_POINT)
239- break ;
240- }
241-
242- return builder.to_utf16_string ();
243- }
244-
245- Lexer::Lexer (StringView source, StringView filename, size_t line_number, size_t line_column)
246- : Lexer(create_utf16_string_from_possibly_invalid_utf8_string(source), filename, line_number, line_column)
247- {
248- // FIXME: Remove this API once all callers are ported to UTF-16.
249- }
250-
251- Lexer::Lexer (Utf16String source, StringView filename, size_t line_number, size_t line_column)
228+ Lexer::Lexer (NonnullRefPtr<SourceCode const > source, size_t line_number, size_t line_column)
252229 : m_source(move(source))
253230 , m_current_token(TokenType::Eof, {}, {}, {}, 0 , 0 , 0 )
254- , m_filename(String::from_utf8(filename).release_value_but_fixme_should_propagate_errors())
255231 , m_line_number(line_number)
256232 , m_line_column(line_column)
257233{
@@ -304,16 +280,16 @@ Lexer::Lexer(Utf16String source, StringView filename, size_t line_number, size_t
304280void Lexer::consume ()
305281{
306282 auto did_reach_eof = [this ] {
307- if (m_position < m_source.length_in_code_units ())
283+ if (m_position < m_source-> code () .length_in_code_units ())
308284 return false ;
309285 m_eof = true ;
310286 m_current_code_unit = ' \0 ' ;
311- m_position = m_source.length_in_code_units () + 1 ;
287+ m_position = m_source-> code () .length_in_code_units () + 1 ;
312288 m_line_column++;
313289 return true ;
314290 };
315291
316- if (m_position > m_source.length_in_code_units ())
292+ if (m_position > m_source-> code () .length_in_code_units ())
317293 return ;
318294
319295 if (did_reach_eof ())
@@ -339,7 +315,7 @@ void Lexer::consume()
339315 // and column - don't do it again. From https://tc39.es/ecma262/#sec-line-terminators:
340316 // The sequence <CR><LF> is commonly used as a line terminator.
341317 // It should be considered a single SourceCharacter for the purpose of reporting line numbers.
342- auto second_char_of_crlf = m_position > 1 && m_source.code_unit_at (m_position - 2 ) == ' \r ' && m_current_code_unit == ' \n ' ;
318+ auto second_char_of_crlf = m_position > 1 && m_source-> code () .code_unit_at (m_position - 2 ) == ' \r ' && m_current_code_unit == ' \n ' ;
343319
344320 if (!second_char_of_crlf) {
345321 m_line_number++;
@@ -349,8 +325,8 @@ void Lexer::consume()
349325 dbgln_if (LEXER_DEBUG, " Previous was CR, this is LF - not incrementing line number again." );
350326 }
351327 } else {
352- if (AK::UnicodeUtils::is_utf16_high_surrogate (m_current_code_unit) && m_position < m_source.length_in_code_units ()) {
353- if (AK::UnicodeUtils::is_utf16_low_surrogate (m_source.code_unit_at (m_position))) {
328+ if (AK::UnicodeUtils::is_utf16_high_surrogate (m_current_code_unit) && m_position < m_source-> code () .length_in_code_units ()) {
329+ if (AK::UnicodeUtils::is_utf16_low_surrogate (m_source-> code () .code_unit_at (m_position))) {
354330 ++m_position;
355331
356332 if (did_reach_eof ())
@@ -361,7 +337,7 @@ void Lexer::consume()
361337 ++m_line_column;
362338 }
363339
364- m_current_code_unit = m_source.code_unit_at (m_position++);
340+ m_current_code_unit = m_source-> code () .code_unit_at (m_position++);
365341}
366342
367343bool Lexer::consume_decimal_number ()
@@ -436,40 +412,40 @@ bool Lexer::consume_binary_number()
436412template <typename Callback>
437413bool Lexer::match_numeric_literal_separator_followed_by (Callback callback) const
438414{
439- if (m_position >= m_source.length_in_code_units ())
415+ if (m_position >= m_source-> code () .length_in_code_units ())
440416 return false ;
441417 return m_current_code_unit == ' _'
442- && callback (m_source.code_unit_at (m_position));
418+ && callback (m_source-> code () .code_unit_at (m_position));
443419}
444420
445421bool Lexer::match (char16_t a, char16_t b) const
446422{
447- if (m_position >= m_source.length_in_code_units ())
423+ if (m_position >= m_source-> code () .length_in_code_units ())
448424 return false ;
449425
450426 return m_current_code_unit == a
451- && m_source.code_unit_at (m_position) == b;
427+ && m_source-> code () .code_unit_at (m_position) == b;
452428}
453429
454430bool Lexer::match (char16_t a, char16_t b, char16_t c) const
455431{
456- if (m_position + 1 >= m_source.length_in_code_units ())
432+ if (m_position + 1 >= m_source-> code () .length_in_code_units ())
457433 return false ;
458434
459435 return m_current_code_unit == a
460- && m_source.code_unit_at (m_position) == b
461- && m_source.code_unit_at (m_position + 1 ) == c;
436+ && m_source-> code () .code_unit_at (m_position) == b
437+ && m_source-> code () .code_unit_at (m_position + 1 ) == c;
462438}
463439
464440bool Lexer::match (char16_t a, char16_t b, char16_t c, char16_t d) const
465441{
466- if (m_position + 2 >= m_source.length_in_code_units ())
442+ if (m_position + 2 >= m_source-> code () .length_in_code_units ())
467443 return false ;
468444
469445 return m_current_code_unit == a
470- && m_source.code_unit_at (m_position) == b
471- && m_source.code_unit_at (m_position + 1 ) == c
472- && m_source.code_unit_at (m_position + 2 ) == d;
446+ && m_source-> code () .code_unit_at (m_position) == b
447+ && m_source-> code () .code_unit_at (m_position + 1 ) == c
448+ && m_source-> code () .code_unit_at (m_position + 2 ) == d;
473449}
474450
475451bool Lexer::is_eof () const
@@ -493,7 +469,7 @@ ALWAYS_INLINE u32 Lexer::current_code_point() const
493469 if (m_position == 0 )
494470 return AK::UnicodeUtils::REPLACEMENT_CODE_POINT;
495471
496- auto substring = m_source.substring_view (m_position - 1 );
472+ auto substring = m_source-> code () .substring_view (m_position - 1 );
497473 if (substring.is_empty ())
498474 return AK::UnicodeUtils::REPLACEMENT_CODE_POINT;
499475
@@ -615,7 +591,7 @@ bool Lexer::is_block_comment_end() const
615591
616592bool Lexer::is_numeric_literal_start () const
617593{
618- return is_ascii_digit (m_current_code_unit) || (m_current_code_unit == ' .' && m_position < m_source.length_in_code_units () && is_ascii_digit (m_source.code_unit_at (m_position)));
594+ return is_ascii_digit (m_current_code_unit) || (m_current_code_unit == ' .' && m_position < m_source-> code () .length_in_code_units () && is_ascii_digit (m_source-> code () .code_unit_at (m_position)));
619595}
620596
621597bool Lexer::slash_means_division () const
@@ -861,7 +837,7 @@ Token const& Lexer::next()
861837 while (m_current_code_unit != stop_char && m_current_code_unit != ' \r ' && m_current_code_unit != ' \n ' && !is_eof ()) {
862838 if (m_current_code_unit == ' \\ ' ) {
863839 consume ();
864- if (m_current_code_unit == ' \r ' && m_position < m_source.length_in_code_units () && m_source.code_unit_at (m_position) == ' \n ' ) {
840+ if (m_current_code_unit == ' \r ' && m_position < m_source-> code () .length_in_code_units () && m_source-> code () .code_unit_at (m_position) == ' \n ' ) {
865841 consume ();
866842 }
867843 }
@@ -896,8 +872,8 @@ Token const& Lexer::next()
896872 consume ();
897873 }
898874
899- if (!found_token && m_position + 1 < m_source.length_in_code_units ()) {
900- auto three_chars_view = m_source.substring_view (m_position - 1 , 3 );
875+ if (!found_token && m_position + 1 < m_source-> code () .length_in_code_units ()) {
876+ auto three_chars_view = m_source-> code () .substring_view (m_position - 1 , 3 );
901877 if (auto type = parse_three_char_token (three_chars_view); type != TokenType::Invalid) {
902878 found_token = true ;
903879 token_type = type;
@@ -907,11 +883,11 @@ Token const& Lexer::next()
907883 }
908884 }
909885
910- if (!found_token && m_position < m_source.length_in_code_units ()) {
911- auto two_chars_view = m_source.substring_view (m_position - 1 , 2 );
886+ if (!found_token && m_position < m_source-> code () .length_in_code_units ()) {
887+ auto two_chars_view = m_source-> code () .substring_view (m_position - 1 , 2 );
912888 if (auto type = parse_two_char_token (two_chars_view); type != TokenType::Invalid) {
913889 // OptionalChainingPunctuator :: ?. [lookahead ∉ DecimalDigit]
914- if (!(type == TokenType::QuestionMarkPeriod && m_position + 1 < m_source.length_in_code_units () && is_ascii_digit (m_source.code_unit_at (m_position + 1 )))) {
890+ if (!(type == TokenType::QuestionMarkPeriod && m_position + 1 < m_source-> code () .length_in_code_units () && is_ascii_digit (m_source-> code () .code_unit_at (m_position + 1 )))) {
915891 found_token = true ;
916892 token_type = type;
917893 consume ();
@@ -945,8 +921,8 @@ Token const& Lexer::next()
945921 m_current_token = Token (
946922 token_type,
947923 token_message,
948- m_source.substring_view (trivia_start - 1 , value_start - trivia_start),
949- m_source.substring_view (value_start - 1 , m_position - value_start),
924+ m_source-> code () .substring_view (trivia_start - 1 , value_start - trivia_start),
925+ m_source-> code () .substring_view (value_start - 1 , m_position - value_start),
950926 value_start_line_number,
951927 value_start_column_number,
952928 value_start - 1 );
@@ -976,7 +952,7 @@ Token const& Lexer::force_slash_as_regex()
976952 size_t value_start = m_position - 1 ;
977953
978954 if (has_equals) {
979- VERIFY (m_source.code_unit_at (value_start - 1 ) == ' =' );
955+ VERIFY (m_source-> code () .code_unit_at (value_start - 1 ) == ' =' );
980956 --value_start;
981957 --m_position;
982958 m_current_code_unit = ' =' ;
@@ -988,7 +964,7 @@ Token const& Lexer::force_slash_as_regex()
988964 token_type,
989965 Token::Message::None,
990966 m_current_token.trivia (),
991- m_source.substring_view (value_start - 1 , m_position - value_start),
967+ m_source-> code () .substring_view (value_start - 1 , m_position - value_start),
992968 m_current_token.line_number (),
993969 m_current_token.line_column (),
994970 value_start - 1 );
0 commit comments