Skip to content

Commit 201803f

Browse files
committed
LibJS: Cache length-in-code-units in SourceCode
This avoids some bit twiddling whenever accessing the length in code units in the lexer.
1 parent 7c7a035 commit 201803f

File tree

3 files changed

+16
-13
lines changed

3 files changed

+16
-13
lines changed

Libraries/LibJS/Lexer.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -280,16 +280,16 @@ Lexer::Lexer(NonnullRefPtr<SourceCode const> source_code, size_t line_number, si
280280
void Lexer::consume()
281281
{
282282
auto did_reach_eof = [this] {
283-
if (m_position < source().length_in_code_units())
283+
if (m_position < source_code().length_in_code_units())
284284
return false;
285285
m_eof = true;
286286
m_current_code_unit = '\0';
287-
m_position = source().length_in_code_units() + 1;
287+
m_position = source_code().length_in_code_units() + 1;
288288
m_line_column++;
289289
return true;
290290
};
291291

292-
if (m_position > source().length_in_code_units())
292+
if (m_position > source_code().length_in_code_units())
293293
return;
294294

295295
if (did_reach_eof())
@@ -325,7 +325,7 @@ void Lexer::consume()
325325
dbgln_if(LEXER_DEBUG, "Previous was CR, this is LF - not incrementing line number again.");
326326
}
327327
} else {
328-
if (AK::UnicodeUtils::is_utf16_high_surrogate(m_current_code_unit) && m_position < source().length_in_code_units()) {
328+
if (AK::UnicodeUtils::is_utf16_high_surrogate(m_current_code_unit) && m_position < source_code().length_in_code_units()) {
329329
if (AK::UnicodeUtils::is_utf16_low_surrogate(source().code_unit_at(m_position))) {
330330
++m_position;
331331

@@ -412,15 +412,15 @@ bool Lexer::consume_binary_number()
412412
template<typename Callback>
413413
bool Lexer::match_numeric_literal_separator_followed_by(Callback callback) const
414414
{
415-
if (m_position >= source().length_in_code_units())
415+
if (m_position >= source_code().length_in_code_units())
416416
return false;
417417
return m_current_code_unit == '_'
418418
&& callback(source().code_unit_at(m_position));
419419
}
420420

421421
bool Lexer::match(char16_t a, char16_t b) const
422422
{
423-
if (m_position >= source().length_in_code_units())
423+
if (m_position >= source_code().length_in_code_units())
424424
return false;
425425

426426
return m_current_code_unit == a
@@ -429,7 +429,7 @@ bool Lexer::match(char16_t a, char16_t b) const
429429

430430
bool Lexer::match(char16_t a, char16_t b, char16_t c) const
431431
{
432-
if (m_position + 1 >= source().length_in_code_units())
432+
if (m_position + 1 >= source_code().length_in_code_units())
433433
return false;
434434

435435
return m_current_code_unit == a
@@ -439,7 +439,7 @@ bool Lexer::match(char16_t a, char16_t b, char16_t c) const
439439

440440
bool Lexer::match(char16_t a, char16_t b, char16_t c, char16_t d) const
441441
{
442-
if (m_position + 2 >= source().length_in_code_units())
442+
if (m_position + 2 >= source_code().length_in_code_units())
443443
return false;
444444

445445
return m_current_code_unit == a
@@ -591,7 +591,7 @@ bool Lexer::is_block_comment_end() const
591591

592592
bool Lexer::is_numeric_literal_start() const
593593
{
594-
return is_ascii_digit(m_current_code_unit) || (m_current_code_unit == '.' && m_position < source().length_in_code_units() && is_ascii_digit(source().code_unit_at(m_position)));
594+
return is_ascii_digit(m_current_code_unit) || (m_current_code_unit == '.' && m_position < source_code().length_in_code_units() && is_ascii_digit(source().code_unit_at(m_position)));
595595
}
596596

597597
bool Lexer::slash_means_division() const
@@ -837,7 +837,7 @@ Token const& Lexer::next()
837837
while (m_current_code_unit != stop_char && m_current_code_unit != '\r' && m_current_code_unit != '\n' && !is_eof()) {
838838
if (m_current_code_unit == '\\') {
839839
consume();
840-
if (m_current_code_unit == '\r' && m_position < source().length_in_code_units() && source().code_unit_at(m_position) == '\n') {
840+
if (m_current_code_unit == '\r' && m_position < source_code().length_in_code_units() && source().code_unit_at(m_position) == '\n') {
841841
consume();
842842
}
843843
}
@@ -872,7 +872,7 @@ Token const& Lexer::next()
872872
consume();
873873
}
874874

875-
if (!found_token && m_position + 1 < source().length_in_code_units()) {
875+
if (!found_token && m_position + 1 < source_code().length_in_code_units()) {
876876
auto three_chars_view = source().substring_view(m_position - 1, 3);
877877
if (auto type = parse_three_char_token(three_chars_view); type != TokenType::Invalid) {
878878
found_token = true;
@@ -883,11 +883,11 @@ Token const& Lexer::next()
883883
}
884884
}
885885

886-
if (!found_token && m_position < source().length_in_code_units()) {
886+
if (!found_token && m_position < source_code().length_in_code_units()) {
887887
auto two_chars_view = source().substring_view(m_position - 1, 2);
888888
if (auto type = parse_two_char_token(two_chars_view); type != TokenType::Invalid) {
889889
// OptionalChainingPunctuator :: ?. [lookahead ∉ DecimalDigit]
890-
if (!(type == TokenType::QuestionMarkPeriod && m_position + 1 < source().length_in_code_units() && is_ascii_digit(source().code_unit_at(m_position + 1)))) {
890+
if (!(type == TokenType::QuestionMarkPeriod && m_position + 1 < source_code().length_in_code_units() && is_ascii_digit(source().code_unit_at(m_position + 1)))) {
891891
found_token = true;
892892
token_type = type;
893893
consume();

Libraries/LibJS/SourceCode.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ SourceCode::SourceCode(String filename, Utf16String code)
2121
: m_filename(move(filename))
2222
, m_code(move(code))
2323
, m_code_view(m_code.utf16_view())
24+
, m_length_in_code_units(m_code_view.length_in_code_units())
2425
{
2526
}
2627

Libraries/LibJS/SourceCode.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class JS_API SourceCode : public RefCounted<SourceCode> {
2222
String const& filename() const { return m_filename; }
2323
Utf16String const& code() const { return m_code; }
2424
Utf16View const& code_view() const { return m_code_view; }
25+
size_t length_in_code_units() const { return m_length_in_code_units; }
2526

2627
SourceRange range_from_offsets(u32 start_offset, u32 end_offset) const;
2728

@@ -31,6 +32,7 @@ class JS_API SourceCode : public RefCounted<SourceCode> {
3132
String m_filename;
3233
Utf16String m_code;
3334
Utf16View m_code_view;
35+
size_t m_length_in_code_units { 0 };
3436

3537
// For fast mapping of offsets to line/column numbers, we build a list of
3638
// starting points (with byte offsets into the source string) and which

0 commit comments

Comments
 (0)