Skip to content

Commit ae6a84c

Browse files
AtkinsSJgmta
authored andcommitted
LibGUI: Lex INI files as Utf8
Iterating byte by byte meant that the column positions assigned to INI tokens would be off if there were any multi-byte codepoints. Using a Utf8View means these positions refer to whole codepoints instead, and the column positions match what GUI::TextEditor expects. :^) Fixes #12706.
1 parent 95df712 commit ae6a84c

File tree

2 files changed

+14
-16
lines changed

2 files changed

+14
-16
lines changed

Userland/Libraries/LibGUI/INILexer.cpp

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (c) 2020, Hüseyin Aslıtürk <asliturk@hotmail.com>
3+
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
34
*
45
* SPDX-License-Identifier: BSD-2-Clause
56
*/
@@ -12,20 +13,20 @@ namespace GUI {
1213

1314
IniLexer::IniLexer(StringView input)
1415
: m_input(input)
16+
, m_iterator(m_input.begin())
1517
{
1618
}
1719

18-
char IniLexer::peek(size_t offset) const
20+
u32 IniLexer::peek(size_t offset) const
1921
{
20-
if ((m_index + offset) >= m_input.length())
21-
return 0;
22-
return m_input[m_index + offset];
22+
return m_iterator.peek(offset).value_or(0);
2323
}
2424

25-
char IniLexer::consume()
25+
u32 IniLexer::consume()
2626
{
27-
VERIFY(m_index < m_input.length());
28-
char ch = m_input[m_index++];
27+
VERIFY(m_iterator != m_input.end());
28+
u32 ch = *m_iterator;
29+
++m_iterator;
2930
if (ch == '\n') {
3031
m_position.line++;
3132
m_position.column = 0;
@@ -38,8 +39,6 @@ char IniLexer::consume()
3839
Vector<IniToken> IniLexer::lex()
3940
{
4041
Vector<IniToken> tokens;
41-
42-
size_t token_start_index = 0;
4342
IniPosition token_start_position;
4443

4544
auto emit_token = [&](auto type) {
@@ -52,7 +51,6 @@ Vector<IniToken> IniLexer::lex()
5251
};
5352

5453
auto begin_token = [&] {
55-
token_start_index = m_index;
5654
token_start_position = m_position;
5755
};
5856

@@ -64,7 +62,7 @@ Vector<IniToken> IniLexer::lex()
6462
tokens.append(token);
6563
};
6664

67-
while (m_index < m_input.length()) {
65+
while (m_iterator != m_input.end()) {
6866
auto ch = peek();
6967

7068
if (is_ascii_space(ch)) {

Userland/Libraries/LibGUI/INILexer.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
#pragma once
88

9-
#include <AK/StringView.h>
9+
#include <AK/Utf8View.h>
1010

1111
namespace GUI {
1212

@@ -57,11 +57,11 @@ class IniLexer {
5757
Vector<IniToken> lex();
5858

5959
private:
60-
char peek(size_t offset = 0) const;
61-
char consume();
60+
u32 peek(size_t offset = 0) const;
61+
u32 consume();
6262

63-
StringView m_input;
64-
size_t m_index { 0 };
63+
Utf8View m_input;
64+
Utf8CodePointIterator m_iterator;
6565
IniPosition m_position { 0, 0 };
6666
};
6767

0 commit comments

Comments
 (0)