Skip to content

Commit b65d281

Browse files
DanShadersBertalanD
authored andcommitted
AK: Add GenericLexer::{consume_decimal_integer,peek_string}
1 parent 6b30847 commit b65d281

File tree

3 files changed

+153
-0
lines changed

3 files changed

+153
-0
lines changed

AK/GenericLexer.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <AK/Assertions.h>
88
#include <AK/CharacterTypes.h>
99
#include <AK/GenericLexer.h>
10+
#include <AK/ScopeGuard.h>
1011
#include <AK/StringBuilder.h>
1112

1213
#ifndef KERNEL
@@ -128,6 +129,62 @@ StringView GenericLexer::consume_quoted_string(char escape_char)
128129
return m_input.substring_view(start, length);
129130
}
130131

132+
template<Integral T>
133+
ErrorOr<T> GenericLexer::consume_decimal_integer()
134+
{
135+
using UnsignedT = MakeUnsigned<T>;
136+
137+
ArmedScopeGuard rollback { [&, rollback_position = m_index] {
138+
m_index = rollback_position;
139+
} };
140+
141+
bool has_minus_sign = false;
142+
143+
if (next_is('+') || next_is('-'))
144+
if (consume() == '-')
145+
has_minus_sign = true;
146+
147+
StringView number_view = consume_while(is_ascii_digit);
148+
if (number_view.is_empty())
149+
return Error::from_errno(EINVAL);
150+
151+
auto maybe_number = StringUtils::convert_to_uint<UnsignedT>(number_view, TrimWhitespace::No);
152+
if (!maybe_number.has_value())
153+
return Error::from_errno(ERANGE);
154+
auto number = maybe_number.value();
155+
156+
if (!has_minus_sign) {
157+
if (NumericLimits<T>::max() < number) // This is only possible in a signed case.
158+
return Error::from_errno(ERANGE);
159+
160+
rollback.disarm();
161+
return number;
162+
} else {
163+
if constexpr (IsUnsigned<T>) {
164+
if (number == 0) {
165+
rollback.disarm();
166+
return 0;
167+
}
168+
return Error::from_errno(ERANGE);
169+
} else {
170+
static constexpr UnsignedT max_value = static_cast<UnsignedT>(NumericLimits<T>::max()) + 1;
171+
if (number > max_value)
172+
return Error::from_errno(ERANGE);
173+
rollback.disarm();
174+
return -number;
175+
}
176+
}
177+
}
178+
179+
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
180+
template ErrorOr<i8> GenericLexer::consume_decimal_integer<i8>();
181+
template ErrorOr<u16> GenericLexer::consume_decimal_integer<u16>();
182+
template ErrorOr<i16> GenericLexer::consume_decimal_integer<i16>();
183+
template ErrorOr<u32> GenericLexer::consume_decimal_integer<u32>();
184+
template ErrorOr<i32> GenericLexer::consume_decimal_integer<i32>();
185+
template ErrorOr<u64> GenericLexer::consume_decimal_integer<u64>();
186+
template ErrorOr<i64> GenericLexer::consume_decimal_integer<i64>();
187+
131188
#ifndef KERNEL
132189
Optional<DeprecatedString> GenericLexer::consume_and_unescape_string(char escape_char)
133190
{

AK/GenericLexer.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ class GenericLexer {
3131
return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0';
3232
}
3333

34+
Optional<StringView> peek_string(size_t length, size_t offset = 0) const
35+
{
36+
if (m_index + offset + length > m_input.length())
37+
return {};
38+
return m_input.substring_view(m_index + offset, length);
39+
}
40+
3441
constexpr bool next_is(char expected) const
3542
{
3643
return peek() == expected;
@@ -121,6 +128,8 @@ class GenericLexer {
121128
#ifndef KERNEL
122129
Optional<DeprecatedString> consume_and_unescape_string(char escape_char = '\\');
123130
#endif
131+
template<Integral T>
132+
ErrorOr<T> consume_decimal_integer();
124133

125134
enum class UnicodeEscapeError {
126135
MalformedUnicodeEscape,

Tests/AK/TestGenericLexer.cpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,3 +204,90 @@ TEST_CASE(consume_escaped_code_point)
204204
test("\\ud83d\\ude00"sv, 0x1f600);
205205
test("\\ud83d\\ude00"sv, 0xd83d, false);
206206
}
207+
208+
TEST_CASE(consume_decimal_integer_correctly_parses)
209+
{
210+
#define CHECK_PARSES_INTEGER(test, expected, type) \
211+
do { \
212+
GenericLexer lexer(test##sv); \
213+
auto actual = lexer.consume_decimal_integer<type>(); \
214+
VERIFY(!actual.is_error()); \
215+
EXPECT_EQ(actual.value(), static_cast<type>(expected)); \
216+
EXPECT_EQ(lexer.tell(), test##sv.length()); \
217+
} while (false)
218+
CHECK_PARSES_INTEGER("0", 0, u8);
219+
CHECK_PARSES_INTEGER("-0", -0, u8);
220+
CHECK_PARSES_INTEGER("10", 10, u8);
221+
CHECK_PARSES_INTEGER("255", 255, u8);
222+
CHECK_PARSES_INTEGER("0", 0, u16);
223+
CHECK_PARSES_INTEGER("-0", -0, u16);
224+
CHECK_PARSES_INTEGER("1234", 1234, u16);
225+
CHECK_PARSES_INTEGER("65535", 65535, u16);
226+
CHECK_PARSES_INTEGER("0", 0, u32);
227+
CHECK_PARSES_INTEGER("-0", -0, u32);
228+
CHECK_PARSES_INTEGER("1234", 1234, u32);
229+
CHECK_PARSES_INTEGER("4294967295", 4294967295, u32);
230+
CHECK_PARSES_INTEGER("0", 0, u64);
231+
CHECK_PARSES_INTEGER("-0", -0, u64);
232+
CHECK_PARSES_INTEGER("1234", 1234, u64);
233+
CHECK_PARSES_INTEGER("18446744073709551615", 18446744073709551615ULL, u64);
234+
CHECK_PARSES_INTEGER("0", 0, i8);
235+
CHECK_PARSES_INTEGER("-0", -0, i8);
236+
CHECK_PARSES_INTEGER("10", 10, i8);
237+
CHECK_PARSES_INTEGER("-10", -10, i8);
238+
CHECK_PARSES_INTEGER("127", 127, i8);
239+
CHECK_PARSES_INTEGER("-128", -128, i8);
240+
CHECK_PARSES_INTEGER("0", 0, i16);
241+
CHECK_PARSES_INTEGER("-0", -0, i16);
242+
CHECK_PARSES_INTEGER("1234", 1234, i16);
243+
CHECK_PARSES_INTEGER("-1234", -1234, i16);
244+
CHECK_PARSES_INTEGER("32767", 32767, i16);
245+
CHECK_PARSES_INTEGER("-32768", -32768, i16);
246+
CHECK_PARSES_INTEGER("0", 0, i32);
247+
CHECK_PARSES_INTEGER("-0", -0, i32);
248+
CHECK_PARSES_INTEGER("1234", 1234, i32);
249+
CHECK_PARSES_INTEGER("-1234", -1234, i32);
250+
CHECK_PARSES_INTEGER("2147483647", 2147483647, i32);
251+
CHECK_PARSES_INTEGER("-2147483648", -2147483648, i32);
252+
CHECK_PARSES_INTEGER("0", 0, i64);
253+
CHECK_PARSES_INTEGER("-0", -0, i64);
254+
CHECK_PARSES_INTEGER("1234", 1234, i64);
255+
CHECK_PARSES_INTEGER("-1234", -1234, i64);
256+
CHECK_PARSES_INTEGER("9223372036854775807", 9223372036854775807, i64);
257+
CHECK_PARSES_INTEGER("-9223372036854775808", -9223372036854775808ULL, i64);
258+
#undef CHECK_PARSES_INTEGER
259+
}
260+
261+
TEST_CASE(consume_decimal_integer_fails_with_correct_error)
262+
{
263+
#define CHECK_FAILS_WITH_ERROR(test, type, err) \
264+
do { \
265+
GenericLexer lexer(test##sv); \
266+
auto actual = lexer.consume_decimal_integer<type>(); \
267+
VERIFY(actual.is_error() && actual.error().is_errno()); \
268+
EXPECT_EQ(actual.error().code(), err); \
269+
EXPECT_EQ(lexer.tell(), static_cast<size_t>(0)); \
270+
} while (false)
271+
CHECK_FAILS_WITH_ERROR("Well hello GenericLexer!", u64, EINVAL);
272+
CHECK_FAILS_WITH_ERROR("+", u64, EINVAL);
273+
CHECK_FAILS_WITH_ERROR("+WHF", u64, EINVAL);
274+
CHECK_FAILS_WITH_ERROR("-WHF", u64, EINVAL);
275+
CHECK_FAILS_WITH_ERROR("-1", u8, ERANGE);
276+
CHECK_FAILS_WITH_ERROR("-100", u8, ERANGE);
277+
CHECK_FAILS_WITH_ERROR("-1", u16, ERANGE);
278+
CHECK_FAILS_WITH_ERROR("-100", u16, ERANGE);
279+
CHECK_FAILS_WITH_ERROR("-1", u32, ERANGE);
280+
CHECK_FAILS_WITH_ERROR("-100", u32, ERANGE);
281+
CHECK_FAILS_WITH_ERROR("-1", u64, ERANGE);
282+
CHECK_FAILS_WITH_ERROR("-100", u64, ERANGE);
283+
284+
CHECK_FAILS_WITH_ERROR("-129", i8, ERANGE);
285+
CHECK_FAILS_WITH_ERROR("128", i8, ERANGE);
286+
CHECK_FAILS_WITH_ERROR("-32769", i16, ERANGE);
287+
CHECK_FAILS_WITH_ERROR("32768", i16, ERANGE);
288+
CHECK_FAILS_WITH_ERROR("-2147483649", i32, ERANGE);
289+
CHECK_FAILS_WITH_ERROR("2147483648", i32, ERANGE);
290+
CHECK_FAILS_WITH_ERROR("-9223372036854775809", i64, ERANGE);
291+
CHECK_FAILS_WITH_ERROR("9223372036854775808", i64, ERANGE);
292+
#undef CHECK_FAILS_WITH_ERROR
293+
}

0 commit comments

Comments
 (0)