Skip to content

Commit 424fa57

Browse files
committed
Http: Improve URL Parser to test more edge cases
1 parent 3d351d9 commit 424fa57

File tree

2 files changed

+240
-13
lines changed

2 files changed

+240
-13
lines changed

Libraries/Http/HttpURLParser.cpp

Lines changed: 82 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,52 @@ SC::Result SC::HttpURLParser::parse(StringView url)
5050
const bool hasPath = it.advanceUntilMatches('/');
5151

5252
host = StringView::fromIterators(start, it, encoding);
53+
54+
// Check for query parameters or fragments before parsing host
55+
StringView originalHost = host;
56+
auto hostIt = originalHost.getIterator<StringIteratorASCII>();
57+
StringCodePoint separator;
58+
const bool hasQueryOrFragment = hostIt.advanceUntilMatchesAny({'?', '#'}, separator);
59+
60+
// If there is no path but the host contains query/fragment (e.g. http://example.com?x=1)
61+
// trim the `host` to exclude the query/fragment before parsing hostname/port.
62+
if (not hasPath && hasQueryOrFragment)
63+
{
64+
// hostIt currently points at the separator ('?' or '#') within originalHost.
65+
// Build a new host view that stops before the separator.
66+
host = StringView::fromIteratorFromStart(hostIt, encoding);
67+
}
68+
5369
SC_TRY(parseHost());
5470
if (not hasPath)
5571
{
56-
path = "/";
57-
pathname = "/";
72+
if (hasQueryOrFragment)
73+
{
74+
// There are query params or fragments in the host string
75+
if (separator == '?')
76+
{
77+
// Query parameters found
78+
pathname = "/";
79+
search = StringView::fromIteratorUntilEnd(hostIt, encoding);
80+
// Check for fragment after query
81+
auto queryIt = hostIt;
82+
if (queryIt.advanceUntilMatches('#'))
83+
{
84+
hash = StringView::fromIteratorUntilEnd(queryIt, encoding);
85+
}
86+
}
87+
else
88+
{
89+
// Fragment found
90+
pathname = "/";
91+
hash = StringView::fromIteratorUntilEnd(hostIt, encoding);
92+
}
93+
}
94+
else
95+
{
96+
path = "/";
97+
pathname = "/";
98+
}
5899
return Result(true);
59100
}
60101
// path + hash
@@ -101,10 +142,11 @@ SC::Result SC::HttpURLParser::parseHost()
101142
it = start;
102143
}
103144
start = it;
104-
host = StringView::fromIteratorUntilEnd(it, encoding);
145+
// Save the hostname part (before port) for parsing
146+
StringView hostnamePart = StringView::fromIteratorUntilEnd(it, encoding);
105147

106-
// Parse hostname and port from host
107-
StringIteratorASCII it2 = host.getIterator<StringIteratorASCII>();
148+
// Parse hostname and port from hostnamePart
149+
StringIteratorASCII it2 = hostnamePart.getIterator<StringIteratorASCII>();
108150
StringIteratorASCII start2 = it2;
109151

110152
StringCodePoint firstChar;
@@ -115,7 +157,8 @@ SC::Result SC::HttpURLParser::parseHost()
115157
return Result(false);
116158
(void)it2.stepForward(); // include ]
117159
hostname = StringView::fromIterators(start2, it2, encoding);
118-
if (it2.advanceRead(firstChar) && firstChar == ':')
160+
// check if next char is ':' (port separator) without consuming it twice
161+
if (it2.match(':'))
119162
{
120163
(void)it2.stepForward();
121164
StringView portString = StringView::fromIteratorUntilEnd(it2, encoding);
@@ -127,6 +170,13 @@ SC::Result SC::HttpURLParser::parseHost()
127170
return Result(false);
128171
port = static_cast<uint16_t>(value);
129172
}
173+
// Update host to include hostname and port
174+
host = StringView::fromIterators(start, it2, encoding);
175+
}
176+
else
177+
{
178+
// No port for IPv6
179+
host = StringView::fromIterators(start, it2, encoding);
130180
}
131181
}
132182
else
@@ -136,6 +186,7 @@ SC::Result SC::HttpURLParser::parseHost()
136186
if (it2.advanceUntilMatches(':'))
137187
{
138188
hostname = StringView::fromIterators(start2, it2, encoding);
189+
// stepForward moves past ':' so port iterator starts at first digit
139190
(void)it2.stepForward();
140191
StringView portString = StringView::fromIteratorUntilEnd(it2, encoding);
141192
if (not portString.isEmpty())
@@ -146,10 +197,17 @@ SC::Result SC::HttpURLParser::parseHost()
146197
return Result(false);
147198
port = static_cast<uint16_t>(value);
148199
}
200+
// Advance it2 to end of port string for host calculation
201+
while (not it2.isAtEnd())
202+
(void)it2.stepForward();
203+
// Update host to include hostname and port
204+
host = StringView::fromIterators(start, it2, encoding);
149205
}
150206
else
151207
{
152-
hostname = host;
208+
// No port found
209+
hostname = hostnamePart;
210+
host = hostnamePart;
153211
}
154212
}
155213

@@ -188,10 +246,21 @@ SC::Result SC::HttpURLParser::validateHost()
188246

189247
SC::Result SC::HttpURLParser::parseUserPassword(StringView userPassword)
190248
{
191-
StringViewTokenizer tokenizer(userPassword);
192-
SC_TRY(tokenizer.tokenizeNext({':'}, StringViewTokenizer::Options::SkipEmpty));
193-
username = tokenizer.component;
194-
SC_TRY(tokenizer.tokenizeNext({}, StringViewTokenizer::Options::SkipEmpty));
195-
password = tokenizer.component;
196-
return Result(true);
249+
auto func = [this, userPassword](auto it) -> Result
250+
{
251+
auto start = it;
252+
if (it.advanceUntilMatches(':'))
253+
{
254+
username = StringView::fromIterators(start, it, encoding);
255+
(void)it.stepForward();
256+
password = StringView::fromIteratorUntilEnd(it, encoding);
257+
}
258+
else
259+
{
260+
username = userPassword;
261+
password = StringView();
262+
}
263+
return Result(true);
264+
};
265+
return userPassword.withIterator(func);
197266
}

Tests/Libraries/Http/HttpURLParserTest.cpp

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,26 @@ struct SC::HttpURLParserTest : public SC::TestCase
3939
{
4040
testUTF8();
4141
}
42+
if (test_section("edgeCases"))
43+
{
44+
testEdgeCases();
45+
}
46+
if (test_section("protocols"))
47+
{
48+
testProtocols();
49+
}
50+
if (test_section("queryParams"))
51+
{
52+
testQueryParams();
53+
}
54+
if (test_section("specialChars"))
55+
{
56+
testSpecialChars();
57+
}
58+
if (test_section("ipAddresses"))
59+
{
60+
testIPAddresses();
61+
}
4262
}
4363

4464
void testFull();
@@ -48,6 +68,11 @@ struct SC::HttpURLParserTest : public SC::TestCase
4868
void testIPv6();
4969
void testInvalidPort();
5070
void testUTF8();
71+
void testEdgeCases();
72+
void testProtocols();
73+
void testQueryParams();
74+
void testSpecialChars();
75+
void testIPAddresses();
5176
};
5277

5378
void SC::HttpURLParserTest::testFull()
@@ -172,6 +197,139 @@ void SC::HttpURLParserTest::testUTF8()
172197
SC_TEST_EXPECT(urlParser.hash == "#frâg"_u8);
173198
}
174199

200+
void SC::HttpURLParserTest::testEdgeCases()
201+
{
202+
HttpURLParser urlParser;
203+
204+
// Username only (no password)
205+
SC_TEST_EXPECT(urlParser.parse("http://user@example.com/path"));
206+
SC_TEST_EXPECT(urlParser.username == "user");
207+
SC_TEST_EXPECT(urlParser.password.isEmpty());
208+
SC_TEST_EXPECT(urlParser.hostname == "example.com");
209+
210+
// Root path only
211+
SC_TEST_EXPECT(urlParser.parse("http://example.com/"));
212+
SC_TEST_EXPECT(urlParser.pathname == "/");
213+
SC_TEST_EXPECT(urlParser.path == "/");
214+
215+
// No path at all
216+
SC_TEST_EXPECT(urlParser.parse("http://example.com"));
217+
SC_TEST_EXPECT(urlParser.pathname == "/");
218+
SC_TEST_EXPECT(urlParser.path == "/");
219+
220+
// Port 0
221+
SC_TEST_EXPECT(urlParser.parse("http://example.com:0/path"));
222+
SC_TEST_EXPECT(urlParser.port == 0);
223+
224+
// Very long path
225+
SC_TEST_EXPECT(urlParser.parse("http://example.com/very/long/path/with/many/segments"));
226+
SC_TEST_EXPECT(urlParser.pathname == "/very/long/path/with/many/segments");
227+
228+
// Path with dots
229+
SC_TEST_EXPECT(urlParser.parse("http://example.com/path/./subpath/../other"));
230+
SC_TEST_EXPECT(urlParser.pathname == "/path/./subpath/../other");
231+
}
232+
233+
void SC::HttpURLParserTest::testProtocols()
234+
{
235+
HttpURLParser urlParser;
236+
237+
// HTTPS protocol
238+
SC_TEST_EXPECT(urlParser.parse("https://example.com"));
239+
SC_TEST_EXPECT(urlParser.protocol == "https");
240+
SC_TEST_EXPECT(urlParser.port == 443);
241+
242+
// Mixed case protocol
243+
SC_TEST_EXPECT(urlParser.parse("Https://example.com"));
244+
SC_TEST_EXPECT(urlParser.protocol == "Https");
245+
246+
// Invalid protocol
247+
SC_TEST_EXPECT(not urlParser.parse("ftp://example.com"));
248+
SC_TEST_EXPECT(not urlParser.parse("custom://example.com"));
249+
}
250+
251+
void SC::HttpURLParserTest::testQueryParams()
252+
{
253+
HttpURLParser urlParser;
254+
255+
// Multiple query parameters
256+
SC_TEST_EXPECT(urlParser.parse("http://example.com/path?key1=value1&key2=value2&key3=value3"));
257+
SC_TEST_EXPECT(urlParser.search == "?key1=value1&key2=value2&key3=value3");
258+
259+
// Query parameter with empty value
260+
SC_TEST_EXPECT(urlParser.parse("http://example.com/path?empty=&key=value"));
261+
SC_TEST_EXPECT(urlParser.search == "?empty=&key=value");
262+
263+
// Query parameter with no value
264+
SC_TEST_EXPECT(urlParser.parse("http://example.com/path?flag&key=value"));
265+
SC_TEST_EXPECT(urlParser.search == "?flag&key=value");
266+
267+
// Only query parameters, no path
268+
SC_TEST_EXPECT(urlParser.parse("http://example.com?query=value"));
269+
SC_TEST_EXPECT(urlParser.pathname == "/");
270+
SC_TEST_EXPECT(urlParser.search == "?query=value");
271+
272+
// Query parameters with special characters
273+
SC_TEST_EXPECT(urlParser.parse("http://example.com/path?q=hello%20world&special=%2B%2D"));
274+
SC_TEST_EXPECT(urlParser.search == "?q=hello%20world&special=%2B%2D");
275+
}
276+
277+
void SC::HttpURLParserTest::testSpecialChars()
278+
{
279+
HttpURLParser urlParser;
280+
281+
// Path with allowed special characters
282+
SC_TEST_EXPECT(urlParser.parse("http://example.com/path_with_underscores-and-dashes"));
283+
SC_TEST_EXPECT(urlParser.pathname == "/path_with_underscores-and-dashes");
284+
285+
// Path with numbers
286+
SC_TEST_EXPECT(urlParser.parse("http://example.com/path123/456"));
287+
SC_TEST_EXPECT(urlParser.pathname == "/path123/456");
288+
289+
// Hostname with numbers
290+
SC_TEST_EXPECT(urlParser.parse("http://site123.com/path"));
291+
SC_TEST_EXPECT(urlParser.hostname == "site123.com");
292+
293+
// Username with special characters
294+
SC_TEST_EXPECT(urlParser.parse("http://user_name@example.com/path"));
295+
SC_TEST_EXPECT(urlParser.username == "user_name");
296+
297+
// Invalid: space in path (should fail)
298+
SC_TEST_EXPECT(not urlParser.parse("http://example.com/path with space"));
299+
}
300+
301+
void SC::HttpURLParserTest::testIPAddresses()
302+
{
303+
HttpURLParser urlParser;
304+
305+
// IPv4 address
306+
SC_TEST_EXPECT(urlParser.parse("http://192.168.1.1/path"));
307+
SC_TEST_EXPECT(urlParser.hostname == "192.168.1.1");
308+
SC_TEST_EXPECT(urlParser.port == 80);
309+
310+
// IPv4 with port
311+
SC_TEST_EXPECT(urlParser.parse("http://192.168.1.1:8080/path"));
312+
SC_TEST_EXPECT(urlParser.hostname == "192.168.1.1");
313+
SC_TEST_EXPECT(urlParser.port == 8080);
314+
315+
// IPv6 localhost
316+
SC_TEST_EXPECT(urlParser.parse("http://[::1]/path"));
317+
SC_TEST_EXPECT(urlParser.hostname == "[::1]");
318+
319+
// IPv6 with port
320+
SC_TEST_EXPECT(urlParser.parse("http://[::1]:8080/path"));
321+
SC_TEST_EXPECT(urlParser.hostname == "[::1]");
322+
SC_TEST_EXPECT(urlParser.port == 8080);
323+
324+
// IPv6 full address
325+
SC_TEST_EXPECT(urlParser.parse("http://[2001:db8::1]/path"));
326+
SC_TEST_EXPECT(urlParser.hostname == "[2001:db8::1]");
327+
328+
// IPv6 compressed
329+
SC_TEST_EXPECT(urlParser.parse("http://[2001:db8::]/path"));
330+
SC_TEST_EXPECT(urlParser.hostname == "[2001:db8::]");
331+
}
332+
175333
namespace SC
176334
{
177335
void runHttpURLParserTest(SC::TestReport& report) { HttpURLParserTest test(report); }

0 commit comments

Comments
 (0)