Skip to content

Commit b29193e

Browse files
committed
Add some unit tests to NativeTests for utf8 decode functions handling truncation of sequences at chunk borders
Also plumb the chunkEndsAtTruncatedSequence argument down through the other DecodeUnitsInto overloads.
1 parent 37e79d8 commit b29193e

File tree

3 files changed

+74
-6
lines changed

3 files changed

+74
-6
lines changed

bin/NativeTests/CodexTests.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,4 +212,72 @@ namespace CodexTest
212212
CHECK(sourceBuffer[i] == (const char16)encodedBuffer[i]);
213213
}
214214
}
215+
216+
template <typename TTestCase, typename TDecodeFunc>
217+
void RunUtf8DecodeTestCase(const TTestCase &testCases, const TDecodeFunc func)
218+
{
219+
const int numTestCases = _countof(testCases);
220+
const charcount_t charCount = _countof(testCases[0].result);
221+
char16 decodedBuffer[charCount + 1]; // +1 in case a null-terminating func is passed in
222+
223+
for (int i = 0; i < numTestCases; i++)
224+
{
225+
bool chunkEndsInTruncatedSequence = false;
226+
size_t decodedCount = func(decodedBuffer, testCases[i].utf8Encoding, testCases[i].utf8Encoding + testCases[i].bytesToDecode, utf8::DecodeOptions::doChunkedEncoding, &chunkEndsInTruncatedSequence);
227+
CHECK(decodedCount == testCases[i].expectedDecodedChars);
228+
229+
for (size_t j = 0; j < decodedCount; j++)
230+
{
231+
CHECK(decodedBuffer[j] == testCases[i].result[j]);
232+
}
233+
234+
CHECK(testCases[i].shouldEndInTruncation == chunkEndsInTruncatedSequence);
235+
}
236+
}
237+
238+
TEST_CASE("CodexTest_DecodeUnitsInto_ChunkEndsInTruncatedSequence", "[CodexTest]")
239+
{
240+
struct TestCase
241+
{
242+
int bytesToDecode;
243+
size_t expectedDecodedChars;
244+
bool shouldEndInTruncation;
245+
char16 result[8];
246+
utf8char_t utf8Encoding[8];
247+
};
248+
249+
TestCase testCases[] = {
250+
{ 2, 1, false, { 0xc1 }, { 0xc3, 0x81 } }, // Valid 2-byte sequence
251+
{ 1, 0, true, { 0x0 }, { 0xc3, 0x81 } }, // Valid 2-byte sequence truncated at the end of the chunk
252+
{ 2, 2, false, { 0xfffd, 0x79 },{ 0xc3, 0x79 } }, // Invalid 2-byte sequence
253+
{ 1, 0, true, { 0x0 }, { 0xc3, 0x79 } }, // Invalid 2-byte sequence truncated at the end of the chunk
254+
{ 3, 1, false, { 0x3042 },{ 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence
255+
{ 1, 0, true, { 0x0 }, { 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence truncated at the end of the chunk
256+
{ 2, 0, true, { 0x0 }, { 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence truncated at the end of the chunk
257+
{ 3, 3, false, { 0xfffd, 0x79, 0xfffd }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence
258+
{ 1, 0, true, { 0x0 }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence truncated at the end of the chunk
259+
{ 2, 0, true, { 0x0 }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence truncated at the end of the chunk
260+
{ 3, 3, false, { 0xfffd, 0xfffd, 0x79 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence
261+
{ 1, 0, true, { 0x0 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence truncated at the end of the chunk
262+
{ 2, 0, true, { 0x0 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence truncated at the end of the chunk
263+
{ 4, 2, false, { 0xd9c4, 0xdc83 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence
264+
{ 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
265+
{ 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
266+
{ 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
267+
{ 4, 4, false, { 0xfffd, 0x79, 0xfffd, 0xfffd }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence
268+
{ 1, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
269+
{ 2, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
270+
{ 3, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
271+
{ 4, 4, false, { 0xfffd, 0xfffd, 0x79, 0xfffd }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence
272+
{ 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
273+
{ 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
274+
{ 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
275+
{ 4, 4, false, { 0xfffd, 0xfffd, 0xfffd, 0x79 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence
276+
{ 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
277+
{ 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
278+
{ 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
279+
};
280+
281+
RunUtf8DecodeTestCase(testCases, utf8::DecodeUnitsIntoAndNullTerminateNoAdvance);
282+
}
215283
};

lib/Common/Codex/Utf8Codex.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -451,17 +451,17 @@ namespace utf8
451451
}
452452

453453
_Use_decl_annotations_
454-
size_t DecodeUnitsIntoAndNullTerminate(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
454+
size_t DecodeUnitsIntoAndNullTerminate(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options, bool *chunkEndsAtTruncatedSequence)
455455
{
456-
size_t result = DecodeUnitsInto(buffer, pbUtf8, pbEnd, options);
456+
size_t result = DecodeUnitsInto(buffer, pbUtf8, pbEnd, options, chunkEndsAtTruncatedSequence);
457457
buffer[result] = 0;
458458
return result;
459459
}
460460

461461
_Use_decl_annotations_
462-
size_t DecodeUnitsIntoAndNullTerminateNoAdvance(char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
462+
size_t DecodeUnitsIntoAndNullTerminateNoAdvance(char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options, bool *chunkEndsAtTruncatedSequence)
463463
{
464-
return DecodeUnitsIntoAndNullTerminate(buffer, pbUtf8, pbEnd, options);
464+
return DecodeUnitsIntoAndNullTerminate(buffer, pbUtf8, pbEnd, options, chunkEndsAtTruncatedSequence);
465465
}
466466

467467
bool CharsAreEqual(LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, DecodeOptions options)

lib/Common/Codex/Utf8Codex.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,9 @@ namespace utf8
277277
size_t DecodeUnitsInto(_Out_writes_(pbEnd - pbUtf8) char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault, bool *chunkEndsAtTruncatedSequence = nullptr);
278278

279279
// Decode cb bytes from ptr to into buffer returning the number of characters converted and written to buffer (excluding the null terminator)
280-
size_t DecodeUnitsIntoAndNullTerminate(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
280+
size_t DecodeUnitsIntoAndNullTerminate(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault, bool *chunkEndsAtTruncatedSequence = nullptr);
281281

282-
size_t DecodeUnitsIntoAndNullTerminateNoAdvance(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
282+
size_t DecodeUnitsIntoAndNullTerminateNoAdvance(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault, bool *chunkEndsAtTruncatedSequence = nullptr);
283283

284284
// Encode a UTF-8 sequence into a UTF-8 sequence (which is just a memcpy). This is included for convenience in templates
285285
// when the character encoding is a template parameter.

0 commit comments

Comments
 (0)