Skip to content

Commit a5db151

Browse files
author
Ian Halliday
committed
Fix SAL annotations in Utf8Codex.cpp/.h
Copy pasta looking bug. We modify four bytes in the out buffer but had _ecount_out(3). Bumped to 4.
1 parent a7f7571 commit a5db151

File tree

2 files changed

+13
-6
lines changed

2 files changed

+13
-6
lines changed

lib/Common/Codex/Utf8Codex.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,8 @@ namespace utf8
323323
return ptr;
324324
}
325325

326-
LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, __out_ecount(3) LPUTF8 ptr)
326+
_Use_decl_annotations_
327+
LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, LPUTF8 ptr)
327328
{
328329
// A unicode codepoint is encoded into a surrogate pair by doing the following:
329330
// subtract 0x10000 from the codepoint
@@ -472,9 +473,10 @@ namespace utf8
472473
}
473474

474475
template <bool cesu8Encoding>
475-
__range(0, cch * 3)
476-
size_t EncodeIntoImpl(__out_ecount(cch * 3) LPUTF8 buffer, __in_ecount(cch) const char16 *source, charcount_t cch)
476+
__range(0, cchIn * 3)
477+
size_t EncodeIntoImpl(__out_ecount(cchIn * 3) LPUTF8 buffer, __in_ecount(cchIn) const char16 *source, charcount_t cchIn)
477478
{
479+
charcount_t cch = cchIn; // SAL analysis gets confused by EncodeTrueUtf8's dest buffer requirement unless we alias cchIn with a local
478480
LPUTF8 dest = buffer;
479481

480482
if (!ShouldFastPath(dest, source)) goto LSlowPath;

lib/Common/Codex/Utf8Codex.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ namespace utf8
162162
LPUTF8 EncodeFull(char16 ch, __out_ecount(3) LPUTF8 ptr);
163163

164164
// Encode a surrogate pair into a utf8 sequence
165-
LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, __out_ecount(3) LPUTF8 ptr);
165+
LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, __out_ecount(4) LPUTF8 ptr);
166166

167167
// Encode ch into a UTF8 sequence ignoring surrogate pairs (which are encoded as two
168168
// separate code points).
@@ -177,7 +177,7 @@ namespace utf8
177177
}
178178

179179
// Encode ch into a UTF8 sequence while being aware of surrogate pairs.
180-
inline LPUTF8 EncodeTrueUtf8(char16 ch, const char16** source, charcount_t* cch, __out_ecount(3) LPUTF8 ptr)
180+
inline LPUTF8 EncodeTrueUtf8(char16 ch, const char16** source, charcount_t* cch, __out_ecount((*cch + 1) * 3) LPUTF8 ptr)
181181
{
182182
if (ch < 0x80)
183183
{
@@ -201,11 +201,16 @@ namespace utf8
201201
if ((surrogateHigh >= 0xD800 && surrogateHigh <= 0xDBFF) &&
202202
(surrogateLow >= 0xDC00 && surrogateLow <= 0xDFFF))
203203
{
204+
LPUTF8 retptr = EncodeSurrogatePair(surrogateHigh, surrogateLow, ptr);
205+
206+
// SAL analysis gets confused if we call EncodeSurrogatePair after
207+
// modifying cch
208+
204209
// Consume the low surrogate
205210
*source = *source + 1;
206211
*cch = *cch - 1;
207212

208-
return EncodeSurrogatePair(surrogateHigh, surrogateLow, ptr);
213+
return retptr;
209214
}
210215
}
211216

0 commit comments

Comments
 (0)