Fix SAL annotations in Utf8Codex.cpp/.h

Ian Halliday · Ian Halliday · commit a5db151b0da7 · 2016-11-29T12:46:07.000-08:00
Copy pasta looking bug.  We modify four bytes in the out buffer but had
_ecount_out(3).  Bumped to 4.
diff --git a/lib/Common/Codex/Utf8Codex.cpp b/lib/Common/Codex/Utf8Codex.cpp
@@ -323,7 +323,8 @@ namespace utf8
         return ptr;
     }
 
-    LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, __out_ecount(3) LPUTF8 ptr)
+    _Use_decl_annotations_
+    LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, LPUTF8 ptr)
     {
         // A unicode codepoint is encoded into a surrogate pair by doing the following:
         //  subtract 0x10000 from the codepoint
@@ -472,9 +473,10 @@ namespace utf8
     }
 
     template <bool cesu8Encoding>
-    __range(0, cch * 3)
-    size_t EncodeIntoImpl(__out_ecount(cch * 3) LPUTF8 buffer, __in_ecount(cch) const char16 *source, charcount_t cch)
+    __range(0, cchIn * 3)
+    size_t EncodeIntoImpl(__out_ecount(cchIn * 3) LPUTF8 buffer, __in_ecount(cchIn) const char16 *source, charcount_t cchIn)
     {
+        charcount_t cch = cchIn; // SAL analysis gets confused by EncodeTrueUtf8's dest buffer requirement unless we alias cchIn with a local
         LPUTF8 dest = buffer;
 
         if (!ShouldFastPath(dest, source)) goto LSlowPath;
diff --git a/lib/Common/Codex/Utf8Codex.h b/lib/Common/Codex/Utf8Codex.h
@@ -162,7 +162,7 @@ namespace utf8
     LPUTF8 EncodeFull(char16 ch, __out_ecount(3) LPUTF8 ptr);
 
     // Encode a surrogate pair into a utf8 sequence 
-    LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, __out_ecount(3) LPUTF8 ptr);
+    LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, __out_ecount(4) LPUTF8 ptr);
 
     // Encode ch into a UTF8 sequence ignoring surrogate pairs (which are encoded as two
     // separate code points).
@@ -177,7 +177,7 @@ namespace utf8
     }
 
     // Encode ch into a UTF8 sequence while being aware of surrogate pairs.
-    inline LPUTF8 EncodeTrueUtf8(char16 ch, const char16** source, charcount_t* cch, __out_ecount(3) LPUTF8 ptr)
+    inline LPUTF8 EncodeTrueUtf8(char16 ch, const char16** source, charcount_t* cch, __out_ecount((*cch + 1) * 3) LPUTF8 ptr)
     {
         if (ch < 0x80)
         {
@@ -201,11 +201,16 @@ namespace utf8
             if ((surrogateHigh >= 0xD800 && surrogateHigh <= 0xDBFF) &&
                 (surrogateLow >= 0xDC00 && surrogateLow <= 0xDFFF))
             {
+                LPUTF8 retptr = EncodeSurrogatePair(surrogateHigh, surrogateLow, ptr);
+                
+                // SAL analysis gets confused if we call EncodeSurrogatePair after
+                // modifying cch
+
                 // Consume the low surrogate
                 *source = *source + 1;
                 *cch = *cch - 1;
 
-                return EncodeSurrogatePair(surrogateHigh, surrogateLow, ptr);
+                return retptr;
             }
         }
 

Original file line number	Diff line number	Diff line change
`@@ -323,7 +323,8 @@ namespace utf8`
`323`	`323`	`return ptr;`
`324`	`324`	`}`
`325`	`325`
`326`		`- LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, __out_ecount(3) LPUTF8 ptr)`
	`326`	`+ _Use_decl_annotations_`
	`327`	`+ LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, LPUTF8 ptr)`
`327`	`328`	`{`
`328`	`329`	`// A unicode codepoint is encoded into a surrogate pair by doing the following:`
`329`	`330`	`// subtract 0x10000 from the codepoint`
`@@ -472,9 +473,10 @@ namespace utf8`
`472`	`473`	`}`
`473`	`474`
`474`	`475`	`template <bool cesu8Encoding>`
`475`		`- __range(0, cch * 3)`
`476`		`- size_t EncodeIntoImpl(__out_ecount(cch * 3) LPUTF8 buffer, __in_ecount(cch) const char16 *source, charcount_t cch)`
	`476`	`+ __range(0, cchIn * 3)`
	`477`	`+ size_t EncodeIntoImpl(__out_ecount(cchIn * 3) LPUTF8 buffer, __in_ecount(cchIn) const char16 *source, charcount_t cchIn)`
`477`	`478`	`{`
	`479`	`+ charcount_t cch = cchIn; // SAL analysis gets confused by EncodeTrueUtf8's dest buffer requirement unless we alias cchIn with a local`
`478`	`480`	`LPUTF8 dest = buffer;`
`479`	`481`
`480`	`482`	`if (!ShouldFastPath(dest, source)) goto LSlowPath;`
Original file line number	Diff line number	Diff line change
`@@ -162,7 +162,7 @@ namespace utf8`
`162`	`162`	`LPUTF8 EncodeFull(char16 ch, __out_ecount(3) LPUTF8 ptr);`
`163`	`163`
`164`	`164`	`// Encode a surrogate pair into a utf8 sequence`
`165`		`- LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, __out_ecount(3) LPUTF8 ptr);`
	`165`	`+ LPUTF8 EncodeSurrogatePair(char16 surrogateHigh, char16 surrogateLow, __out_ecount(4) LPUTF8 ptr);`
`166`	`166`
`167`	`167`	`// Encode ch into a UTF8 sequence ignoring surrogate pairs (which are encoded as two`
`168`	`168`	`// separate code points).`
`@@ -177,7 +177,7 @@ namespace utf8`
`177`	`177`	`}`
`178`	`178`
`179`	`179`	`// Encode ch into a UTF8 sequence while being aware of surrogate pairs.`
`180`		`- inline LPUTF8 EncodeTrueUtf8(char16 ch, const char16** source, charcount_t* cch, __out_ecount(3) LPUTF8 ptr)`
	`180`	`+ inline LPUTF8 EncodeTrueUtf8(char16 ch, const char16** source, charcount_t* cch, __out_ecount((cch + 1) 3) LPUTF8 ptr)`
`181`	`181`	`{`
`182`	`182`	`if (ch < 0x80)`
`183`	`183`	`{`
`@@ -201,11 +201,16 @@ namespace utf8`
`201`	`201`	`if ((surrogateHigh >= 0xD800 && surrogateHigh <= 0xDBFF) &&`
`202`	`202`	`(surrogateLow >= 0xDC00 && surrogateLow <= 0xDFFF))`
`203`	`203`	`{`
	`204`	`+ LPUTF8 retptr = EncodeSurrogatePair(surrogateHigh, surrogateLow, ptr);`
	`205`	`+`
	`206`	`+ // SAL analysis gets confused if we call EncodeSurrogatePair after`
	`207`	`+ // modifying cch`
	`208`	`+`
`204`	`209`	`// Consume the low surrogate`
`205`	`210`	`source = source + 1;`
`206`	`211`	`cch = cch - 1;`
`207`	`212`
`208`		`- return EncodeSurrogatePair(surrogateHigh, surrogateLow, ptr);`
	`213`	`+ return retptr;`
`209`	`214`	`}`
`210`	`215`	`}`
`211`	`216`