Skip to content

Commit ababb64

Browse files
committed
EbmlUnicodeString: don't read beyond end of string
The conversion from an UTF-8 encoded string into a wchar_t one was reading from beyond the end of the source buffer if the length indicated by a UTF-8 character's first byte exceeds the number of bytes actually present afterwards. Fixes the issue reported as Cisco TALOS-CAN-0036.
1 parent c161e60 commit ababb64

File tree

2 files changed

+47
-23
lines changed

2 files changed

+47
-23
lines changed

Diff for: ChangeLog

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
2015-10-20 Moritz Bunkus <moritz@bunkus.org>
2+
3+
* EbmlUnicodeString::UpdateFromUTF8(): Fixed an invalid memory
4+
access. When reading from a UTF-8 string in which the length
5+
indicated by a UTF-8 character's first byte exceeds the string's
6+
actual number of bytes the parser would access beyond the end of
7+
the string resulting in a heap information leak. Fixes the issue
8+
reported as Cisco TALOS-CAN-0036.
9+
110
2015-10-17 Moritz Bunkus <moritz@bunkus.org>
211

312
* Released v1.3.2.

Diff for: src/EbmlUnicodeString.cpp

+38-23
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,21 @@ START_LIBEBML_NAMESPACE
4747

4848
// ===================== UTFstring class ===================
4949

50+
static unsigned int UTFCharLength(uint8 lead)
51+
{
52+
if (lead < 0x80)
53+
return 1;
54+
else if ((lead >> 5) == 0x6)
55+
return 2;
56+
else if ((lead >> 4) == 0xe)
57+
return 3;
58+
else if ((lead >> 3) == 0x1e)
59+
return 4;
60+
else
61+
// Invalid size?
62+
return 0;
63+
}
64+
5065
UTFstring::UTFstring()
5166
:_Length(0)
5267
,_Data(NULL)
@@ -143,39 +158,39 @@ void UTFstring::UpdateFromUTF8()
143158
delete [] _Data;
144159
// find the size of the final UCS-2 string
145160
size_t i;
146-
for (_Length=0, i=0; i<UTF8string.length(); _Length++) {
147-
uint8 lead = static_cast<uint8>(UTF8string[i]);
148-
if (lead < 0x80)
149-
i++;
150-
else if ((lead >> 5) == 0x6)
151-
i += 2;
152-
else if ((lead >> 4) == 0xe)
153-
i += 3;
154-
else if ((lead >> 3) == 0x1e)
155-
i += 4;
161+
const size_t SrcLength = UTF8string.length();
162+
for (_Length=0, i=0; i<SrcLength; _Length++) {
163+
const unsigned int CharLength = UTFCharLength(static_cast<uint8>(UTF8string[i]));
164+
if ((CharLength >= 1) && (CharLength <= 4))
165+
i += CharLength;
156166
else
157167
// Invalid size?
158168
break;
159169
}
160170
_Data = new wchar_t[_Length+1];
161171
size_t j;
162-
for (j=0, i=0; i<UTF8string.length(); j++) {
163-
uint8 lead = static_cast<uint8>(UTF8string[i]);
164-
if (lead < 0x80) {
172+
for (j=0, i=0; i<SrcLength; j++) {
173+
const uint8 lead = static_cast<uint8>(UTF8string[i]);
174+
const unsigned int CharLength = UTFCharLength(lead);
175+
if ((CharLength < 1) || (CharLength > 4))
176+
// Invalid char?
177+
break;
178+
179+
if ((i + CharLength) > SrcLength)
180+
// Guard against invalid memory access beyond the end of the
181+
// source buffer.
182+
break;
183+
184+
if (CharLength == 1)
165185
_Data[j] = lead;
166-
i++;
167-
} else if ((lead >> 5) == 0x6) {
186+
else if (CharLength == 2)
168187
_Data[j] = ((lead & 0x1F) << 6) + (UTF8string[i+1] & 0x3F);
169-
i += 2;
170-
} else if ((lead >> 4) == 0xe) {
188+
else if (CharLength == 3)
171189
_Data[j] = ((lead & 0x0F) << 12) + ((UTF8string[i+1] & 0x3F) << 6) + (UTF8string[i+2] & 0x3F);
172-
i += 3;
173-
} else if ((lead >> 3) == 0x1e) {
190+
else if (CharLength == 4)
174191
_Data[j] = ((lead & 0x07) << 18) + ((UTF8string[i+1] & 0x3F) << 12) + ((UTF8string[i+2] & 0x3F) << 6) + (UTF8string[i+3] & 0x3F);
175-
i += 4;
176-
} else
177-
// Invalid char?
178-
break;
192+
193+
i += CharLength;
179194
}
180195
_Data[j] = 0;
181196
}

0 commit comments

Comments
 (0)