@@ -47,6 +47,21 @@ START_LIBEBML_NAMESPACE
4747
4848// ===================== UTFstring class ===================
4949
50+ static unsigned int UTFCharLength (uint8 lead)
51+ {
52+ if (lead < 0x80 )
53+ return 1 ;
54+ else if ((lead >> 5 ) == 0x6 )
55+ return 2 ;
56+ else if ((lead >> 4 ) == 0xe )
57+ return 3 ;
58+ else if ((lead >> 3 ) == 0x1e )
59+ return 4 ;
60+ else
61+ // Invalid size?
62+ return 0 ;
63+ }
64+
5065UTFstring::UTFstring ()
5166 :_Length(0 )
5267 ,_Data(NULL )
@@ -143,39 +158,39 @@ void UTFstring::UpdateFromUTF8()
143158 delete [] _Data;
144159 // find the size of the final UCS-2 string
145160 size_t i;
146- for (_Length=0 , i=0 ; i<UTF8string.length (); _Length++) {
147- uint8 lead = static_cast <uint8>(UTF8string[i]);
148- if (lead < 0x80 )
149- i++;
150- else if ((lead >> 5 ) == 0x6 )
151- i += 2 ;
152- else if ((lead >> 4 ) == 0xe )
153- i += 3 ;
154- else if ((lead >> 3 ) == 0x1e )
155- i += 4 ;
161+ const size_t SrcLength = UTF8string.length ();
162+ for (_Length=0 , i=0 ; i<SrcLength; _Length++) {
163+ const unsigned int CharLength = UTFCharLength (static_cast <uint8>(UTF8string[i]));
164+ if ((CharLength >= 1 ) && (CharLength <= 4 ))
165+ i += CharLength;
156166 else
157167 // Invalid size?
158168 break ;
159169 }
160170 _Data = new wchar_t [_Length+1 ];
161171 size_t j;
162- for (j=0 , i=0 ; i<UTF8string.length (); j++) {
163- uint8 lead = static_cast <uint8>(UTF8string[i]);
164- if (lead < 0x80 ) {
172+ for (j=0 , i=0 ; i<SrcLength; j++) {
173+ const uint8 lead = static_cast <uint8>(UTF8string[i]);
174+ const unsigned int CharLength = UTFCharLength (lead);
175+ if ((CharLength < 1 ) || (CharLength > 4 ))
176+ // Invalid char?
177+ break ;
178+
179+ if ((i + CharLength) > SrcLength)
180+ // Guard against invalid memory access beyond the end of the
181+ // source buffer.
182+ break ;
183+
184+ if (CharLength == 1 )
165185 _Data[j] = lead;
166- i++;
167- } else if ((lead >> 5 ) == 0x6 ) {
186+ else if (CharLength == 2 )
168187 _Data[j] = ((lead & 0x1F ) << 6 ) + (UTF8string[i+1 ] & 0x3F );
169- i += 2 ;
170- } else if ((lead >> 4 ) == 0xe ) {
188+ else if (CharLength == 3 )
171189 _Data[j] = ((lead & 0x0F ) << 12 ) + ((UTF8string[i+1 ] & 0x3F ) << 6 ) + (UTF8string[i+2 ] & 0x3F );
172- i += 3 ;
173- } else if ((lead >> 3 ) == 0x1e ) {
190+ else if (CharLength == 4 )
174191 _Data[j] = ((lead & 0x07 ) << 18 ) + ((UTF8string[i+1 ] & 0x3F ) << 12 ) + ((UTF8string[i+2 ] & 0x3F ) << 6 ) + (UTF8string[i+3 ] & 0x3F );
175- i += 4 ;
176- } else
177- // Invalid char?
178- break ;
192+
193+ i += CharLength;
179194 }
180195 _Data[j] = 0 ;
181196}
0 commit comments