Skip to content

Commit

Permalink
Fix for issue# 8890: commonPrefix does not handle unicode correctly.
Browse files Browse the repository at this point in the history
It was returning partial code points if the first few code units in a
code point matched but not the entire code point.
  • Loading branch information
jmdavis committed Jan 4, 2013
1 parent 88052ce commit dd5eb5f
Showing 1 changed file with 96 additions and 14 deletions.
110 changes: 96 additions & 14 deletions std/algorithm.d
Expand Up @@ -317,7 +317,7 @@ module std.algorithm;
import std.c.string, core.bitop;
import std.array, std.ascii, std.container, std.conv, std.exception,
std.functional, std.math, std.metastrings, std.range, std.string,
std.traits, std.typecons, std.typetuple, std.uni;
std.traits, std.typecons, std.typetuple, std.uni, std.utf;

version(unittest)
{
Expand Down Expand Up @@ -5226,15 +5226,19 @@ Returns the common prefix of two ranges. Example:
assert(commonPrefix("hello, world", "hello, there") == "hello, ");
----
The type of the result is the same as $(D takeExactly(r1, n)), where
$(D n) is the number of elements that both ranges start with.
For strings, the result is a slice of $(D r1) which contains the characters that
both strings start with. For all other types, the type of the result is the
same as the result of $(D takeExactly(r1, n)), where $(D n) is the number of
elements that both ranges start with.
*/
auto commonPrefix(alias pred = "a == b", R1, R2)(R1 r1, R2 r2)
if (isForwardRange!R1 && isForwardRange!R2)
if (isForwardRange!R1 && isForwardRange!R2 &&
!isNarrowString!R1 && !isNarrowString!R2 &&
is(typeof(binaryFun!pred(r1.front, r2.front))))
{
static if (isSomeString!R1 && isSomeString!R2
&& ElementEncodingType!R1.sizeof == ElementEncodingType!R2.sizeof
|| isRandomAccessRange!R1 && hasLength!R2)
static if (isRandomAccessRange!R1 && isRandomAccessRange!R2 &&
hasLength!R1 && hasLength!R2 &&
hasSlicing!R1)
{
immutable limit = min(r1.length, r2.length);
foreach (i; 0 .. limit)
Expand All @@ -5250,21 +5254,99 @@ if (isForwardRange!R1 && isForwardRange!R2)
{
auto result = r1.save;
size_t i = 0;
for (; !r1.empty && !r2.empty && binaryFun!pred(r1.front, r2.front);
for (;
!r1.empty && !r2.empty && binaryFun!pred(r1.front, r2.front);
++i, r1.popFront(), r2.popFront())
{}
return takeExactly(result, i);
}
}

auto commonPrefix(alias pred, R1, R2)(R1 r1, R2 r2)
if (isSomeString!R1 && isSomeString!R2 &&
!(!isNarrowString!R1 && !isNarrowString!R2) &&
is(typeof(binaryFun!pred(r1.front, r2.front))))
{
auto result = r1.save;
immutable len = r1.length;
size_t i = 0;

for (size_t j = 0; i < len && !r2.empty; r2.popFront(), i = j)
{
immutable f = decode(r1, j);
if (!binaryFun!pred(f, r2.front))
break;
}

return result[0 .. i];
}

auto commonPrefix(R1, R2)(R1 r1, R2 r2)
if (isSomeString!R1 && isSomeString!R2 && !(!isNarrowString!R1 && !isNarrowString!R2))
{
static if (ElementEncodingType!R1.sizeof == ElementEncodingType!R2.sizeof)
{
immutable limit = min(r1.length, r2.length);
for (size_t i = 0; i < limit;)
{
immutable codeLen = std.utf.stride(r1, i);
size_t j = 0;

for (; j < codeLen && i < limit; ++i, ++j)
{
if (r1[i] != r2[i])
return r1[0 .. i - j];
}

if (i == limit && j < codeLen)
throw new UTFException("Invalid UTF-8 sequence", i);
}
return takeExactly(result, i);
return r1[0 .. limit];
}
else
return commonPrefix!"a == b"(r1, r2);
}

unittest
{
assert(commonPrefix("hello, world", "hello, there") == "hello, ");
assert(commonPrefix("hello, ", "hello, world") == "hello, ");
assert(equal(commonPrefix("hello, world", "hello, there"w), "hello, "));
assert(equal(commonPrefix("hello, world"w, "hello, there"), "hello, "));
assert(equal(commonPrefix("hello, world", "hello, there"d), "hello, "));
assert(commonPrefix([1, 2, 3], [1, 2, 3, 4, 5]) == [1, 2, 3]);
assert(commonPrefix([1, 2, 3, 4, 5], [1, 2, 3]) == [1, 2, 3]);
assert(commonPrefix([1, 2, 3, 4], [1, 2, 3, 4]) == [1, 2, 3, 4]);
assert(commonPrefix([1, 2, 3], [7, 2, 3, 4, 5]).empty);
assert(commonPrefix([7, 2, 3, 4, 5], [1, 2, 3]).empty);
assert(commonPrefix([1, 2, 3], cast(int[])null).empty);
assert(commonPrefix(cast(int[])null, [1, 2, 3]).empty);
assert(commonPrefix(cast(int[])null, cast(int[])null).empty);

foreach (S; TypeTuple!(char[], const(char)[], string,
wchar[], const(wchar)[], wstring,
dchar[], const(dchar)[], dstring))
{
foreach(T; TypeTuple!(string, wstring, dstring))
{
assert(commonPrefix(to!S(""), to!T("")).empty);
assert(commonPrefix(to!S(""), to!T("hello")).empty);
assert(commonPrefix(to!S("hello"), to!T("")).empty);
assert(commonPrefix(to!S("hello, world"), to!T("hello, there")) == to!S("hello, "));
assert(commonPrefix(to!S("hello, there"), to!T("hello, world")) == to!S("hello, "));
assert(commonPrefix(to!S("hello, "), to!T("hello, world")) == to!S("hello, "));
assert(commonPrefix(to!S("hello, world"), to!T("hello, ")) == to!S("hello, "));
assert(commonPrefix(to!S("hello, world"), to!T("hello, world")) == to!S("hello, world"));

//Bug# 8890
assert(commonPrefix(to!S("Пиво"), to!T("Пони"))== to!S("П"));
assert(commonPrefix(to!S("Пони"), to!T("Пиво"))== to!S("П"));
assert(commonPrefix(to!S("Пиво"), to!T("Пиво"))== to!S("Пиво"));
assert(commonPrefix(to!S("\U0010FFFF\U0010FFFB\U0010FFFE"),
to!T("\U0010FFFF\U0010FFFB\U0010FFFC")) == to!S("\U0010FFFF\U0010FFFB"));
assert(commonPrefix(to!S("\U0010FFFF\U0010FFFB\U0010FFFC"),
to!T("\U0010FFFF\U0010FFFB\U0010FFFE")) == to!S("\U0010FFFF\U0010FFFB"));
assert(commonPrefix!"a != b"(to!S("Пиво"), to!T("онво")) == to!S("Пи"));
assert(commonPrefix!"a != b"(to!S("онво"), to!T("Пиво")) == to!S("он"));
}
}

assertThrown!UTFException(commonPrefix("\U0010FFFF\U0010FFFB", "\U0010FFFF\U0010FFFB"[0 .. $ - 1]));
}

// findAdjacent
Expand Down

0 comments on commit dd5eb5f

Please sign in to comment.