Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fix for issue# 8890: commonPrefix does not handle unicode correctly.

It was returning partial code points if the first few code units in a
code point matched but not the entire code point.
  • Loading branch information...
commit dd5eb5ff97736ecaec525488413382f94c0053ea 1 parent 88052ce
@jmdavis jmdavis authored
Showing with 96 additions and 14 deletions.
  1. +96 −14 std/algorithm.d
View
110 std/algorithm.d
@@ -317,7 +317,7 @@ module std.algorithm;
import std.c.string, core.bitop;
import std.array, std.ascii, std.container, std.conv, std.exception,
std.functional, std.math, std.metastrings, std.range, std.string,
- std.traits, std.typecons, std.typetuple, std.uni;
+ std.traits, std.typecons, std.typetuple, std.uni, std.utf;
version(unittest)
{
@@ -5226,15 +5226,19 @@ Returns the common prefix of two ranges. Example:
assert(commonPrefix("hello, world", "hello, there") == "hello, ");
----
-The type of the result is the same as $(D takeExactly(r1, n)), where
-$(D n) is the number of elements that both ranges start with.
+For strings, the result is a slice of $(D r1) which contains the characters that
+both strings start with. For all other types, the type of the result is the
+same as the result of $(D takeExactly(r1, n)), where $(D n) is the number of
+elements that both ranges start with.
*/
auto commonPrefix(alias pred = "a == b", R1, R2)(R1 r1, R2 r2)
-if (isForwardRange!R1 && isForwardRange!R2)
+if (isForwardRange!R1 && isForwardRange!R2 &&
+ !isNarrowString!R1 && !isNarrowString!R2 &&
+ is(typeof(binaryFun!pred(r1.front, r2.front))))
{
- static if (isSomeString!R1 && isSomeString!R2
- && ElementEncodingType!R1.sizeof == ElementEncodingType!R2.sizeof
- || isRandomAccessRange!R1 && hasLength!R2)
+ static if (isRandomAccessRange!R1 && isRandomAccessRange!R2 &&
+ hasLength!R1 && hasLength!R2 &&
+ hasSlicing!R1)
{
immutable limit = min(r1.length, r2.length);
foreach (i; 0 .. limit)
@@ -5250,21 +5254,99 @@ if (isForwardRange!R1 && isForwardRange!R2)
{
auto result = r1.save;
size_t i = 0;
- for (; !r1.empty && !r2.empty && binaryFun!pred(r1.front, r2.front);
+ for (;
+ !r1.empty && !r2.empty && binaryFun!pred(r1.front, r2.front);
++i, r1.popFront(), r2.popFront())
+ {}
+ return takeExactly(result, i);
+ }
+}
+
+auto commonPrefix(alias pred, R1, R2)(R1 r1, R2 r2)
+if (isSomeString!R1 && isSomeString!R2 &&
+ !(!isNarrowString!R1 && !isNarrowString!R2) &&
+ is(typeof(binaryFun!pred(r1.front, r2.front))))
+{
+ auto result = r1.save;
+ immutable len = r1.length;
+ size_t i = 0;
+
+ for (size_t j = 0; i < len && !r2.empty; r2.popFront(), i = j)
+ {
+ immutable f = decode(r1, j);
+ if (!binaryFun!pred(f, r2.front))
+ break;
+ }
+
+ return result[0 .. i];
+}
+
+auto commonPrefix(R1, R2)(R1 r1, R2 r2)
+if (isSomeString!R1 && isSomeString!R2 && !(!isNarrowString!R1 && !isNarrowString!R2))
+{
+ static if (ElementEncodingType!R1.sizeof == ElementEncodingType!R2.sizeof)
+ {
+ immutable limit = min(r1.length, r2.length);
+ for (size_t i = 0; i < limit;)
{
+ immutable codeLen = std.utf.stride(r1, i);
+ size_t j = 0;
+
+ for (; j < codeLen && i < limit; ++i, ++j)
+ {
+ if (r1[i] != r2[i])
+ return r1[0 .. i - j];
+ }
+
+ if (i == limit && j < codeLen)
+ throw new UTFException("Invalid UTF-8 sequence", i);
}
- return takeExactly(result, i);
+ return r1[0 .. limit];
}
+ else
+ return commonPrefix!"a == b"(r1, r2);
}
unittest
{
- assert(commonPrefix("hello, world", "hello, there") == "hello, ");
- assert(commonPrefix("hello, ", "hello, world") == "hello, ");
- assert(equal(commonPrefix("hello, world", "hello, there"w), "hello, "));
- assert(equal(commonPrefix("hello, world"w, "hello, there"), "hello, "));
- assert(equal(commonPrefix("hello, world", "hello, there"d), "hello, "));
+ assert(commonPrefix([1, 2, 3], [1, 2, 3, 4, 5]) == [1, 2, 3]);
+ assert(commonPrefix([1, 2, 3, 4, 5], [1, 2, 3]) == [1, 2, 3]);
+ assert(commonPrefix([1, 2, 3, 4], [1, 2, 3, 4]) == [1, 2, 3, 4]);
+ assert(commonPrefix([1, 2, 3], [7, 2, 3, 4, 5]).empty);
+ assert(commonPrefix([7, 2, 3, 4, 5], [1, 2, 3]).empty);
+ assert(commonPrefix([1, 2, 3], cast(int[])null).empty);
+ assert(commonPrefix(cast(int[])null, [1, 2, 3]).empty);
+ assert(commonPrefix(cast(int[])null, cast(int[])null).empty);
+
+ foreach (S; TypeTuple!(char[], const(char)[], string,
+ wchar[], const(wchar)[], wstring,
+ dchar[], const(dchar)[], dstring))
+ {
+ foreach(T; TypeTuple!(string, wstring, dstring))
+ {
+ assert(commonPrefix(to!S(""), to!T("")).empty);
+ assert(commonPrefix(to!S(""), to!T("hello")).empty);
+ assert(commonPrefix(to!S("hello"), to!T("")).empty);
+ assert(commonPrefix(to!S("hello, world"), to!T("hello, there")) == to!S("hello, "));
+ assert(commonPrefix(to!S("hello, there"), to!T("hello, world")) == to!S("hello, "));
+ assert(commonPrefix(to!S("hello, "), to!T("hello, world")) == to!S("hello, "));
+ assert(commonPrefix(to!S("hello, world"), to!T("hello, ")) == to!S("hello, "));
+ assert(commonPrefix(to!S("hello, world"), to!T("hello, world")) == to!S("hello, world"));
+
+ //Bug# 8890
+ assert(commonPrefix(to!S("Пиво"), to!T("Пони"))== to!S("П"));
+ assert(commonPrefix(to!S("Пони"), to!T("Пиво"))== to!S("П"));
+ assert(commonPrefix(to!S("Пиво"), to!T("Пиво"))== to!S("Пиво"));
+ assert(commonPrefix(to!S("\U0010FFFF\U0010FFFB\U0010FFFE"),
+ to!T("\U0010FFFF\U0010FFFB\U0010FFFC")) == to!S("\U0010FFFF\U0010FFFB"));
+ assert(commonPrefix(to!S("\U0010FFFF\U0010FFFB\U0010FFFC"),
+ to!T("\U0010FFFF\U0010FFFB\U0010FFFE")) == to!S("\U0010FFFF\U0010FFFB"));
+ assert(commonPrefix!"a != b"(to!S("Пиво"), to!T("онво")) == to!S("Пи"));
+ assert(commonPrefix!"a != b"(to!S("онво"), to!T("Пиво")) == to!S("он"));
+ }
+ }
+
+ assertThrown!UTFException(commonPrefix("\U0010FFFF\U0010FFFB", "\U0010FFFF\U0010FFFB"[0 .. $ - 1]));
}
// findAdjacent

0 comments on commit dd5eb5f

Please sign in to comment.
Something went wrong with that request. Please try again.