Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fix for issue# 8890: commonPrefix does not handle unicode correctly.

It was returning partial code points if the first few code units in a
code point matched but not the entire code point.
  • Loading branch information...
commit dd5eb5ff97736ecaec525488413382f94c0053ea 1 parent 88052ce
Jonathan M Davis jmdavis authored

Showing 1 changed file with 96 additions and 14 deletions. Show diff stats Hide diff stats

  1. +96 14 std/algorithm.d
110 std/algorithm.d
@@ -317,7 +317,7 @@ module std.algorithm;
317 317 import std.c.string, core.bitop;
318 318 import std.array, std.ascii, std.container, std.conv, std.exception,
319 319 std.functional, std.math, std.metastrings, std.range, std.string,
320   - std.traits, std.typecons, std.typetuple, std.uni;
  320 + std.traits, std.typecons, std.typetuple, std.uni, std.utf;
321 321
322 322 version(unittest)
323 323 {
@@ -5226,15 +5226,19 @@ Returns the common prefix of two ranges. Example:
5226 5226 assert(commonPrefix("hello, world", "hello, there") == "hello, ");
5227 5227 ----
5228 5228
5229   -The type of the result is the same as $(D takeExactly(r1, n)), where
5230   -$(D n) is the number of elements that both ranges start with.
  5229 +For strings, the result is a slice of $(D r1) which contains the characters that
  5230 +both strings start with. For all other types, the type of the result is the
  5231 +same as the result of $(D takeExactly(r1, n)), where $(D n) is the number of
  5232 +elements that both ranges start with.
5231 5233 */
5232 5234 auto commonPrefix(alias pred = "a == b", R1, R2)(R1 r1, R2 r2)
5233   -if (isForwardRange!R1 && isForwardRange!R2)
  5235 +if (isForwardRange!R1 && isForwardRange!R2 &&
  5236 + !isNarrowString!R1 && !isNarrowString!R2 &&
  5237 + is(typeof(binaryFun!pred(r1.front, r2.front))))
5234 5238 {
5235   - static if (isSomeString!R1 && isSomeString!R2
5236   - && ElementEncodingType!R1.sizeof == ElementEncodingType!R2.sizeof
5237   - || isRandomAccessRange!R1 && hasLength!R2)
  5239 + static if (isRandomAccessRange!R1 && isRandomAccessRange!R2 &&
  5240 + hasLength!R1 && hasLength!R2 &&
  5241 + hasSlicing!R1)
5238 5242 {
5239 5243 immutable limit = min(r1.length, r2.length);
5240 5244 foreach (i; 0 .. limit)
@@ -5250,21 +5254,99 @@ if (isForwardRange!R1 && isForwardRange!R2)
5250 5254 {
5251 5255 auto result = r1.save;
5252 5256 size_t i = 0;
5253   - for (; !r1.empty && !r2.empty && binaryFun!pred(r1.front, r2.front);
  5257 + for (;
  5258 + !r1.empty && !r2.empty && binaryFun!pred(r1.front, r2.front);
5254 5259 ++i, r1.popFront(), r2.popFront())
  5260 + {}
  5261 + return takeExactly(result, i);
  5262 + }
  5263 +}
  5264 +
  5265 +auto commonPrefix(alias pred, R1, R2)(R1 r1, R2 r2)
  5266 +if (isSomeString!R1 && isSomeString!R2 &&
  5267 + !(!isNarrowString!R1 && !isNarrowString!R2) &&
  5268 + is(typeof(binaryFun!pred(r1.front, r2.front))))
  5269 +{
  5270 + auto result = r1.save;
  5271 + immutable len = r1.length;
  5272 + size_t i = 0;
  5273 +
  5274 + for (size_t j = 0; i < len && !r2.empty; r2.popFront(), i = j)
  5275 + {
  5276 + immutable f = decode(r1, j);
  5277 + if (!binaryFun!pred(f, r2.front))
  5278 + break;
  5279 + }
  5280 +
  5281 + return result[0 .. i];
  5282 +}
  5283 +
  5284 +auto commonPrefix(R1, R2)(R1 r1, R2 r2)
  5285 +if (isSomeString!R1 && isSomeString!R2 && !(!isNarrowString!R1 && !isNarrowString!R2))
  5286 +{
  5287 + static if (ElementEncodingType!R1.sizeof == ElementEncodingType!R2.sizeof)
  5288 + {
  5289 + immutable limit = min(r1.length, r2.length);
  5290 + for (size_t i = 0; i < limit;)
5255 5291 {
  5292 + immutable codeLen = std.utf.stride(r1, i);
  5293 + size_t j = 0;
  5294 +
  5295 + for (; j < codeLen && i < limit; ++i, ++j)
  5296 + {
  5297 + if (r1[i] != r2[i])
  5298 + return r1[0 .. i - j];
  5299 + }
  5300 +
  5301 + if (i == limit && j < codeLen)
  5302 + throw new UTFException("Invalid UTF-8 sequence", i);
5256 5303 }
5257   - return takeExactly(result, i);
  5304 + return r1[0 .. limit];
5258 5305 }
  5306 + else
  5307 + return commonPrefix!"a == b"(r1, r2);
5259 5308 }
5260 5309
5261 5310 unittest
5262 5311 {
5263   - assert(commonPrefix("hello, world", "hello, there") == "hello, ");
5264   - assert(commonPrefix("hello, ", "hello, world") == "hello, ");
5265   - assert(equal(commonPrefix("hello, world", "hello, there"w), "hello, "));
5266   - assert(equal(commonPrefix("hello, world"w, "hello, there"), "hello, "));
5267   - assert(equal(commonPrefix("hello, world", "hello, there"d), "hello, "));
  5312 + assert(commonPrefix([1, 2, 3], [1, 2, 3, 4, 5]) == [1, 2, 3]);
  5313 + assert(commonPrefix([1, 2, 3, 4, 5], [1, 2, 3]) == [1, 2, 3]);
  5314 + assert(commonPrefix([1, 2, 3, 4], [1, 2, 3, 4]) == [1, 2, 3, 4]);
  5315 + assert(commonPrefix([1, 2, 3], [7, 2, 3, 4, 5]).empty);
  5316 + assert(commonPrefix([7, 2, 3, 4, 5], [1, 2, 3]).empty);
  5317 + assert(commonPrefix([1, 2, 3], cast(int[])null).empty);
  5318 + assert(commonPrefix(cast(int[])null, [1, 2, 3]).empty);
  5319 + assert(commonPrefix(cast(int[])null, cast(int[])null).empty);
  5320 +
  5321 + foreach (S; TypeTuple!(char[], const(char)[], string,
  5322 + wchar[], const(wchar)[], wstring,
  5323 + dchar[], const(dchar)[], dstring))
  5324 + {
  5325 + foreach(T; TypeTuple!(string, wstring, dstring))
  5326 + {
  5327 + assert(commonPrefix(to!S(""), to!T("")).empty);
  5328 + assert(commonPrefix(to!S(""), to!T("hello")).empty);
  5329 + assert(commonPrefix(to!S("hello"), to!T("")).empty);
  5330 + assert(commonPrefix(to!S("hello, world"), to!T("hello, there")) == to!S("hello, "));
  5331 + assert(commonPrefix(to!S("hello, there"), to!T("hello, world")) == to!S("hello, "));
  5332 + assert(commonPrefix(to!S("hello, "), to!T("hello, world")) == to!S("hello, "));
  5333 + assert(commonPrefix(to!S("hello, world"), to!T("hello, ")) == to!S("hello, "));
  5334 + assert(commonPrefix(to!S("hello, world"), to!T("hello, world")) == to!S("hello, world"));
  5335 +
  5336 + //Bug# 8890
  5337 + assert(commonPrefix(to!S("Пиво"), to!T("Пони"))== to!S("П"));
  5338 + assert(commonPrefix(to!S("Пони"), to!T("Пиво"))== to!S("П"));
  5339 + assert(commonPrefix(to!S("Пиво"), to!T("Пиво"))== to!S("Пиво"));
  5340 + assert(commonPrefix(to!S("\U0010FFFF\U0010FFFB\U0010FFFE"),
  5341 + to!T("\U0010FFFF\U0010FFFB\U0010FFFC")) == to!S("\U0010FFFF\U0010FFFB"));
  5342 + assert(commonPrefix(to!S("\U0010FFFF\U0010FFFB\U0010FFFC"),
  5343 + to!T("\U0010FFFF\U0010FFFB\U0010FFFE")) == to!S("\U0010FFFF\U0010FFFB"));
  5344 + assert(commonPrefix!"a != b"(to!S("Пиво"), to!T("онво")) == to!S("Пи"));
  5345 + assert(commonPrefix!"a != b"(to!S("онво"), to!T("Пиво")) == to!S("он"));
  5346 + }
  5347 + }
  5348 +
  5349 + assertThrown!UTFException(commonPrefix("\U0010FFFF\U0010FFFB", "\U0010FFFF\U0010FFFB"[0 .. $ - 1]));
5268 5350 }
5269 5351
5270 5352 // findAdjacent

0 comments on commit dd5eb5f

Please sign in to comment.
Something went wrong with that request. Please try again.