Skip to content

Commit

Permalink
Merge pull request #1736 from JakobOvrum/graphemerange
Browse files Browse the repository at this point in the history
Add std.uni.byGrapheme and std.uni.byCodePoint
  • Loading branch information
monarchdodra committed Dec 5, 2013
2 parents 5ef0d34 + 31a4357 commit 43901ec
Showing 1 changed file with 188 additions and 1 deletion.
189 changes: 188 additions & 1 deletion std/uni.d
Expand Up @@ -31,7 +31,7 @@
Converting text to any of the four normalization forms via $(LREF normalize).
)
$(LI
Decoding ($(LREF decodeGrapheme)) and iteration ($(LREF graphemeStride))
Decoding ($(LREF decodeGrapheme)) and iteration ($(LREF byGrapheme), $(LREF graphemeStride))
by user-perceived characters, that is by $(LREF Grapheme) clusters.
)
$(LI
Expand Down Expand Up @@ -5188,6 +5188,193 @@ unittest
assert(equalS(decodeGrapheme(s)[], "\uAC01"));
}

/++
$(P Iterate a string by grapheme.)
$(P Useful for doing string manipulation that needs to be aware
of graphemes.)
See_Also:
$(LREF byCodePoint)
+/
// TODO: Bidirectional access
auto byGrapheme(Range)(Range range)
if(isInputRange!Range && is(Unqual!(ElementType!Range) == dchar))
{
static struct Result
{
private Range _range;
private Grapheme _front;

bool empty() @property
{
return _front.length == 0;
}

Grapheme front() @property
{
return _front;
}

void popFront()
{
_front = _range.empty ? Grapheme.init : _range.decodeGrapheme();
}

static if(isForwardRange!Range)
{
Result save() @property
{
return Result(_range.save, _front);
}
}
}

auto result = Result(range);
result.popFront();
return result;
}

///
unittest
{
auto text = "noe\u0308l"; // noël using e + combining diaeresis
assert(text.walkLength == 5); // 5 code points

auto gText = text.byGrapheme;
assert(gText.walkLength == 4); // 4 graphemes

assert(gText.take(3).equal("noe\u0308".byGrapheme));
assert(gText.drop(3).equal("l".byGrapheme));
}

// For testing non-forward-range input ranges
version(unittest)
private static struct InputRangeString
{
private string s;

bool empty() @property { return s.empty; }
dchar front() @property { return s.front; }
void popFront() @property { s.popFront(); }
}

unittest
{
assert("".byGrapheme.walkLength == 0);

auto reverse = "le\u0308on";
assert(reverse.walkLength == 5);

auto gReverse = reverse.byGrapheme;
assert(gReverse.walkLength == 4);

foreach(text; TypeTuple!("noe\u0308l"c, "noe\u0308l"w, "noe\u0308l"d))
{
assert(text.walkLength == 5);
static assert(isForwardRange!(typeof(text)));

auto gText = text.byGrapheme;
static assert(isForwardRange!(typeof(gText)));
assert(gText.walkLength == 4);
assert(gText.array.retro.equal(gReverse));
}

auto nonForwardRange = InputRangeString("noe\u0308l").byGrapheme;
static assert(!isForwardRange!(typeof(nonForwardRange)));
assert(nonForwardRange.walkLength == 4);
}

/++
$(P Lazily transform a range of $(LREF Grapheme)s to a range of code points.)
$(P Useful for converting the result to a string after doing operations
on graphemes.)
$(P Acts as the identity function when given a range of code points.)
+/
// TODO: Propagate bidirectional access
auto byCodePoint(Range)(Range range)
if(isInputRange!Range && is(Unqual!(ElementType!Range) == Grapheme))
{
static struct Result
{
private Range _range;
private size_t i = 0;

bool empty() @property
{
return _range.empty;
}

dchar front() @property
{
return _range.front[i];
}

void popFront()
{
++i;

if(i >= _range.front.length)
{
_range.popFront();
i = 0;
}
}

static if(isForwardRange!Range)
{
Result save() @property
{
return Result(_range.save, i);
}
}
}

return Result(range);
}

/// Ditto
Range byCodePoint(Range)(Range range)
if(isInputRange!Range && is(Unqual!(ElementType!Range) == dchar))
{
return range;
}

///
unittest
{
import std.string : text;

string s = "noe\u0308l"; // noël

// reverse it and convert the result to a string
string reverse = s.byGrapheme
.array
.retro
.byCodePoint
.text;

assert(reverse == "le\u0308on"); // lëon
}

unittest
{
assert("".byGrapheme.byCodePoint.equal(""));

string text = "noe\u0308l";
static assert(is(typeof(text.byCodePoint) == string));

auto gText = InputRangeString(text).byGrapheme;
static assert(!isForwardRange!(typeof(gText)));

auto cpText = gText.byCodePoint;
static assert(!isForwardRange!(typeof(cpText)));

assert(cpText.walkLength == text.walkLength);
}

/++
$(P A structure designed to effectively pack $(CHARACTERS)
of a $(CLUSTER).
Expand Down

0 comments on commit 43901ec

Please sign in to comment.