Skip to content

Commit

Permalink
Implement NFG string -> NFD/NFKC/NFKD codes.
Browse files Browse the repository at this point in the history
Probably not a highly likely path, but thankfully rather easy to do
with the pieces we already have.
  • Loading branch information
jnthn committed Apr 20, 2015
1 parent 78f8c85 commit 6c8bd19
Showing 1 changed file with 19 additions and 2 deletions.
21 changes: 19 additions & 2 deletions src/strings/normalize.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ MVMString * MVM_unicode_codepoints_to_nfg_string(MVMThreadContext *tc, MVMObject
* integer array, with codepoints normalized according to the specified
* normalization form. */
void MVM_unicode_string_to_codepoints(MVMThreadContext *tc, MVMString *s, MVMNormalization form, MVMObject *out) {
MVMNormalizer norm;
MVMCodepoint *result;
MVMint64 result_pos, result_alloc;
MVMCodepointIter ci;
Expand All @@ -157,7 +156,25 @@ void MVM_unicode_string_to_codepoints(MVMThreadContext *tc, MVMString *s, MVMNor

/* Otherwise, need to feed it through a normalizer. */
else {
MVM_panic(1, "Non-NFC Str coercions are NYI");
MVMNormalizer norm;
MVMint32 ready;
MVM_unicode_normalizer_init(tc, &norm, form);
while (MVM_string_ci_has_more(tc, &ci)) {
MVMCodepoint cp;
ready = MVM_unicode_normalizer_process_codepoint(tc, &norm, MVM_string_ci_get_codepoint(tc, &ci), &cp);
if (ready) {
maybe_grow_result(&result, &result_alloc, result_pos + ready);
result[result_pos++] = cp;
while (--ready > 0)
result[result_pos++] = MVM_unicode_normalizer_get_codepoint(tc, &norm);
}
}
MVM_unicode_normalizer_eof(tc, &norm);
ready = MVM_unicode_normalizer_available(tc, &norm);
maybe_grow_result(&result, &result_alloc, result_pos + ready);
while (ready--)
result[result_pos++] = MVM_unicode_normalizer_get_codepoint(tc, &norm);
MVM_unicode_normalizer_cleanup(tc, &norm);
}

/* Put result into array body. */
Expand Down

0 comments on commit 6c8bd19

Please sign in to comment.