Skip to content

Commit 12424eb

Browse files
MaxGraeydcodeIO
authored andcommitted
Add String#toLowerCase and String#toUpperCase (AssemblyScript#965)
1 parent 20e882c commit 12424eb

File tree

10 files changed

+10158
-5373
lines changed

10 files changed

+10158
-5373
lines changed

std/assembly/index.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1429,6 +1429,8 @@ declare class String {
14291429
replaceAll(search: string, replacement: string): string;
14301430
slice(beginIndex: i32, endIndex?: i32): string;
14311431
split(separator?: string, limit?: i32): string[];
1432+
toLowerCase(): string;
1433+
toUpperCase(): string;
14321434
toString(): string;
14331435
}
14341436

std/assembly/string.ts

Lines changed: 143 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
/// <reference path="./rt/index.d.ts" />
22

33
import { BLOCK, BLOCK_OVERHEAD, BLOCK_MAXSIZE } from "./rt/common";
4-
import { compareImpl, strtol, strtod, isSpace } from "./util/string";
4+
import { compareImpl, strtol, strtod, isSpace, isAscii, toLower8, toUpper8 } from "./util/string";
5+
import { specialsUpper, casemap, bsearch } from "./util/casemap";
56
import { E_INVALIDLENGTH } from "./util/error";
67
import { ArrayBufferView } from "./arraybuffer";
78
import { idof } from "./builtins";
@@ -10,27 +11,27 @@ import { idof } from "./builtins";
1011

1112
@lazy static readonly MAX_LENGTH: i32 = BLOCK_MAXSIZE >>> alignof<u16>();
1213

13-
static fromCharCode(unit: i32, surr: i32 = -1): string {
14+
static fromCharCode(unit: i32, surr: i32 = -1): String {
1415
var hasSur = surr > 0;
15-
var out = __alloc(2 << i32(hasSur), idof<string>());
16+
var out = __alloc(2 << i32(hasSur), idof<String>());
1617
store<u16>(out, <u16>unit);
1718
if (hasSur) store<u16>(out, <u16>surr, 2);
18-
return changetype<string>(out); // retains
19+
return changetype<String>(out); // retains
1920
}
2021

21-
static fromCodePoint(code: i32): string {
22+
static fromCodePoint(code: i32): String {
2223
assert(<u32>code <= 0x10FFFF);
2324
var hasSur = code > 0xFFFF;
24-
var out = __alloc(2 << i32(hasSur), idof<string>());
25+
var out = __alloc(2 << i32(hasSur), idof<String>());
2526
if (!hasSur) {
2627
store<u16>(out, <u16>code);
2728
} else {
2829
code -= 0x10000;
29-
let lo: u32 = (code & 0x3FF) + 0xDC00;
30-
let hi: u32 = (code >>> 10) + 0xD800;
31-
store<u32>(out, hi | (lo << 16));
30+
let hi = (code & 0x03FF) | 0xDC00;
31+
let lo = (code >>> 10) | 0xD800;
32+
store<u32>(out, lo | (hi << 16));
3233
}
33-
return changetype<string>(out); // retains
34+
return changetype<String>(out); // retains
3435
}
3536

3637
get length(): i32 {
@@ -50,12 +51,13 @@ import { idof } from "./builtins";
5051
}
5152

5253
codePointAt(pos: i32): i32 {
53-
if (<u32>pos >= <u32>this.length) return -1; // (undefined)
54+
var len = this.length;
55+
if (<u32>pos >= <u32>len) return -1; // (undefined)
5456
var first = <i32>load<u16>(changetype<usize>(this) + (<usize>pos << 1));
55-
if (first < 0xD800 || first > 0xDBFF || pos + 1 == this.length) return first;
57+
if ((first & 0xFC00) != 0xD800 || pos + 1 == len) return first;
5658
var second = <i32>load<u16>(changetype<usize>(this) + ((<usize>pos + 1) << 1));
57-
if (second < 0xDC00 || second > 0xDFFF) return first;
58-
return ((first - 0xD800) << 10) + (second - 0xDC00) + 0x10000;
59+
if ((second & 0xFC00) != 0xDC00) return first;
60+
return (first - 0xD800 << 10) + (second - 0xDC00) + 0x10000;
5961
}
6062

6163
@operator("+") private static __concat(left: String, right: String): String {
@@ -432,7 +434,7 @@ import { idof } from "./builtins";
432434

433435
split(separator: String | null = null, limit: i32 = i32.MAX_VALUE): String[] {
434436
if (!limit) return changetype<Array<String>>(__allocArray(0, alignof<String>(), idof<Array<String>>())); // retains
435-
if (separator === null) return <String[]>[this];
437+
if (separator === null) return [this];
436438
var length: isize = this.length;
437439
var sepLen: isize = separator.length;
438440
if (limit < 0) limit = i32.MAX_VALUE;
@@ -484,6 +486,121 @@ import { idof } from "./builtins";
484486
// releases result
485487
}
486488

489+
toLowerCase(): String {
490+
var len = <usize>this.length;
491+
if (!len) return this;
492+
var codes = __alloc(len * 2 * 2, idof<String>());
493+
var j: usize = 0;
494+
for (let i: usize = 0; i < len; ++i, ++j) {
495+
let c = <u32>load<u16>(changetype<usize>(this) + (i << 1));
496+
if (isAscii(c)) {
497+
store<u16>(codes + (j << 1), toLower8(c));
498+
} else {
499+
// check and read surrogate pair
500+
if ((c - 0xD7FF < 0xDC00 - 0xD7FF) && i < len - 1) {
501+
let c1 = <u32>load<u16>(changetype<usize>(this) + (i << 1), 2);
502+
if (c1 - 0xDBFF < 0xE000 - 0xDBFF) {
503+
let c0 = c;
504+
c = (((c & 0x03FF) << 10) | (c1 & 0x03FF)) + 0x10000;
505+
++i;
506+
if (c >= 0x20000) {
507+
store<u32>(codes + (j << 1), c0 | (c1 << 16));
508+
++j;
509+
continue;
510+
}
511+
}
512+
}
513+
// check special casing for lower table. It has one ently so instead lookup we just inline this.
514+
if (c == 0x0130) {
515+
// 0x0130 -> [0x0069, 0x0307]
516+
store<u32>(codes + (j << 1), (0x0307 << 16) | 0x0069);
517+
++j;
518+
} else if (c - 0x24B6 <= 0x24CF - 0x24B6) {
519+
// Range 0x24B6 <= c <= 0x24CF not covered by casemap and require special early handling
520+
store<u16>(codes + (j << 1), c + 26);
521+
} else {
522+
let code = casemap(c, 0) & 0x1FFFFF;
523+
if (code < 0x10000) {
524+
store<u16>(codes + (j << 1), code);
525+
} else {
526+
// store as surrogare pair
527+
code -= 0x10000;
528+
let lo = (code >>> 10) | 0xD800;
529+
let hi = (code & 0x03FF) | 0xDC00;
530+
store<u32>(codes + (j << 1), lo | (hi << 16));
531+
++j;
532+
}
533+
}
534+
}
535+
}
536+
codes = __realloc(codes, j << 1);
537+
return changetype<String>(codes); // retains
538+
}
539+
540+
toUpperCase(): String {
541+
var len = <usize>this.length;
542+
if (!len) return this;
543+
var codes = __alloc(len * 3 * 2, idof<String>());
544+
// @ts-ignore: cast
545+
var specialsUpperPtr = specialsUpper.dataStart as usize;
546+
var specialsUpperLen = specialsUpper.length;
547+
var j: usize = 0;
548+
for (let i: usize = 0; i < len; ++i, ++j) {
549+
let c = <u32>load<u16>(changetype<usize>(this) + (i << 1));
550+
if (isAscii(c)) {
551+
store<u16>(codes + (j << 1), toUpper8(c));
552+
} else {
553+
// check and read surrogate pair
554+
if ((c - 0xD7FF < 0xDC00 - 0xD7FF) && i < len - 1) {
555+
let c1 = <u32>load<u16>(changetype<usize>(this) + (i << 1), 2);
556+
if (c1 - 0xDBFF < 0xE000 - 0xDBFF) {
557+
let c0 = c;
558+
c = (((c & 0x03FF) << 10) | (c1 & 0x03FF)) + 0x10000;
559+
++i;
560+
if (c >= 0x20000) {
561+
store<u32>(codes + (j << 1), c0 | (c1 << 16));
562+
++j;
563+
continue;
564+
}
565+
}
566+
}
567+
// Range 0x24D0 <= c <= 0x24E9 not covered by casemap and require special early handling
568+
if (c - 0x24D0 <= 0x24E9 - 0x24D0) {
569+
// monkey patch
570+
store<u16>(codes + (j << 1), c - 26);
571+
} else {
572+
let index = -1;
573+
// Fast range check. See first and last rows in specialsUpper table
574+
if (c - 0x00DF <= 0xFB17 - 0x00DF) {
575+
index = <usize>bsearch(c, specialsUpperPtr, specialsUpperLen);
576+
}
577+
if (~index) {
578+
// load next 3 code points from row with `index` offset for specialsUpper table
579+
let ab = load<u32>(specialsUpperPtr + (index << 1), 2);
580+
let cc = load<u16>(specialsUpperPtr + (index << 1), 6);
581+
store<u32>(codes + (j << 1), ab, 0);
582+
store<u16>(codes + (j << 1), cc, 4);
583+
j += 1 + usize(cc != 0);
584+
} else {
585+
let code = casemap(c, 1) & 0x1FFFFF;
586+
if (code < 0x10000) {
587+
store<u16>(codes + (j << 1), code);
588+
} else {
589+
// store as surrogare pair
590+
code -= 0x10000;
591+
let lo = (code >>> 10) | 0xD800;
592+
let hi = (code & 0x03FF) | 0xDC00;
593+
store<u32>(codes + (j << 1), lo | (hi << 16));
594+
++j;
595+
}
596+
}
597+
}
598+
}
599+
}
600+
codes = __realloc(codes, j << 1);
601+
return changetype<String>(codes); // retains
602+
}
603+
487604
toString(): String {
488605
return this;
489606
}
@@ -548,7 +665,7 @@ export namespace String {
548665
if ((c1 & 0xFC00) == 0xD800 && strOff + 2 < strEnd) {
549666
let c2 = <u32>load<u16>(strOff, 2);
550667
if ((c2 & 0xFC00) == 0xDC00) {
551-
c1 = 0x10000 + ((c1 & 0x03FF) << 10) + (c2 & 0x03FF);
668+
c1 = 0x10000 + ((c1 & 0x03FF) << 10) | (c2 & 0x03FF);
552669
store<u8>(bufOff, c1 >> 18 | 240);
553670
store<u8>(bufOff, c1 >> 12 & 63 | 128, 1);
554671
store<u8>(bufOff, c1 >> 6 & 63 | 128, 2);
@@ -573,17 +690,17 @@ export namespace String {
573690
return changetype<ArrayBuffer>(buf); // retains
574691
}
575692

576-
export function decode(buf: ArrayBuffer, nullTerminated: bool = false): string {
693+
export function decode(buf: ArrayBuffer, nullTerminated: bool = false): String {
577694
return decodeUnsafe(changetype<usize>(buf), buf.byteLength, nullTerminated);
578695
}
579696

580697
// @ts-ignore: decorator
581698
@unsafe
582-
export function decodeUnsafe(buf: usize, len: usize, nullTerminated: bool = false): string {
699+
export function decodeUnsafe(buf: usize, len: usize, nullTerminated: bool = false): String {
583700
var bufOff = buf;
584701
var bufEnd = buf + len;
585702
assert(bufEnd >= bufOff); // guard wraparound
586-
var str = __alloc(len << 1, idof<string>()); // max is one u16 char per u8 byte
703+
var str = __alloc(len << 1, idof<String>()); // max is one u16 char per u8 byte
587704
var strOff = str;
588705
while (bufOff < bufEnd) {
589706
let cp = <u32>load<u8>(bufOff++);
@@ -604,8 +721,8 @@ export namespace String {
604721
load<u8>(bufOff, 2) & 63
605722
) - 0x10000;
606723
bufOff += 3;
607-
store<u16>(strOff, 0xD800 + (cp >> 10));
608-
store<u16>(strOff, 0xDC00 + (cp & 1023), 2);
724+
store<u16>(strOff, 0xD800 | (cp >> 10));
725+
store<u16>(strOff, 0xDC00 | (cp & 1023), 2);
609726
strOff += 4;
610727
} else {
611728
if (bufEnd - bufOff < 2) break;
@@ -617,7 +734,7 @@ export namespace String {
617734
bufOff += 2; strOff += 2;
618735
}
619736
}
620-
return changetype<string>(__realloc(str, strOff - str)); // retains
737+
return changetype<String>(__realloc(str, strOff - str)); // retains
621738
}
622739
}
623740

@@ -634,16 +751,16 @@ export namespace String {
634751
return changetype<ArrayBuffer>(buf); // retains
635752
}
636753

637-
export function decode(buf: ArrayBuffer): string {
754+
export function decode(buf: ArrayBuffer): String {
638755
return decodeUnsafe(changetype<usize>(buf), buf.byteLength);
639756
}
640757

641758
// @ts-ignore: decorator
642759
@unsafe
643-
export function decodeUnsafe(buf: usize, len: usize): string {
644-
var str = __alloc(len &= ~1, idof<string>());
760+
export function decodeUnsafe(buf: usize, len: usize): String {
761+
var str = __alloc(len &= ~1, idof<String>());
645762
memory.copy(str, buf, len);
646-
return changetype<string>(str); // retains
763+
return changetype<String>(str); // retains
647764
}
648765
}
649766
}

0 commit comments

Comments
 (0)