Skip to content

Commit

Permalink
fix: fixed small bug with case insensitive range matching
Browse files Browse the repository at this point in the history
  • Loading branch information
ColinEberhardt committed Feb 20, 2021
1 parent c0f279b commit d92d3c3
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 30 deletions.
21 changes: 15 additions & 6 deletions assembly/__spec_tests__/generated.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,11 @@ it("line: 152 - matches ([\\da-f:]+)$ against 'abc'", () => {
expect(match.matches[0]).toBe("abc".substring(0, 3));
expect(match.matches[1]).toBe("abc".substring(0, 3));
});
xit("line: 153 - aspect [Actual]: <Match>null vs [Expected]: Not <Match>null issue", () => {});
it("line: 153 - matches ([\\da-f:]+)$ against 'fed'", () => {
const match = exec("([\\da-f:]+)$", "fed", "is");
expect(match.matches[0]).toBe("fed".substring(0, 3));
expect(match.matches[1]).toBe("fed".substring(0, 3));
});
it("line: 154 - matches ([\\da-f:]+)$ against 'E'", () => {
const match = exec("([\\da-f:]+)$", "E", "is");
expect(match.matches[0]).toBe("E".substring(0, 1));
Expand Down Expand Up @@ -1044,8 +1048,8 @@ xit("line: 199 - non capturing groups not supported", () => {});
xit("line: 200 - non capturing groups not supported", () => {});
xit("line: 201 - non capturing groups not supported", () => {});
xit("line: 202 - non capturing groups not supported", () => {});
xit("line: 203 - aspect [Actual]: <Match>null vs [Expected]: Not <Match>null issue", () => {});
xit("line: 204 - aspect [Actual]: <Match>null vs [Expected]: Not <Match>null issue", () => {});
xit("line: 203 - test appears to be incorrect?", () => {});
xit("line: 204 - test appears to be incorrect?", () => {});
it("line: 205 - matches ^ a\\ b[c ]d $ against 'abcd'", () => {
expectNotMatch("^ a\\ b[c ]d $", ["abcd"]);
});
Expand Down Expand Up @@ -1352,7 +1356,7 @@ it("line: 1083 - matches ^[ab]{1,3}(ab*?|b) against 'The quick brown fox'", () =
xit("line: 1084 - back references are not supported", () => {});
xit("line: 1085 - back references are not supported", () => {});
xit("line: 1086 - test encoding issue", () => {});
xit("line: 1087 - requires triage", () => {});
xit("line: 1087 - test requires a substring function", () => {});
xit("line: 1088 - requires triage", () => {});
it("line: 1089 - matches abc\\x0def\\x00pqr\\x000xyz\\x0000AB against 'abc456 abc\x0def\x00pqr\x000xyz\x0000ABCDE'", () => {
const match = exec(
Expand Down Expand Up @@ -1506,8 +1510,13 @@ it("line: 1144 - matches ^[W-c]+$ against 'WXY_^abc'", () => {
const match = exec("^[W-c]+$", "WXY_^abc", "s");
expect(match.matches[0]).toBe("WXY_^abc".substring(0, 8));
});
xit("line: 1145 - as-pect test issue", () => {});
xit("line: 1146 - as-pect test issue", () => {});
it("line: 1145 - matches ^[W-c]+$ against 'wxy'", () => {
expectNotMatch("^[W-c]+$", ["wxy"]);
});
it("line: 1146 - matches ^[W-c]+$ against 'WXY_^abc'", () => {
const match = exec("^[W-c]+$", "WXY_^abc", "is");
expect(match.matches[0]).toBe("WXY_^abc".substring(0, 8));
});
xit("line: 1147 - requires triage", () => {});
xit("line: 1148 - requires triage", () => {});
xit("line: 1149 - requires triage", () => {});
Expand Down
9 changes: 9 additions & 0 deletions assembly/__tests__/character-sets.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,12 @@ it("treats - as a literal in negated sets", () => {
expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
expectMatch("[^-abc]", ["1", "A"]);
});

it("supports case insensitive matching", () => {
// simple ranges
expectMatch("[a-c]", ["A", "C", "a", "c"], "i");
expectNotMatch("[a-c]", ["D", "d"], "i");
// complex
expectMatch("[W-c]", ["W", "w", "C", "c"], "i");
expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i");
});
16 changes: 12 additions & 4 deletions assembly/__tests__/utils.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import { RegExp, Match } from "..";

export function expectMatch(regex: string, arr: string[]): void {
let regexp = new RegExp(regex);
export function expectMatch(
regex: string,
arr: string[],
flags: string = ""
): void {
let regexp = new RegExp(regex, flags);
for (let i = 0; i < arr.length; i++) {
const value = arr[i];
const match = exec(regexp, value);
expect(match.matches[0]).toStrictEqual(value);
}
}

export function expectNotMatch(regex: string, arr: string[]): void {
let regexp = new RegExp(regex);
export function expectNotMatch(
regex: string,
arr: string[],
flags: string = ""
): void {
let regexp = new RegExp(regex, flags);
for (let i = 0; i < arr.length; i++) {
const match = regexp.exec(arr[i]);
expect(match).toBeNull(
Expand Down
34 changes: 27 additions & 7 deletions assembly/nfa/matcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
NodeType,
} from "../parser/node";
import { Flags } from "../regexp";
import { Range } from "../util";

const enum MatcherType {
Character,
Expand Down Expand Up @@ -36,7 +37,10 @@ export class Matcher {
node: CharacterRangeNode,
flags: Flags
): CharacterRangeMatcher {
return new CharacterRangeMatcher(node.from, node.to, flags.ignoreCase);
return new CharacterRangeMatcher(
new Range(node.from, node.to),
flags.ignoreCase
);
}

static fromCharacterSetNode(
Expand Down Expand Up @@ -89,20 +93,36 @@ export class CharacterMatcher extends Matcher {
}
}

const LOWERCASE_LETTERS = new Range(Char.a, Char.z);
const UPPERCASE_LETTERS = new Range(Char.A, Char.Z);
const UPPER_LOWER_OFFSET = Char.a - Char.A;

export class CharacterRangeMatcher extends Matcher {
constructor(private from: u32, private to: u32, private ignoreCase: bool) {
private ranges: Range[];

constructor(private range: Range, ignoreCase: bool) {
super(MatcherType.CharacterRange);
this.ranges = [range];

if (ignoreCase) {
this.from |= 0x20;
this.to |= 0x20;
const lowerIntersect = range.intersection(LOWERCASE_LETTERS);
if (lowerIntersect) {
this.ranges.push(lowerIntersect.offset(-UPPER_LOWER_OFFSET));
}
const upperIntersect = range.intersection(UPPERCASE_LETTERS);
if (upperIntersect) {
this.ranges.push(upperIntersect.offset(UPPER_LOWER_OFFSET));
}
}
}

matches(code: u32): bool {
if (this.ignoreCase) {
code |= 0x20;
for (let i = 0, len = this.ranges.length; i < len; i++) {
if (code >= u32(this.ranges[i].from) && code <= u32(this.ranges[i].to)) {
return true;
}
}
return code >= this.from && code <= this.to;
return false;
}
}

Expand Down
14 changes: 14 additions & 0 deletions assembly/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,17 @@ export function replaceAtIndex<T>(arr: T[], index: u32, item: T): T[] {
unchecked((res[index] = item));
return res;
}

export class Range {
constructor(public from: i32, public to: i32) {}

intersection(other: Range): Range | null {
const lower = i32(Math.max(this.from, other.from));
const upper = i32(Math.min(this.to, other.to));
return lower < upper ? new Range(lower, upper) : null;
}

offset(value: i32): Range {
return new Range(this.from + value, this.to + value);
}
}
12 changes: 5 additions & 7 deletions spec/test-generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@ const knownIssues = {
1288,
],
"test contains an octal escape sequence": [1102],
// the test results measure captured groups using character length / locations
// see: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/length
// this is tricky to reproduce
"test requires a substring function": [1087],
"requires triage": [
1087,
1363,
1369,
1163,
Expand All @@ -32,15 +35,10 @@ const knownIssues = {
1413,
...range(1301, 1308),
],
"as-pect test issue": [1145, 1146],
"test indicates a malformed regex, whereas it appears OK in JS": [1189],
"test regex contains syntax not supported in JS": [82, 1158, 281],
"the test behaviour differs between PCRE and JS": [290],
"aspect [Actual]: <Match>null vs [Expected]: Not <Match>null issue": [
153,
203,
204,
],
"test appears to be incorrect?": [203, 204],
};

const hasKnownIssue = (index) => {
Expand Down
8 changes: 2 additions & 6 deletions ts/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@ globalAny.log = console.log;

import { RegExp } from "../assembly/regexp";

const regexObj = new RegExp("^(a){1,3}");
const match = regexObj.exec("abc");
const regexObj = new RegExp("[a-c]", "i");
const match = regexObj.exec("A");
console.log(JSON.stringify(match, null, 2));

const regexObj2 = new RegExp("(a|b)c|a(b|c)");
const match2 = regexObj2.exec("ab");
console.log(JSON.stringify(match2, null, 2));

0 comments on commit d92d3c3

Please sign in to comment.