From 72b2c8b9301aa705aa883cb6ebd296365798371c Mon Sep 17 00:00:00 2001
From: Shahar Soel <shahar.soel@gmail.com>
Date: Mon, 19 Dec 2016 01:01:56 +0200
Subject: [PATCH] Support Lexing with custom(None RegExp) Token Patterns.

fixes #331
---
 docs/custom_token_patterns.md                 | 76 ++++++++++++++++++
 examples/lexer/README.md                      |  1 +
 .../lexer/custom_patterns/custom_patterns.js  | 62 ++++++++++++++
 .../custom_patterns/custom_patterns_spec.js   | 23 ++++++
 readme.md                                     |  1 +
 src/scan/lexer.ts                             | 80 +++++++++++++------
 src/scan/lexer_public.ts                      | 43 +++++++---
 src/scan/tokens_public.ts                     | 38 +++++++--
 src/utils/utils.ts                            |  5 +-
 test/scan/lexer_spec.ts                       | 48 ++++++++++-
 10 files changed, 335 insertions(+), 42 deletions(-)
 create mode 100644 docs/custom_token_patterns.md
 create mode 100644 examples/lexer/custom_patterns/custom_patterns.js
 create mode 100644 examples/lexer/custom_patterns/custom_patterns_spec.js

diff --git a/docs/custom_token_patterns.md b/docs/custom_token_patterns.md
new file mode 100644
index 000000000..dba0c98e9
--- /dev/null
+++ b/docs/custom_token_patterns.md
@@ -0,0 +1,76 @@
+## Custom Token Patterns
+
+See: [**Runnable example**](../examples/lexer/custom_patterns/custom_patterns.js) for quick starting.
+
+### Background
+Normally a Token's pattern is defined using a JavaScript regular expression:
+
+```JavaScript
+let IntegerToken = createToken({name: "IntegerToken", pattern: /\d+/})
+```
+ 
+However in some circumstances the capability to provide a custom pattern matching implementation may be required. 
+Perhaps a special Token which cannot be easily defined using regular expressions, or perhaps
+to enable working around performance problems in a specific RegularExpression engine, for example:
+
+* [WebKit/Safari multiple orders of magnitude performance degradation for specific regExp patterns](https://bugs.webkit.org/show_bug.cgi?id=152578) 😞 
+
+
+### Usage
+A custom pattern must conform to the API of the [RegExp.prototype.exec](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec)
+function. Additionally it must perform any matches from the **start** of the input. In RegExp semantics this means
+that any custom pattern implementations should behave as if the [start of input anchor](http://www.rexegg.com/regex-anchors.html#caret) 
+has been used.
+
+
+The basic syntax for supplying a custom pattern is defined by the [ICustomPattern](http://sap.github.io/chevrotain/documentation/0_20_0/interfaces/icustompattern.html) interface.
+Example:
+
+```JavaScript
+function matchInteger(text) {
+   let i = 0
+   let charCode = text.charCodeAt(i)
+   while (charCode >= 48 && charCode <= 57) {
+     i++
+     charCode = text.charCodeAt(i)
+   }
+   
+   // No match, must return null to conform with the RegExp.prototype.exec signature
+   if (i === 0) {
+      return null
+   }
+   else {
+      let matchedString = text.substring(0, i)
+      // according to the RegExp.prototype.exec API the first item in the returned array must be the whole matched string.
+      return [matchedString]
+   }
+}
+
+let IntegerToken = createToken({
+                                 name: "IntegerToken",
+                                 pattern: {
+                                   exec:  matchInteger,
+                                   containsLineTerminator: false
+                              }})
+```
+
+The **containsLineTerminator** property is used by the lexer to properly compute the line/column numbers.
+If the custom matched pattern could possibly include a line terminator then this property must be defined as "true".
+Most Tokens can never contain a line terminator so the property is optional (false by default) which enables a shorter syntax:
+
+```JavaScript
+let IntegerToken = createToken({
+                                 name: "IntegerToken",
+                                 pattern: {
+                                   exec:  matchInteger
+                              }})
+```
+
+Using an Object literal with only a single property is still a little verbose so an even more concise syntax is also supported:
+```JavaScript
+let IntegerToken = createToken({name: "IntegerToken", pattern: matchInteger})
+```
+
+
+ 
+
diff --git a/examples/lexer/README.md b/examples/lexer/README.md
index 48289580a..c12ad5426 100644
--- a/examples/lexer/README.md
+++ b/examples/lexer/README.md
@@ -6,6 +6,7 @@ A few simple examples of using the Chevrotain Lexer to resolve some common lexin
 * [Keywords vs Identifiers](https://github.com/SAP/Chevrotain/blob/master/examples/lexer/keywords_vs_identifiers/keywords_vs_identifiers.js)
 * [Token Groups](https://github.com/SAP/Chevrotain/blob/master/examples/lexer/token_groups/token_groups.js)
 * [Lexer with Multiple Modes](https://github.com/SAP/Chevrotain/blob/master/examples/lexer/multi_mode_lexer/multi_mode_lexer.js)
+* [Custom Token Patterns implementations](https://github.com/SAP/Chevrotain/blob/master/examples/lexer/custom_patterns/custom_patterns.js)
 
 
 to run all the lexer examples's tests:
diff --git a/examples/lexer/custom_patterns/custom_patterns.js b/examples/lexer/custom_patterns/custom_patterns.js
new file mode 100644
index 000000000..e65ee94fd
--- /dev/null
+++ b/examples/lexer/custom_patterns/custom_patterns.js
@@ -0,0 +1,62 @@
+/**
+ * This example demonstrate usage of custom token patterns.
+ * custom token patterns allow implementing token matchers using arbitrary JavaScript code
+ * instead of being limited to only using regular expressions.
+ *
+ * For additional details see the docs:
+ * https://github.com/SAP/chevrotain/blob/master/docs/custom_token_patterns.md
+ */
+let chevrotain = require("chevrotain")
+let createToken = chevrotain.createToken
+let Lexer = chevrotain.Lexer
+
+
+// First lets define our custom pattern for matching an Integer Literal.
+// This function's signature matches the RegExp.prototype.exec function.
+// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec
+function matchInteger(text) {
+    let i = 0
+    let charCode = text.charCodeAt(i)
+    while (charCode >= 48 && charCode <= 57) {
+        i++
+        charCode = text.charCodeAt(i)
+    }
+
+    // No match, must return null to conform with the RegExp.prototype.exec signature
+    if (i === 0) {
+        return null
+    }
+    else {
+        let matchedString = text.substring(0, i)
+        // according to the RegExp.prototype.exec API the first item in the returned array must be the whole matched string.
+        return [matchedString]
+    }
+}
+
+// Now we can simply replace the regExp pattern with our custom pattern.
+// Consult the Docs (linked above) for additional syntax variants.
+let IntegerLiteral = createToken({name: "IntegerLiteral", pattern: matchInteger})
+let Comma = createToken({name: "Comma", pattern: /,/})
+let Whitespace = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED})
+
+customPatternLexer = new Lexer(
+    [
+        Whitespace,
+        Comma,
+        IntegerLiteral
+    ])
+
+module.exports = {
+
+    IntegerLiteral: IntegerLiteral,
+    Comma:          Comma,
+
+    tokenize: function(text) {
+        let lexResult = customPatternLexer.tokenize(text)
+
+        if (lexResult.errors.length >= 1) {
+            throw new Error("sad sad panda lexing errors detected")
+        }
+        return lexResult
+    }
+}
diff --git a/examples/lexer/custom_patterns/custom_patterns_spec.js b/examples/lexer/custom_patterns/custom_patterns_spec.js
new file mode 100644
index 000000000..8be549519
--- /dev/null
+++ b/examples/lexer/custom_patterns/custom_patterns_spec.js
@@ -0,0 +1,23 @@
+let assert = require("assert")
+let customPatternExample = require("./custom_patterns")
+
+let tokenize = customPatternExample.tokenize
+let Comma = customPatternExample.Comma
+let IntegerLiteral = customPatternExample.IntegerLiteral
+
+describe('The Chevrotain Lexer ability to use custom pattern implementations.', () => {
+
+    it('Can Lex a simple input using a Custom Integer Literal RegExp', () => {
+        let text = `1 , 2 , 3`
+        let lexResult = tokenize(text)
+
+        assert.equal(lexResult.errors.length, 0)
+        assert.equal(lexResult.tokens.length, 5)
+
+        expect(lexResult.tokens[0]).to.be.an.instanceof(IntegerLiteral)
+        expect(lexResult.tokens[1]).to.be.an.instanceof(Comma)
+        expect(lexResult.tokens[2]).to.be.an.instanceof(IntegerLiteral)
+        expect(lexResult.tokens[3]).to.be.an.instanceof(Comma)
+        expect(lexResult.tokens[4]).to.be.an.instanceof(IntegerLiteral)
+    })
+})
diff --git a/readme.md b/readme.md
index 1c0ff1d62..ff90b58f1 100644
--- a/readme.md
+++ b/readme.md
@@ -38,6 +38,7 @@ any code generation phase.
     * [Multiple Lexer Modes][lexer_modes] depending on the context.
     * [Tokens Grouping][lexer_groups].
     * [Different Token types for balancing performance, memory usage and ease of use](docs/token_types.md).
+    * [Custom Token patterns(none RegExp) support](docs/custom_token_patterns.md)
     * **No code generation** The Lexer does not require any code generation phase. 
 
   3. [**High Performance**][benchmark].
diff --git a/src/scan/lexer.ts b/src/scan/lexer.ts
index d95d91c02..28ab0fec1 100644
--- a/src/scan/lexer.ts
+++ b/src/scan/lexer.ts
@@ -1,10 +1,12 @@
-import {Token, tokenName, ISimpleTokenOrIToken} from "./tokens_public"
-import {TokenConstructor, ILexerDefinitionError, LexerDefinitionErrorType, Lexer, IMultiModeLexerDefinition} from "./lexer_public"
+import {Token, tokenName, ISimpleTokenOrIToken, CustomPatternMatcherFunc} from "./tokens_public"
+import {
+    TokenConstructor, ILexerDefinitionError, LexerDefinitionErrorType, Lexer, IMultiModeLexerDefinition,
+    IRegExpExec
+} from "./lexer_public"
 import {
     reject,
     indexOf,
     map,
-    zipObject,
     isString,
     isUndefined,
     reduce,
@@ -19,7 +21,8 @@ import {
     uniq,
     every,
     keys,
-    isArray
+    isArray,
+    isFunction
 } from "../utils/utils"
 import {isLazyTokenType, isSimpleTokenType} from "./tokens"
 
@@ -28,7 +31,7 @@ export const DEFAULT_MODE = "defaultMode"
 export const MODES = "modes"
 
 export interface IAnalyzeResult {
-    allPatterns:RegExp[]
+    allPatterns:IRegExpExec[]
     patternIdxToClass:Function[]
     patternIdxToGroup:any[]
     patternIdxToLongerAltIdx:number[]
@@ -38,6 +41,8 @@ export interface IAnalyzeResult {
     emptyGroups:{ [groupName:string]:Token[] }
 }
 
+const CONTAINS_LINE_TERMINATOR = "containsLineTerminator"
+
 export function analyzeTokenClasses(tokenClasses:TokenConstructor[]):IAnalyzeResult {
 
     let onlyRelevantClasses = reject(tokenClasses, (currClass) => {
@@ -45,15 +50,27 @@ export function analyzeTokenClasses(tokenClasses:TokenConstructor[]):IAnalyzeRes
     })
 
     let allTransformedPatterns = map(onlyRelevantClasses, (currClass) => {
-        return addStartOfInput(currClass[PATTERN])
-    })
+        let currPattern = currClass[PATTERN]
 
-    let allPatternsToClass = zipObject(<any>allTransformedPatterns, onlyRelevantClasses)
+        if (isRegExp(currPattern)) {
+            return addStartOfInput(currPattern)
+        }
+        // CustomPatternMatcherFunc - custom patterns do not require any transformations, only wrapping in a RegExp Like object
+        else if (isFunction(currPattern)) {
+            return {exec: currPattern}
+        }
+        // ICustomPattern
+        else if (has(currPattern, "exec")) {
+            return currPattern
+        }
+        else {
+            throw Error("non exhaustive match")
+        }
 
-    let patternIdxToClass:any = map(allTransformedPatterns, (pattern) => {
-        return allPatternsToClass[pattern.toString()]
     })
 
+    let patternIdxToClass = onlyRelevantClasses
+
     let patternIdxToGroup = map(onlyRelevantClasses, (clazz:any) => {
         let groupName = clazz.GROUP
         if (groupName === Lexer.SKIPPED) {
@@ -84,8 +101,16 @@ export function analyzeTokenClasses(tokenClasses:TokenConstructor[]):IAnalyzeRes
     let patternIdxToPopMode = map(onlyRelevantClasses, (clazz:any) => has(clazz, "POP_MODE"))
 
     let patternIdxToCanLineTerminator = map(allTransformedPatterns, (pattern:RegExp) => {
-        // TODO: unicode escapes of line terminators too?
-        return /\\n|\\r|\\s/g.test(pattern.source)
+        if (isRegExp(pattern)) {
+            // TODO: unicode escapes of line terminators too?
+            return /\\n|\\r|\\s/g.test(pattern.source)
+        }
+        else {
+            if (has(pattern, CONTAINS_LINE_TERMINATOR)) {
+                return pattern[CONTAINS_LINE_TERMINATOR]
+            }
+            return false
+        }
     })
 
     let emptyGroups = reduce(onlyRelevantClasses, (acc, clazz:any) => {
@@ -112,18 +137,13 @@ export function validatePatterns(tokenClasses:TokenConstructor[], validModesName
     let errors = []
 
     let missingResult = findMissingPatterns(tokenClasses)
-    let validTokenClasses = missingResult.valid
     errors = errors.concat(missingResult.errors)
 
-    let invalidResult = findInvalidPatterns(validTokenClasses)
-    validTokenClasses = invalidResult.valid
+    let invalidResult = findInvalidPatterns(missingResult.valid)
+    let validTokenClasses = invalidResult.valid
     errors = errors.concat(invalidResult.errors)
 
-    errors = errors.concat(findEndOfInputAnchor(validTokenClasses))
-
-    errors = errors.concat(findUnsupportedFlags(validTokenClasses))
-
-    errors = errors.concat(findDuplicatePatterns(validTokenClasses))
+    errors = errors.concat(validateRegExpPattern(validTokenClasses))
 
     errors = errors.concat(findInvalidGroupType(validTokenClasses))
 
@@ -132,6 +152,19 @@ export function validatePatterns(tokenClasses:TokenConstructor[], validModesName
     return errors
 }
 
+function validateRegExpPattern(tokenClasses:TokenConstructor[]):ILexerDefinitionError[] {
+    let errors = []
+    let withRegExpPatterns = filter(tokenClasses, (currTokClass) => isRegExp(currTokClass[PATTERN]))
+
+    errors = errors.concat(findEndOfInputAnchor(withRegExpPatterns))
+
+    errors = errors.concat(findUnsupportedFlags(withRegExpPatterns))
+
+    errors = errors.concat(findDuplicatePatterns(withRegExpPatterns))
+
+    return errors
+}
+
 export interface ILexerFilterResult {
     errors:ILexerDefinitionError[]
     valid:TokenConstructor[]
@@ -157,12 +190,13 @@ export function findMissingPatterns(tokenClasses:TokenConstructor[]):ILexerFilte
 export function findInvalidPatterns(tokenClasses:TokenConstructor[]):ILexerFilterResult {
     let tokenClassesWithInvalidPattern = filter(tokenClasses, (currClass) => {
         let pattern = currClass[PATTERN]
-        return !isRegExp(pattern)
+        return !isRegExp(pattern) && !isFunction(pattern) && !has(pattern, "exec")
     })
 
     let errors = map(tokenClassesWithInvalidPattern, (currClass) => {
         return {
-            message:      "Token class: ->" + tokenName(currClass) + "<- static 'PATTERN' can only be a RegExp",
+            message:      "Token class: ->" + tokenName(currClass) + "<- static 'PATTERN' can only be a RegExp, a" +
+                          " Function matching the {CustomPatternMatcherFunc} type or an Object matching the {ICustomPattern} interface.",
             type:         LexerDefinitionErrorType.INVALID_PATTERN,
             tokenClasses: [currClass]
         }
@@ -361,8 +395,6 @@ export function performRuntimeChecks(lexerDefinition:IMultiModeLexerDefinition):
                     })
                 }
             })
-
-            // lexerDefinition.modes[currModeName] = reject<Function>(currModeValue, (currTokClass) => isUndefined(currTokClass))
         })
     }
 
diff --git a/src/scan/lexer_public.ts b/src/scan/lexer_public.ts
index cf9eb3957..1c0043a32 100644
--- a/src/scan/lexer_public.ts
+++ b/src/scan/lexer_public.ts
@@ -1,7 +1,21 @@
-import {Token, LazyTokenCacheData, getImage, getStartLine, getStartColumn, ISimpleTokenOrIToken} from "./tokens_public"
 import {
-    validatePatterns, analyzeTokenClasses, countLineTerminators, DEFAULT_MODE, performRuntimeChecks, checkLazyMode,
-    checkSimpleMode, cloneEmptyGroups
+    Token,
+    LazyTokenCacheData,
+    getImage,
+    getStartLine,
+    getStartColumn,
+    ISimpleTokenOrIToken,
+    CustomPatternMatcherFunc
+} from "./tokens_public"
+import {
+    validatePatterns,
+    analyzeTokenClasses,
+    countLineTerminators,
+    DEFAULT_MODE,
+    performRuntimeChecks,
+    checkLazyMode,
+    checkSimpleMode,
+    cloneEmptyGroups
 } from "./lexer"
 import {
     cloneObj,
@@ -19,8 +33,13 @@ import {
     mapValues
 } from "../utils/utils"
 import {
-    fillUpLineToOffset, getStartColumnFromLineToOffset, getStartLineFromLineToOffset, augmentTokenClasses,
-    createSimpleLazyToken, LazyTokenCreator, createLazyTokenInstance
+    fillUpLineToOffset,
+    getStartColumnFromLineToOffset,
+    getStartLineFromLineToOffset,
+    augmentTokenClasses,
+    createSimpleLazyToken,
+    LazyTokenCreator,
+    createLazyTokenInstance
 } from "./tokens"
 
 export interface TokenConstructor extends Function {
@@ -80,6 +99,10 @@ export interface IMultiModeLexerDefinition {
     defaultMode:string
 }
 
+export interface IRegExpExec {
+    exec:CustomPatternMatcherFunc
+}
+
 export class Lexer {
 
     public static SKIPPED = "This marks a skipped Token pattern, this means each token identified by it will" +
@@ -92,7 +115,7 @@ export class Lexer {
     protected isSimpleTokenMode
     protected modes:string[] = []
     protected defaultMode:string
-    protected allPatterns:{ [modeName:string]:RegExp[] } = {}
+    protected allPatterns:{ [modeName:string]:IRegExpExec[] } = {}
     protected patternIdxToClass:{ [modeName:string]:Function[] } = {}
     protected patternIdxToGroup:{ [modeName:string]:string[] } = {}
     protected patternIdxToLongerAltIdx:{ [modeName:string]:number[] } = {}
@@ -472,8 +495,8 @@ export class Lexer {
                     text = text.substr(1)
                     offset++
                     for (j = 0; j < currModePatterns.length; j++) {
-                        foundResyncPoint = currModePatterns[j].test(text)
-                        if (foundResyncPoint) {
+                        foundResyncPoint = currModePatterns[j].exec(text)
+                        if (foundResyncPoint !== null) {
                             break
                         }
                     }
@@ -609,8 +632,8 @@ export class Lexer {
                     text = text.substr(1)
                     offset++
                     for (j = 0; j < currModePatterns.length; j++) {
-                        foundResyncPoint = currModePatterns[j].test(text)
-                        if (foundResyncPoint) {
+                        foundResyncPoint = currModePatterns[j].exec(text)
+                        if (foundResyncPoint !== null) {
                             break
                         }
                     }
diff --git a/src/scan/tokens_public.ts b/src/scan/tokens_public.ts
index 11cc57373..1e322101d 100644
--- a/src/scan/tokens_public.ts
+++ b/src/scan/tokens_public.ts
@@ -1,6 +1,6 @@
 import {isString, isRegExp, isFunction, isUndefined, assignNoOverwrite, has} from "../utils/utils"
 import {functionName, defineNameProp} from "../lang/lang_extensions"
-import {Lexer, TokenConstructor} from "./lexer_public"
+import {Lexer, TokenConstructor, IRegExpExec} from "./lexer_public"
 import {
     isInheritanceBasedToken,
     getStartLineFromLazyToken,
@@ -14,11 +14,39 @@ import {
     augmentTokenClasses
 } from "./tokens"
 
+/**
+ *  The type of custom pattern matcher functions.
+ *  Matches should only be done on the start of the text.
+ *  Note that this is identical to the signature of RegExp.prototype.exec
+ *
+ *  This should behave as if the regExp match is using a start of input anchor.
+ *  So: for example if a custom matcher is implemented for Tokens matching: /\w+/
+ *  The implementation of the custom matcher must implement a custom matcher for /^\w+/.
+ */
+export type CustomPatternMatcherFunc = (test:string) => RegExpExecArray
+
+/**
+ * Interface for custom user provided token pattern matchers.
+ */
+export interface ICustomPattern {
+    /**
+     * The custom pattern implementation.
+     * @see CustomPatternMatcherFunc
+     */
+    exec:CustomPatternMatcherFunc
+    /**
+     * Flag indicating if this custom pattern may contain line terminators.
+     * This is required to avoid errors in the line/column numbering.
+     * @default false - if this property was not explicitly defined.
+     */
+    containsLineTerminator?:boolean
+}
+
 /**
  *  This can be used to improve the quality/readability of error messages or syntax diagrams.
  *
  * @param {Function} clazz - A constructor for a Token subclass
- * @returns {string} - The Human readable label a Token if it exists.
+ * @returns {string} - The Human readable label for a Token if it exists.
  */
 export function tokenLabel(clazz:Function):string {
     if (hasTokenLabel(clazz)) {
@@ -47,12 +75,11 @@ export function tokenName(clazz:Function):string {
     }
 }
 
-// TODO: uppper or lower case name? or support both???
 export interface ITokenConfig {
     name:string
     parent?:TokenConstructor
     label?:string
-    pattern?:RegExp
+    pattern?:RegExp | CustomPatternMatcherFunc | ICustomPattern
     group?:string|any
     push_mode?:string
     pop_mode?:boolean
@@ -67,7 +94,6 @@ const POP_MODE = "pop_mode"
 const LONGER_ALT = "longer_alt"
 
 /**
- *
  * @param {ITokenConfig} config - The configuration for
  * @returns {TokenConstructor} - A constructor for the new Token subclass
  */
@@ -128,7 +154,7 @@ export function extendSimpleLazyToken(tokenName:string, patternOrParent:any = un
  * extend and create Token subclasses in a less verbose manner
  *
  * @param {string} tokenName - The name of the new TokenClass
- * @param {RegExp|Function} patternOrParent - RegExp Pattern or Parent Token Constructor
+ * @param {RegExp|CustomPatternMatcherFunc|Function} patternOrParent - RegExp Pattern or Parent Token Constructor
  * @param {Function} parentConstructor - The Token class to be extended
  * @returns {Function} - A constructor for the new extended Token subclass
  */
diff --git a/src/utils/utils.ts b/src/utils/utils.ts
index 7f6b61431..972cb76af 100644
--- a/src/utils/utils.ts
+++ b/src/utils/utils.ts
@@ -138,7 +138,10 @@ export function pick(obj:Object, predicate:(item) => boolean) {
 }
 
 export function has(obj:any, prop:string):boolean {
-    return obj.hasOwnProperty(prop)
+    if (isObject(obj)) {
+        return obj.hasOwnProperty(prop)
+    }
+    return false
 }
 
 export function contains<T>(arr:T[], item):boolean {
diff --git a/test/scan/lexer_spec.ts b/test/scan/lexer_spec.ts
index 312633782..4d6ffecb5 100644
--- a/test/scan/lexer_spec.ts
+++ b/test/scan/lexer_spec.ts
@@ -11,7 +11,8 @@ import {
     getStartColumn,
     getStartLine,
     getEndLine,
-    getEndColumn, SimpleLazyToken
+    getEndColumn,
+    SimpleLazyToken, createToken
 } from "../../src/scan/tokens_public"
 import {Lexer, LexerDefinitionErrorType, IMultiModeLexerDefinition} from "../../src/scan/lexer_public"
 import {
@@ -36,6 +37,7 @@ function defineLexerSpecs(contextName, extendToken, tokenMatcher) {
         const IntegerTok = extendToken("IntegerTok", /^[1-9]\d*/)
         const IdentifierTok = extendToken("IdentifierTok", /^[A-Za-z_]\w*/)
         const BambaTok = extendToken("BambaTok", /^bamba/)
+
         BambaTok.LONGER_ALT = IdentifierTok
 
 
@@ -910,6 +912,50 @@ function defineLexerSpecs(contextName, extendToken, tokenMatcher) {
                     expect(badLexer.lexerDefinitionErrors[0].message).to.include("NotSimpleTok1")
                     expect(badLexer.lexerDefinitionErrors[0].message).to.include("NotSimpleTok2")
                 })
+
+                context("custom pattern", () => {
+
+
+                    function defineCustomPatternSpec(variant, customPattern) {
+                        it(variant, () => {
+                            let A = createToken({name: "A", pattern: /A/})
+                            let B = createToken({name: "B", pattern: customPattern})
+                            let WS = createToken({
+                                name:     "WS", pattern: {
+                                    exec:                   (text) => /^\s+/.exec(text),
+                                    containsLineTerminator: true
+                                }, group: Lexer.SKIPPED
+                            })
+
+
+                            let lexerDef:any = [WS, A, B]
+
+                            let myLexer = new Lexer(lexerDef)
+                            let lexResult = myLexer.tokenize("B A\n B ")
+                            expect(lexResult.tokens).to.have.length(3)
+                            expect(lexResult.tokens[0]).to.be.instanceOf(B)
+                            expect(lexResult.tokens[1]).to.be.instanceOf(A)
+                            expect(lexResult.tokens[2]).to.be.instanceOf(B)
+
+                            let lastToken = lexResult.tokens[2]
+                            expect(getStartLine(lastToken)).to.equal(2)
+                            expect(getEndLine(lastToken)).to.equal(2)
+                            expect(getStartColumn(lastToken)).to.equal(2)
+                            expect(getEndColumn(lastToken)).to.equal(2)
+                            expect(getStartOffset(lastToken)).to.equal(5)
+                            expect(getEndOffset(lastToken)).to.equal(5)
+                        })
+                    }
+
+                    defineCustomPatternSpec("With short function syntax", (text) => /^B/.exec(text))
+                    defineCustomPatternSpec("with explicit canContainLinerTerminator", {
+                        exec:                   (text) => /^B/.exec(text),
+                        containsLineTerminator: false
+                    })
+                    defineCustomPatternSpec("with implicit canContainLinerTerminator", {
+                        exec: (text) => /^B/.exec(text)
+                    })
+                })
             })
         })
     })