Skip to content

Commit

Permalink
Option to disable regexs, or execute in a thread.
Browse files Browse the repository at this point in the history
Adresses issue #152.
The current default is to run regexs natively, thus there is no change in behaviour.  Still need to adress how to run this in Node.
  • Loading branch information
NeilFraser committed Apr 9, 2019
1 parent 929dcce commit 7e6f780
Show file tree
Hide file tree
Showing 5 changed files with 270 additions and 38 deletions.
5 changes: 4 additions & 1 deletion demos/json.html
Expand Up @@ -56,7 +56,10 @@

function runButton() {
disable('disabled');
myInterpreter.run();
if (myInterpreter.run()) {
// Async function hit. There's more code to run.
disable('');
}
}

function disable(disabled) {
Expand Down
5 changes: 4 additions & 1 deletion demos/serialize.html
Expand Up @@ -63,7 +63,10 @@

function runButton() {
disable('disabled');
myInterpreter.run();
if (myInterpreter.run()) {
// Async function hit. There's more code to run.
disable('');
}
}

function disable(disabled) {
Expand Down
5 changes: 4 additions & 1 deletion index.html
Expand Up @@ -52,7 +52,10 @@

function runButton() {
disable('disabled');
myInterpreter.run();
if (myInterpreter.run()) {
// Async function hit. There's more code to run.
disable('');
}
}

function disable(disabled) {
Expand Down
237 changes: 202 additions & 35 deletions interpreter.js
Expand Up @@ -146,6 +146,21 @@ Interpreter.VALUE_IN_DESCRIPTOR = {'VALUE_IN_DESCRIPTOR': true};
*/
Interpreter.toStringCycles_ = [];

/**
* Some pathological regular expressions can take geometric time.
* Regular expressions are handled in one of three ways:
* 0 - throw as invalid.
* 1 - execute natively (risk of unresponsive program).
* 2 - execute in separate thread (not supported by IE 9).
*/
Interpreter.prototype.REGEXP_MODE = 1;

/**
* If REGEXP_MODE = 2, the length of time (in ms) to allow a RegExp
* thread to execute before terminating it.
*/
Interpreter.prototype.REGEXP_THREAD_TIMEOUT = 10000;

/**
* Add more code to the interpreter.
* @param {string|!Object} code Raw JavaScript text or AST.
Expand Down Expand Up @@ -1095,40 +1110,114 @@ Interpreter.prototype.initString = function(scope) {
};
this.setNativeFunctionPrototype(this.STRING, 'localeCompare', wrapper);

wrapper = function(separator, limit) {
wrapper = function(separator, limit, callback) {
var string = String(this);
limit = limit ? Number(limit) : undefined;
// Example of catastrophic split RegExp:
// 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaac'.split(/^(a+)+b/)
if (thisInterpreter.isa(separator, thisInterpreter.REGEXP)) {
separator = separator.data;
thisInterpreter.maybeThrowRegExp(separator, callback);
if (thisInterpreter.REGEXP_MODE === 2) {
// Run split in separate thread.
var splitWorker = new Worker('regexp_worker.js');
var pid = thisInterpreter.regExpTimeout(separator, splitWorker,
callback);
splitWorker.onmessage = function(e) {
clearTimeout(pid);
callback(thisInterpreter.arrayNativeToPseudo(e.data));
};
splitWorker.postMessage(['split', string, separator, limit]);
return;
}
}
var jsList = String(this).split(separator, limit);
return thisInterpreter.arrayNativeToPseudo(jsList);
// Run split natively.
var jsList = string.split(separator, limit);
callback(thisInterpreter.arrayNativeToPseudo(jsList));
};
this.setNativeFunctionPrototype(this.STRING, 'split', wrapper);
this.setAsyncFunctionPrototype(this.STRING, 'split', wrapper);

wrapper = function(regexp) {
wrapper = function(regexp, callback) {
var string = String(this);
if (thisInterpreter.isa(regexp, thisInterpreter.REGEXP)) {
regexp = regexp.data;
} else {
regexp = new RegExp(regexp);
}
// Example of catastrophic match RegExp:
// 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaac'.match(/^(a+)+b/)
thisInterpreter.maybeThrowRegExp(regexp, callback);
if (thisInterpreter.REGEXP_MODE === 2) {
// Run match in separate thread.
var matchWorker = new Worker('regexp_worker.js');
var pid = thisInterpreter.regExpTimeout(regexp, matchWorker,
callback);
matchWorker.onmessage = function(e) {
clearTimeout(pid);
callback(e.data && thisInterpreter.arrayNativeToPseudo(e.data));
};
matchWorker.postMessage(['match', string, regexp]);
return;
}
var m = String(this).match(regexp);
return m && thisInterpreter.arrayNativeToPseudo(m);
// Run match natively.
var m = string.match(regexp);
callback(m && thisInterpreter.arrayNativeToPseudo(m));
};
this.setNativeFunctionPrototype(this.STRING, 'match', wrapper);
this.setAsyncFunctionPrototype(this.STRING, 'match', wrapper);

wrapper = function(regexp) {
wrapper = function(regexp, callback) {
var string = String(this);
if (thisInterpreter.isa(regexp, thisInterpreter.REGEXP)) {
regexp = regexp.data;
} else {
regexp = new RegExp(regexp);
}
// Example of catastrophic search RegExp:
// 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaac'.search(/^(a+)+b/)
thisInterpreter.maybeThrowRegExp(regexp, callback);
if (thisInterpreter.REGEXP_MODE === 2) {
// Run search in separate thread.
var searchWorker = new Worker('regexp_worker.js');
var pid = thisInterpreter.regExpTimeout(regexp, searchWorker,
callback);
searchWorker.onmessage = function(e) {
clearTimeout(pid);
callback(e.data);
};
searchWorker.postMessage(['search', string, regexp]);
return;
}
return String(this).search(regexp);
// Run search natively.
callback(string.search(regexp));
};
this.setNativeFunctionPrototype(this.STRING, 'search', wrapper);
this.setAsyncFunctionPrototype(this.STRING, 'search', wrapper);

wrapper = function(substr, newSubstr) {
wrapper = function(substr, newSubstr, callback) {
// Support for function replacements is the responsibility of a polyfill.
var string = String(this);
newSubstr = String(newSubstr);
// Example of catastrophic replace RegExp:
// 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaac'.replace(/^(a+)+b/, '')
if (thisInterpreter.isa(substr, thisInterpreter.REGEXP)) {
substr = substr.data;
thisInterpreter.maybeThrowRegExp(substr, callback);
if (thisInterpreter.REGEXP_MODE === 2) {
// Run replace in separate thread.
var replaceWorker = new Worker('regexp_worker.js');
var pid = thisInterpreter.regExpTimeout(substr, replaceWorker,
callback);
replaceWorker.onmessage = function(e) {
clearTimeout(pid);
callback(e.data);
};
replaceWorker.postMessage(['replace', string, substr, newSubstr]);
return;
}
}
return String(this).replace(substr, newSubstr);
// Run replace natively.
callback(string.replace(substr, newSubstr));
};
this.setNativeFunctionPrototype(this.STRING, 'replace', wrapper);
this.setAsyncFunctionPrototype(this.STRING, 'replace', wrapper);
// Add a polyfill to handle replace's second argument being a function.
this.polyfills_.push(
"(function() {",
Expand Down Expand Up @@ -1362,33 +1451,60 @@ Interpreter.prototype.initRegExp = function(scope) {
this.setProperty(this.REGEXP.properties['prototype'], 'source', '(?:)',
Interpreter.READONLY_NONENUMERABLE_DESCRIPTOR);

wrapper = function(str) {
return this.data.test(str);
};
this.setNativeFunctionPrototype(this.REGEXP, 'test', wrapper);
// Use polyfill to avoid complexity of regexp threads.
this.polyfills_.push(
"Object.defineProperty(RegExp.prototype, 'test',",
"{configurable: true, writable: true, value:",
"function(str) {",
"return String(str).search(this) !== -1",
"}",
"});");

wrapper = function(str) {
str = str.toString();
wrapper = function(string, callback) {
var thisPseudoRegExp = this;
var regexp = this.data;
string = String(string);
// Get lastIndex from wrapped regex, since this is settable.
this.data.lastIndex =
regexp.lastIndex =
Number(thisInterpreter.getProperty(this, 'lastIndex'));
var match = this.data.exec(str);
thisInterpreter.setProperty(this, 'lastIndex', this.data.lastIndex);

if (match) {
var result =
thisInterpreter.createObjectProto(thisInterpreter.ARRAY_PROTO);
for (var i = 0; i < match.length; i++) {
thisInterpreter.setProperty(result, i, match[i]);
// Example of catastrophic exec RegExp:
// /^(a+)+b/.exec('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaac')
thisInterpreter.maybeThrowRegExp(regexp, callback);
if (thisInterpreter.REGEXP_MODE === 2) {
// Run exec in separate thread.
// Note that lastIndex is not preserved when a RegExp is passed to a
// Web Worker. Thus it needs to be passed back and forth separately.
var execWorker = new Worker('regexp_worker.js');
var pid = thisInterpreter.regExpTimeout(regexp, execWorker,
callback);
execWorker.onmessage = function(e) {
clearTimeout(pid);
// Return tuple: [result, lastIndex]
thisInterpreter.setProperty(thisPseudoRegExp, 'lastIndex',
e.data[1]);
callback(matchToPseudo(e.data[0]));
};
execWorker.postMessage(['exec', regexp, regexp.lastIndex, string]);
return;
}
// Run exec natively.
var match = regexp.exec(string);
thisInterpreter.setProperty(thisPseudoRegExp, 'lastIndex',
regexp.lastIndex);
callback(matchToPseudo(match));

function matchToPseudo(match) {
if (match) {
var result = thisInterpreter.arrayNativeToPseudo(match);
// match has additional properties.
thisInterpreter.setProperty(result, 'index', match.index);
thisInterpreter.setProperty(result, 'input', match.input);
return result;
}
// match has additional properties.
thisInterpreter.setProperty(result, 'index', match.index);
thisInterpreter.setProperty(result, 'input', match.input);
return result;
return null;
}
return null;
};
this.setNativeFunctionPrototype(this.REGEXP, 'exec', wrapper);
this.setAsyncFunctionPrototype(this.REGEXP, 'exec', wrapper);
};

/**
Expand Down Expand Up @@ -1535,6 +1651,43 @@ Interpreter.prototype.isa = function(child, constructor) {
return false;
};

/**
* If REGEXP_MODE is 0, then throw an error.
* Also throw if REGEXP_MODE is 2 and JS doesn't support Web Workers.
* @param {!RegExp} nativeRegExp Regular expression.
* @param {Function} callback Asynchronous callback function.
*/
Interpreter.prototype.maybeThrowRegExp = function(nativeRegExp, callback) {
if (this.REGEXP_MODE === 0 ||
(this.REGEXP_MODE === 2 && typeof Worker !== 'function')) {
callback && callback(null);
this.throwException(this.ERROR, 'Regular expressions not supported: ' +
nativeRegExp);
}
};

/**
* Set a timeout for regular expression threads. Unless cancelled, this will
* terminate the thread and throw an error.
* @param {!RegExp} nativeRegExp Regular expression (used for error message).
* @param {!Worker} worker Thread to terminate.
* @param {!Function} callback Async callback function to continue execution.
* @return {number} PID of timeout. Used to cancel if thread completes.
*/
Interpreter.prototype.regExpTimeout = function(nativeRegExp, worker, callback) {
var thisInterpreter = this;
return setTimeout(function() {
worker.terminate();
callback(null);
try {
thisInterpreter.throwException(thisInterpreter.ERROR,
'RegExp Timeout: ' + nativeRegExp);
} catch (e) {
// Eat the expected Interpreter.STEP_ERROR.
}
}, this.REGEXP_THREAD_TIMEOUT);
};

/**
* Is a value a legal integer for an array length?
* @param {Interpreter.Value} x Value to check.
Expand Down Expand Up @@ -2193,6 +2346,20 @@ Interpreter.prototype.setNativeFunctionPrototype =
Interpreter.NONENUMERABLE_DESCRIPTOR);
};

/**
* Convenience method for adding an async function as a non-enumerable property
* onto an object's prototype.
* @param {!Interpreter.Object} obj Data object.
* @param {Interpreter.Value} name Name of property.
* @param {!Function} wrapper Function object.
*/
Interpreter.prototype.setAsyncFunctionPrototype =
function(obj, name, wrapper) {
this.setProperty(obj.properties['prototype'], name,
this.createAsyncFunction(wrapper),
Interpreter.NONENUMERABLE_DESCRIPTOR);
};

/**
* Returns the current scope from the stateStack.
* @return {!Interpreter.Object} Current scope dictionary.
Expand Down
56 changes: 56 additions & 0 deletions regexp_worker.js
@@ -0,0 +1,56 @@
/**
* @license
* JavaScript Interpreter's Web Worker for Regular Expressions
*
* Copyright 2019 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @fileoverview Runs regular expressions in separate thread.
* @author fraser@google.com (Neil Fraser)
*/
'use strict';

onmessage = function(e) {
var result;
var data = e.data;
switch (data[0]) {
case 'split':
// ['split', string, separator, limit]
result = data[1].split(data[2], data[3]);
break;
case 'match':
// ['match', string, regexp]
result = data[1].match(data[2]);
break;
case 'search':
// ['search', string, regexp]
result = data[1].search(data[2]);
break;
case 'replace':
// ['replace', string, regexp, newSubstr]
result = data[1].replace(data[2], data[3]);
break;
case 'exec':
// ['exec', regexp, lastIndex, string]
var regexp = data[1];
regexp.lastIndex = data[2];
result = [regexp.exec(data[3]), data[1].lastIndex];
break;
default:
throw 'Unknown RegExp operation: ' + data[0];
}
postMessage(result);
};

0 comments on commit 7e6f780

Please sign in to comment.