Browse files

Initial commit

  • Loading branch information...
0 parents commit 2d56d8741b0344935b40710663ec9abb5f283ca0 Nicholas C. Zakas committed Feb 7, 2010
Showing with 1,295 additions and 0 deletions.
  1. +19 −0 LICENSE
  2. +1 −0 README
  3. +42 −0 src/css/CSSTokenizerDemo.htm
  4. +258 −0 src/css/css-tokens.js
  5. +451 −0 src/util/stringreader-tests.htm
  6. +283 −0 src/util/stringreader.js
  7. +241 −0 src/util/tokenstream.js
19 LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2009 Nicholas C. Zakas. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
1 README
@@ -0,0 +1 @@
+A collection of utilities, lexers, and parsers written in JavaScript.
42 src/css/CSSTokenizerDemo.htm
@@ -0,0 +1,42 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>CSS TokenStream Demo</title>
+<script type="text/javascript" src="../util/stringreader.js"></script>
+<script type="text/javascript" src="../util/tokenstream.js"></script>
+<script type="text/javascript" src="css-tokens.js"></script>
+
+</head>
+<body>
+<h1>CSS TokenStream Demo</h1>
+<textarea rows="10" cols="40" id="input"></textarea>
+<input type="button" onclick="tokenize()" value="Tokenize">
+<p>(You may want to keep the CSS kinda small, this could take a while.)</p>
+<div id="output">
+
+</div>
+<script type="text/javascript">
+
+function tokenize(){
+ var stream = new TokenStream(document.getElementById("input").value, CSSTokens);
+ document.getElementById("output").innerHTML = "";
+ setTimeout(function(){
+ var tt = stream.get();
+ var output = document.getElementById("output");
+
+ output.innerHTML += tt + " - " + stream.tokenName(tt) + " - (" + stream.token().value + ")<br>";
+
+ if (tt > 0){
+ setTimeout(arguments.callee, 15);
+ }
+
+
+ }, 15);
+}
+
+
+
+
+</script>
+</body>
+</html>
258 src/css/css-tokens.js
@@ -0,0 +1,258 @@
+/*
+ * CSS Token information.
+ * Copyright (c) 2010 Nicholas C. Zakas. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * CSS token information based on Flex lexical scanner grammar:
+ * http://www.w3.org/TR/CSS2/grammar.html#scanner
+ */
+CSSTokens = function(){
+
+ //token fragments
+ var h = "[0-9a-fA-F]",
+ nonascii = "[\\u0080-\\uFFFF]",
+ unicode = "(?:\\\\" + h + "{1,6}(?:\\r\\n|[ \\t\\r\\n\\f])?)",
+ escape = "(?:" + unicode + "|\\\\[^\r\n\f0-9a-fA-F])",
+ nmstart = "(?:[_a-zA-Z\\*]|" + nonascii + "|" + escape + ")", //includes leading * and _ for IE
+ nmchar = "(?:[_a-zA-Z0-9\\-]|" + nonascii + "|" + escape + ")",
+ string1 = "(?:\\\"(?:[^\\n\\r\\f\\\"]|\\\\" + nl + "|" + escape + ")*\\\")",
+ string2 = "(?:\\'(?:[^\\n\\r\\f\\\"]|\\\\" + nl + "|" + escape + ")*\\')",
+ invalid1 = "(?:\\\"(?:[^\\n\\r\\f\\\"]|\\\\" + nl + "|" + escape + ")*)",
+ invalid2 = "(?:\\'(?:[^\\n\\r\\f\\\"]|\\\\" + nl + "|" + escape + ")*)",
+
+ comment = "\\/\\*[^\\*]*\\*+([^\/\\*][^\\*]*\\*+)*\\/",
+ ident = "(?:\\-?" + nmstart + nmchar + "*)",
+ name = nmchar + "+",
+ num = "(?:[0-9]+|[0-9]*\\.[0-9]+)",
+ string = string1 + "|" + string2,
+ invalid = invalid1 + "|" + invalid2,
+ url = "(?:[!#$%&\\*\\-~]|" + nonascii + "|" + escape + ")*",
+ s = "[ \\t\\r\\n\\f]+",
+ w = "(?:" + s + ")?",
+ nl = "(?:\\n|\\r\\n|\\r|\\f)";
+
+ //return the token information
+ return [
+ {
+ name: "S",
+ pattern: "[ \t\r\n\f]+"
+ },
+ {
+ name: "COMMENT",
+ pattern: comment
+ },
+ //CDO and CDC intentionally omitted
+ {
+ name: "INCLUDES",
+ text: "~="
+ },
+ {
+ name: "DASHMATCH",
+ text: "|="
+ },
+ {
+ name: "STRING",
+ pattern: "(?:" + string1 + "|" + string2 + ")",
+ },
+ {
+ name: "INVALID",
+ pattern: "(?:" + invalid1 + "|" + invalid2 + ")",
+ },
+
+
+ {
+ name: "IDENT",
+ pattern: ident
+ },
+ {
+ name: "HASH",
+ pattern: "#" + name
+ },
+ {
+ name: "IMPORT_SYM",
+ pattern: "@IMPORT",
+ patternOpt: "i"
+ },
+ {
+ name: "PAGE_SYM",
+ pattern: "@PAGE",
+ patternOpt: "i"
+ },
+ {
+ name: "MEDIA_SYM",
+ pattern: "@MEDIA",
+ patternOpt: "i"
+ },
+ {
+ name: "CHARSET_SYM",
+ text: "@charset "
+ },
+ {
+ name: "IMPORTANT_SYM",
+ pattern: "!(?:" + w + "|" + comment + ")*IMPORTANT",
+ patternOpt: "i"
+ },
+ {
+ name: "EMS",
+ pattern: num + "em",
+ patternOpt: "i"
+ },
+ {
+ name: "EXS",
+ pattern: num + "ex",
+ patternOpt: "i"
+ },
+ {
+ name: "LENGTH",
+ pattern: "(?:" + num + "px|" + num + "cm|" + num + "mm|" + num + "in|" + num + "pt|" + num + "pc" + ")",
+ patternOpt: "i"
+ },
+ {
+ name: "ANGLE",
+ pattern: "(?:" + num + "deg|" + num + "rad|" + num + "grad" + ")",
+ patternOpt: "i"
+ },
+ {
+ name: "TIME",
+ pattern: "(?:" + num + "ms|" + num + "s" + ")",
+ patternOpt: "i"
+ },
+ {
+ name: "FREQ",
+ pattern: "(?:" + num + "hz|" + num + "khz" + ")",
+ patternOpt: "i"
+ },
+ {
+ name: "DIMENSION",
+ pattern: num + ident
+ },
+ {
+ name: "PERCENTAGE",
+ pattern: num + "%"
+ },
+ {
+ name: "NUMBER",
+ pattern: num
+ },
+ {
+ name: "URI",
+ pattern: "url\\(" + w + string + w + "\\)"
+ },
+ {
+ name: "URI",
+ pattern: "url\\(" + w + url + w + "\\)"
+ },
+ {
+ name: "FUNCTION",
+ pattern: ident + "\\("
+ },
+
+ //Not defined as tokens, but might as well be
+ {
+ name: "SLASH",
+ text: "/"
+ },
+ {
+ name: "MINUS",
+ text: "-"
+ },
+ {
+ name: "PLUS",
+ text: "+"
+ },
+ {
+ name: "STAR",
+ text: "*"
+ },
+ {
+ name: "GREATER",
+ text: ">"
+ },
+ {
+ name: "LBRACE",
+ text: "{"
+ },
+ {
+ name: "RBRACE",
+ text: "}"
+ },
+ {
+ name: "LBRACKET",
+ text: "["
+ },
+ {
+ name: "RBRACKET",
+ text: "]"
+ },
+ {
+ name: "EQUALS",
+ text: "="
+ },
+ {
+ name: "COLON",
+ text: ":"
+ },
+ {
+ name: "SEMICOLON",
+ text: ";"
+ },
+
+ {
+ name: "LPAREN",
+ text: "("
+ },
+ {
+ name: "RPAREN",
+ text: ")"
+ },
+
+ {
+ name: "DOT",
+ text: "."
+ },
+ {
+ name: "COMMA",
+ text: ","
+ }
+ ];
+
+}();
+
+/*
+stylesheet : [ CDO | CDC | S | statement ]*;
+statement : ruleset | at-rule;
+at-rule : ATKEYWORD S* any* [ block | ';' S* ];
+block : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
+ruleset : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
+selector : any+;
+declaration : property S* ':' S* value;
+property : IDENT;
+value : [ any | block | ATKEYWORD S* ]+;
+any : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
+ | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
+ | DASHMATCH | ':' | FUNCTION S* any* ')'
+ | '(' S* any* ')' | '[' S* any* ']' ] S*;
+
+
+
+*/
+
451 src/util/stringreader-tests.htm
@@ -0,0 +1,451 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>String Reader Tests</title>
+<!-- Combo-handled YUI CSS files: -->
+<link rel="stylesheet" type="text/css" href="http://yui.yahooapis.com/combo?2.7.0/build/logger/assets/logger.css&2.7.0/build/yuitest/assets/testlogger.css">
+<!-- Combo-handled YUI JS files: -->
+<script type="text/javascript" src="http://yui.yahooapis.com/combo?2.7.0/build/yahoo-dom-event/yahoo-dom-event.js&2.7.0/build/logger/logger-min.js&2.7.0/build/yuitest/yuitest-min.js"></script>
+<script type="text/javascript" src="stringreader.js"></script>
+
+
+</head>
+<body>
+<h1>String Reader Tests</h1>
+<script type="text/javascript">
+
+YAHOO.namespace("test");
+
+YAHOO.test.StringReader = (function(){
+
+ var assert = YAHOO.util.Assert;
+
+ //-------------------------------------------------------------------------
+ // Base Test Suite
+ //-------------------------------------------------------------------------
+
+ var suite = new YAHOO.tool.TestSuite("StringReader");
+
+ //-------------------------------------------------------------------------
+ // Test Case for adding
+ //-------------------------------------------------------------------------
+
+ suite.add(new YAHOO.tool.TestCase({
+
+ name : "read() Tests",
+
+ //---------------------------------------------------------------------
+ // Tests
+ //---------------------------------------------------------------------
+
+ /*
+ * Tests that reading a string with no new lines works.
+ */
+ testLinearReadWithOneLine: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString),
+ i = 0,
+ c;
+
+ assert.areEqual(1, reader.getCol(), "Column should be 1.");
+ c = reader.read();
+
+ while(c){
+ assert.areEqual(testString.charAt(i), c, "Character at position " + i + " is incorrect.");
+ assert.areEqual(i+2, reader.getCol(), "Column should be " + (i+2) + ".");
+ c = reader.read();
+ i++;
+ }
+
+ assert.areEqual(testString.length, i, "All characters should be read.");
+ assert.isNull(c, "Last character read should be null.");
+ },
+
+ /*
+ * Tests that reading a multi-line string works.
+ */
+ testLinearReadWithTwoLines: function(){
+ var testString = "Hello world!\nNice day, isn't it?",
+ reader = new StringReader(testString),
+ i = 0,
+ c = reader.read();
+
+ while(c){
+ assert.areEqual(testString.charAt(i), c, "Character at position " + i + " is incorrect.");
+ if (c == "\n"){
+ assert.areEqual(2, reader.getRow(), "Should now be on second row.");
+ assert.areEqual(1, reader.getCol(), "The new line should cause you to go to first char in second row.");
+ }
+ c = reader.read();
+ i++;
+ }
+
+ assert.isNull(c, "Last character read should be null.");
+ },
+
+
+ /*
+ * Tests that reading a multi-line string properly tracks rows and cols.
+ */
+ testLinearReadWithTwoLinesTrackCols: function(){
+ var testString = "Hello world!\nNice day, isn't it?",
+ reader = new StringReader(testString);
+
+ reader.readTo("!");
+ reader.read();
+
+ assert.areEqual(1, reader.getCol());
+ assert.areEqual(2, reader.getRow());
+
+ },
+
+ /*
+ * Tests that reading a multi-line string works when the last character is a new-line.
+ */
+ testLinearReadWithTwoLinesAndDanglingNewLine: function(){
+ var testString = "Hello world!\nNice day, isn't it?\n",
+ reader = new StringReader(testString),
+ i = 0,
+ c = reader.read();
+
+ while(c){
+ assert.areEqual(testString.charAt(i), c, "Character at position " + i + " is incorrect.");
+ c = reader.read();
+ i++;
+ }
+
+ assert.isNull(c, "Last character read should be null.");
+ }
+
+
+
+
+ }));
+
+ //-------------------------------------------------------------------------
+ // Test Case for readTo
+ //-------------------------------------------------------------------------
+
+ suite.add(new YAHOO.tool.TestCase({
+
+ name : "readTo() Tests",
+
+
+ //---------------------------------------------------------------------
+ // Tests
+ //---------------------------------------------------------------------
+
+ /*
+ * Tests that reading a string with no new lines works.
+ */
+ testLinearReadToWithOneLine: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString);
+
+
+ assert.areEqual("Hello ", reader.readTo(" "));
+ assert.areEqual("w", reader.read());
+ },
+
+ /*
+ * Tests that reading a multi-line string works.
+ */
+ testLinearReadToWithTwoLines: function(){
+ var testString = "Hello world!\nNice day, isn't it?",
+ reader = new StringReader(testString);
+
+
+ assert.areEqual("Hello world!\nN", reader.readTo("N"));
+ assert.areEqual(2, reader.getRow());
+ assert.areEqual(2, reader.getCol());
+ }
+
+ }));
+
+ //-------------------------------------------------------------------------
+ // Test Case for unread
+ //-------------------------------------------------------------------------
+
+ suite.add(new YAHOO.tool.TestCase({
+
+ name : "unread() Tests",
+
+
+ //---------------------------------------------------------------------
+ // Tests
+ //---------------------------------------------------------------------
+
+ /*
+ * Tests that calling unread at the beginning of the string does nothing.
+ */
+ testUnreadBeginning: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString);
+
+ reader.unread();
+ assert.areEqual(1, reader.getCol());
+ assert.areEqual(1, reader.getRow());
+ assert.areEqual("H", reader.read());
+ },
+
+ /*
+ * Tests that calling unread() moves back one space on same line.
+ */
+ testUnreadSimple: function(){
+ var testString = "Hello world!\nNice day, isn't it?",
+ reader = new StringReader(testString);
+
+ reader.read();
+ reader.unread();
+
+ assert.areEqual("H", reader.read(), "First character read should be 'H'.");
+ assert.areEqual(2, reader.getCol());
+ assert.areEqual(1, reader.getRow());
+ },
+
+ /*
+ * Tests that calling unread() moves back across lines.
+ */
+ testUnreadMultiLine: function(){
+ var testString = "Hello world!\nNice day, isn't it?",
+ reader = new StringReader(testString);
+
+ reader.readTo("\n");
+ reader.unread();
+
+ assert.areEqual(13, reader.getCol());
+ assert.areEqual(1, reader.getRow());
+ assert.areEqual("\n", reader.read(), "First character read should be '\\n'.");
+
+ },
+
+ /*
+ * Tests that calling unread() moves back across lines.
+ */
+ testUnreadMultiLine2: function(){
+ var testString = "A\nB\nC",
+ reader = new StringReader(testString);
+
+ reader.readTo("C");
+ reader.unread();
+
+ assert.areEqual(1, reader.getCol());
+ assert.areEqual(3, reader.getRow());
+
+ reader.unread();
+
+ assert.areEqual(2, reader.getCol());
+ assert.areEqual(2, reader.getRow());
+
+ reader.unread();
+
+ assert.areEqual(1, reader.getCol());
+ assert.areEqual(2, reader.getRow());
+
+ reader.unread();
+
+ assert.areEqual(2, reader.getCol());
+ assert.areEqual(1, reader.getRow());
+
+ reader.unread();
+
+ assert.areEqual(1, reader.getCol());
+ assert.areEqual(1, reader.getRow());
+
+ assert.areEqual("A", reader.read(), "First character read should be 'A'.");
+
+ }
+
+ }));
+
+ //-------------------------------------------------------------------------
+ // Test Case for readWhile()
+ //-------------------------------------------------------------------------
+
+ suite.add(new YAHOO.tool.TestCase({
+
+ name : "readWhile() Tests",
+
+
+ //---------------------------------------------------------------------
+ // Tests
+ //---------------------------------------------------------------------
+
+ /*
+ * Tests that the entire string can be read..
+ */
+ testReadWhileSimple: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString);
+
+ var result = reader.readWhile(function(){
+ return true;
+ });
+
+ assert.areEqual(testString, result);
+ assert.areEqual(1, reader.getRow());
+ assert.areEqual(13, reader.getCol());
+ }
+ }));
+
+ //-------------------------------------------------------------------------
+ // Test Case for readCount()
+ //-------------------------------------------------------------------------
+
+ suite.add(new YAHOO.tool.TestCase({
+
+ name : "readCount() Tests",
+
+
+ //---------------------------------------------------------------------
+ // Tests
+ //---------------------------------------------------------------------
+
+ /*
+ * Tests that a set number of characters are read correctly.
+ */
+ testReadCountSimple: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString);
+
+ var result = reader.readCount(6);
+
+ assert.areEqual("Hello ", result);
+
+ result = reader.readCount(2);
+ assert.areEqual("wo", result);
+ }
+ }));
+
+ //-------------------------------------------------------------------------
+ // Test Case for readMatch()
+ //-------------------------------------------------------------------------
+
+ suite.add(new YAHOO.tool.TestCase({
+
+ name : "readMatch() Tests",
+
+
+ //---------------------------------------------------------------------
+ // Tests
+ //---------------------------------------------------------------------
+
+ /*
+ * Tests that a text pattern is read correctly.
+ */
+ testReadMatchSimple: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString);
+
+ var result = reader.readMatch("Hello");
+
+ assert.areEqual("Hello", result);
+ },
+
+ /*
+ * Tests that a regex pattern is read correctly.
+ */
+ testReadMatchRegEx: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString);
+
+ var result = reader.readMatch(/^Hello/);
+
+ assert.areEqual("Hello", result);
+ }
+
+
+ }));
+
+ //-------------------------------------------------------------------------
+ // Test Case for unreadCount()
+ //-------------------------------------------------------------------------
+
+ suite.add(new YAHOO.tool.TestCase({
+
+ name : "unreadCount() Tests",
+
+
+ //---------------------------------------------------------------------
+ // Tests
+ //---------------------------------------------------------------------
+
+ /*
+ * Tests that unreading in the middle of a string works.
+ */
+ testUnreadCountMiddle: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString);
+
+ reader.readTo("w");
+ reader.unreadCount(3);
+
+ assert.areEqual(5, reader.getCol());
+ assert.areEqual(1, reader.getRow());
+ assert.areEqual("o", reader.read());
+ },
+
+ /*
+ * Tests that unreading to the start of a string works.
+ */
+ testUnreadCountBeginning: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString);
+
+ reader.readTo(" ");
+ reader.unreadCount(12);
+
+ assert.areEqual(1, reader.getCol());
+ assert.areEqual(1, reader.getRow());
+ assert.areEqual("H", reader.read());
+ }
+
+
+ }));
+
+ //-------------------------------------------------------------------------
+ // Test Case for eof()
+ //-------------------------------------------------------------------------
+
+ suite.add(new YAHOO.tool.TestCase({
+
+ name : "eof() Tests",
+
+
+ //---------------------------------------------------------------------
+ // Tests
+ //---------------------------------------------------------------------
+
+ /*
+ * Tests that eof() works after reading to end of string.
+ */
+ testTestEofSimple: function(){
+ var testString = "Hello world!",
+ reader = new StringReader(testString);
+
+ reader.readTo("!");
+ assert.isTrue(reader.eof());
+ }
+
+
+ }));
+
+ //return it
+ return suite;
+
+})();
+
+(function (){
+ //create the logger
+ var logger = new YAHOO.tool.TestLogger();
+
+ //add the tests
+ YAHOO.tool.TestRunner.add(YAHOO.test.StringReader);
+ YAHOO.tool.TestRunner.run();
+
+})();
+
+
+</script>
+</body>
+</html>
283 src/util/stringreader.js
@@ -0,0 +1,283 @@
+/*
+ * StringReader implementation.
+ * Copyright (c) 2010 Nicholas C. Zakas. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * Convenient way to read through strings.
+ * @class StringReader
+ * @constructor
+ * @param {String} text The text to read.
+ */
+function StringReader(text){
+
+ /**
+ * The input text with line endings normalized.
+ * @property _input
+ * @type String
+ * @private
+ */
+ this._input = text.replace(/\n\r?/g, "\n");
+
+
+ /**
+ * The row for the character to be read next.
+ * @property _row
+ * @type int
+ * @private
+ */
+ this._row = 1;
+
+
+ /**
+ * The column for the character to be read next.
+ * @property _col
+ * @type int
+ * @private
+ */
+ this._col = 1;
+
+ /**
+ * The index of the character in the input to be read next.
+ * @property _cursor
+ * @type int
+ * @private
+ */
+ this._cursor = 0;
+}
+
+StringReader.prototype = {
+
+ //restore constructor
+ constructor: StringReader,
+
+ //-------------------------------------------------------------------------
+ // Position info
+ //-------------------------------------------------------------------------
+
+ /**
+ * Returns the column of the character to be read next.
+ * @return {int} The column of the character to be read next.
+ * @method getCol
+ */
+ getCol: function(){
+ return this._col;
+ },
+
+ /**
+ * Returns the row of the character to be read next.
+ * @return {int} The row of the character to be read next.
+ * @method getRow
+ */
+ getRow: function(){
+ return this._row ;
+ },
+
+ /**
+ * Determines if you're at the end of the input.
+ * @return {Boolean} True if there's no more input, false otherwise.
+ * @method eof
+ */
+ eof: function(){
+ return (this._cursor == this._input.length)
+ },
+
+ //-------------------------------------------------------------------------
+ // Basic reading
+ //-------------------------------------------------------------------------
+
+ /**
+ * Reads the next character from the input and adjusts the row and column
+ * accordingly.
+ * @return {String} The next character or null if there is no next character.
+ * @method read
+ */
+ read: function(){
+ var c = null;
+
+ //if we're not at the end of the input...
+ if (this._cursor < this._input.length){
+
+ //if the last character was a newline, increment row count
+ //and reset column count
+ if (this._input.charAt(this._cursor) == "\n"){
+ this._row++;
+ this._col=1;
+ } else {
+ this._col++;
+ }
+
+ //get character and increment cursor and column
+ c = this._input.charAt(this._cursor++);
+ }
+
+ return c;
+ },
+
+ /**
+ * Reverses the current position by one character, going back up through
+ * the string. Adjusts column and row position.
+ * @method unread
+ */
+ unread: function(){
+
+ //if we're not at the beginning of the input...
+ if (this._cursor > 0){
+
+ //decrement cursor
+ this._cursor--;
+
+ //if the previous character was a newline, decrement row count
+ //and reset column count
+ if (this._input.charAt(this._cursor) == "\n"){
+ this._row--;
+ this._col=this._cursor-this._input.lastIndexOf("\n", this._cursor-1);
+ } else {
+ this._col--;
+ }
+
+ }
+
+ },
+
+ //-------------------------------------------------------------------------
+ // Advanced reading
+ //-------------------------------------------------------------------------
+
+ /**
+ * Reads up to and including the given string. Throws an error if that
+ * string is not found.
+ * @param {String} pattern The string to read.
+ * @return {String} The string when it is found.
+ * @throws Error when the string pattern is not found.
+ * @method readTo
+ */
+ readTo: function(pattern){
+
+ var buffer = "",
+ c;
+
+ /*
+ * First, buffer must be the same length as the pattern.
+ * Then, buffer must end with the pattern or else reach the
+ * end of the input.
+ */
+ while (buffer.length < pattern.length || buffer.lastIndexOf(pattern) != buffer.length - pattern.length){
+ c = this.read();
+ if (c){
+ buffer += c;
+ } else {
+ throw new Error("Expected \"" + pattern + "\" at line " + this._row + ", col " + this._col + ".");
+ }
+ }
+
+ return buffer;
+
+ },
+
+ /**
+ * Reads characters while each character causes the given
+ * filter function to return true. The function is passed
+ * in each character and either returns true to continue
+ * reading or false to stop.
+ * @param {Function} filter The function to read on each character.
+ * @return {String} The string made up of all characters that passed the
+ * filter check.
+ * @method readWhile
+ */
+ readWhile: function(filter){
+
+ var buffer = "",
+ c = this.read();
+
+ while(c !== null && filter(c)){
+ buffer += c;
+ c = this.read();
+ }
+
+ return buffer;
+
+ },
+
+ /**
+ * Reads characters that match either text or a regular expression and
+ * returns those characters. If a match is found, the row and column
+ * are adjusted; if no match is found, the reader's state is unchanged.
+ * reading or false to stop.
+ * @param {String|RegExp} matchter If a string, then the literal string
+ * value is searched for. If a regular expression, then any string
+ * matching the pattern is search for.
+ * @return {String} The string made up of all characters that matched or
+ * null if there was no match.
+ * @method readMatch
+ */
+ readMatch: function(matcher){
+
+ var source = this._input.substring(this._cursor),
+ value = null;
+
+ //if it's a string, just do a straight match
+ if (typeof matcher == "string"){
+ if (source.indexOf(matcher) === 0){
+ value = this.readCount(matcher.length);
+ }
+ } else if (matcher instanceof RegExp){
+ if (matcher.test(source)){
+ value = this.readCount(RegExp.lastMatch.length);
+ }
+ }
+
+ return value;
+ },
+
+
+ /**
+ * Reads a given number of characters. If the end of the input is reached,
+ * it reads only the remaining characters and does not throw an error.
+ * @param {int} count The number of characters to read.
+ * @return {String} The string made up the read characters.
+ * @method readCount
+ */
+ readCount: function(count){
+ var buffer = "";
+
+ while(count--){
+ buffer += this.read();
+ }
+
+ return buffer;
+ },
+
+
+ /**
+ * Unreads a set number of characters. If the beginning of the input is
+ * reached, it stops at that point and does not throw an error.
+ * @param {int} count The number of characters to unread.
+ * @return {void}
+ * @method unreadCount
+ */
+ unreadCount: function(count){
+ while(count--){
+ this.unread();
+ }
+ }
+
+};
241 src/util/tokenstream.js
@@ -0,0 +1,241 @@
+/*
+ * TokenStream implementation.
+ * Copyright (c) 2010 Nicholas C. Zakas. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * Generic TokenStream providing base functionality.
+ * @class TokenStream
+ * @constructor
+ * @param {String} text The text to tokenize.
+ * @param {Array} tokenInfo An array of token information.
+ */
+function TokenStream(text, tokenInfo){
+
+ /**
+ * The string reader for easy access to the text.
+ * @type StringReader
+ * @property _reader
+ * @private
+ */
+ this._reader = new StringReader(text);
+
+ /**
+ * Token object for the last consumed token.
+ * @type Token
+ * @property _token
+ * @private
+ */
+ this._token = null;
+
+ /**
+ * The array of token information.
+ * @type Array
+ * @property _tokenInfo
+ * @private
+ */
+ this._tokenInfo = tokenInfo;
+
+ //do a little magic on the token info
+ this._processTokenInfo();
+}
+
+
+
+TokenStream.prototype = {
+
+ //restore constructor
+ constructor: TokenStream,
+
+ /**
+ * Pre-process token information for the token stream. The first thing it
+ * does is push an EOF token to the front of token info. This means that
+ * EOF is always equivalent to token type 0. Next, it processes each token
+ * pattern, either text or regex, into a function called "match" that is
+ * attached to the tokenInfo object. This allows for faster lexing later
+ * on.
+ * @return {void}
+ * @method _processTokenInfo
+ * @private
+ */
+ _processTokenInfo: function(){
+ var tokenInfo = this._tokenInfo,
+ i = 0,
+ len = tokenInfo.length + 1;
+
+ //push EOF token to the front
+ tokenInfo.unshift({
+ name: "EOF",
+ match: function(reader){
+ return reader.eof() ? " " : null;
+ }
+ });
+
+ //create match functions for each tokenInfo object
+ while (i < len){
+ if (typeof tokenInfo[i].text == "string"){
+ tokenInfo[i].match = function(reader){
+ return reader.readMatch(this.text);
+ };
+ } else if (typeof tokenInfo[i].pattern == "string"){
+ tokenInfo[i].match = function(reader){
+ return reader.readMatch(new RegExp("^" + this.pattern, this.patternOpt));
+ };
+ }
+ i++;
+ }
+
+ },
+
+ //-------------------------------------------------------------------------
+ // Matching methods
+ //-------------------------------------------------------------------------
+
+ /**
+ * Determines if the next token matches the given token type.
+ * If so, that token is consumed; if not, the token is placed
+ * back onto the token stream.
+ * @param {int} tokenType The code for the token type to check.
+ * @return {Boolean} True if the token type matches, false if not.
+ * @method match
+ */
+ match: function(tokenType){
+ return this.get() == tokenType || this.unget();
+ },
+
+ /**
+ * Determines if the next token matches the given token type.
+ * If so, that token is consumed; if not, an error is thrown.
+ * @param {int} tokenType The code for the token type to check.
+ * @return {void}
+ * @method mustMatch
+ */
+ mustMatch: function(tokenType){
+ if (!this.match(tokenType)){
+ throw new Error("Expected " + this._tokenNames[tokenType] +
+ " at line " + this._reader.getRow() + ", character " + this._reader.getCol() + ".");
+ }
+ },
+
+ //-------------------------------------------------------------------------
+ // Consuming methods
+ //-------------------------------------------------------------------------
+
+ /**
+ * Consumes the next token from the token stream.
+ * @return {int} The token type of the token that was just consumed.
+ * @method get
+ */
+ get: function(){
+
+ var tokenInfo = this._tokenInfo,
+ reader = this._reader,
+ startCol = reader.getCol(),
+ startRow = reader.getRow(),
+ value,
+ i =0,
+ len = tokenInfo.length,
+ found = false,
+ token = { startCol: reader.getCol(), startRow: reader.getRow() };
+
+ //test each token pattern from top to bottom
+ while (i < len && !found){
+
+ //wrap in try-catch to help debug tokenInfo errors
+ try {
+ value = tokenInfo[i].match(reader);
+ } catch (ex){
+ throw new Error("Error in token info for " + tokenInfo[i].name + ": " + ex.message);
+ }
+
+ //if there's a value, break the loop, otherwise continue
+ if (value){
+ found = true;
+ } else {
+ i++;
+ }
+ }
+
+ token.endCol = reader.getCol();
+ token.endRow = reader.getRow();
+
+ if (found){
+ token.type = i;
+ token.value = value;
+ } else {
+ token.type = -1;
+ token.value = reader.read();
+ }
+
+ //save for later
+ this._token = token;
+
+ //just return the type
+ return token.type;
+ },
+
+ /**
+ * Returns the token type for the next token in the stream without
+ * consuming it.
+ * @return {int} The token type of the next token in the stream.
+ * @method peek
+ */
+ peek: function(){
+ var tokenType = this.get();
+ this.unget();
+ return tokenType;
+ },
+
+ /**
+ * Returns the actual token object for the last consumed token.
+ * @return {Token} The token object for the last consumed token.
+ * @method token
+ */
+ token: function(){
+ return this._token;
+ },
+
+ /**
+ * Returns the name of the token for the given token type.
+ * @return {String} The name of the token or "UNKNOWN_TOKEN" for any
+ * invalid token type.
+ * @method tokenName
+ */
+ tokenName: function(tokenType){
+ if (tokenType < 0 || tokenType > this._tokenInfo.length){
+ return "UNKNOWN_TOKEN";
+ } else {
+ return this._tokenInfo[tokenType].name;
+ }
+ },
+
+ /**
+ * Returns the last consumed token to the token stream.
+ * @method unget
+ */
+ unget: function(){
+ throw new Error("Unget not yet implemented.");
+ }
+
+};
+
+
+

0 comments on commit 2d56d87

Please sign in to comment.