Permalink
Browse files

first working example

it needs the very last version of the tokenizer as I was working on
both at the same time. I must complete the interface so that it's a real
stream (and thus can be piped to)
  • Loading branch information...
Floby committed Apr 13, 2011
1 parent ae2a99c commit 586192199091834de696ed1b032cb27f010baaff
Showing with 135 additions and 1 deletion.
  1. +55 −0 examples/simple.js
  2. +80 −1 lib/Parser.js
View
@@ -0,0 +1,55 @@
+var Tokenizer = require('tokenizer');
+var Parser = require('../lib/Parser.js');
+var assert = require('assert');
+var sys = require('sys');
+
+/**
+ * in this example we create a parser that reads a number then reads the
+ * [number] words following. whitespaces are ignored
+ */
+
+function createTokenizer () {
+ var t = new Tokenizer();
+ // we only use built-in rules
+ t.addRule(Tokenizer.number);
+ t.addRule(Tokenizer.word);
+ t.addRule(Tokenizer.whitespace);
+ t.ignore('whitespace');
+ t.on('token', function(token, type) {
+ //console.log('got token %s of type ', JSON.stringify(token), type);
+ });
+ return t;
+}
+
+function MyParser () {
+ Parser.call(this, createTokenizer());
+ //this.initialHander(this.number); // this could be, but let's use
+ // the 'default handler' method
+ this.defaultHandler(this.number);
+}
+sys.inherits(MyParser, Parser);
+
+MyParser.prototype.number = function number(token, type, next) {
+ //console.log('current queue in number', this._queue);
+ assert.equal(type, 'number', "unexpected token "+token+"("+type+"). expecting number");
+ var nb = parseInt(token);
+ for (var i = 0; i < nb; ++i) {
+ next(this.word);
+ };
+ console.log('expecting %d words', nb);
+};
+
+MyParser.prototype.word = function word(token, type, next) {
+ //console.log('current queue in word', this._queue);
+ assert.equal(type, 'word', "unexpected token "+token+". expecting word");
+ console.log('read %s', token);
+};
+
+// entry point
+var str1 = "2 coucou salut \n\t 3 bonjour bonsoir";
+var str2 = " hello";
+
+var p = new MyParser();
+
+p.write(str1);
+p.end(str2);
View
@@ -2,7 +2,86 @@ var EventEmitter = require('events').EventEmitter;
var sys = require('sys');
var assert = require('assert');
-function Parser () {
+function makeIgnore(number) {
+ return function ignore(token, type, next) {
+ if(type === 'eof') {
+ console.warn('ignored %d tokens', number);
+ return;
+ }
+ next(makeIgnore(number+1));
+ }
+}
+
+function Parser (tokenizer) {
EventEmitter.apply(this);
+ if(!tokenizer) throw Error("you must specify a tokenizer");
+ this._tokenizer = tokenizer;
+ this._queue = []; // queue of functions to be called on the next token
+ //this._queue.push(makeIgnore(0));
+
+ var self = this;
+ this._tokenizer.on('end', function() {
+ self._newToken('', 'eof');
+ });
+ this._tokenizer.on('token', function(token, type) {
+ self._newToken(token, type);
+ });
}
sys.inherits(Parser, EventEmitter);
+
+Parser.prototype.initialHander = function initialHander(h) {
+ // h is the first handler in the queue, this function must be
+ // called in subclasses constructors
+ if(typeof h === 'handler') {
+ for (var i = 0; i < h.length; ++i) {
+ this.initialHander(h[i]);
+ };
+ return;
+ }
+ assert.equal(typeof h, 'function');
+ this._queue.unshift(h);
+};
+
+Parser.prototype.defaultHandler = function defaultHandler(h) {
+ function DEFAULT(token, type, next) {
+ if(type === 'eof') {
+ this._reachedEnd();
+ return;
+ }
+ next(DEFAULT);
+ h.apply(this, arguments);
+ }
+ //DEFAULT.toString = function toString() {
+ //return "DEFAULT -> "+h;
+ //}
+ this._queue.push(DEFAULT);
+ this._haveDefault = true;
+};
+
+Parser.prototype._newToken = function _newToken(token, type) {
+ if(!this._haveDefault) this.defaultHandler(makeIgnore(0));
+ // this is a really naive implementation
+ var f = this._queue.shift();
+ if(!f) {
+ this.emit('error', new SyntaxError("unexpected token "+token));
+ return;
+ }
+ queue = this._queue;
+ function next(fun) {
+ queue.unshift.apply(queue, arguments);
+ }
+ f.call(this, token, type, next);
+};
+
+Parser.prototype._reachedEnd = function _reachedEnd() {
+ this.emit('end');
+};
+
+Parser.prototype.write = function write(data, encoding) {
+ this._tokenizer.write(data, encoding);
+};
+Parser.prototype.end = function end(data, encoding) {
+ this._tokenizer.end(data, encoding);
+};
+
+module.exports = Parser;

0 comments on commit 5861921

Please sign in to comment.