Skip to content

Commit

Permalink
Don't confuse dots in qNames with triple end.
Browse files Browse the repository at this point in the history
  • Loading branch information
RubenVerborgh committed Mar 23, 2014
1 parent 53cb27f commit 2935d2b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
9 changes: 6 additions & 3 deletions lib/N3Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ var patterns = {
_tripleQuotedString: /^""("[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*")""|^''('[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*')''/,
_langcode: /^@([a-z]+(?:-[a-z0-9]+)*)(?=[^a-z0-9\-])/i,
_prefix: /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:[\.\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:(?=\s)/,
_qname: /^((?:[A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:[\.\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:((?:(?:[0-:A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])(?:(?:[\.\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])*(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~]))?)?)(?=[\s\.;,)#])/,
_qname: /^((?:[A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:[\.\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:((?:(?:[0-:A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])(?:(?:[\.\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])*(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~]))?)?)(?=[\s;,)#]|\.[\s#\(\[<"'])/,
_number: /^[\-+]?(?:\d+\.?\d*([eE](?:[\-\+])?\d+)|\d+\.\d+|\.\d+|\d+)(?=\s*[\s\.;,)#])/,
_boolean: /^(?:true|false)(?=[\s#,;.])/,
_dot: /^\.(?!\d)/, // If a digit follows a dot, it is a number, not punctuation.
Expand Down Expand Up @@ -234,8 +234,11 @@ N3Lexer.prototype = {
token.type = 'prefix';
token.value = match[1] || '';
}
// Try to find a qname.
else if (match = this._qname.exec(input)) {
// Try to find a qname. Since it can contain (but not end with) a dot,
// we always need a non-dot character before deciding it is a qname.
// Therefore, try inserting a space if we're at the end of the input.
else if ((match = this._qname.exec(input)) ||
inputFinished && (match = this._qname.exec(input + ' '))) {
token.type = 'qname';
token.prefix = match[1] || '';
token.value = this._unescape(match[2]);
Expand Down
10 changes: 10 additions & 0 deletions test/N3Lexer-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,16 @@ describe('N3Lexer', function () {
{ type: 'dot', line: 1 },
{ type: 'eof', line: 1 }));

it('should tokenize a qname with a dot, split in half while streaming',
shouldTokenize(streamOf('dbpedia:Anthony_J._Batt', 'aglia '),
{ type: 'qname', prefix: 'dbpedia', value: 'Anthony_J._Battaglia', line: 1 },
{ type: 'eof', line: 1 }));

it('should tokenize a qname with a dot, split after the dot while streaming',
shouldTokenize(streamOf('dbpedia:Anthony_J.', '_Battaglia '),
{ type: 'qname', prefix: 'dbpedia', value: 'Anthony_J._Battaglia', line: 1 },
{ type: 'eof', line: 1 }));

it('should tokenize a stream',
shouldTokenize(streamOf('<a>\n<b', '> ', '"""', 'c\n', '"""', '.',
'<d> <e', '> ', '""', '.',
Expand Down

0 comments on commit 2935d2b

Please sign in to comment.