Browse files

Use sqlite for wordnet integration

This is a rewrite of the wordnet module to use the sqlite database from
http://wnsql.sourceforge.net . It largely maintains feature parity, but
the API has been changed to be more regular. The changes are roughly:

  get -> getWord, getWordById
  lookup -> findWords
  lookupSynonyms -> word.senses[n].getSynonymns

The file download functionality has been removed for the moment, as it's
not currently clear to me how that should work in the larger scheme of
things.
  • Loading branch information...
1 parent b297512 commit 67e2614a170c4486da4d199d2e84e6497ed38439 @mullr mullr committed Mar 12, 2012
View
7 Makefile
@@ -23,18 +23,15 @@ SHELL := /bin/bash
benchmark:
@node benchmarks
-test_clean:
- @ls -1 ./io_spec/test_data/wordnet/download/{index,data}* | xargs rm -f
-
-clean: test_clean
+clean:
@find ./ -name *~ | xargs rm -f
@find ./ -name \#* | xargs rm -f
@rm *classifier.json
test:
@NODE_PATH=. jasmine-node spec/
-test_io: test_clean
+test_io:
@NODE_PATH=. jasmine-node io_spec/
test_io_unclean:
View
48 README.md
@@ -423,49 +423,47 @@ A TfIdf instance can also be serialized and deserialzed for save and recall.
WordNet
-------
-One of the newest and most experimental features is WordNet integration. Here's an
-example of using natural to look up definitions of the word node. The parameter in
-the WordNet constructor is the local directory that will store the WordNet
-database files. If the database files are not present in the specified directories
-natural will download them for you.
+Natural provides a partial wrapper over the WordNet database. To use it,
+you'll need to download the sqlite version of the database from
+http://sourceforge.net/projects/wnsql/files/ and put it somewhere on your system.
+Then, pass in the path to the database when calling the WordNet constructor.
Keep in mind the WordNet integration is to be considered experimental at this point
and not production ready. The API is also subject to change.
Here's an exmple of looking up definitions for the word, "node".
- var wordnet = new natural.WordNet('.');
+ var wordnet = new natural.WordNet('./wordnet30.sqlite');
- wordnet.lookup('node', function(results) {
- results.forEach(function(result) {
+ wordnet.getWord('node', function(word) {
+ console.log(word.lemma);
+ word.senses.forEach(function(sense) {
console.log('------------------------------------');
- console.log(result.synsetOffset);
- console.log(result.pos);
- console.log(result.lemma);
- console.log(result.synonyms);
- console.log(result.pos);
- console.log(result.gloss);
+ console.log(sense.definition);
+ console.log(sense.pos);
+ sense.getSynonyms(function(synonymns) {
+ synonymns.foreach(function(synonymn) {
+ console.log("- " + synonymn.lemma);
+ });
+ });
});
});
-Given a synset offset and part of speech a definition can be looked up directly.
+You can also search for multiple words, using '%' as a wildcard.
- var wordnet = new natural.WordNet('.');
-
- wordnet.get(4424418, 'n', function(result) {
- console.log('------------------------------------');
- console.log(result.lemma);
- console.log(result.pos);
- console.log(result.gloss);
- console.log(result.synonyms);
+ wordnet.findWords("nod%", function(words) {
+ words.foreach(function(word) {
+ console.log(word.lemma);
+ });
});
+
Princeton University "About WordNet." WordNet. Princeton University. 2010. <http://wordnet.princeton.edu>
License
-------
-Copyright (c) 2011, Chris Umbel, Rob Ellis
+Copyright (c) 2011, Chris Umbel, Rob Ellis, Russell Mull
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -491,4 +489,4 @@ WordNet License
This license is available as the file LICENSE in any downloaded version of WordNet.
WordNet 3.0 license: (Download)
-WordNet Release 3.0 This software and database is being provided to you, the LICENSEE, by Princeton University under the following license. By obtaining, using and/or copying this software and database, you agree that you have read, understood, and will comply with these terms and conditions.: Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used in advertising or publicity pertaining to distribution of the software and/or database. Title to copyright in this software, database and any associated documentation shall at all times remain with Princeton University and LICENSEE agrees to preserve same.
+WordNet Release 3.0 This software and database is being provided to you, the LICENSEE, by Princeton University under the following license. By obtaining, using and/or copying this software and database, you agree that you have read, understood, and will comply with these terms and conditions.: Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used in advertising or publicity pertaining to distribution of the software and/or database. Title to copyright in this software, database and any associated documentation shall at all times remain with Princeton University and LICENSEE agrees to preserve same.
View
1 io_spec/test_data/wordnet/download/README.txt
@@ -1 +0,0 @@
-This directory contains temporary files used during io tests.
View
111 io_spec/wordnet_spec.js
@@ -1,111 +0,0 @@
-/*
-Copyright (c) 2011, Chris Umbel
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-var WordNet = require('lib/natural/wordnet/wordnet');
-jasmine.asyncSpecWait.timeout = 30 * 1000;
-
-describe('wordnet', function() {
- it('should download files', function() {
- var wordnet = new WordNet('./io_spec/test_data/wordnet/download/', 'http://wordnet.naturalnode.com/');
-
- wordnet.lookup('entity', function(records) {
- expect(records.length).toBe(1);
- expect(records[0].lemma).toBe('entity');
-
- require('path').exists('./io_spec/test_data/wordnet/download/index.noun', function(exists) {
- expect(exists).toBeTruthy();
- asyncSpecDone();
- });
- });
-
- asyncSpecWait();
- });
-
- it('should lookup synonyms', function() {
- var wordnet = new WordNet('./io_spec/test_data/wordnet/download/', 'http://wordnet.naturalnode.com/');
-
- wordnet.lookupSynonyms('entity', function(records) {
- expect(records.length).toBe(3);
-
- require('path').exists('./io_spec/test_data/wordnet/download/index.noun', function(exists) {
- expect(exists).toBeTruthy();
- asyncSpecDone();
- });
- });
-
- asyncSpecWait();
- });
-
- it('should resize buffer for large returns', function() {
- var wordnet = new WordNet('./io_spec/test_data/wordnet/download/', 'http://wordnet.naturalnode.com/');
-
- wordnet.lookupSynonyms('hot', function(records) {
- expect(records.length).toBe(53);
- asyncSpecDone();
- });
-
- asyncSpecWait();
- });
-
- it('should lookup synonyms give a synset offset and a pos', function() {
- var wordnet = new WordNet('./io_spec/test_data/wordnet/download/', 'http://wordnet.naturalnode.com/');
-
- wordnet.getSynonyms(1740, 'n', function(records) {
- expect(records.length).toBe(3);
- expect(records[0].synsetOffset).toBe(4424418);
- expect(records[1].synsetOffset).toBe(2137);
- expect(records[2].synsetOffset).toBe(1930);
- asyncSpecDone();
- });
-
- asyncSpecWait();
- });
-
- it('should lookup synonyms via a provided synset object', function() {
- var wordnet = new WordNet('./io_spec/test_data/wordnet/download/', 'http://wordnet.naturalnode.com/');
-
- wordnet.lookup('entity', function(results) {
- wordnet.getSynonyms(results[0], function(records) {
- expect(records.length).toBe(3);
- expect(records[0].synsetOffset).toBe(4424418);
- expect(records[1].synsetOffset).toBe(2137);
- expect(records[2].synsetOffset).toBe(1930);
- asyncSpecDone();
- });
- });
-
- asyncSpecWait();
- });
-
- it('should add records but once', function() {
- var wordnet = new WordNet('./io_spec/test_data/wordnet/download/', 'http://wordnet.naturalnode.com/');
-
- wordnet.lookup('node', function(records) {
- expect(records.length).toBe(8);
- expect(records[0].lemma).toBe('node');
-
- asyncSpecDone();
- });
-
- asyncSpecWait();
- });
-});
View
74 lib/natural/wordnet/data_file.js
@@ -1,74 +0,0 @@
-/*
-Copyright (c) 2011, Chris Umbel
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-var WordNetFile = require('./wordnet_file'),
- fs = require('fs'),
- util = require('util');
-
-function get(location, callback) {
- var buff = new Buffer(4096);
-
- this.open(function(err, fd, done) {
- WordNetFile.appendLineChar(fd, location, 0, buff, function(line) {
- done();
- var data = line.split('| ');
- var tokens = data[0].split(/\s+/);
- var ptrs = [];
- var wCnt = parseInt(tokens[3], 10);
- var synonyms = [];
-
- for(var i = 0; i < wCnt; i++) {
- synonyms.push(tokens[4 + i * 2]);
- }
-
- for(var i = 0; i < parseInt(tokens[6], 10); i++) {
- ptrs.push({
- pointerSymbol: tokens[7 + i * 4],
- synsetOffset: parseInt(tokens[8 + i * 4], 10),
- pos: tokens[9 + i * 4],
- sourceTarget: tokens[10 + i * 4]
- });
- }
-
- callback({
- synsetOffset: parseInt(tokens[0], 10),
- lexFilenum: parseInt(tokens[1], 10),
- pos: tokens[2],
- wCnt: wCnt,
- lemma: tokens[4],
- synonyms: synonyms,
- lexId: tokens[5],
- ptrs: ptrs,
- gloss: data[1]
- });
- });
- });
-}
-
-var DataFile = function(dataDir, base, name) {
- WordNetFile.call(this, dataDir, base, 'data.' + name);
-};
-
-util.inherits(DataFile, WordNetFile);
-DataFile.prototype.get = get;
-
-module.exports = DataFile;
View
140 lib/natural/wordnet/index_file.js
@@ -1,140 +0,0 @@
-/*
-Copyright (c) 2011, Chris Umbel
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-var WordNetFile = require('./wordnet_file'),
- fs = require('fs'),
- util = require('util');
-
-function getFileSize(path) {
- var stat = fs.statSync(path);
- return stat.size;
-}
-
-function findPrevEOL(fd, pos, callback) {
- var buff = new Buffer(1024);
-
- if(pos == 0)
- callback(0);
- else {
- fs.read(fd, buff, 0, 1, pos, function(err, count) {
- if(buff[0] == 10)
- callback(pos + 1);
- else
- findPrevEOL(fd, pos - 1, callback);
- });
- }
-}
-
-function readLine(fd, pos, callback) {
- var buff = new Buffer(1024);
-
- findPrevEOL(fd, pos, function(pos) {
- WordNetFile.appendLineChar(fd, pos, 0, buff, callback);
- });
-}
-
-function miss(callback) {
- callback({status: 'miss'});
-}
-
-function findAt(fd, size, pos, lastPos, adjustment, searchKey, callback) {
- if (lastPos == pos || pos >= size) {
- miss(callback);
- } else {
- readLine(fd, pos, function(line) {
- var tokens = line.split(/\s+/);
- var key = tokens[0];
-
- if(key == searchKey) {
- callback({status: 'hit', key: key, 'line': line, tokens: tokens});
- } else if(adjustment == 1) {
- miss(callback);
- } else {
- adjustment = Math.ceil(adjustment * 0.5);
-
- if (key < searchKey) {
- findAt(fd, size, pos + adjustment, pos, adjustment, searchKey, callback);
- } else {
- findAt(fd, size, pos - adjustment, pos, adjustment, searchKey, callback);
- }
- }
- });
- }
-}
-
-function find(searchKey, callback) {
- var indexFile = this;
-
- indexFile.open(function(err, fd, done) {
- if(err) {
- console.log(err);
- } else {
- var size = getFileSize(indexFile.filePath) - 1;
- var pos = Math.ceil(size / 2);
- findAt(fd, size, pos, null, pos, searchKey,
- function(result) { callback(result); done(); });
- }
- });
-}
-
-function lookupFromFile(word, callback) {
- this.find(word, function(record) {
- var indexRecord = null;
-
- if(record.status == 'hit') {
- var ptrs = [], offsets = [];
-
- for(var i = 0; i < parseInt(record.tokens[3]); i++)
- ptrs.push(record.tokens[i]);
-
- for(var i = 0; i < parseInt(record.tokens[2]); i++)
- offsets.push(parseInt(record.tokens[ptrs.length + 6 + i], 10));
-
- indexRecord = {
- lemma: record.tokens[0],
- pos: record.tokens[1],
- ptrSymbol: ptrs,
- senseCnt: parseInt(record.tokens[ptrs.length + 4], 10),
- tagsenseCnt: parseInt(record.tokens[ptrs.length + 5], 10),
- synsetOffset: offsets
- };
- }
-
- callback(indexRecord);
- });
-}
-
-function lookup(word, callback) {
- this.lookupFromFile(word, callback);
-}
-
-var IndexFile = function(dataDir, base, name) {
- WordNetFile.call(this, dataDir, base, 'index.' + name);
-};
-
-util.inherits(IndexFile, WordNetFile);
-
-IndexFile.prototype.lookupFromFile = lookupFromFile;
-IndexFile.prototype.lookup = lookup;
-IndexFile.prototype.find = find;
-
-module.exports = IndexFile;
View
206 lib/natural/wordnet/wordnet.js
@@ -1,5 +1,5 @@
/*
-Copyright (c) 2011, Chris Umbel
+Copyright (c) 2011, Chris Umbel and Russell Mull
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,142 +20,90 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
-var IndexFile = require('./index_file'),
- DataFile = require('./data_file');
+var sqlite3 = require('sqlite3');
+var _ = require('underscore');
-function pushResults(data, results, offsets, callback) {
- var wordnet = this;
+module.exports = function(dbInfo) {
+ var db = null;
- if(offsets.length == 0) {
- callback(results);
+ if( dbInfo instanceof sqlite3.Database) {
+ db = dbInfo;
} else {
- data.get(offsets.pop(), function(record) {
- results.push(record);
- wordnet.pushResults(data, results, offsets, callback);
- });
+ db = new sqlite3.Database(dbFile, sqlite3.OPEN_READONLY);
}
-}
-function lookupFromFiles(files, results, word, callback) {
- var wordnet = this;
-
- if(files.length == 0)
- callback(results);
- else {
- var file = files.pop();
-
- file.index.lookup(word, function(record) {
- if(record) {
- wordnet.pushResults(file.data, results, record.synsetOffset, function() {
- wordnet.lookupFromFiles(files, results, word, callback);
- });
- } else {
- wordnet.lookupFromFiles(files, results, word, callback);
- }
- });
- }
-}
-
-function lookup(word, callback) {
- word = word.toLowerCase().replace(/\s+/g, '_');
-
- this.lookupFromFiles([
- {index: this.nounIndex, data: this.nounData},
- {index: this.verbIndex, data: this.verbData},
- {index: this.adjIndex, data: this.adjData},
- {index: this.advIndex, data: this.advData},
- ], [], word, callback);
-}
-
-function get(synsetOffset, pos, callback) {
- var dataFile = this.getDataFile(pos);
- var wordnet = this;
-
- dataFile.get(synsetOffset, function(result) {
- callback(result);
- });
-}
-
-function getDataFile(pos) {
- switch(pos) {
- case 'n':
- return this.nounData;
- case 'v':
- return this.verbData;
- case 'a': case 's':
- return this.adjData;
- case 'r':
- return this.advData;
+ /**
+ * Return a function which which looks up words using the given
+ * query field, which should be qualified by table name. (e.g.
+ * words.wordid) Will use LIKE for the task if useLikeOperator
+ * is true, otherwise will query on equality.
+ */
+ var findWordsBy = function(queryField, useLikeOperator)
+ {
+ var sql = "SELECT synsets.*, senses.*, words.* " +
+ "FROM synsets " +
+ "INNER JOIN senses ON synsets.synsetid = senses.synsetid " +
+ "INNER JOIN words ON senses.wordid = words.wordid ";
+
+ if(useLikeOperator) {
+ sql += "WHERE " + queryField + " LIKE ?";
+ } else {
+ sql += "WHERE " + queryField + " = ?";
+ }
+
+ return function(queryValue, callback) {
+ db.all(sql, [queryValue], function(err, rows) {
+ var words = _.chain(rows).
+ groupBy('wordid').values().
+ map( function(rows) { return new Word(rows); } ).
+ value();
+
+ callback(words);
+ });
}
-}
-
-function loadSynonyms(synonyms, results, ptrs, callback) {
- var wordnet = this;
-
- if(ptrs.length > 0) {
- var ptr = ptrs.pop();
-
- this.get(ptr.synsetOffset, ptr.pos, function(result) {
- synonyms.push(result);
- wordnet.loadSynonyms(synonyms, results, ptrs, callback);
- });
- } else {
- wordnet.loadResultSynonyms(synonyms, results, callback);
}
-}
-
-function loadResultSynonyms(synonyms, results, callback) {
- var wordnet = this;
-
- if(results.length > 0) {
- var result = results.pop();
- wordnet.loadSynonyms(synonyms, results, result.ptrs, callback);
- } else
- callback(synonyms);
-}
-
-function lookupSynonyms(word, callback) {
- var wordnet = this;
-
- wordnet.lookup(word, function(results) {
- wordnet.loadResultSynonyms([], results, callback);
- });
-}
-
-function getSynonyms() {
- var wordnet = this;
- var callback = arguments[2] ? arguments[2] : arguments[1];
- var pos = arguments[0].pos ? arguments[0].pos : arguments[1];
- var synsetOffset = arguments[0].synsetOffset ? arguments[0].synsetOffset : arguments[0];
- this.get(synsetOffset, pos, function(result) {
- wordnet.loadSynonyms([], [], result.ptrs, callback);
- });
-}
-
-function WordNet(dataDir, base) {
- if(!base)
- base = 'http://wordnet.naturalnode.com/';
-
- this.nounIndex = new IndexFile(dataDir, base, 'noun');
- this.verbIndex = new IndexFile(dataDir, base, 'verb');
- this.adjIndex = new IndexFile(dataDir, base, 'adj');
- this.advIndex = new IndexFile(dataDir, base, 'adv');
+ /**
+ * Create a getter for a single wordGet a single word, using the same
+ * parameter semantics as findWordsBy
+ */
+ var getWordBy = function(queryField, useLikeOperator)
+ {
+ return function(queryValue, callback)
+ {
+ findWordsBy(queryField)(queryValue, function(words) {
+ if(words.length < 1) {
+ callback(null);
+ } else {
+ callback(words[0]);
+ }
+ });
+ }
+ }
- this.nounData = new DataFile(dataDir, base, 'noun');
- this.verbData = new DataFile(dataDir, base, 'verb');
- this.adjData = new DataFile(dataDir, base, 'adj');
- this.advData = new DataFile(dataDir, base, 'adv');
+ var Word = function(rows)
+ {
+ return {
+ id: rows[0].wordid,
+ lemma: rows[0].lemma,
+
+ senses: _.map(rows, function(row) {
+ return {
+ pos: row.pos,
+ definition: row.definition,
+ synsetId: row.synsetid,
+ getSynonyms: function(callback) {
+ findWordsBy('synsets.synsetid', false)(this.synsetId, callback);
+ }
+ };
+ })
+ };
- this.get = get;
- this.lookup = lookup;
- this.lookupFromFiles = lookupFromFiles;
- this.pushResults = pushResults;
- this.loadResultSynonyms = loadResultSynonyms;
- this.loadSynonyms = loadSynonyms;
- this.lookupSynonyms = lookupSynonyms;
- this.getSynonyms = getSynonyms;
- this.getDataFile = getDataFile;
-}
+ }
-module.exports = WordNet;
+ return {
+ getWord: getWordBy('words.lemma', true),
+ getWordById: getWordBy('words.wordid', false),
+ findWords: findWordsBy('words.lemma', true)
+ };
+};
View
155 lib/natural/wordnet/wordnet_file.js
@@ -1,155 +0,0 @@
-/*
-Copyright (c) 2011, Chris Umbel
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-var fs = require('fs'),
- path = require('path'),
- url = require('url'),
- util = require('util');
-
-function downloadFile(url, filePath, callback) {
- var zlib;
-
- try {
- zlib = require('zlib');
- var http = require('http');
-
- var req = http.get({
- host: url.host,
- path: url.path,
- port: 80
- });
-
- req.on('response', function (response) {
- var fileStream = fs.createWriteStream(filePath);
- response.on('end', function () {
- fileStream.end();
- callback();
- });
- response.pipe(zlib.createGunzip()).pipe(fileStream);
- });
- return;
-
- } catch (e) {
- /* fall through to the legacy code below */
- }
-
-
- var compress, gzip;
-
- try {
- compress = require('compress');
- gzip = new compress.Gunzip();
- } catch (e) {
- console.log('Unable to load "compress" module. If you oare on unix please install it.' +
- '"compress" may not be available on Windows. In that case please download ' +
- 'the wordnet files yourself and point the constructor at the directory ' +
- 'where they\'re located.');
- throw e;
- }
-
- gzip.init();
-
- var http = require('http');
- var client = http.createClient(80, url.host);
- var request = client.request('GET', url.pathname, {host: url.host});
-
- client.on('error', function(err) {
- console.log(err);
- callback();
- });
-
- request.end();
- var fileStream = fs.createWriteStream(filePath);
-
- request.on('response', function(response) {
- var size = parseInt(response.headers['content-length']);
- var progress = 0;
-
- response.on('data', function(data) {
- var rawLen = data.length;
- data = gzip.inflate(data.toString('binary'), 'binary');
- var outBuff = new Buffer(data.length);
- outBuff.write(data.toString(), 0, 'binary');
-
- fileStream.write(outBuff, 'binary');
- progress += rawLen;
-
- if(progress >= size) {
- fileStream.end();
- callback();
- };
- });
- });
-}
-
-function appendLineChar(fd, pos, buffPos, buff, callback) {
- if(buffPos >= buff.length) {
- var newBuff = new Buffer(buff.length * 2);
- buff.copy(newBuff, 0, 0, buff.length);
- buff = newBuff;
- }
-
- fs.read(fd, buff, buffPos, 1, pos, function(err, count) {
- if(err)
- console.log(err);
- else {
- if(buff[buffPos] == 10 || buffPos == buff.length)
- callback(buff.slice(0, buffPos).toString('ASCII'));
- else {
- appendLineChar(fd, pos + 1, buffPos + 1, buff, callback);
- }
- }
- });
-}
-
-function open(callback) {
- var filePath = this.filePath;
- var url = this.url;
-
- path.exists(filePath, function(exists) {
- var _open = function(){
- fs.open(filePath, 'r', null, function(err, fd) {
- callback(err, fd, function() {fs.close(fd)});
- });
- };
-
- if(exists) {
- _open();
- } else {
- downloadFile(url, filePath, function() {
- _open();
- })
- }
- });
-}
-
-var WordNetFile = function(dataDir, base, fileName) {
- this.dataDir = dataDir;
- this.fileName = fileName;
- this.url = url.parse(url.resolve(base, this.fileName + '.gz'));
- this.filePath = require('path').join(this.dataDir, this.fileName);
-};
-
-WordNetFile.prototype.open = open;
-WordNetFile.appendLineChar = appendLineChar;
-
-module.exports = WordNetFile;
View
3 package.json
@@ -9,7 +9,8 @@
"dependencies": {
"sylvester": ">= 0.0.12",
"apparatus": ">= 0.0.4",
- "underscore": "*"
+ "underscore": "*",
+ "sqlite3": "2.1.x"
},
"devDependencies": {
"uubench": "0.0.x"
View
31 spec/test_data/wordnet/data.adj
@@ -1,31 +0,0 @@
- 1 This software and database is being provided to you, the LICENSEE, by
- 2 Princeton University under the following license. By obtaining, using
- 3 and/or copying this software and database, you agree that you have
- 4 read, understood, and will comply with these terms and conditions.:
- 5
- 6 Permission to use, copy, modify and distribute this software and
- 7 database and its documentation for any purpose and without fee or
- 8 royalty is hereby granted, provided that you agree to comply with
- 9 the following copyright notice and statements, including the disclaimer,
- 10 and that the same appear on ALL copies of the software, database and
- 11 documentation, including modifications that you make for internal
- 12 use or for distribution.
- 13
- 14 WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
- 15
- 16 THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
- 17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
- 18 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
- 19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
- 20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
- 21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
- 22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
- 23 OTHER RIGHTS.
- 24
- 25 The name of Princeton University or Princeton may not be used in
- 26 advertising or publicity pertaining to distribution of the software
- 27 and/or database. Title to copyright in this software, database and
- 28 any associated documentation shall at all times remain with
- 29 Princeton University and LICENSEE agrees to preserve same.
-00001740 00 a 01 able 0 005 = 05200169 n 0000 = 05616246 n 0000 + 05616246 n 0101 + 05200169 n 0101 ! 00002098 a 0101 | (usually followed by `to') having the necessary means or skill or know-how or authority to do something; "able to swim"; "she was able to program her computer"; "we were at last able to buy a car"; "able to get a grant for the project"
-00002098 00 a 01 unable 0 002 = 05200169 n 0000 ! 00001740 a 0101 | (usually followed by `to') not having the necessary means or skill or know-how; "unable to get to town without a car"; "unable to obtain funds"
View
31 spec/test_data/wordnet/data.adv
@@ -1,31 +0,0 @@
- 1 This software and database is being provided to you, the LICENSEE, by
- 2 Princeton University under the following license. By obtaining, using
- 3 and/or copying this software and database, you agree that you have
- 4 read, understood, and will comply with these terms and conditions.:
- 5
- 6 Permission to use, copy, modify and distribute this software and
- 7 database and its documentation for any purpose and without fee or
- 8 royalty is hereby granted, provided that you agree to comply with
- 9 the following copyright notice and statements, including the disclaimer,
- 10 and that the same appear on ALL copies of the software, database and
- 11 documentation, including modifications that you make for internal
- 12 use or for distribution.
- 13
- 14 WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
- 15
- 16 THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
- 17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
- 18 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
- 19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
- 20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
- 21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
- 22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
- 23 OTHER RIGHTS.
- 24
- 25 The name of Princeton University or Princeton may not be used in
- 26 advertising or publicity pertaining to distribution of the software
- 27 and/or database. Title to copyright in this software, database and
- 28 any associated documentation shall at all times remain with
- 29 Princeton University and LICENSEE agrees to preserve same.
-00001740 02 r 01 a_cappella 0 000 | without musical accompaniment; "they performed a cappella"
-00001837 02 r 03 AD 0 A.D. 0 anno_Domini 0 000 | in the Christian era; used before dates after the supposed year Christ was born; "in AD 200"
View
33 spec/test_data/wordnet/data.noun
@@ -1,33 +0,0 @@
- 1 This software and database is being provided to you, the LICENSEE, by
- 2 Princeton University under the following license. By obtaining, using
- 3 and/or copying this software and database, you agree that you have
- 4 read, understood, and will comply with these terms and conditions.:
- 5
- 6 Permission to use, copy, modify and distribute this software and
- 7 database and its documentation for any purpose and without fee or
- 8 royalty is hereby granted, provided that you agree to comply with
- 9 the following copyright notice and statements, including the disclaimer,
- 10 and that the same appear on ALL copies of the software, database and
- 11 documentation, including modifications that you make for internal
- 12 use or for distribution.
- 13
- 14 WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
- 15
- 16 THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
- 17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
- 18 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
- 19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
- 20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
- 21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
- 22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
- 23 OTHER RIGHTS.
- 24
- 25 The name of Princeton University or Princeton may not be used in
- 26 advertising or publicity pertaining to distribution of the software
- 27 and/or database. Title to copyright in this software, database and
- 28 any associated documentation shall at all times remain with
- 29 Princeton University and LICENSEE agrees to preserve same.
-00001740 03 n 01 entity 0 003 ~ 00001930 n 0000 ~ 00002137 n 0000 ~ 04424418 n 0000 | that which is perceived or known or inferred to have its own distinct existence (living or nonliving)
-00001930 03 n 01 physical_entity 0 007 @ 00001740 n 0000 ~ 00002452 n 0000 ~ 00002684 n 0000 ~ 00007347 n 0000 ~ 00020827 n 0000 ~ 00029677 n 0000 ~ 14580597 n 0000 | an entity that has physical existence
-00002137 03 n 02 abstraction 0 abstract_entity 0 010 @ 00001740 n 0000 + 00692329 v 0101 ~ 00023100 n 0000 ~ 00024264 n 0000 ~ 00031264 n 0000 ~ 00031921 n 0000 ~ 00033020 n 0000 ~ 00033615 n 0000 ~ 05810143 n 0000 ~ 07999699 n 0000 | a general concept formed by extracting common features from specific examples
-00002452 03 n 01 thing 0 009 @ 00001930 n 0000 ~ 04347225 n 0000 ~ 09225146 n 0000 ~ 09312645 n 0000 ~ 09367203 n 0000 ~ 09385911 n 0000 ~ 09407867 n 0000 ~ 09465459 n 0000 ~ 09468959 n 0000 | a separate and self-contained entity
View
31 spec/test_data/wordnet/data.verb
@@ -1,31 +0,0 @@
- 1 This software and database is being provided to you, the LICENSEE, by
- 2 Princeton University under the following license. By obtaining, using
- 3 and/or copying this software and database, you agree that you have
- 4 read, understood, and will comply with these terms and conditions.:
- 5
- 6 Permission to use, copy, modify and distribute this software and
- 7 database and its documentation for any purpose and without fee or
- 8 royalty is hereby granted, provided that you agree to comply with
- 9 the following copyright notice and statements, including the disclaimer,
- 10 and that the same appear on ALL copies of the software, database and
- 11 documentation, including modifications that you make for internal
- 12 use or for distribution.
- 13
- 14 WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
- 15
- 16 THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
- 17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
- 18 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
- 19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
- 20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
- 21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
- 22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
- 23 OTHER RIGHTS.
- 24
- 25 The name of Princeton University or Princeton may not be used in
- 26 advertising or publicity pertaining to distribution of the software
- 27 and/or database. Title to copyright in this software, database and
- 28 any associated documentation shall at all times remain with
- 29 Princeton University and LICENSEE agrees to preserve same.
-00001740 29 v 04 breathe 0 take_a_breath 0 respire 0 suspire 3 021 * 00005041 v 0000 * 00004227 v 0000 + 03110322 a 0301 + 00831191 n 0303 + 04080833 n 0301 + 04250850 n 0105 + 00831191 n 0101 ^ 00004227 v 0103 ^ 00005041 v 0103 $ 00002325 v 0000 $ 00002573 v 0000 ~ 00002573 v 0000 ~ 00002724 v 0000 ~ 00002942 v 0000 ~ 00003826 v 0000 ~ 00004032 v 0000 ~ 00004227 v 0000 ~ 00005041 v 0000 ~ 00006697 v 0000 ~ 00007328 v 0000 ~ 00017031 v 0000 02 + 02 00 + 08 00 | draw air into, and expel out of, the lungs; "I can breathe better when the air is clean"; "The patient is respiring"
-00002325 29 v 01 respire 1 005 $ 00001740 v 0000 @ 02108377 v 0000 + 03110322 a 0101 + 00831191 n 0103 + 00830811 n 0101 01 + 02 00 | undergo the biomedical and metabolic processes of respiration by taking up oxygen and producing carbon monoxide
View
32 spec/test_data/wordnet/index.adj
@@ -1,32 +0,0 @@
- 1 This software and database is being provided to you, the LICENSEE, by
- 2 Princeton University under the following license. By obtaining, using
- 3 and/or copying this software and database, you agree that you have
- 4 read, understood, and will comply with these terms and conditions.:
- 5
- 6 Permission to use, copy, modify and distribute this software and
- 7 database and its documentation for any purpose and without fee or
- 8 royalty is hereby granted, provided that you agree to comply with
- 9 the following copyright notice and statements, including the disclaimer,
- 10 and that the same appear on ALL copies of the software, database and
- 11 documentation, including modifications that you make for internal
- 12 use or for distribution.
- 13
- 14 WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
- 15
- 16 THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
- 17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
- 18 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
- 19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
- 20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
- 21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
- 22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
- 23 OTHER RIGHTS.
- 24
- 25 The name of Princeton University or Princeton may not be used in
- 26 advertising or publicity pertaining to distribution of the software
- 27 and/or database. Title to copyright in this software, database and
- 28 any associated documentation shall at all times remain with
- 29 Princeton University and LICENSEE agrees to preserve same.
-able a 4 4 ! & = + 4 3 00001740 00510348 00306663 01017439
-unable a 3 3 ! & = 3 2 00002098 00307794 01825080
-
View
30 spec/test_data/wordnet/index.adv
@@ -1,30 +0,0 @@
- 1 This software and database is being provided to you, the LICENSEE, by
- 2 Princeton University under the following license. By obtaining, using
- 3 and/or copying this software and database, you agree that you have
- 4 read, understood, and will comply with these terms and conditions.:
- 5
- 6 Permission to use, copy, modify and distribute this software and
- 7 database and its documentation for any purpose and without fee or
- 8 royalty is hereby granted, provided that you agree to comply with
- 9 the following copyright notice and statements, including the disclaimer,
- 10 and that the same appear on ALL copies of the software, database and
- 11 documentation, including modifications that you make for internal
- 12 use or for distribution.
- 13
- 14 WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
- 15
- 16 THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
- 17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
- 18 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
- 19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
- 20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
- 21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
- 22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
- 23 OTHER RIGHTS.
- 24
- 25 The name of Princeton University or Princeton may not be used in
- 26 advertising or publicity pertaining to distribution of the software
- 27 and/or database. Title to copyright in this software, database and
- 28 any associated documentation shall at all times remain with
- 29 Princeton University and LICENSEE agrees to preserve same.
-a_cappella r 1 0 1 0 00001740
View
141 spec/test_data/wordnet/index.noun
@@ -1,141 +0,0 @@
- 1 This software and database is being provided to you, the LICENSEE, by
- 2 Princeton University under the following license. By obtaining, using
- 3 and/or copying this software and database, you agree that you have
- 4 read, understood, and will comply with these terms and conditions.:
- 5
- 6 Permission to use, copy, modify and distribute this software and
- 7 database and its documentation for any purpose and without fee or
- 8 royalty is hereby granted, provided that you agree to comply with
- 9 the following copyright notice and statements, including the disclaimer,
- 10 and that the same appear on ALL copies of the software, database and
- 11 documentation, including modifications that you make for internal
- 12 use or for distribution.
- 13
- 14 WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
- 15
- 16 THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
- 17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
- 18 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
- 19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
- 20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
- 21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
- 22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
- 23 OTHER RIGHTS.
- 24
- 25 The name of Princeton University or Princeton may not be used in
- 26 advertising or publicity pertaining to distribution of the software
- 27 and/or database. Title to copyright in this software, database and
- 28 any associated documentation shall at all times remain with
-abandoned_person n 1 2 @ ~ 1 0 09753930
-abandoned_ship n 1 1 @ 1 0 02666501
-abandonment n 3 3 @ ~ + 3 2 00204439 00055315 00091013
-abarticulation n 1 1 @ 1 0 14294271
-abasement n 2 3 @ ~ + 2 1 14440623 00273449
-abashment n 1 2 @ + 1 0 07508092
-entity n 1 1 ~ 1 1 00001740
-mountaineering n 1 4 @ ~ + - 1 0 00325785
-mountainside n 1 2 @ #p 1 1 09361517
-mountebank n 1 2 @ ~ 1 0 10334782
-mounter n 2 2 @ + 2 0 10335123 10334957
-nantucket n 1 1 @ 1 0 09098592
-nanus n 1 2 @ ~ 1 0 10040344
-naomi n 1 1 @ 1 0 11199943
-nap n 5 3 @ ~ + 5 0 15273626 04947087 03936045 00858377 00493742
-oceanfront n 1 1 @ 1 0 09376786
-oceania n 1 3 @ #p %p 1 0 08835875
-oceanic n 1 2 @ ~ 1 0 06937985
-oceanic_abyss n 1 2 @ ~ 1 0 09461315
-oceanic_bird n 1 2 @ ~ 1 0 02057731
-oceanic_bonito n 1 2 @ #m 1 0 02629230
-oceanic_whitetip_shark n 1 2 @ #m 1 0 01490112
-oceanica n 1 3 @ #p %p 1 0 08835875
-oceanid n 1 2 @ ; 1 0 09550899
-oceanites n 1 3 @ #m %m 1 0 02061425
-oceanites_oceanicus n 1 2 @ #m 1 0 02061560
-oceanographer n 1 3 @ ~ + 1 0 10370122
-pasiphae n 1 2 @ ; 1 0 09568643
-paso_doble n 2 1 @ 2 1 00536872 07056265
-paspalum n 1 2 @ #m 1 0 12128071
-paspalum_dilatatum n 1 2 @ #m 1 0 12128071
-paspalum_distichum n 1 2 @ #m 1 0 12128490
-paspalum_notatum n 1 2 @ #m 1 0 12128306
-pasque_flower n 1 3 @ ~ #m 1 0 11737534
-pasqueflower n 1 3 @ ~ #m 1 0 11737534
-pasquinade n 1 1 @ 1 0 06780309
-pass n 16 5 @ ~ #p + ; 16 7 00127286 15139552 00560529 09386842 06691083 06690408 00304592 13936939 07418822 07341860 07176499 06690226 06519369 00787061 00105820 00065575
-pass-through n 1 1 @ 1 0 03897130
-technical_analyst n 1 1 @ 1 0 09910840
-technical_foul n 1 2 @ ; 1 0 00771356
-technical_knockout n 1 1 @ 1 0 00134246
-technical_school n 1 2 @ ~ 1 1 08285246
-technical_sergeant n 1 1 @ 1 0 10696101
-technicality n 1 2 @ + 1 0 05820462
-technician n 2 3 @ ~ + 2 1 10696251 10696508
-technicolor n 1 1 @ 1 0 05667086
-technique n 2 3 @ ~ + 2 2 05665146 05643190
-techno n 1 1 @ 1 0 07058064
-technobabble n 1 1 @ 1 0 07088319
-technocracy n 1 1 @ 1 0 08363106
-technocrat n 2 2 @ #m 2 0 10696755 10696672
-technological_revolution n 1 1 @ 1 1 15260277
-technologist n 1 3 @ ~ + 1 0 09615807
-technology n 2 4 @ ~ + - 2 2 00949619 06125041
-technology_administration n 1 3 @ #p %p 1 0 08130712
-technophile n 1 2 ! @ 1 0 10696888
-technophilia n 1 1 @ 1 0 07555775
-technophobe n 1 2 ! @ 1 0 10697013
-whistle-blower n 1 1 @ 1 0 10777400
-whistle-stop_tour n 1 2 @ #p 1 0 00749232
-whistle_blower n 1 1 @ 1 0 10777400
-whistle_buoy n 1 1 @ 1 0 07266665
-whistle_stop n 1 1 @ 1 0 04579795
-whistleblower n 1 1 @ 1 0 10777400
-zip_fastener n 1 2 @ #p 1 0 04238321
-zip_gun n 1 1 @ 1 0 04615149
-ziphiidae n 1 3 @ #m %m 1 0 02067941
-zipper n 1 3 @ #p + 1 0 04238321
-zippo n 1 2 @ ~ 1 0 13740168
-zirbanit n 1 2 @ ; 1 0 09521648
-zircon n 1 3 @ ~ %s 1 0 15109127
-zirconia n 1 1 @ 1 0 15109391
-zirconium n 1 2 @ #s 1 0 14662281
-zirconium_dioxide n 1 1 @ 1 0 15109391
-zirconium_oxide n 1 1 @ 1 0 15109391
-zirconium_silicate n 1 3 @ ~ %s 1 0 15109127
-zit n 1 2 @ ~ 1 0 14334306
-zither n 1 2 @ ~ 1 0 04615226
-zithern n 1 2 @ ~ 1 0 04615226
-zithromax n 1 1 @ 1 0 02765656
-ziti n 1 1 @ 1 0 07700255
-zizania n 1 3 @ #m %m 1 0 12145802
-zizania_aquatica n 1 3 @ #m %p 1 0 12145919
-ziziphus n 1 3 @ #m %m 1 0 13143097
-ziziphus_jujuba n 1 3 @ #m %p 1 0 13143285
-ziziphus_lotus n 1 2 @ #m 1 0 13143483
-zizz n 2 2 @ ; 2 0 07479799 00858742
-zloty n 1 2 @ %p 1 1 13710777
-zn n 1 2 @ ~ 1 0 14661977
-zoanthropy n 1 1 @ 1 0 14379017
-zoarces n 1 3 @ #m %m 1 0 02617402
-zoarces_viviparus n 1 2 @ #m 1 0 02617537
-zoarcidae n 1 3 @ #m %m 1 0 02617029
-zocor n 1 2 @ ; 1 0 04221994
-zodiac n 2 5 @ #p %p + ; 2 0 08685188 04615456
-zodiacal_light n 1 1 @ 1 1 11527556
-zoisia n 1 3 @ #m %m 1 0 12146100
-zola n 1 1 @ 1 0 11407591
-zollinger-ellison_syndrome n 1 1 @ 1 0 14471054
-zoloft n 1 2 @ ; 1 0 04174853
-zomba n 1 2 @ #p 1 0 08963244
-zombi n 5 3 @ %s ; 5 0 10805932 10805783 10805638 09825519 07919165
-zombi_spirit n 1 2 @ ; 1 0 10805932
-zoning n 1 2 @ #m 1 1 00928751
-zoning_board n 1 1 @ 1 0 08413529
-zoning_commission n 1 1 @ 1 0 08413681
-zonotrichia n 1 3 @ #m %m 1 0 01535310
-zonotrichia_albicollis n 1 2 @ #m 1 0 01535469
-zonotrichia_leucophrys n 1 2 @ #m 1 0 01535690
-zonula n 1 1 @ 1 0 05611221
-zonule n 1 1 @ 1 0 05611221
-zoo n 1 1 @ 1 1 03745146
-
View
32 spec/test_data/wordnet/index.verb
@@ -1,32 +0,0 @@
- 1 This software and database is being provided to you, the LICENSEE, by
- 2 Princeton University under the following license. By obtaining, using
- 3 and/or copying this software and database, you agree that you have
- 4 read, understood, and will comply with these terms and conditions.:
- 5
- 6 Permission to use, copy, modify and distribute this software and
- 7 database and its documentation for any purpose and without fee or
- 8 royalty is hereby granted, provided that you agree to comply with
- 9 the following copyright notice and statements, including the disclaimer,
- 10 and that the same appear on ALL copies of the software, database and
- 11 documentation, including modifications that you make for internal
- 12 use or for distribution.
- 13
- 14 WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
- 15
- 16 THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
- 17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
- 18 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
- 19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
- 20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
- 21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
- 22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
- 23 OTHER RIGHTS.
- 24
- 25 The name of Princeton University or Princeton may not be used in
- 26 advertising or publicity pertaining to distribution of the software
- 27 and/or database. Title to copyright in this software, database and
- 28 any associated documentation shall at all times remain with
- 29 Princeton University and LICENSEE agrees to preserve same.
-breathe v 9 6 @ ~ * ^ $ + 9 3 00001740 02617083 02325272 02751787 00941346 00929703 00779360 00239754 00105333
-respire v 3 5 @ ~ * $ + 3 0 00002573 00002325 00001740
-
View
95 spec/test_data/wordnet_mini.sql
@@ -0,0 +1,95 @@
+PRAGMA foreign_keys=OFF;
+BEGIN TRANSACTION;
+CREATE TABLE adjpositions (synsetid integer not null, wordid integer not null, position varchar(2) not null, primary key (synsetid, wordid));
+CREATE TABLE adjpositiontypes (position varchar(2) not null, positionname varchar(24) not null, primary key (position));
+CREATE TABLE casedwords (casedwordid integer not null, wordid integer not null, cased varchar(80) not null unique, primary key (casedwordid));
+CREATE TABLE lexdomains (lexdomainid smallint not null, lexdomainname varchar(32), pos char, primary key (lexdomainid));
+CREATE TABLE lexlinks (word1id integer not null, synset1id integer not null, word2id integer not null, synset2id integer not null, linkid smallint not null, primary key (word1id, synset1id, word2id, synset2id, linkid));
+CREATE TABLE linktypes (linkid smallint not null, link varchar(50), recurses integer not null, primary key (linkid));
+CREATE TABLE morphmaps (morphid integer not null, pos char not null, wordid integer not null, primary key (morphid, pos, wordid));
+CREATE TABLE morphs (morphid integer not null, morph varchar(70) not null unique, primary key (morphid));
+CREATE TABLE postypes (pos char not null, posname varchar(20) not null, primary key (pos));
+CREATE TABLE samples (synsetid integer not null, sampleid smallint not null, sample longvarchar not null, primary key (synsetid, sampleid));
+CREATE TABLE semlinks (synset1id integer not null, synset2id integer not null, linkid smallint not null, primary key (synset1id, synset2id, linkid));
+CREATE TABLE senses (wordid integer not null, synsetid integer not null, casedwordid integer, senseid integer unique, sensenum smallint not null, lexid smallint not null, tagcount integer, sensekey varchar(100) unique, primary key (wordid, synsetid));
+INSERT INTO "senses" VALUES(138336,300491511,NULL,194444,4,0,0,'unique%5:00:00:unusual:00');
+INSERT INTO "senses" VALUES(138336,300505853,NULL,194441,1,0,10,'unique%5:00:00:incomparable:00');
+INSERT INTO "senses" VALUES(138336,301105042,NULL,194443,2,0,2,'unique%5:00:00:specific:00');
+INSERT INTO "senses" VALUES(138336,302215087,NULL,194442,3,0,0,'unique%5:00:00:single:05');
+INSERT INTO "senses" VALUES(138337,400175490,NULL,194445,1,0,4,'uniquely%4:02:00::');
+INSERT INTO "senses" VALUES(138338,104763650,NULL,194446,1,0,3,'uniqueness%1:07:00::');
+INSERT INTO "senses" VALUES(58843,103076708,NULL,81518,4,0,0,'good%1:06:00::');
+INSERT INTO "senses" VALUES(58843,104849241,NULL,81520,2,1,9,'good%1:07:01::');
+INSERT INTO "senses" VALUES(58843,105142180,NULL,81521,3,2,6,'good%1:07:02::');
+INSERT INTO "senses" VALUES(58843,105159725,NULL,81519,1,0,11,'good%1:07:00::');
+INSERT INTO "senses" VALUES(58843,300064787,NULL,81526,5,0,5,'good%5:00:00:advantageous:00');
+INSERT INTO "senses" VALUES(58843,300106020,NULL,81527,2,0,22,'good%5:00:00:ample:00');
+INSERT INTO "senses" VALUES(58843,300452883,NULL,81528,10,0,2,'good%5:00:00:close:02');
+INSERT INTO "senses" VALUES(58843,300523364,NULL,81529,9,0,2,'good%5:00:00:complete:00');
+INSERT INTO "senses" VALUES(58843,300775611,NULL,81530,21,0,0,'good%5:00:00:discriminating:00');
+INSERT INTO "senses" VALUES(58843,300832784,NULL,81538,14,0,1,'good%5:00:00:operative:00');
+INSERT INTO "senses" VALUES(58843,301048762,NULL,81531,13,0,1,'good%5:00:00:fortunate:00');
+INSERT INTO "senses" VALUES(58843,301068306,NULL,81532,20,0,0,'good%5:00:00:fresh:01');
+INSERT INTO "senses" VALUES(58843,301116026,NULL,81533,19,0,0,'good%5:00:00:genuine:00');
+INSERT INTO "senses" VALUES(58843,301123148,NULL,81522,1,1,190,'good%3:00:01::');
+INSERT INTO "senses" VALUES(58843,301129977,NULL,81523,3,2,12,'good%3:00:02::');
+INSERT INTO "senses" VALUES(58843,301166413,NULL,81534,18,0,0,'good%5:00:00:healthful:00');
+INSERT INTO "senses" VALUES(58843,301171213,NULL,81535,17,0,0,'good%5:00:00:healthy:00');
+INSERT INTO "senses" VALUES(58843,301333477,NULL,81536,16,0,0,'good%5:00:00:intellectual:00');
+INSERT INTO "senses" VALUES(58843,301586752,NULL,81537,6,0,4,'good%5:00:00:nice:00');
+INSERT INTO "senses" VALUES(58843,301661289,NULL,81539,12,0,1,'good%5:00:00:opportune:00');
+INSERT INTO "senses" VALUES(58843,301808329,NULL,81540,15,0,0,'good%5:00:00:pleasing:00');
+INSERT INTO "senses" VALUES(58843,301983162,NULL,81541,4,0,10,'good%5:00:00:reputable:00');
+INSERT INTO "senses" VALUES(58843,302036934,NULL,81542,7,0,3,'good%5:00:00:righteous:00');
+INSERT INTO "senses" VALUES(58843,302226162,NULL,81543,8,0,2,'good%5:00:00:skilled:00');
+INSERT INTO "senses" VALUES(58843,302273643,NULL,81544,11,1,1,'good%5:00:01:sound:00');
+INSERT INTO "senses" VALUES(58843,400011093,NULL,81524,1,0,9,'good%4:02:00::');
+INSERT INTO "senses" VALUES(58843,400057388,NULL,81525,2,1,4,'good%4:02:01::');
+INSERT INTO "senses" VALUES(27771,103076708,NULL,38135,1,0,4,'commodity%1:06:00::');
+INSERT INTO "senses" VALUES(134408,103076708,NULL,188782,1,0,0,'trade_good%1:06:00::');
+CREATE TABLE synsets (synsetid integer not null, pos char, lexdomainid smallint not null, definition longvarchar, primary key (synsetid));
+INSERT INTO "synsets" VALUES(103076708,'n',6,'articles of commerce');
+INSERT INTO "synsets" VALUES(104763650,'n',7,'the quality of being one of a kind');
+INSERT INTO "synsets" VALUES(104849241,'n',7,'moral excellence or admirableness');
+INSERT INTO "synsets" VALUES(105142180,'n',7,'that which is pleasing or valuable or useful');
+INSERT INTO "synsets" VALUES(105159725,'n',7,'benefit');
+INSERT INTO "synsets" VALUES(300064787,'s',0,'promoting or enhancing well-being');
+INSERT INTO "synsets" VALUES(300106020,'s',0,'having the normally expected amount');
+INSERT INTO "synsets" VALUES(300452883,'s',0,'with or in a close or intimate relationship');
+INSERT INTO "synsets" VALUES(300491511,'s',0,'highly unusual or rare but not the single instance');
+INSERT INTO "synsets" VALUES(300505853,'s',0,'radically distinctive and without equal');
+INSERT INTO "synsets" VALUES(300523364,'s',0,'thorough');
+INSERT INTO "synsets" VALUES(300775611,'s',0,'generally admired');
+INSERT INTO "synsets" VALUES(300832784,'s',0,'exerting force or influence');
+INSERT INTO "synsets" VALUES(301048762,'s',0,'resulting favorably');
+INSERT INTO "synsets" VALUES(301068306,'s',0,'not left to spoil');
+INSERT INTO "synsets" VALUES(301105042,'s',0,'(followed by `to'') applying exclusively to a given category or condition or locality');
+INSERT INTO "synsets" VALUES(301116026,'s',0,'not forged');
+INSERT INTO "synsets" VALUES(301123148,'a',0,'having desirable or positive qualities especially those suitable for a thing specified');
+INSERT INTO "synsets" VALUES(301129977,'a',0,'morally admirable');
+INSERT INTO "synsets" VALUES(301166413,'s',0,'tending to promote physical well-being; beneficial to health');
+INSERT INTO "synsets" VALUES(301171213,'s',0,'in excellent physical condition');
+INSERT INTO "synsets" VALUES(301333477,'s',0,'appealing to the mind');
+INSERT INTO "synsets" VALUES(301586752,'s',0,'agreeable or pleasing');
+INSERT INTO "synsets" VALUES(301661289,'s',0,'most suitable or right for a particular purpose');
+INSERT INTO "synsets" VALUES(301808329,'s',0,'capable of pleasing');
+INSERT INTO "synsets" VALUES(301983162,'s',0,'deserving of esteem and respect');
+INSERT INTO "synsets" VALUES(302036934,'s',0,'of moral excellence');
+INSERT INTO "synsets" VALUES(302215087,'s',0,'the single one of its kind');
+INSERT INTO "synsets" VALUES(302226162,'s',0,'having or showing knowledge and skill and aptitude');
+INSERT INTO "synsets" VALUES(302273643,'s',0,'financially sound');
+INSERT INTO "synsets" VALUES(400011093,'r',2,'(often used as a combining form) in a good or proper or satisfactory manner or to a high standard (`good'' is a nonstandard dialectal variant for `well'')');
+INSERT INTO "synsets" VALUES(400057388,'r',2,'completely and absolutely (`good'' is sometimes used informally for `thoroughly'')');
+INSERT INTO "synsets" VALUES(400175490,'r',2,'so as to be unique');
+CREATE TABLE vframemaps (synsetid integer not null, wordid integer not null, frameid smallint not null, primary key (synsetid, wordid, frameid));
+CREATE TABLE vframes (frameid smallint not null, frame varchar(50), primary key (frameid));
+CREATE TABLE vframesentencemaps (synsetid integer not null, wordid integer not null, sentenceid smallint not null, primary key (synsetid, wordid, sentenceid));
+CREATE TABLE vframesentences (sentenceid smallint not null, sentence longvarchar, primary key (sentenceid));
+CREATE TABLE words (wordid integer not null, lemma varchar(80) not null unique, primary key (wordid));
+INSERT INTO "words" VALUES(27771,'commodity');
+INSERT INTO "words" VALUES(58843,'good');
+INSERT INTO "words" VALUES(134408,'trade good');
+INSERT INTO "words" VALUES(138336,'unique');
+INSERT INTO "words" VALUES(138337,'uniquely');
+INSERT INTO "words" VALUES(138338,'uniqueness');
+COMMIT;
View
149 spec/wordnet_spec.js
@@ -1,5 +1,5 @@
/*
-Copyright (c) 2011, Chris Umbel
+Copyright (c) 2011, Chris Umbel and Russell Mull
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -20,87 +20,104 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
-var IndexFile = require('lib/natural/wordnet/index_file'),
- DataFile = require('lib/natural/wordnet/data_file'),
- WordNet = require('lib/natural/wordnet/wordnet');
+var WordNet = require('lib/natural/wordnet/wordnet');
+var sqlite3 = require('sqlite3');
+var _ = require('underscore');
+var fs = require('fs');
describe('wordnet', function() {
- describe('index_file', function() {
- it('should build a valid url', function() {
- var indexFile = new IndexFile('./spec/test_data/wordnet', 'http://wordnet.naturalnode.com/', 'noun');
- expect(indexFile.url.href).toBe('http://wordnet.naturalnode.com/index.noun.gz');
+ var testDataFile = __dirname + '/test_data/wordnet_mini.sql';
+ var testData = fs.readFileSync(testDataFile, 'utf8');
+
+ var db = new sqlite3.Database(':memory:');
+ db.exec(testData);
+
+ var wordnet = new WordNet(db);
+
+ it('should get a single word', function() {
+ wordnet.getWord('unique', function(word) {
+ expect(word.id).toBe(138336);
+ expect(word.lemma).toBe('unique');
+
+ asyncSpecDone();
});
- it('should miss a record', function() {
- var indexFile = new IndexFile('./spec/test_data/wordnet', 'http://wordnet.naturalnode.com/', 'noun');
- indexFile.lookup('aac', function(result) {
- expect(result).toBeNull();
- asyncSpecDone();
- });
-
- asyncSpecWait();
- });
-
- it('should find a record', function() {
- var indexFile = new IndexFile('./spec/test_data/wordnet', 'http://wordnet.naturalnode.com/', 'noun');
- indexFile.lookup('pass', function(result) {
- expect(result.lemma).toBe('pass');
- expect(result.pos).toBe('n');
- expect(result.ptrSymbol.length == 5);
- expect(result.synsetOffset.length == 16);
- asyncSpecDone();
- });
-
- asyncSpecWait();
+ asyncSpecWait();
+ });
+
+ it('should find multiple words', function() {
+ wordnet.findWords('unique%', function(words) {
+ expect(words.length).toBe(3);
+ expect(_.pluck(words, 'lemma')).toEqual(['unique', 'uniquely', 'uniqueness']);
});
});
-
- describe('data_file', function() {
- it('should find a record', function() {
- var dataFile = new DataFile('./spec/test_data/wordnet/', 'http://wordnet.naturalnode.com/', 'noun');
-
- dataFile.get(1740, function(data) {
- expect(data.lemma).toBe('entity');
- expect(data.ptrs.length).toBe(3);
- asyncSpecDone();
- });
-
- asyncSpecWait();
+
+ it('should find a word by id', function() {
+ wordnet.getWordById(138336, function(word) {
+ expect(word.id).toBe(138336);
+ expect(word.lemma).toBe('unique');
+
+ asyncSpecDone();
});
+
+ asyncSpecWait();
});
-
- it('should look up a word', function() {
- var wordnet = new WordNet('./spec/test_data/wordnet/', 'http://wordnet.naturalnode.com/');
-
- wordnet.lookup('entity', function(records) {
- expect(records.length).toBe(1);
- expect(records[0].lemma).toBe('entity');
+
+ it('should handle a miss for single word retrieval', function() {
+ wordnet.getWord('argombiszki', function(word) {
+ expect(word).toBeNull();
asyncSpecDone();
});
-
+
asyncSpecWait();
- });
-
- it('should handle a miss', function() {
- var wordnet = new WordNet('./spec/test_data/wordnet/', 'http://wordnet.naturalnode.com/');
-
- wordnet.lookup('argombiszki', function(records) {
- expect(records.length).toBe(0);
+ });
+
+ it('should handle a miss for multiple word retrieval', function() {
+ wordnet.findWords('argombiszki%', function(words) {
+ expect(words.length).toBe(0);
asyncSpecDone();
});
-
+
asyncSpecWait();
- });
-
-
- it('should get a word', function() {
- var wordnet = new WordNet('./spec/test_data/wordnet/', 'http://wordnet.naturalnode.com/');
+ });
+
+ describe('word', function() {
+ var word = null;
+ wordnet.getWord('good', function(w) { word = w; });
+
+ waitsFor(function() { return word != null; }, 1000);
+
+ it('should have senses', function() {
+ expect(word.senses.length).toBe(27);
+ expect(word.senses[0].pos).toBe('n');
+ expect(word.senses[0].definition).toBe('articles of commerce');
+ });
+
+ it('should be able to tell you its synonyms for its senses', function() {
+ word.senses[0].getSynonyms(function(synonyms) {
+ expect(_.pluck(synonyms, 'lemma')).toEqual(['commodity', 'good', 'trade good']);
+ asyncSpecDone();
+ });
+ asyncSpecWait();
+ });
+
+ });
+/*
+
+ it('should lookup synonyms', function() {
+ var wordnet = new WordNet('./io_spec/test_data/wordnet/download/', 'http://wordnet.naturalnode.com/');
- wordnet.get(1740, 'n', function(record) {
- expect(record.lemma).toBe('entity');
- asyncSpecDone();
+ wordnet.lookupSynonyms('entity', function(records) {
+ expect(records.length).toBe(3);
+
+ require('path').exists('./io_spec/test_data/wordnet/download/index.noun', function(exists) {
+ expect(exists).toBeTruthy();
+ asyncSpecDone();
+ });
});
asyncSpecWait();
- });
+ });
+
+*/
});

0 comments on commit 67e2614

Please sign in to comment.