Skip to content

Commit

Permalink
classifiers accept text
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisumbel committed Nov 27, 2011
1 parent 379ae61 commit abbe74d
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 13 deletions.
2 changes: 1 addition & 1 deletion lib/natural/classifiers/bayes_classifier.js
Expand Up @@ -26,7 +26,7 @@ Classifier = require('./classifier'),
ApparatusBayesClassifier = require('apparatus').BayesClassifier;

var BayesClassifier = function(stemmer) {
Classifier.call(this, new ApparatusBayesClassifier());
Classifier.call(this, new ApparatusBayesClassifier(), stemmer);
};

sys.inherits(BayesClassifier, Classifier);
Expand Down
6 changes: 6 additions & 0 deletions lib/natural/classifiers/classifier.js
Expand Up @@ -34,6 +34,9 @@ function addDocument(text, classification) {
if(this.docs[classification] == null)
this.docs[classification] = [];

if(typeof text === 'string')
text = this.stemmer.tokenizeAndStem(text);

this.docs[classification].push(text);

for(var i = 0; i < text.length; i++) {
Expand All @@ -44,6 +47,9 @@ function addDocument(text, classification) {
function textToFeatures(observation) {
var features = [];

if(typeof observation === 'string')
observation = this.stemmer.tokenizeAndStem(observation);

for(var feature in this.features) {
if(observation.indexOf(feature) > -1)
features.push(1);
Expand Down
2 changes: 1 addition & 1 deletion lib/natural/classifiers/logistic_regression_classifier.js
Expand Up @@ -26,7 +26,7 @@ Classifier = require('./classifier'),
ApparatusLogisticRegressionClassifier = require('apparatus').LogisticRegressionClassifier;

var LogisticRegressionClassifier = function(stemmer) {
Classifier.call(this, new ApparatusLogisticRegressionClassifier());
Classifier.call(this, new ApparatusLogisticRegressionClassifier(), stemmer);
};

sys.inherits(LogisticRegressionClassifier, Classifier);
Expand Down
17 changes: 16 additions & 1 deletion spec/bayes_classifier_spec.js
Expand Up @@ -24,7 +24,7 @@ var natural = require('lib/natural');

describe('bayes classifier', function() {
describe('classifier', function() {
it('should classify with mixed training data', function() {
it('should classify with arrays', function() {
var classifier = new natural.BayesClassifier();
classifier.addDocument(['fix', 'box'], 'computing');
classifier.addDocument(['write', 'code'], 'computing');
Expand All @@ -38,5 +38,20 @@ describe('bayes classifier', function() {
expect(classifier.classify(['bug', 'code'])).toBe('computing');
expect(classifier.classify(['read', 'thing'])).toBe('literature');
});

it('should classify with arrays', function() {
var classifier = new natural.BayesClassifier();
classifier.addDocument('i fixed the box', 'computing');
classifier.addDocument('i write code', 'computing');
classifier.addDocument('nasty script code', 'computing');
classifier.addDocument('write a book', 'literature');
classifier.addDocument('read a book', 'literature');
classifier.addDocument('study the books', 'literature');

classifier.train();

expect(classifier.classify('a bug in the code')).toBe('computing');
expect(classifier.classify('read all the books')).toBe('literature');
});
});
});
35 changes: 25 additions & 10 deletions spec/logistic_regression_classifier_spec.js
Expand Up @@ -25,18 +25,33 @@ var natural = new require('lib/natural'),

describe('logistic regression', function() {
it('should classify with individually trained documents', function() {
var logistic = new LogisticRegressionClassifier();
var classifier = new LogisticRegressionClassifier();

logistic.addDocument(['have', 'computer'], 'IT');
logistic.addDocument(['have', 'phone'], 'IT');
logistic.addDocument(['computer', 'suck'], 'IT');
logistic.addDocument(['field', 'goal'], 'sports');
logistic.addDocument(['score', 'goal'], 'sports');
logistic.addDocument(['great', 'speed'], 'sports');
classifier.addDocument(['have', 'computer'], 'IT');
classifier.addDocument(['have', 'phone'], 'IT');
classifier.addDocument(['computer', 'suck'], 'IT');
classifier.addDocument(['field', 'goal'], 'sports');
classifier.addDocument(['score', 'goal'], 'sports');
classifier.addDocument(['great', 'speed'], 'sports');

logistic.train();
classifier.train();

expect(logistic.classify(['hate', 'computer'])).toBe('IT');
expect(logistic.classify(['score', 'please'])).toBe('sports');
expect(classifier.classify(['hate', 'computer'])).toBe('IT');
expect(classifier.classify(['score', 'please'])).toBe('sports');
});

it('should classify with arrays', function() {
var classifier = new natural.LogisticRegressionClassifier();
classifier.addDocument('i fixed the box', 'computing');
classifier.addDocument('i write code', 'computing');
classifier.addDocument('nasty script code', 'computing');
classifier.addDocument('write a book', 'literature');
classifier.addDocument('read a book', 'literature');
classifier.addDocument('study the books', 'literature');

classifier.train();

expect(classifier.classify('a bug in the code')).toBe('computing');
expect(classifier.classify('read all the books')).toBe('literature');
});
});

0 comments on commit abbe74d

Please sign in to comment.