From 5f35e0ec3bd9a7a2c7317a7e38a75c0a4b089fb8 Mon Sep 17 00:00:00 2001 From: Luis Rodrigues Date: Fri, 2 Jan 2015 00:50:05 +0000 Subject: [PATCH] 1765 errors in 32016 words, not bad for a first pass --- spec/porter_stemmer_pt_spec.js | 66 +++++++++++----------------------- 1 file changed, 21 insertions(+), 45 deletions(-) diff --git a/spec/porter_stemmer_pt_spec.js b/spec/porter_stemmer_pt_spec.js index 02ff464a6..cf80f2e4c 100644 --- a/spec/porter_stemmer_pt_spec.js +++ b/spec/porter_stemmer_pt_spec.js @@ -25,50 +25,26 @@ var PorterStemmer = require('../lib/natural/stemmers/porter_stemmer_pt'), describe('porter_stemmer_pt', function() { - it('should not stem stopwords', function() { - - var words = [ - 'a', 'e', 'o' - ]; - - for (var i in words) { - expect(PorterStemmer.stem(words[i])).toBe(words[i]); - } - }); - - it('should perform stemming on a few special case words', function() { - - var words = { - 'abastecem' : 'abastec', - 'abastecer' : 'abastec', - 'abastecida' : 'abastec', - 'abastecimento' : 'abastec' - }; - - for (var word in words) { - expect(PorterStemmer.stem(word)).toBe(words[word]); - } - }); - -// it('should perform stemming on a lot of words', function() { -// var errors = []; -// -// fs.readFileSync('spec/test_data/snowball_pt.txt').toString().split('\n').forEach(function(line) { -// if (line) { -// var fields = line.replace(/\s+/g, ' ').split(' '), -// stemmed = PorterStemmer.stem(fields[0]); -// -// if (stemmed !== fields[1]) { -// errors.push({ -// word: fields[0], -// expected: fields[1], -// actual: stemmed -// }); -// } -// } -// }); -// -// expect(errors.length).toBe(0); -// }); + it('should perform stemming on a lot of words', function() { + var errors = []; + + fs.readFileSync('spec/test_data/snowball_pt.txt').toString().split('\n').forEach(function(line) { + if (line) { + var fields = line.replace(/\s+/g, ' ').split(' '), + stemmed = PorterStemmer.stem(fields[0]); + + if (stemmed !== fields[1]) { + console.log('Error:', fields[0], 'Expected:', fields[1], 'Got:', stemmed); + errors.push({ + word: fields[0], + expected: fields[1], + actual: stemmed + }); + } + } + }); + + expect(errors.length).toBe(0); + }); });