Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

More complete regexp to match Katakana

  • Loading branch information...
commit 25ffbfc8ed9534b0c15c9e3ff71c8036280934a9 1 parent e87d337
@gmarty gmarty authored
Showing with 5 additions and 3 deletions.
  1. +3 −1 lib/natural/stemmers/stemmer_ja.js
  2. +2 −2 spec/stemmer_ja_spec.js
View
4 lib/natural/stemmers/stemmer_ja.js
@@ -26,6 +26,8 @@
* Inspired by:
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java
*
+ * This script assumes input is normalized using normalizer_ja().
+ *
* \@todo Use .bind() in StemmerJa.prototype.attach().
*/
@@ -117,7 +119,7 @@ StemmerJa.prototype.stemKatakana = function(token) {
* @return {boolean} True if the string has katakana only.
*/
StemmerJa.prototype.isKatakana = function(str) {
- return !!str.match(/^[ァ-ヴ]+$/);
+ return !!str.match(/^[゠-ヿ]+$/);
};
// Expose an attach function that will patch String with new methods.
View
4 spec/stemmer_ja_spec.js
@@ -23,8 +23,8 @@ THE SOFTWARE.
var StemmerJa = require('lib/natural/stemmers/stemmer_ja');
var stemmer = new StemmerJa();
-var test = ['コピー', 'コーヒー', 'タクシー', 'パーティー', 'パーティ', 'センター'];
-var testResult = ['コピー', 'コーヒ', 'タクシ', 'パーティ', 'パーティ', 'センタ'];
+var test = ['コピー', 'コーヒー', 'タクシー', 'パーティー', 'パーティ', 'ヘルプ・センター'];
+var testResult = ['コピー', 'コーヒ', 'タクシ', 'パーティ', 'パーティ', 'ヘルプ・センタ'];
var text = '明後日パーティーに行く予定がある。図書館で資料をコピーしました。';
describe('StemmerJa', function() {

0 comments on commit 25ffbfc

Please sign in to comment.
Something went wrong with that request. Please try again.