Skip to content
Browse files

More complete regexp for Japanese characters

  • Loading branch information...
1 parent 25ffbfc commit 8b949a2b56af2d38e56ccd88e994257a17d9790f @gmarty gmarty committed
Showing with 3 additions and 3 deletions.
  1. +3 −3 lib/natural/tokenizers/tokenizer_ja.js
View
6 lib/natural/tokenizers/tokenizer_ja.js
@@ -69,9 +69,9 @@ var Tokenizer = require('./tokenizer'),
var TokenizerJa = function() {
this.chartype_ = [
[/[〇一二三四五六七八九十百千万億兆]/, 'M'],
- [/[一-]/, 'H'],
- [/[ぁ-]/, 'I'],
- [/[ァ-ヶ]/, 'K'],
+ [/[一-]/, 'H'],
+ [/[ぁ-]/, 'I'],
+ [/[゠-ヿ]/, 'K'],
[/[a-zA-Z]/, 'A'],
[/[0-9]/, 'N']
];

0 comments on commit 8b949a2

Please sign in to comment.
Something went wrong with that request. Please try again.