Skip to content

Commit

Permalink
feat(addwordex): 加词支持指定权重和词性,解决原来 AddWord() 接口加词权重太低没有生效的问题
Browse files Browse the repository at this point in the history
  • Loading branch information
sy-lht committed Jan 31, 2023
1 parent d839aff commit 8e5f5e5
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 0 deletions.
3 changes: 3 additions & 0 deletions README.md
Expand Up @@ -58,6 +58,9 @@ func main() {
fmt.Println("精确模式:", strings.Join(words, "/"))

x.AddWord("比特币")
// `AddWordEx` 支持指定词语的权重,作为 `AddWord` 权重太低加词失败的补充。
// `tag` 参数可以为空字符串,也可以指定词性。
// x.AddWordEx("比特币", 100000, "")
s = "比特币"
words = x.Cut(s, use_hmm)
fmt.Println(s)
Expand Down
1 change: 1 addition & 0 deletions README_EN.md
Expand Up @@ -53,6 +53,7 @@ func main() {
fmt.Println("精确模式:", strings.Join(words, "/"))
x.AddWord("比特币")
// x.AddWordEx("比特币", 100000, "")
s = "比特币"
words = x.Cut(s, use_hmm)
fmt.Println(s)
Expand Down
4 changes: 4 additions & 0 deletions jieba.cpp
Expand Up @@ -84,6 +84,10 @@ void AddWord(Jieba x, const char* word) {
((cppjieba::Jieba*)x)->InsertUserWord(word);
}

void AddWordEx(Jieba x, const char* word, int freq, const char* tag) {
((cppjieba::Jieba*)x)->InsertUserWord(word, freq, tag);
}

void RemoveWord(Jieba x, const char* word) {
((cppjieba::Jieba*)x)->DeleteUserWord(word);
}
Expand Down
8 changes: 8 additions & 0 deletions jieba.go
Expand Up @@ -103,6 +103,14 @@ func (x *Jieba) AddWord(s string) {
C.AddWord(x.jieba, cstr)
}

func (x *Jieba) AddWordEx(s string, freq int, tag string) {
cstr := C.CString(s)
ctag := C.CString(tag)
defer C.free(unsafe.Pointer(ctag))
defer C.free(unsafe.Pointer(cstr))
C.AddWordEx(x.jieba, cstr, C.int(freq), ctag)
}

func (x *Jieba) RemoveWord(s string) {
cstr := C.CString(s)
defer C.free(unsafe.Pointer(cstr))
Expand Down
1 change: 1 addition & 0 deletions jieba.h
Expand Up @@ -28,6 +28,7 @@ char** CutAll(Jieba handle, const char* sentence);
char** CutForSearch(Jieba handle, const char* sentence, int is_hmm_used);
char** Tag(Jieba handle, const char* sentence);
void AddWord(Jieba handle, const char* word);
void AddWordEx(Jieba handle, const char* word, int freq, const char* tag);
void RemoveWord(Jieba handle, const char* word);

Word* Tokenize(Jieba x, const char* sentence, TokenizeMode mode, int is_hmm_used);
Expand Down

0 comments on commit 8e5f5e5

Please sign in to comment.