forked from yanyiwu/gojieba
/
jieba.go
99 lines (87 loc) · 2.01 KB
/
jieba.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package gojieba
/*
#cgo CXXFLAGS: -I./deps -DLOGGING_LEVEL=LL_WARNING -O3 -Wall
#include <stdlib.h>
#include "jieba.h"
*/
import "C"
import "unsafe"
type TokenizeMode int
const (
DefaultMode TokenizeMode = iota
SearchMode
)
type Word struct {
Str string
Start int
End int
}
type Jieba struct {
jieba C.Jieba
}
func NewJieba(paths ...string) *Jieba {
dictpaths := getDictPaths(paths...)
dpath := C.CString(dictpaths[0])
defer C.free(unsafe.Pointer(dpath))
hpath := C.CString(dictpaths[1])
defer C.free(unsafe.Pointer(hpath))
upath := C.CString(dictpaths[2])
defer C.free(unsafe.Pointer(upath))
return &Jieba{
C.NewJieba(dpath, hpath, upath),
}
}
func (x *Jieba) Free() {
C.FreeJieba(x.jieba)
}
func (x *Jieba) Cut(s string, hmm bool) []string {
c_int_hmm := 0
if hmm {
c_int_hmm = 1
}
cstr := C.CString(s)
defer C.free(unsafe.Pointer(cstr))
var words **C.char = C.Cut(x.jieba, cstr, C.int(c_int_hmm))
defer C.FreeWords(words)
res := cstrings(words)
return res
}
func (x *Jieba) CutAll(s string) []string {
cstr := C.CString(s)
defer C.free(unsafe.Pointer(cstr))
var words **C.char = C.CutAll(x.jieba, cstr)
defer C.FreeWords(words)
res := cstrings(words)
return res
}
func (x *Jieba) CutForSearch(s string, hmm bool) []string {
c_int_hmm := 0
if hmm {
c_int_hmm = 1
}
cstr := C.CString(s)
defer C.free(unsafe.Pointer(cstr))
var words **C.char = C.CutForSearch(x.jieba, cstr, C.int(c_int_hmm))
defer C.FreeWords(words)
res := cstrings(words)
return res
}
func (x *Jieba) Tag(s string) []string {
cstr := C.CString(s)
defer C.free(unsafe.Pointer(cstr))
var words **C.char = C.Tag(x.jieba, cstr)
defer C.FreeWords(words)
res := cstrings(words)
return res
}
func (x *Jieba) Tokenize(s string, mode TokenizeMode, hmm bool) []Word {
c_int_hmm := 0
if hmm {
c_int_hmm = 1
}
cstr := C.CString(s)
defer C.free(unsafe.Pointer(cstr))
var words *C.Word = C.Tokenize(x.jieba, cstr, C.TokenizeMode(mode), C.int(c_int_hmm))
defer C.free(unsafe.Pointer(words))
return convertWords(s, words)
}