/
word.go
65 lines (53 loc) · 1.3 KB
/
word.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// Copyright (c) 2015 RxnWeaver
//
// Part of the RxnWeaver suite of projects. See README.md and LICENSE
// for more details.
package tokenizer
// Word represents a token whose type is one of `TokMayBeWord` or
// `TokWord`, and qualifies it.
//
// It holds information regarding the so-called IOB (Inside, Outside,
// Beginning) status of the token, its lemma form (in case of a word),
// its part of speech (in case of a word), etc.
type Word struct {
token TextToken // Actual text and its properties
iob byte // 'B', 'I' or 'O'
pos string // Part of Speech
lemma string // Lemma form
class string // Assigned after learning
}
// newWord creates and initialises a word with its properties set to
// reasonable defaults.
func newWord(text string, b int, e int) *Word {
w := &Word{}
w.token.text = text
w.token.begin = b
w.token.end = e
w.token.ttype = TokMayBeWord
w.iob = 'O'
return w
}
func (w *Word) Text() string {
return w.token.text
}
func (w *Word) Begin() int {
return w.token.begin
}
func (w *Word) End() int {
return w.token.end
}
func (w *Word) Type() TokenType {
return w.token.ttype
}
func (w *Word) IOB() byte {
return w.iob
}
func (w *Word) POS() string {
return w.pos
}
func (w *Word) Lemma() string {
return w.lemma
}
func (w *Word) Class() string {
return w.class
}