-
Notifications
You must be signed in to change notification settings - Fork 20
/
stream.rs
52 lines (45 loc) · 1.26 KB
/
stream.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
use tantivy::tokenizer::Token;
#[derive(Debug)]
pub struct CangjieTokenStream<'a> {
src: &'a str,
result: Vec<&'a str>,
// Begin with 1
index: usize,
token: Token,
}
impl<'a> CangjieTokenStream<'a> {
pub fn new(src: &'a str, result: Vec<&'a str>) -> Self {
CangjieTokenStream {
src,
result,
index: 0,
token: Token::default(),
}
}
}
impl<'a> ::tantivy::tokenizer::TokenStream for CangjieTokenStream<'a> {
fn advance(&mut self) -> bool {
if self.index < self.result.len() {
let current_word = self.result[self.index];
let offset_from = current_word.as_ptr() as usize - self.src.as_ptr() as usize;
let offset_to = offset_from + current_word.len();
self.token = Token {
offset_from,
offset_to,
position: self.index,
text: current_word.to_string(),
position_length: 1,
};
self.index += 1;
true
} else {
false
}
}
fn token(&self) -> &::tantivy::tokenizer::Token {
&self.token
}
fn token_mut(&mut self) -> &mut ::tantivy::tokenizer::Token {
&mut self.token
}
}