forked from ghetzel/go-stockutil
/
manipulator.go
118 lines (96 loc) · 2.79 KB
/
manipulator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
package fileutil
import (
"bufio"
"bytes"
"errors"
"io"
)
// By default, the underlying bufio.Scanner that tokenizes the input data
// will discard the tokens that it's splitting on. however, in most cases, we
// don't actually want this. however, if the thing we're removing IS this token,
// then we will have removed the token, then immediately put it back in the stream.
//
// Returning the SkipToken error will tell the ReadManipulator to not put this token
// back into the stream, but otherwise not produce an actual error during read.
var SkipToken = errors.New(`skip token`)
type ReadManipulatorFunc func(data []byte) ([]byte, error)
type ReadManipulator struct {
reader io.Reader
fn ReadManipulatorFunc
splitter bufio.SplitFunc
scanner *bufio.Scanner
buffer *bytes.Buffer
lastToken []byte
}
func NewReadManipulator(reader io.Reader, fns ...ReadManipulatorFunc) *ReadManipulator {
rm := &ReadManipulator{
reader: reader,
splitter: bufio.ScanLines,
buffer: bytes.NewBuffer(nil),
}
if len(fns) > 0 && fns[0] != nil {
rm.fn = fns[0]
}
return rm
}
func (self *ReadManipulator) Split(split bufio.SplitFunc) {
self.splitter = split
}
func (self *ReadManipulator) Read(b []byte) (int, error) {
if self.fn != nil {
// initialize the scanner if we need to
if self.scanner == nil {
self.scanner = bufio.NewScanner(self.reader)
self.scanner.Split(self.interceptToken)
}
// if there's more to scan...
for self.scanner.Scan() {
data := self.scanner.Bytes()
// get the scanned bytes, run them through the manip. function
if out, err := self.fn(data); err == nil || err == SkipToken {
if err == nil {
out = append(out, self.lastToken...)
}
self.lastToken = nil
// write the manipulated bytes to the buffer
if n, err := self.buffer.Write(out); err != nil {
return n, err
}
// loop until we've put enough data in the buffer to satisfy the
// requested read
if self.buffer.Len() >= len(b) {
break
}
} else {
return 0, err
}
}
// check for scan errors
if err := self.scanner.Err(); err != nil {
return 0, err
}
// return whats in the buffer, and keep doing this until its empty
return self.buffer.Read(b)
} else {
return self.reader.Read(b)
}
}
func (self *ReadManipulator) Close() error {
if self.scanner != nil {
self.scanner = nil
}
self.lastToken = nil
self.buffer = bytes.NewBuffer(nil)
if closer, ok := self.reader.(io.Closer); ok {
return closer.Close()
} else {
return nil
}
}
func (self *ReadManipulator) interceptToken(data []byte, atEOF bool) (advance int, token []byte, err error) {
advance, token, err = self.splitter(data, atEOF)
if advance > 0 && len(data) >= advance {
self.lastToken = append(self.lastToken, data[len(token):advance]...)
}
return
}