/
payload.go
65 lines (54 loc) · 1.43 KB
/
payload.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
package crawler
import (
"bytes"
"fmt"
"io"
"sync"
"time"
"github.com/PacktPublishing/Hands-On-Software-Engineering-with-Golang/Chapter07/pipeline"
"github.com/google/uuid"
)
var (
_ pipeline.Payload = (*crawlerPayload)(nil)
payloadPool = sync.Pool{
New: func() interface{} { return new(crawlerPayload) },
}
)
type crawlerPayload struct {
LinkID uuid.UUID
URL string
RetrievedAt time.Time
RawContent bytes.Buffer
// NoFollowLinks are still added to the graph but no outgoing edges
// will be created from this link to them.
NoFollowLinks []string
Links []string
Title string
TextContent string
}
// Clone implements pipeline.Payload.
func (p *crawlerPayload) Clone() pipeline.Payload {
newP := payloadPool.Get().(*crawlerPayload)
newP.LinkID = p.LinkID
newP.URL = p.URL
newP.RetrievedAt = p.RetrievedAt
newP.NoFollowLinks = append([]string(nil), p.NoFollowLinks...)
newP.Links = append([]string(nil), p.Links...)
newP.Title = p.Title
newP.TextContent = p.TextContent
_, err := io.Copy(&newP.RawContent, &p.RawContent)
if err != nil {
panic(fmt.Sprintf("[BUG] error cloning payload raw content: %v", err))
}
return newP
}
// MarkAsProcessed implements pipeline.Payload
func (p *crawlerPayload) MarkAsProcessed() {
p.URL = p.URL[:0]
p.RawContent.Reset()
p.NoFollowLinks = p.NoFollowLinks[:0]
p.Links = p.Links[:0]
p.Title = p.Title[:0]
p.TextContent = p.TextContent[:0]
payloadPool.Put(p)
}