forked from xlvector/caspercloud
-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyzer.go
126 lines (106 loc) · 2.43 KB
/
analyzer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
package caspercloud
import (
"bytes"
"compress/gzip"
"encoding/base64"
"encoding/json"
"github.com/PuerkitoBio/goquery"
"io/ioutil"
"log"
"net/http"
"os"
"strings"
)
const (
kParserServer = "http://parser.crawler.bdp.cc/submit"
)
type Mail struct {
From string `json:"from"`
Title string `json:"title"`
}
type CasperOutput struct {
Downloads []string `json:"downloads"`
Mails []Mail `json:"mails"`
}
func LoadDownloads(fs []string) {
for _, fn := range fs {
ParseFile(fn)
}
}
func ParseFile(fn string) error {
f, err := os.Open(fn)
if err != nil {
log.Println("fail to load file:", err)
return err
}
defer f.Close()
doc, err := goquery.NewDocumentFromReader(f)
if err != nil {
log.Println("fail to get dom:", err)
return err
}
log.Println("file length", len(doc.Text()))
return nil
}
type MailProcessor struct {
}
func NewMailProcessor() *MailProcessor {
return &MailProcessor{}
}
func (p *MailProcessor) postData(data string) bool {
buf := bytes.NewBuffer(nil)
w := gzip.NewWriter(buf)
defer w.Close()
if _, err := w.Write([]byte(data)); err != nil {
log.Println("gzip compress err:", err)
}
w.Flush()
//params := url.Values{}
//params.Set("data", string(buf.Bytes()))
response, err := http.Post(kParserServer, "plain/text", buf)
if err != nil || response == nil {
log.Println("do request get error:", err.Error(), " response:", response)
return false
}
defer response.Body.Close()
body, _ := ioutil.ReadAll(response.Body)
log.Println("|post result|", string(body))
return true
}
func (p *MailProcessor) Process(metaInfo map[string]string, downloads []string) bool {
var mails []string
isZip := false
for _, fn := range downloads {
f, err := os.Open(fn)
if err != nil {
log.Fatal("open file get error:", err.Error())
}
fd, err := ioutil.ReadAll(f)
if err != nil {
log.Fatal("read file get error:", err.Error())
}
if strings.HasSuffix(fn, ".zip") {
isZip = true
mails = append(mails, base64.StdEncoding.EncodeToString(fd))
} else {
mails = append(mails, string(fd))
}
f.Close()
}
htmls, err := json.Marshal(mails)
if err != nil {
log.Fatal("marshal mails get err:", err.Error())
}
metaInfo["raw_html"] = string(htmls)
if isZip {
metaInfo["is_zip"] = "true"
} else {
metaInfo["is_zip"] = "false"
}
data, err := json.Marshal(metaInfo)
if err != nil {
log.Fatal("marshal metainfo get error:", err.Error())
}
p.postData(string(data))
return true
}