-
Notifications
You must be signed in to change notification settings - Fork 116
/
main.go
96 lines (88 loc) · 2.49 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
package main
import (
"context"
"fmt"
"github.com/olivere/elastic"
"io/ioutil"
"net/http"
"regexp"
"strconv"
)
type Profile struct {
Name string
Age string
Weight string
Height string
Image string
}
func main() {
var ch = make(chan int)
for i := 1; i <= 133; i++ {
go HandlePage(i, ch)
}
for i := 1; i <= 1330; i++ {
fmt.Printf("第%d条插入elk完成\n", <-ch)
}
}
//解析每页数据
func HandlePage(i int, ch chan int) {
url := "https://vip.jiayuan.com/cjjllist.php?&p=" + strconv.Itoa(i)
html := ParseHtml(url)
profile := GetToWork(html)
var num = 0
for _, v := range profile {
num++
PutIntoElk(ch, num, v)
}
}
//解析url
func ParseHtml(url string) (html string) {
resp, err := http.Get(url)
if err != nil {
fmt.Println(err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Println(resp.StatusCode)
}
if html, err := ioutil.ReadAll(resp.Body); err == nil {
return string(html)
} else {
panic(err)
}
return
}
//开始工作
func GetToWork(html string) (profile []Profile) {
nameReg := regexp.MustCompile(`class="memberName">(?s:(.*?))</a></p>`)
nameSlice := nameReg.FindAllStringSubmatch(html, -1)
linkReg := regexp.MustCompile(`<p><a target="_blank" href="(?s:(.*?))" class="memberName">`)
linkSlice := linkReg.FindAllStringSubmatch(html, -1)
var profiles []Profile
for index, v := range nameSlice {
link := "https://vip.jiayuan.com" + linkSlice[index][1]
profileHtml := ParseHtml(link)
ageReg := regexp.MustCompile(`<span class="item">年龄:(?s:(.*?))</span>`)
heightReg := regexp.MustCompile(`<span class="item">身高:(?s:(.*?))</span>`)
weightReg := regexp.MustCompile(`<span class="item">体重:(?s:(.*?))</span>`)
imgReg := regexp.MustCompile(`<img src="(?s:(.*?))" alt="" />`)
ageSlice := ageReg.FindAllStringSubmatch(profileHtml, -1)
heightSlice := heightReg.FindAllStringSubmatch(profileHtml, -1)
weightSlice := weightReg.FindAllStringSubmatch(profileHtml, -1)
imgSlice := imgReg.FindAllStringSubmatch(profileHtml, -1)
profiles = append(profiles, Profile{Name: v[1], Age: ageSlice[0][1], Weight: weightSlice[0][1], Height: heightSlice[0][1], Image: imgSlice[0][1]})
}
return profiles
}
//存入ELK中
func PutIntoElk(ch chan int, num int, profile Profile) {
client, err := elastic.NewClient(elastic.SetSniff(false))
if err != nil {
fmt.Println(err)
}
_, err = client.Index().Index("shijijiayuan").Type("profile").BodyJson(profile).Do(context.Background())
if err != nil {
fmt.Println(err)
}
ch <- num
}