/
header.go
138 lines (123 loc) · 3.3 KB
/
header.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package goscrapper
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"strings"
)
// Fetch the title from head, if a tag wasn't found because
// it's missing in the source HTML, empty string will be returned.
//
// Example:
//
// html: <title>Lorem Ipsum</title>
//
// Result: Lorem Ipsum
func (w *Web) Title() string {
return strings.TrimSpace(w.Doc.Find("title").Text())
}
// Fetch the charset meta info from head, if a tag wasn't found because
// it's missing in the source HTML, empty string will be returned.
//
// Example:
//
// html: <meta charset="utf-8" />
// Result: utf-8
func (w *Web) Charset() string {
return w.Doc.Find("meta").AttrOr("charset", "")
}
// Fetch viewport meta info from head
//
// Examples:
//
// html: <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" />
//
// Results:
// w.Viewport().Val -> ['width=device-width', 'initial-scale=1', 'maximum-scale=1', 'user-scalable=no']
// w.Viewport().String() -> 'width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no'
func (w *Web) Viewport() *Viewport {
return NewViewport(w.Doc)
}
// Fetch content type meta info from head
//
// Example:
//
// <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
//
// Result: [text/html, utf-8]
func (w *Web) ContentType() string {
var values []string
content, ok := w.Doc.Find("meta[http-equiv='Content-type']").Attr("content")
if !ok {
return ""
}
items := strings.Split(content, " ")
for _, v := range items {
if strings.Contains(v, "=") {
kv := strings.Split(v, "=")
values = append(values, strings.TrimSpace(strings.TrimRight(kv[1], ";")))
continue
}
values = append(values, strings.TrimSpace(strings.TrimRight(v, ";")))
}
return strings.Join(values, ", ")
}
// Fetch canonical meta url from head
//
// Example:
//
// html: <link rel="canonical" href="https://test-page.goscrapper.com/page.html" />
//
// Result: https://test-page.goscrapper.com/page.html
func (w *Web) Canonical() string {
return w.Doc.Find("link[rel='canonical']").AttrOr("href", "")
}
// Fetch meta info of csrf token from head
//
// Example:
//
// html: <meta name="csrf-token" content="token" />
//
// Result: token
func (w *Web) CSRFToken() string {
return w.Doc.Find("meta[name='csrf-token']").AttrOr("content", "")
}
// get the header collected as an slice
func (w *Web) Headers() map[string]string {
headers := make(map[string]string, 4)
headers["charset"] = w.Charset()
headers["canonical"] = w.Canonical()
headers["contentType"] = w.ContentType()
headers["csrfToken"] = w.CSRFToken()
headers["viewport"] = w.Viewport().String()
return headers
}
type Viewport struct {
Val map[string]string
}
func NewViewport(doc *goquery.Document) *Viewport {
v := Viewport{}
v.Fetch(doc)
return &v
}
func (v *Viewport) Fetch(doc *goquery.Document) {
vp := make(map[string]string)
values, ok := doc.Find("meta[name='viewport']").Attr("content")
if ok {
for _, item := range strings.Split(values, ",") {
kv := strings.Split(item, "=")
vp[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
}
}
v.Val = vp
}
// String representation of viewport
func (v *Viewport) String() string {
if len(v.Val) == 0 {
return ""
}
var vp []string
for k, v := range v.Val {
vp = append(vp, fmt.Sprintf("%s=%s", k, v))
}
return strings.Join(vp, ", ")
}