-
Notifications
You must be signed in to change notification settings - Fork 18
/
cosmetic.go
261 lines (213 loc) · 8.15 KB
/
cosmetic.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
package rules
import (
"bytes"
"strings"
"golang.org/x/exp/slices"
)
// CosmeticRuleType is the enumeration of different cosmetic rules
type CosmeticRuleType uint
// CosmeticRuleType enumeration
const (
CosmeticElementHiding CosmeticRuleType = iota // ## rules (https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-elemhide-rules)
CosmeticCSS // #$# rules (https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-css-rules)
CosmeticJS // #%# rules (https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#javascript-rules)
// TODO: Move HTML filtering rules to a different file/structure
CosmeticHTML // $$ rules (https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#html-filtering-rules)
)
// cosmeticRuleMarker is a special marker that defines what type of cosmetic rule we are dealing with
type cosmeticRuleMarker string
// cosmeticRuleMarker enumeration
const (
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-elemhide-rules
markerElementHiding cosmeticRuleMarker = "##"
markerElementHidingException cosmeticRuleMarker = "#@#"
markerElementHidingExtCSS cosmeticRuleMarker = "#?#"
markerElementHidingExtCSSException cosmeticRuleMarker = "#@?#"
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-css-rules
markerCSS cosmeticRuleMarker = "#$#"
markerCSSException cosmeticRuleMarker = "#@$#"
markerCSSExtCSS cosmeticRuleMarker = "#$?#"
markerCSSExtCSSException cosmeticRuleMarker = "#@$?#"
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#javascript-rules
markerJS cosmeticRuleMarker = "#%#"
markerJSException cosmeticRuleMarker = "#@%#"
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#html-filtering-rules
markerHTML cosmeticRuleMarker = "$$"
markerHTMLException cosmeticRuleMarker = "$@$"
)
// contains all possible cosmetic rule markers
var cosmeticRulesMarkers = []string{
string(markerElementHiding), string(markerElementHidingException),
string(markerElementHidingExtCSS), string(markerElementHidingExtCSSException),
string(markerCSS), string(markerCSSException),
string(markerCSSExtCSS), string(markerCSSExtCSSException),
string(markerJS), string(markerJSException),
string(markerHTML), string(markerHTMLException),
}
// necessary for findCosmeticRuleMarker function. Initialized in the init() function
var cosmeticRuleMarkersFirstChars []byte
func init() {
// This is important for "findCosmeticRuleMarker" function to sort markers
// in this order
slices.SortFunc(cosmeticRulesMarkers, func(a, b string) int {
return len(b) - len(a)
})
for _, marker := range cosmeticRulesMarkers {
if bytes.IndexByte(cosmeticRuleMarkersFirstChars, marker[0]) == -1 {
cosmeticRuleMarkersFirstChars = append(cosmeticRuleMarkersFirstChars, marker[0])
}
}
}
// CosmeticRule represents a cosmetic rule (element hiding, CSS, scriptlet)
type CosmeticRule struct {
// RuleText is the original rule text.
RuleText string
// Content meaning depends on the rule type:
// - Element hiding: content is just a selector;
// - CSS: content is a selector + style definition;
// - JS: text of the script to be injected.
Content string
// permittedDomains is a list of permitted domains for this rule.
permittedDomains []string
// restrictedDomains is a list of restricted domains for this rule.
restrictedDomains []string
// FilterListID is a list identifier.
FilterListID int
// Type of the rule.
Type CosmeticRuleType
// Whitelist means that this rule is meant to disable rules with the same
// content on the specified domains. For instance,
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#elemhide-exceptions.
Whitelist bool
// ExtendedCSS means that this rule is supposed to be applied by the
// javascript library, see https://github.com/AdguardTeam/ExtendedCss.
ExtendedCSS bool
}
// NewCosmeticRule parses the rule text and creates a
func NewCosmeticRule(ruleText string, filterListID int) (*CosmeticRule, error) {
f := CosmeticRule{
RuleText: ruleText,
FilterListID: filterListID,
}
index, m := findCosmeticRuleMarker(ruleText)
if index == -1 {
return nil, &RuleSyntaxError{msg: "cannot find cosmetic marker", ruleText: ruleText}
}
if index > 0 {
// This means that the marker is preceded by the list of domains
// Now it's a good time to parse them.
domains := ruleText[:index]
permitted, restricted, err := loadDomains(domains, ",")
if err != nil {
return nil, &RuleSyntaxError{msg: "cannot load domains", ruleText: ruleText}
}
f.permittedDomains = permitted
f.restrictedDomains = restricted
}
f.Content = strings.TrimSpace(ruleText[index+len(m):])
if f.Content == "" {
return nil, &RuleSyntaxError{msg: "empty rule content", ruleText: ruleText}
}
switch cosmeticRuleMarker(m) {
case markerElementHiding:
f.Type = CosmeticElementHiding
case markerElementHidingException:
f.Type = CosmeticElementHiding
f.Whitelist = true
default:
return nil, ErrUnsupportedRule
}
if f.Whitelist && len(f.permittedDomains) == 0 {
return nil, &RuleSyntaxError{msg: "whitelist rule must have at least one domain specified", ruleText: ruleText}
}
// TODO: validate content
// TODO: detect ExtCSS pseudo-classes
return &f, nil
}
// Text returns the original rule text
// Implements the `Rule` interface
func (f *CosmeticRule) Text() string {
return f.RuleText
}
// GetFilterListID returns ID of the filter list this rule belongs to
func (f *CosmeticRule) GetFilterListID() int {
return f.FilterListID
}
// String returns original rule text
func (f *CosmeticRule) String() string {
return f.RuleText
}
// GetPermittedDomains returns a list of permitted domains
func (f *CosmeticRule) GetPermittedDomains() []string {
return f.permittedDomains
}
// IsGeneric returns true if rule can be considered generic (is not limited to a specific domain)
func (f *CosmeticRule) IsGeneric() bool {
return len(f.permittedDomains) == 0
}
// Match returns true if this rule can be used on the specified hostname
func (f *CosmeticRule) Match(hostname string) bool {
// TODO: Improve hosts matching, start using a better approach (token-based maps)
if len(f.permittedDomains) == 0 && len(f.restrictedDomains) == 0 {
return true
}
if len(f.restrictedDomains) > 0 {
if isDomainOrSubdomainOfAny(hostname, f.restrictedDomains) {
// Domain or host is restricted
// i.e. $domain=~example.org
return false
}
}
if len(f.permittedDomains) > 0 {
if !isDomainOrSubdomainOfAny(hostname, f.permittedDomains) {
// Domain is not among permitted
// i.e. $domain=example.org and we're checking example.com
return false
}
}
return true
}
// isCosmetic checks if this is a cosmetic filtering rule
func isCosmetic(line string) bool {
index, _ := findCosmeticRuleMarker(line)
return index != -1
}
// findCosmeticRuleMarker looks for a cosmetic rule marker in the
// rule text and returns the start index and the marker found.
// if nothing found, it returns -1.
func findCosmeticRuleMarker(ruleText string) (int, string) {
for _, firstMarkerChar := range cosmeticRuleMarkersFirstChars {
startIndex := strings.IndexByte(ruleText, firstMarkerChar)
if startIndex == -1 {
continue
}
// Handling false positives while looking for cosmetic rules in host files.
//
// For instance, it could look like this:
// 0.0.0.0 jackbootedroom.com ## phishing
if startIndex > 0 && ruleText[startIndex-1] == ' ' {
continue
}
for _, marker := range cosmeticRulesMarkers {
if startsAtIndexWith(ruleText, startIndex, marker) {
return startIndex, marker
}
}
}
return -1, ""
}
// startsAtIndexWith checks if the specified string starts with a substr at the specified index
// str is the string to check
// startIndex is the index to start checking from
// substr is the substring to check
func startsAtIndexWith(str string, startIndex int, substr string) bool {
if len(str)-startIndex < len(substr) {
return false
}
for i := 0; i < len(substr); i++ {
if str[startIndex+i] != substr[i] {
return false
}
}
return true
}