Skip to content

Commit

Permalink
v3.0.0 - Config API redone to support custom elements and attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
BenLubar committed Nov 4, 2016
1 parent de1d2fb commit 5dbb039
Show file tree
Hide file tree
Showing 8 changed files with 280 additions and 114 deletions.
15 changes: 15 additions & 0 deletions .codeclimate.yml
@@ -0,0 +1,15 @@
engines:
fixme:
enabled: true
gofmt:
enabled: true
golint:
enabled: true
govet:
enabled: true
markdownlint:
enabled: true
ratings:
paths:
- "**.go"
- "**.md"
16 changes: 16 additions & 0 deletions .travis.yml
@@ -0,0 +1,16 @@
language: go
sudo: false
go:
- 1.7
- tip
before_install:
- go get github.com/mattn/goveralls
script:
- $HOME/gopath/bin/goveralls -service=travis-ci
notifications:
webhooks:
urls:
- https://webhooks.gitter.im/e/b7a046aa93ac3d8e5262
on_success: change
on_failure: always
on_start: never
3 changes: 3 additions & 0 deletions README.md
@@ -1,4 +1,7 @@
# HTML Cleaner

[![Build Status](https://travis-ci.org/BenLubar/htmlcleaner.svg?branch=master)](https://travis-ci.org/BenLubar/htmlcleaner)
[![Join the chat at https://gitter.im/BenLubar/htmlcleaner](https://badges.gitter.im/BenLubar/webscale.svg)](https://gitter.im/BenLubar/htmlcleaner)
[![Go Report Card](https://goreportcard.com/badge/github.com/BenLubar/htmlcleaner)](https://goreportcard.com/report/github.com/BenLubar/htmlcleaner)
[![GoDoc](https://godoc.org/github.com/BenLubar/htmlcleaner?status.svg)](https://godoc.org/github.com/BenLubar/htmlcleaner)
[![Coverage Status](https://coveralls.io/repos/github/BenLubar/htmlcleaner/badge.svg?branch=master)](https://coveralls.io/github/BenLubar/htmlcleaner?branch=master)
42 changes: 33 additions & 9 deletions cleaner.go
Expand Up @@ -50,8 +50,18 @@ func Preprocess(config *Config, fragment string) string {
case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
raw := string(t.Raw())
tagName, _ := t.TagName()
tag := atom.Lookup(tagName)
if _, ok := config.Elem[tag]; !ok {
allowed := false
if tag := atom.Lookup(tagName); tag != 0 {
if _, ok := config.elem[tag]; ok {
allowed = true
}
}
if !allowed {
if _, ok := config.elemCustom[string(tagName)]; ok {
allowed = true
}
}
if !allowed {
raw = html.EscapeString(raw)
}
write(raw)
Expand Down Expand Up @@ -239,7 +249,9 @@ func filterNode(c *Config, n *html.Node) *html.Node {
}

func cleanNode(c *Config, n *html.Node) *html.Node {
if allowedAttr, ok := c.Elem[n.DataAtom]; ok {
allowedAttr, ok1 := c.elem[n.DataAtom]
customAttr, ok2 := c.elemCustom[n.Data]
if ok1 || ok2 {
// copy the node
tmp := *n
n = &tmp
Expand All @@ -252,15 +264,24 @@ func cleanNode(c *Config, n *html.Node) *html.Node {
n.Attr = make([]html.Attribute, 0, len(attrs))
for _, attr := range attrs {
a := atom.Lookup([]byte(attr.Key))
if attr.Namespace != "" || (!allowedAttr[a] && !c.Attr[a]) {

re1, ok1 := allowedAttr[a]
re2, ok2 := customAttr[attr.Key]
_, ok3 := c.attr[a]
_, ok4 := c.attrCustom[attr.Key]

if attr.Namespace != "" || (!ok1 && !ok2 && !ok3 && !ok4) {
continue
}

if !c.AllowJavascriptURL && !cleanURL(c, a, &attr) {
if !cleanURL(c, a, &attr) {
continue
}

if re, ok := c.AttrMatch[n.DataAtom][a]; ok && !re.MatchString(attr.Val) {
if re1 != nil && !re1.MatchString(attr.Val) {
continue
}
if re2 != nil && !re2.MatchString(attr.Val) {
continue
}

Expand All @@ -287,6 +308,12 @@ var allowedURLSchemes = map[string]bool{
"": true,
}

// SafeURLScheme returns true if u.Scheme is http, https, mailto, data, or an
// empty string.
func SafeURLScheme(u *url.URL) bool {
return allowedURLSchemes[u.Scheme]
}

func cleanURL(c *Config, a atom.Atom, attr *html.Attribute) bool {
if a != atom.Href && a != atom.Src && a != atom.Poster {
return true
Expand All @@ -296,9 +323,6 @@ func cleanURL(c *Config, a atom.Atom, attr *html.Attribute) bool {
if err != nil {
return false
}
if !allowedURLSchemes[u.Scheme] {
return false
}
if c.ValidateURL != nil && !c.ValidateURL(u) {
return false
}
Expand Down
18 changes: 13 additions & 5 deletions cleaner_test.go
Expand Up @@ -29,6 +29,14 @@ func doTableTest(f func(*Config, string) string, t *testing.T, table []testTable
}
}

var wrapConfig = func() *Config {
c := *DefaultConfig

c.WrapText = true

return &c
}()

var testTableClean = []testTable{
{"Empty", ``, ``, nil},
{"PlainText", `a`, `a`, nil},
Expand All @@ -46,9 +54,9 @@ var testTableClean = []testTable{
{"Ampersand", `&`, `&`, nil},
{"AmpersandEntity", `&`, `&`, nil},
{"InvalidTagEntity", `<invalidtag>&#34;</invalidtag>`, `&lt;invalidtag&gt;&#34;&lt;/invalidtag&gt;`, nil},
{"StrayListItem", `<li>`, `<ul><li></li></ul>`, &Config{Elem: map[atom.Atom]map[atom.Atom]bool{atom.Ul: nil, atom.Li: nil}}},
{"StrayListItem", `<li>`, `<ul><li></li></ul>`, (&Config{}).ElemAtom(atom.Ul, atom.Li)},
{"LinkPercent", `<a href="https://www.%google.com">google</a>`, `<a>google</a>`, nil},
{"LinkPercentWrap", `<a href="https://www.%google.com">google</a>`, `<p><a>google</a></p>`, &Config{Elem: DefaultConfig.Elem, WrapText: true}},
{"LinkPercentWrap", `<a href="https://www.%google.com">google</a>`, `<p><a>google</a></p>`, wrapConfig},
{"GreaterThanInfix", `foo>bar`, `foo&gt;bar`, nil},
{"GreaterThanPrefix", `>bar`, `&gt;bar`, nil},
{"GreaterThanSuffix", `foo>`, `foo&gt;`, nil},
Expand All @@ -63,9 +71,9 @@ var testTableClean = []testTable{
{"PHP", `<?php echo mysql_real_escape_string('foo'); ?>`, `<!--?php echo mysql_real_escape_string('foo'); ?-->`, nil},
{"PHPEscaped", `<?php echo mysql_real_escape_string('foo'); ?>`, `&lt;!--?php echo mysql_real_escape_string(&#39;foo&#39;); ?--&gt;`, &Config{EscapeComments: true}},
{"Small250", strings.Repeat(`<small>a `, 250), strings.Repeat(`<small>a `, 99) + "<small>[omitted]" + strings.Repeat(`</small>`, 100), nil},
{"WrapUnclosed", `hello <em>world`, `<p>hello <em>world</em></p>`, &Config{Elem: DefaultConfig.Elem, WrapText: true}},
{"WrapStraySpace", `<p>hello</p> <p>world</p>`, `<p>hello</p> <p>world</p>`, &Config{Elem: DefaultConfig.Elem, WrapText: true}},
{"WrapInvalidNesting", `<em>hello <p>world</p>`, `<p><em>hello </em></p><p><em>world</em></p><p></p>`, &Config{Elem: DefaultConfig.Elem, WrapText: true}},
{"WrapUnclosed", `hello <em>world`, `<p>hello <em>world</em></p>`, wrapConfig},
{"WrapStraySpace", `<p>hello</p> <p>world</p>`, `<p>hello</p> <p>world</p>`, wrapConfig},
{"WrapInvalidNesting", `<em>hello <p>world</p>`, `<p><em>hello </em></p><p><em>world</em></p><p></p>`, wrapConfig},
}

func TestClean(t *testing.T) {
Expand Down

0 comments on commit 5dbb039

Please sign in to comment.