Skip to content

Commit

Permalink
fix: support IDNA in lists
Browse files Browse the repository at this point in the history
Not sure this is actually supported by most adblocker, but we might as
well be permissive.
  • Loading branch information
ThinkChaos committed Mar 29, 2023
1 parent b856837 commit f887e82
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 2 deletions.
23 changes: 22 additions & 1 deletion lists/parsers/hosts.go
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/asaskevich/govalidator"
"github.com/hashicorp/go-multierror"
"golang.org/x/net/idna"
)

const maxDomainNameLength = 255 // https://www.rfc-editor.org/rfc/rfc1034#section-3.1
Expand Down Expand Up @@ -92,7 +93,8 @@ func (e *HostListEntry) UnmarshalText(data []byte) error {

host := scanner.Text()

if err := validateHostsListEntry(host); err != nil {
host, err := normalizeHostsListEntry(host)
if err != nil {
return err
}

Expand Down Expand Up @@ -191,6 +193,25 @@ func (e HostsFileEntry) forEachHost(callback func(string) error) error {
return nil
}

func normalizeHostsListEntry(host string) (string, error) {
// Lookup is the profile preferred for DNS queries, we use Punycode here as it does less validation.
// That avoids rejecting domains in a list for reasons that amount to "that domain should not be used"
// since the goal of the list is to determine whether the domain should be used or not, we leave
// that decision to it.
idnaProfile := idna.Punycode

host, err := idnaProfile.ToASCII(host)
if err != nil {
return "", fmt.Errorf("%w: %s", err, host)
}

if err := validateHostsListEntry(host); err != nil {
return "", err
}

return host, nil
}

func validateDomainName(host string) error {
if len(host) > maxDomainNameLength {
return fmt.Errorf("domain name is too long: %s", host)
Expand Down
30 changes: 29 additions & 1 deletion lists/parsers/hosts_test.go
Expand Up @@ -303,6 +303,14 @@ var _ = Describe("HostList", func() {
"# comment",
" ",
"domain.tld # comment",

// http://www.i18nguy.com/markup/idna-examples.html
"belgië.icom.museum",
"الأردن.icom.museum",
"한국.icom.museum",

// Domain name w/ rune not supported by `idna.Lookup`
"domain_underscore.tld",
)
})

Expand All @@ -317,11 +325,31 @@ var _ = Describe("HostList", func() {
Expect(entry.String()).Should(Equal("domain.tld"))
Expect(sut.Position()).Should(Equal("line 4"))

entry, err = sut.Next(context.Background())
Expect(err).Should(Succeed())
Expect(entry.String()).Should(Equal("xn--belgi-rsa.icom.museum"))
Expect(sut.Position()).Should(Equal("line 5"))

entry, err = sut.Next(context.Background())
Expect(err).Should(Succeed())
Expect(entry.String()).Should(Equal("xn--igbhzh7gpa.icom.museum"))
Expect(sut.Position()).Should(Equal("line 6"))

entry, err = sut.Next(context.Background())
Expect(err).Should(Succeed())
Expect(entry.String()).Should(Equal("xn--3e0b707e.icom.museum"))
Expect(sut.Position()).Should(Equal("line 7"))

entry, err = sut.Next(context.Background())
Expect(err).Should(Succeed())
Expect(entry.String()).Should(Equal("domain_underscore.tld"))
Expect(sut.Position()).Should(Equal("line 8"))

_, err = sut.Next(context.Background())
Expect(err).ShouldNot(Succeed())
Expect(err).Should(MatchError(io.EOF))
Expect(IsNonResumableErr(err)).Should(BeTrue())
Expect(sut.Position()).Should(Equal("line 5"))
Expect(sut.Position()).Should(Equal("line 9"))
})
})

Expand Down

0 comments on commit f887e82

Please sign in to comment.