diff --git a/lists/parsers/hosts.go b/lists/parsers/hosts.go index 32323d7bc..e95a997e7 100644 --- a/lists/parsers/hosts.go +++ b/lists/parsers/hosts.go @@ -12,6 +12,7 @@ import ( "github.com/asaskevich/govalidator" "github.com/hashicorp/go-multierror" + "golang.org/x/net/idna" ) const maxDomainNameLength = 255 // https://www.rfc-editor.org/rfc/rfc1034#section-3.1 @@ -92,7 +93,8 @@ func (e *HostListEntry) UnmarshalText(data []byte) error { host := scanner.Text() - if err := validateHostsListEntry(host); err != nil { + host, err := normalizeHostsListEntry(host) + if err != nil { return err } @@ -191,6 +193,25 @@ func (e HostsFileEntry) forEachHost(callback func(string) error) error { return nil } +func normalizeHostsListEntry(host string) (string, error) { + // Lookup is the profile preferred for DNS queries, we use Punycode here as it does less validation. + // That avoids rejecting domains in a list for reasons that amount to "that domain should not be used" + // since the goal of the list is to determine whether the domain should be used or not, we leave + // that decision to it. + idnaProfile := idna.Punycode + + host, err := idnaProfile.ToASCII(host) + if err != nil { + return "", fmt.Errorf("%w: %s", err, host) + } + + if err := validateHostsListEntry(host); err != nil { + return "", err + } + + return host, nil +} + func validateDomainName(host string) error { if len(host) > maxDomainNameLength { return fmt.Errorf("domain name is too long: %s", host) diff --git a/lists/parsers/hosts_test.go b/lists/parsers/hosts_test.go index 76567d80a..fcbe316df 100644 --- a/lists/parsers/hosts_test.go +++ b/lists/parsers/hosts_test.go @@ -303,6 +303,14 @@ var _ = Describe("HostList", func() { "# comment", " ", "domain.tld # comment", + + // http://www.i18nguy.com/markup/idna-examples.html + "belgië.icom.museum", + "الأردن.icom.museum", + "한국.icom.museum", + + // Domain name w/ rune not supported by `idna.Lookup` + "domain_underscore.tld", ) }) @@ -317,11 +325,31 @@ var _ = Describe("HostList", func() { Expect(entry.String()).Should(Equal("domain.tld")) Expect(sut.Position()).Should(Equal("line 4")) + entry, err = sut.Next(context.Background()) + Expect(err).Should(Succeed()) + Expect(entry.String()).Should(Equal("xn--belgi-rsa.icom.museum")) + Expect(sut.Position()).Should(Equal("line 5")) + + entry, err = sut.Next(context.Background()) + Expect(err).Should(Succeed()) + Expect(entry.String()).Should(Equal("xn--igbhzh7gpa.icom.museum")) + Expect(sut.Position()).Should(Equal("line 6")) + + entry, err = sut.Next(context.Background()) + Expect(err).Should(Succeed()) + Expect(entry.String()).Should(Equal("xn--3e0b707e.icom.museum")) + Expect(sut.Position()).Should(Equal("line 7")) + + entry, err = sut.Next(context.Background()) + Expect(err).Should(Succeed()) + Expect(entry.String()).Should(Equal("domain_underscore.tld")) + Expect(sut.Position()).Should(Equal("line 8")) + _, err = sut.Next(context.Background()) Expect(err).ShouldNot(Succeed()) Expect(err).Should(MatchError(io.EOF)) Expect(IsNonResumableErr(err)).Should(BeTrue()) - Expect(sut.Position()).Should(Equal("line 5")) + Expect(sut.Position()).Should(Equal("line 9")) }) })