forked from mvdan/xurls
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
81 lines (70 loc) · 1.64 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/* Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc> */
/* See LICENSE for licensing information */
package main
import (
"log"
"os"
"sort"
"strings"
"text/template"
"golang.org/x/net/idna"
"github.com/mvdan/xurls"
)
const path = "regex.go"
var regexTmpl = template.Must(template.New("regex").Parse(`// Generated by regexgen
package xurls
const ({{ range $key, $value := . }}
{{$key}} = ` + "`" + `{{$value}}` + "`" + `{{end}}
)
`))
// These schemes may be followed by just ":" instead of "://", so instead of
// allowing for arbitrary schemes we're limiting the regex to just a few
// well-known ones.
var schemes = []string{
`bitcoin`,
`magnet`,
`mailto`,
`sms`,
`tel`,
`xmpp`,
}
func writeRegex(tlds []string) error {
allTldsSet := make(map[string]struct{})
add := func(tld string) {
if _, e := allTldsSet[tld]; e {
log.Fatalf("Duplicate TLD: %s", tld)
}
allTldsSet[tld] = struct{}{}
}
for _, tldlist := range [...][]string{tlds, xurls.PseudoTLDs} {
for _, tld := range tldlist {
add(tld)
asciiTld, err := idna.ToASCII(tld)
if err != nil {
return err
}
if asciiTld != tld {
add(asciiTld)
}
}
}
var allTlds []string
for tld := range allTldsSet {
allTlds = append(allTlds, tld)
}
sort.Strings(allTlds)
f, err := os.Create(path)
if err != nil {
return err
}
return regexTmpl.Execute(f, map[string]string{
"gtld ": `(?i)(` + strings.Join(allTlds, `|`) + `)(?-i)`,
"otherScheme": `(?i)(` + strings.Join(schemes, `|`) + `)(?-i):`,
})
}
func main() {
log.Printf("Generating %s...", path)
if err := writeRegex(xurls.TLDs); err != nil {
log.Fatalf("Could not write %s: %v", path, err)
}
}