/
wayback.go
64 lines (52 loc) · 1.3 KB
/
wayback.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
package providers
import (
"bufio"
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
)
func Wayback(domain string, results chan string, client *http.Client) error {
var found_subs []string
var x int = 0
res, err := client.Get(fmt.Sprintf("http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=txt&fl=original&collapse=urlkey", domain))
if err != nil {
return err
}
defer res.Body.Close()
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
line := scanner.Text()
if line == "" {
continue
}
line, _ = url.QueryUnescape(line)
extractor, err := regexp.Compile(`(?i)[a-zA-Z0-9\*_.-]+\.` + domain)
if err != nil {
return err
}
sub := extractor.FindString(line) // get subdomain by each line using regex
if sub == "" {
continue
}
sub = strings.ToLower(sub)
sub = strings.TrimPrefix(sub, "25")
sub = strings.TrimPrefix(sub, "2f")
if !strings.HasPrefix(sub, "*.") { // verify that subdomain doesn't have wildcard at the beginning of string
// get unique subdomains since this provider reports a lot of urls with same subdomains so they're already filtered for performance
for _, f := range found_subs {
if sub == f {
x = 1
break
}
}
if x != 1 {
found_subs = append(found_subs, sub)
results <- sub
}
x = 0
}
}
return nil
}