Skip to content

Commit

Permalink
Added the ignore url feature
Browse files Browse the repository at this point in the history
  • Loading branch information
MSTEWARDSON committed Oct 16, 2020
1 parent 0b4f5cb commit a02c9e5
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 62 deletions.
Binary file added WISA.exe
Binary file not shown.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ go 1.15
require (
github.com/gookit/color v1.3.1
github.com/spf13/pflag v1.0.5
mvdan.cc/xurls v1.1.0
mvdan.cc/xurls/v2 v2.2.0
)
11 changes: 11 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gookit/color v1.3.1 h1:PPD/C7sf8u2L8XQPdPgsWRoAiLQGZEZOzU3cf5IYYUk=
github.com/gookit/color v1.3.1/go.mod h1:R3ogXq2B9rTbXoSHJ1HyUVAZ3poOJHpd9nQmyGZsfvQ=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.5.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
mvdan.cc/xurls v1.1.0 h1:kj0j2lonKseISJCiq1Tfk+iTv65dDGCl0rTbanXJGGc=
mvdan.cc/xurls v1.1.0/go.mod h1:TNWuhvo+IqbUCmtUIb/3LJSQdrzel8loVpgFm0HikbI=
mvdan.cc/xurls/v2 v2.2.0 h1:NSZPykBXJFCetGZykLAxaL6SIpvbVy/UFEniIfHAa8A=
mvdan.cc/xurls/v2 v2.2.0/go.mod h1:EV1RMtya9D6G5DMYPGD8zTQzaHet6Jh8gFlRgGRJeO8=
9 changes: 9 additions & 0 deletions test_ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# 2. File with a comment and single URL that matches any URL
# that starts with https://www.google.com, for example:
#
# https://www.google.com/index.html would match
# https://www.google.com/ would match
# https://www.google.ca would NOT match
https://www.google.com
# This is invalid. It doesn't use http:// or https://
www.google.com
7 changes: 6 additions & 1 deletion test_urls
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,9 @@ https://httpstat.us/404

<a href="https://httpstat.us/200"/>
<a href="https://httpstat.us/403"/>
<a href="https://httpstat.us/404"/>
<a href="https://httpstat.us/404"/>

https://www.google.com/index.html
https://www.google.com/
https://www.google.ca
www.google.com
186 changes: 125 additions & 61 deletions wisa.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package main

import (
"bufio"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"regexp"
"strings"
"sync"
"time"

Expand All @@ -17,9 +20,10 @@ import (

// flag globals
var (
filenamePtr *string
verbosePtr *bool
jsonPtr *bool
filenamePtr *string
verbosePtr *bool
jsonPtr *bool
ignoreFilePtr *bool
)

// RequestResult is a struct for storing urls and status codes
Expand Down Expand Up @@ -79,12 +83,28 @@ func checkLink(wg *sync.WaitGroup, url string) (RequestResult, int, error) {
return r, status, reqErr
}

//Goes through the url list and removes any url that matches with the ignore url list
func ignoreURL(url []string, ignoreList []string) []string {
//Loop through the url list
for i := 0; i < len(url); i++ {
//Loop through the ignore url list
for k := 0; k < len(ignoreList); k++ {
//If the beginning of the url matches the ignoreList value, set the url value to ""
if strings.HasPrefix(url[i], ignoreList[k]) == true {
url[i] = ""
}
}
}
return url
}

func main() {

// https://github.com/spf13/pflag
filenamePtr = flag.StringP("file", "f", "", "filename input (required)") // filename input
verbosePtr = flag.BoolP("version", "v", false, "verbose output") // (error logs)
jsonPtr = flag.BoolP("json", "j", false, "json output") // turns off verbose output
ignoreFilePtr = flag.BoolP("ignore", "i", false, "ignores certain urls based off a text file")

flag.Parse()

Expand All @@ -95,83 +115,127 @@ func main() {
// seperate regex for IP urls since the above did not work
rIP := regexp.MustCompile(`(?:(?:(?:https?|ftp):)\/\/)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d{2,5})?`)

//String array holding the list of urls to ignore
ignoreURLs := []string{}

// check if arg is passed
if len(*filenamePtr) == 0 {
if len(os.Args) == 1 {
fmt.Println("Usage: wisa -f [file]")
os.Exit(1)
}
} else {
//Ignore
if *ignoreFilePtr {
//Get the file path
ignoreFilePath := flag.Args()[0]
fmt.Printf("Reading %s...\n", ignoreFilePath)
//Open the file
fileIgn, err := os.Open(ignoreFilePath)
//In case of panic
if err != nil {
fmt.Println(err)
os.Exit(2)
}
//Close the file
defer fileIgn.Close()
//Create new scanner
scanner := bufio.NewScanner(fileIgn)
//Loop through each line of the file
for scanner.Scan() {
line := scanner.Text()
//If the starting character is not "#"
startChar := string(line[0])
if startChar != "#" {
//Match agansit regrex patterns
UrlsFromLine := r.FindAllString(string(line), -1)
ipUrlsFromLine := rIP.FindAllString(string(line), -1)
UrlsFromLine = append(UrlsFromLine, ipUrlsFromLine...)
ignoreURLs = append(ignoreURLs, UrlsFromLine...)
}
}
//Remove any dupes
removeDuplicate(ignoreURLs)
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}

// notify if -v flag is passed
if *verbosePtr && !*jsonPtr {
fmt.Println("verbose output enabled...")
}
// notify if -v flag is passed
if *verbosePtr && !*jsonPtr {
fmt.Println("verbose output enabled...")
}

// Open a file (read-only) https://golang.org/pkg/os/#Open
if !*jsonPtr {
fmt.Printf("Reading %s...\n", *filenamePtr)
}
// Open a file (read-only) https://golang.org/pkg/os/#Open
if !*jsonPtr {
fmt.Printf("Reading %s...\n", *filenamePtr)
}

file, err := os.Open(*filenamePtr)
file, err := os.Open(*filenamePtr)

if err != nil {
fmt.Println(err)
os.Exit(2)
}
if err != nil {
fmt.Println(err)
os.Exit(2)
}

// read all data in to byte slice
text, err := ioutil.ReadAll(file)
// read all data in to byte slice
text, err := ioutil.ReadAll(file)

// find all regex matches and in converted byte data and concat both string slices into single slice
textUrls := r.FindAllString(string(text), -1)
ipUrls := rIP.FindAllString(string(text), -1)
// find all regex matches and in converted byte data and concat both string slices into single slice
textUrls := r.FindAllString(string(text), -1)
ipUrls := rIP.FindAllString(string(text), -1)

urls := append(textUrls, ipUrls...)
urls := append(textUrls, ipUrls...)

// stop reading file
file.Close()
// stop reading file
file.Close()

urls = removeDuplicate(urls)
urls = removeDuplicate(urls)

// create workgroup to ensure all routines finish https://golang.org/pkg/sync/#WaitGroup
var wg sync.WaitGroup
//Call the ignoreURLs func to remove any urls that match the list
if *ignoreFilePtr == true {
urls = ignoreURL(urls, ignoreURLs)
}

// json output stuff
var jsonSlice []RequestResult
var mut sync.Mutex
finalExit := 0
// create workgroup to ensure all routines finish https://golang.org/pkg/sync/#WaitGroup
var wg sync.WaitGroup

// json output stuff
var jsonSlice []RequestResult
var mut sync.Mutex
finalExit := 0

// check if urls found are alive
for _, url := range urls {
wg.Add(1)
go func(url string) {
res, status, err := checkLink(&wg, url)

if err == nil {
mut.Lock()
jsonSlice = append(jsonSlice, res)
mut.Unlock()
}

if status == 3 {
mut.Lock()
finalExit = status
mut.Unlock()
}
}(url)
}

// check if urls found are alive
for _, url := range urls {
wg.Add(1)
go func(url string) {
res, status, err := checkLink(&wg, url)
// wait for go routines to finish
wg.Wait()

if err == nil {
mut.Lock()
jsonSlice = append(jsonSlice, res)
mut.Unlock()
}
if *jsonPtr {
urlsJ, err := json.Marshal(jsonSlice)

if status == 3 {
mut.Lock()
finalExit = status
mut.Unlock()
if err != nil {
fmt.Println(err)
} else {
fmt.Println(string(urlsJ))
}
}(url)
}

// wait for go routines to finish
wg.Wait()

if *jsonPtr {
urlsJ, err := json.Marshal(jsonSlice)

if err != nil {
fmt.Println(err)
} else {
fmt.Println(string(urlsJ))
}

os.Exit(finalExit)
}
os.Exit(finalExit)
}

0 comments on commit a02c9e5

Please sign in to comment.