forked from efixler/scrape
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Support both form and JSON input for extract and feed endpoints - Remove URL fragment when cleaning URLs - Add some debug logging to help diagnose encoding issues with urls in JSON responses
- Loading branch information
Showing
10 changed files
with
315 additions
and
133 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package server | ||
|
||
import ( | ||
"encoding/json" | ||
"errors" | ||
nurl "net/url" | ||
) | ||
|
||
type BatchRequest struct { | ||
Urls []string `json:"urls"` | ||
} | ||
|
||
type singleURLRequest struct { | ||
URL *nurl.URL `json:"url"` | ||
PrettyPrint bool `json:"pp,omitempty"` | ||
} | ||
|
||
var errNoURL = errors.New("URL is required") | ||
|
||
func (sur *singleURLRequest) UnmarshalJSON(b []byte) error { | ||
type alias singleURLRequest | ||
asur := &struct { | ||
URL string `json:"url"` | ||
*alias | ||
}{ | ||
alias: (*alias)(sur), | ||
} | ||
if err := json.Unmarshal(b, asur); err != nil { | ||
return err | ||
} | ||
if asur.URL == "" { | ||
return errNoURL | ||
} | ||
var err error | ||
if sur.URL, err = nurl.Parse(asur.URL); err != nil { | ||
return err | ||
} | ||
if !sur.URL.IsAbs() { | ||
return errors.New("URL must be absolute") | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package server | ||
|
||
import ( | ||
"encoding/json" | ||
"strings" | ||
"testing" | ||
) | ||
|
||
func TestUnmarshalSingleUrlRequest(t *testing.T) { | ||
t.Parallel() | ||
tests := []struct { | ||
name string | ||
body string | ||
expectURLString string | ||
expectPP bool | ||
expectErr bool | ||
}{ | ||
{ | ||
name: "valid", | ||
body: `{"url":"http://example.com"}`, | ||
expectURLString: "http://example.com", | ||
expectPP: false, | ||
expectErr: false, | ||
}, | ||
{ | ||
name: "missing url", | ||
body: `{"urls":["http://example.com"]}`, | ||
expectURLString: "", | ||
expectPP: false, | ||
expectErr: true, | ||
}, | ||
{ | ||
name: "non-absolute url", | ||
body: `{"url":"example/foo"}`, | ||
expectURLString: "", | ||
expectPP: false, | ||
expectErr: true, | ||
}, | ||
{ | ||
name: "url with fragment", | ||
body: `{"url":"http://example.com#fragment"}`, | ||
expectURLString: "http://example.com#fragment", | ||
expectPP: false, | ||
expectErr: false, | ||
}, | ||
} | ||
for _, tt := range tests { | ||
var sur singleURLRequest | ||
err := sur.UnmarshalJSON([]byte(tt.body)) | ||
if (err != nil) != tt.expectErr { | ||
t.Fatalf("[%s] UnmarshalSingleUrlRequest, expected error %v, got %v", tt.name, tt.expectErr, err) | ||
} | ||
if tt.expectURLString != "" && sur.URL.String() != tt.expectURLString { | ||
t.Errorf("[%s] UnmarshalSingleUrlRequest, expected URL %s, got %s", tt.name, tt.expectURLString, sur.URL.String()) | ||
} | ||
if sur.PrettyPrint != tt.expectPP { | ||
t.Errorf("[%s] UnmarshalSingleUrlRequest, expected PrettyPrint %v, got %v", tt.name, tt.expectPP, sur.PrettyPrint) | ||
} | ||
// now run the same test but with json.Decoder | ||
reader := strings.NewReader(tt.body) | ||
decoder := json.NewDecoder(reader) | ||
decoder.DisallowUnknownFields() | ||
surD := new(singleURLRequest) | ||
err = decoder.Decode(surD) | ||
if (err != nil) != tt.expectErr { | ||
t.Fatalf("[%s] json.Decoder.Decode, expected error %v, got %v", tt.name, tt.expectErr, err) | ||
} | ||
if tt.expectURLString != "" && surD.URL.String() != tt.expectURLString { | ||
t.Errorf("[%s] json.Decoder.Decode, expected URL %s, got %s", tt.name, tt.expectURLString, surD.URL.String()) | ||
} | ||
if surD.PrettyPrint != tt.expectPP { | ||
t.Errorf("[%s] json.Decoder.Decode, expected PrettyPrint %v, got %v", tt.name, tt.expectPP, surD.PrettyPrint) | ||
} | ||
} | ||
} |
Oops, something went wrong.