From 76ab7c59f48743c924369526bf4a5bf6e948dec2 Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 9 Nov 2022 14:53:20 +0700 Subject: [PATCH 01/17] chore: add go-rod to dependencies --- go.mod | 4 ++++ go.sum | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/go.mod b/go.mod index 8ef0359..36926d8 100644 --- a/go.mod +++ b/go.mod @@ -49,6 +49,7 @@ require ( github.com/go-playground/locales v0.14.0 // indirect github.com/go-playground/universal-translator v0.18.0 // indirect github.com/go-playground/validator/v10 v10.10.1 // indirect + github.com/go-rod/rod v0.112.0 // indirect github.com/go-sql-driver/mysql v1.6.0 // indirect github.com/go-stack/stack v1.8.1 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect @@ -94,6 +95,9 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect github.com/ugorji/go/codec v1.2.7 // indirect + github.com/ysmood/goob v0.4.0 // indirect + github.com/ysmood/gson v0.7.2 // indirect + github.com/ysmood/leakless v0.8.0 // indirect go.opencensus.io v0.23.0 // indirect go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect diff --git a/go.sum b/go.sum index a281d94..1148a3c 100644 --- a/go.sum +++ b/go.sum @@ -231,6 +231,8 @@ github.com/go-playground/validator/v10 v10.10.1 h1:uA0+amWMiglNZKZ9FJRKUAe9U3RX9 github.com/go-playground/validator/v10 v10.10.1/go.mod h1:i+3WkQ1FvaUjjxh1kSvIA4dMGDBiPU55YFDl0WbKdWU= github.com/go-resty/resty/v2 v2.7.0 h1:me+K9p3uhSmXtrBZ4k9jcEAfJmuC8IivWHwaLZwPrFY= github.com/go-resty/resty/v2 v2.7.0/go.mod h1:9PWDzw47qPphMRFfhsyk0NnSgvluHcljSMVIq3w7q0I= +github.com/go-rod/rod v0.112.0 h1:U9Yc+quw4hxZ6GrdbWFBeylvaYElEKM9ijFW2LYkGlA= +github.com/go-rod/rod v0.112.0/go.mod h1:GZDtmEs6RpF6kBRYpGCZXxXlKNneKVPiKOjaMbmVVjE= github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE= github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -630,6 +632,15 @@ github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ= +github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18= +github.com/ysmood/got v0.31.3/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY= +github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM= +github.com/ysmood/gson v0.7.1/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= +github.com/ysmood/gson v0.7.2 h1:1iWUvpi5DPvd2j59W7ifRPR9DiAZ3Ga+fmMl1mJrRbM= +github.com/ysmood/gson v0.7.2/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= +github.com/ysmood/leakless v0.8.0 h1:BzLrVoiwxikpgEQR0Lk8NyBN5Cit2b1z+u0mgL4ZJak= +github.com/ysmood/leakless v0.8.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= From 6446a4d4d9c5b5cff46d3f0ede50e6130dec4639 Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 9 Nov 2022 14:58:57 +0700 Subject: [PATCH 02/17] feat: implement headless modules for handling headless browser communication --- headless/headless.go | 56 +++++++++++++ headless/validate.go | 184 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 headless/headless.go create mode 100644 headless/validate.go diff --git a/headless/headless.go b/headless/headless.go new file mode 100644 index 0000000..583ef7d --- /dev/null +++ b/headless/headless.go @@ -0,0 +1,56 @@ +package headless + +import ( + "net/http" + + "github.com/gin-contrib/cors" + "github.com/gin-gonic/gin" + "github.com/nextdotid/proof_server/common" + "github.com/nextdotid/proof_server/validator" + "github.com/sirupsen/logrus" +) + +var ( + Engine *gin.Engine + l = logrus.WithFields(logrus.Fields{"module": "headless"}) +) + +type ErrorResponse struct { + Message string `json:"message"` +} + +func middlewareCors() gin.HandlerFunc { + return cors.Default() +} + +func Init() { + if Engine != nil { + return + } + + Engine = gin.Default() + Engine.Use(middlewareCors()) + + Engine.GET("/healthz", healthz) + Engine.POST("/v1/validate", validate) +} + +func errorResp(c *gin.Context, error_code int, err error) { + c.JSON(error_code, ErrorResponse{ + Message: err.Error(), + }) +} + +func healthz(c *gin.Context) { + platforms := make([]string, 0) + for p := range validator.PlatformFactories { + platforms = append(platforms, string(p)) + } + + c.JSON(http.StatusOK, gin.H{ + "hello": "proof service", + "environment": common.Environment, + "revision": common.Revision, + "built_at": common.BuildTime, + }) +} diff --git a/headless/validate.go b/headless/validate.go new file mode 100644 index 0000000..398906b --- /dev/null +++ b/headless/validate.go @@ -0,0 +1,184 @@ +package headless + +import ( + "net/http" + "time" + + "github.com/gin-gonic/gin" + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/proto" + "golang.org/x/xerrors" +) + +const ( + matchTypeRegex = "regexp" + matchTypeXPath = "xpath" + matchTypeJS = "js" + defaultTimeout = "10s" +) + +var ( + validMatchTypes = map[string]struct{}{ + matchTypeJS: {}, + matchTypeXPath: {}, + matchTypeRegex: {}, + } +) + +type MatchRegExp struct { + // Selector is the target element if not specified "*" will be used + Selector string `json:"selector"` + + // Value is the target value + Value string `json:"value"` +} + +type MatchXPath struct { + // Selector is the xpath selector + Selector string `json:"selector"` +} + +type MatchJS struct { + // Value is the javascript value + Value string `json:"value"` +} + +type Match struct { + Type string `json:"type"` + MatchRegExp *MatchRegExp `json:"regexp"` + MatchXPath *MatchXPath `json:"xpath"` + MatchJS *MatchJS `json:"js"` +} + +type ValidateRequest struct { + Location string `json:"location"` + Timeout string `json:"timeout"` + Match Match `json:"match"` +} + +type ValidateResponse struct { + IsValid bool `json:"is_valid"` + Detail string `json:"detail,omitempty"` +} + +func validate(c *gin.Context) { + var req ValidateRequest + if err := c.Bind(&req); err != nil { + errorResp(c, http.StatusBadRequest, xerrors.Errorf("Param error")) + return + } + + if err := checkValidateRequest(&req); err != nil { + errorResp(c, http.StatusBadRequest, err) + return + } + + browser := rod.New() + if err := browser.Connect(); err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + + defer browser.Close() + + page, err := browser.Page(proto.TargetCreateTarget{URL: req.Location}) + if err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + + timeout := req.Timeout + if timeout == "" { + timeout = defaultTimeout + } + + timeoutDuration, err := time.ParseDuration(timeout) + if err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + + page = page.Timeout(timeoutDuration) + if err := page.WaitLoad(); err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + + switch req.Match.Type { + case matchTypeRegex: + selector := req.Match.MatchRegExp.Selector + if selector == "" { + selector = "*" + } + + if _, err := page.ElementR(selector, req.Match.MatchRegExp.Value); err != nil { + c.JSON(http.StatusOK, ValidateResponse{IsValid: false, Detail: err.Error()}) + + return + } + case matchTypeXPath: + selector := req.Match.MatchXPath.Selector + if _, err := page.ElementX(selector); err != nil { + c.JSON(http.StatusOK, ValidateResponse{IsValid: false, Detail: err.Error()}) + + return + } + case matchTypeJS: + js := req.Match.MatchJS.Value + if _, err := page.ElementByJS(rod.Eval(js)); err != nil { + c.JSON(http.StatusOK, ValidateResponse{IsValid: false, Detail: err.Error()}) + + return + } + } + + c.JSON(http.StatusOK, ValidateResponse{IsValid: true}) +} + +func checkValidateRequest(req *ValidateRequest) error { + if req.Location == "" { + return xerrors.Errorf("'location' is missing") + } + + if req.Timeout != "" { + if _, err := time.ParseDuration(req.Timeout); err != nil { + return xerrors.Errorf("'timeout' is invalid") + } + } + + if _, ok := validMatchTypes[req.Match.Type]; !ok { + return xerrors.Errorf("'match.type' should be 'regexp', 'xpath', or 'js'") + } + + if req.Match.Type == matchTypeRegex { + if req.Match.MatchRegExp == nil { + return xerrors.Errorf("'match.regexp' payload is missing") + } + + if req.Match.MatchRegExp.Value == "" { + return xerrors.Errorf("'match.regexp.value' must be specified") + } + } + + if req.Match.Type == matchTypeXPath { + if req.Match.MatchXPath == nil { + return xerrors.Errorf("'match.xpath' payload is missing") + } + + if req.Match.MatchXPath.Selector == "" { + return xerrors.Errorf("'match.xpath.selector' must be specified") + } + } + + if req.Match.Type == matchTypeJS { + if req.Match.MatchJS == nil { + return xerrors.Errorf("'match.js' payload is missing") + } + + if req.Match.MatchJS.Value == "" { + return xerrors.Errorf("'match.js.value' must be specified") + } + } + + return nil +} From 500d473ecca6c304424f362813f83f262a7076b8 Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 9 Nov 2022 18:44:41 +0700 Subject: [PATCH 03/17] feat: use a custom launcher --- headless/headless.go | 18 +++++------------- headless/launcher.go | 42 ++++++++++++++++++++++++++++++++++++++++++ headless/validate.go | 37 +++++++++++++++++++++++++++++++------ 3 files changed, 78 insertions(+), 19 deletions(-) create mode 100644 headless/launcher.go diff --git a/headless/headless.go b/headless/headless.go index 583ef7d..6ab71c9 100644 --- a/headless/headless.go +++ b/headless/headless.go @@ -11,19 +11,17 @@ import ( ) var ( - Engine *gin.Engine - l = logrus.WithFields(logrus.Fields{"module": "headless"}) + Engine *gin.Engine + LauncherPath string + l = logrus.WithFields(logrus.Fields{"module": "headless"}) ) -type ErrorResponse struct { - Message string `json:"message"` -} - func middlewareCors() gin.HandlerFunc { return cors.Default() } -func Init() { +func Init(launcherPath string) { + LauncherPath = launcherPath if Engine != nil { return } @@ -35,12 +33,6 @@ func Init() { Engine.POST("/v1/validate", validate) } -func errorResp(c *gin.Context, error_code int, err error) { - c.JSON(error_code, ErrorResponse{ - Message: err.Error(), - }) -} - func healthz(c *gin.Context) { platforms := make([]string, 0) for p := range validator.PlatformFactories { diff --git a/headless/launcher.go b/headless/launcher.go new file mode 100644 index 0000000..646ff24 --- /dev/null +++ b/headless/launcher.go @@ -0,0 +1,42 @@ +package headless + +import "github.com/go-rod/rod/lib/launcher" + +func newLauncher(path string) *launcher.Launcher { + if path == "" { + var found bool + + path, found = launcher.LookPath() + if !found { + path = launcher.NewBrowser().MustGet() + } + } + + return launcher.New(). + Bin(path). + // recommended flags to run in serverless environments + // see https://github.com/alixaxel/chrome-aws-lambda/blob/master/source/index.ts + Set("allow-running-insecure-content"). + Set("autoplay-policy", "user-gesture-required"). + Set("disable-component-update"). + Set("disable-domain-reliability"). + Set("disable-features", "AudioServiceOutOfProcess", "IsolateOrigins", "site-per-process"). + Set("disable-print-preview"). + Set("disable-setuid-sandbox"). + Set("disable-site-isolation-trials"). + Set("disable-speech-api"). + Set("disable-web-security"). + Set("disk-cache-size", "33554432"). + Set("enable-features", "SharedArrayBuffer"). + Set("hide-scrollbars"). + Set("ignore-gpu-blocklist"). + Set("in-process-gpu"). + Set("mute-audio"). + Set("no-default-browser-check"). + Set("no-pings"). + Set("no-sandbox"). + Set("no-zygote"). + Set("single-process"). + Set("use-gl", "swiftshader"). + Set("window-size", "1920", "1080") +} diff --git a/headless/validate.go b/headless/validate.go index 398906b..d0e9926 100644 --- a/headless/validate.go +++ b/headless/validate.go @@ -56,11 +56,21 @@ type ValidateRequest struct { Match Match `json:"match"` } -type ValidateResponse struct { +type ErrorResponse struct { + Message string `json:"message"` +} + +type SuccessResponse struct { IsValid bool `json:"is_valid"` Detail string `json:"detail,omitempty"` } +func errorResp(c *gin.Context, error_code int, err error) { + c.JSON(error_code, ErrorResponse{ + Message: err.Error(), + }) +} + func validate(c *gin.Context) { var req ValidateRequest if err := c.Bind(&req); err != nil { @@ -73,7 +83,17 @@ func validate(c *gin.Context) { return } - browser := rod.New() + launcher := newLauncher(LauncherPath) + defer launcher.Cleanup() + defer launcher.Kill() + + u, err := launcher.Launch() + if err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + + browser := rod.New().ControlURL(u) if err := browser.Connect(); err != nil { errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) return @@ -104,6 +124,11 @@ func validate(c *gin.Context) { return } + if err := page.WaitRepaint(); err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + switch req.Match.Type { case matchTypeRegex: selector := req.Match.MatchRegExp.Selector @@ -112,27 +137,27 @@ func validate(c *gin.Context) { } if _, err := page.ElementR(selector, req.Match.MatchRegExp.Value); err != nil { - c.JSON(http.StatusOK, ValidateResponse{IsValid: false, Detail: err.Error()}) + c.JSON(http.StatusOK, SuccessResponse{IsValid: false, Detail: err.Error()}) return } case matchTypeXPath: selector := req.Match.MatchXPath.Selector if _, err := page.ElementX(selector); err != nil { - c.JSON(http.StatusOK, ValidateResponse{IsValid: false, Detail: err.Error()}) + c.JSON(http.StatusOK, SuccessResponse{IsValid: false, Detail: err.Error()}) return } case matchTypeJS: js := req.Match.MatchJS.Value if _, err := page.ElementByJS(rod.Eval(js)); err != nil { - c.JSON(http.StatusOK, ValidateResponse{IsValid: false, Detail: err.Error()}) + c.JSON(http.StatusOK, SuccessResponse{IsValid: false, Detail: err.Error()}) return } } - c.JSON(http.StatusOK, ValidateResponse{IsValid: true}) + c.JSON(http.StatusOK, SuccessResponse{IsValid: true}) } func checkValidateRequest(req *ValidateRequest) error { From 7925e273aa006094d3a4c38e815dc12c0f688b11 Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 9 Nov 2022 18:44:52 +0700 Subject: [PATCH 04/17] test: add test cases --- headless/headless_test.go | 31 ++++++++ headless/validate_test.go | 158 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 headless/headless_test.go create mode 100644 headless/validate_test.go diff --git a/headless/headless_test.go b/headless/headless_test.go new file mode 100644 index 0000000..6550150 --- /dev/null +++ b/headless/headless_test.go @@ -0,0 +1,31 @@ +package headless_test + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/gin-gonic/gin" + "github.com/nextdotid/proof_server/config" + "github.com/nextdotid/proof_server/headless" +) + +func TestMain(m *testing.M) { + config.Init("../config/config.test.json") + headless.Init("") + os.Exit(m.Run()) +} + +func APITestCall(engine *gin.Engine, method, url string, body any, response any) *httptest.ResponseRecorder { + bb, _ := json.Marshal(body) + w := httptest.NewRecorder() + req, _ := http.NewRequest(method, url, bytes.NewReader(bb)) + req.Header.Add("Content-Type", "application/json") + engine.ServeHTTP(w, req) + json.Unmarshal(w.Body.Bytes(), response) + + return w +} diff --git a/headless/validate_test.go b/headless/validate_test.go new file mode 100644 index 0000000..83307f7 --- /dev/null +++ b/headless/validate_test.go @@ -0,0 +1,158 @@ +package headless_test + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/nextdotid/proof_server/headless" +) + +func newValidRequest(location string, matchType string) headless.ValidateRequest { + switch matchType { + case "regexp": + return headless.ValidateRequest{ + Location: location, + Timeout: "2s", + Match: headless.Match{ + Type: "regexp", + MatchRegExp: &headless.MatchRegExp{ + Selector: "*", + Value: "match-this-text", + }, + }, + } + case "xpath": + return headless.ValidateRequest{ + Location: location, + Timeout: "2s", + Match: headless.Match{ + Type: "xpath", + MatchXPath: &headless.MatchXPath{ + Selector: "//text()[contains(.,'match-this-text')]", + }, + }, + } + case "js": + return headless.ValidateRequest{ + Location: location, + Timeout: "2s", + Match: headless.Match{ + Type: "js", + MatchJS: &headless.MatchJS{ + Value: "() => [].filter.call(document.querySelectorAll('*'), (el) => el.textContent === 'match-this-text')[0]", + }, + }, + } + } + + return headless.ValidateRequest{} +} + +func Test_Validate(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Add("Content-Type", "text/html; charset=utf-8") + w.Write([]byte(` + + + + document.body.innerHTML = '

match-this-text

'; + + + + + + `)) + })) + + defer ts.Close() + + t.Run("success", func(t *testing.T) { + // using regexp + req := newValidRequest(ts.URL, "regexp") + res := headless.SuccessResponse{} + + APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + + assert.Equal(t, true, res.IsValid) + assert.Equal(t, "", res.Detail) + + // using xpath + req = newValidRequest(ts.URL, "xpath") + res = headless.SuccessResponse{} + + APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + + assert.Equal(t, true, res.IsValid) + assert.Equal(t, "", res.Detail) + + // using js + req = newValidRequest(ts.URL, "js") + res = headless.SuccessResponse{} + + APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + + assert.Equal(t, true, res.IsValid) + assert.Equal(t, "", res.Detail) + }) + + t.Run("error ", func(t *testing.T) { + // invalid location + req := newValidRequest(ts.URL, "regexp") + res := headless.ErrorResponse{} + req.Location = "" + APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + + assert.Contains(t, res.Message, "location") + + // invalid timeout + req = newValidRequest(ts.URL, "regexp") + res = headless.ErrorResponse{} + req.Timeout = "invalid" + APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + + assert.Contains(t, res.Message, "timeout") + + // invalid match type + req = newValidRequest(ts.URL, "regexp") + res = headless.ErrorResponse{} + req.Match.Type = "invalid" + APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + + assert.Contains(t, res.Message, "match.type") + + // missing regexp value + req = newValidRequest(ts.URL, "regexp") + res = headless.ErrorResponse{} + req.Match.MatchRegExp.Value = "" + APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + + assert.Contains(t, res.Message, "match.regexp.value") + + // missing xpath selector + req = newValidRequest(ts.URL, "xpath") + res = headless.ErrorResponse{} + req.Match.MatchXPath.Selector = "" + APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + + assert.Contains(t, res.Message, "match.xpath.selector") + + // missing js value + req = newValidRequest(ts.URL, "js") + res = headless.ErrorResponse{} + req.Match.MatchJS.Value = "" + APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + + assert.Contains(t, res.Message, "match.js.value") + + // target text is not found + req = newValidRequest(ts.URL, "regexp") + success := headless.SuccessResponse{} + req.Match.MatchRegExp.Value = "unknown-text" + APITestCall(headless.Engine, "POST", "/v1/validate", req, &success) + + assert.Equal(t, success.IsValid, false) + }) +} From c44b0280c0b1d5fb85a29624a2148f0b53e9a99c Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 9 Nov 2022 18:45:31 +0700 Subject: [PATCH 05/17] feat: implement headless cmd --- cmd/headless/main.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 cmd/headless/main.go diff --git a/cmd/headless/main.go b/cmd/headless/main.go new file mode 100644 index 0000000..a7341e8 --- /dev/null +++ b/cmd/headless/main.go @@ -0,0 +1,22 @@ +package main + +import ( + "flag" + "fmt" + + "github.com/nextdotid/proof_server/headless" + "github.com/sirupsen/logrus" +) + +var ( + flagPort = flag.Int("port", 9801, "Listen port") +) + +func main() { + flag.Parse() + logrus.SetLevel(logrus.DebugLevel) + headless.Init("") + + fmt.Printf("Server now running on 0.0.0.0:%d", *flagPort) + headless.Engine.Run(fmt.Sprintf("0.0.0.0:%d", *flagPort)) +} From dfc65394ca7da08ec5d53c8ed95896e63341472d Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 9 Nov 2022 19:04:43 +0700 Subject: [PATCH 06/17] feat: block unwanted resources --- go.mod | 1 + go.sum | 2 ++ headless/validate.go | 26 +++++++++++++++++++++----- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 36926d8..3d23afb 100644 --- a/go.mod +++ b/go.mod @@ -89,6 +89,7 @@ require ( github.com/spf13/cast v1.4.1 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/ssoroka/slice v0.0.0-20220402005549-78f0cea3df8b // indirect github.com/subosito/gotenv v1.2.0 // indirect github.com/teris-io/shortid v0.0.0-20201117134242-e59966efd125 // indirect github.com/tidwall/gjson v1.9.3 // indirect diff --git a/go.sum b/go.sum index 1148a3c..a459a2d 100644 --- a/go.sum +++ b/go.sum @@ -597,6 +597,8 @@ github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5q github.com/spf13/viper v1.7.1/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= github.com/spf13/viper v1.11.0 h1:7OX/1FS6n7jHD1zGrZTM7WtY13ZELRyosK4k93oPr44= github.com/spf13/viper v1.11.0/go.mod h1:djo0X/bA5+tYVoCn+C7cAYJGcVn/qYLFTG8gdUsX7Zk= +github.com/ssoroka/slice v0.0.0-20220402005549-78f0cea3df8b h1:nDFJ1KYD1CSRP3nHtkvCH+ztuoz+QW++OvCLgpS6kQE= +github.com/ssoroka/slice v0.0.0-20220402005549-78f0cea3df8b/go.mod h1:l4Ov7Zo7X3/MCC+pefg/lN7x8X8FKb1Ub7oxosKKJa0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= diff --git a/headless/validate.go b/headless/validate.go index d0e9926..f6ac42d 100644 --- a/headless/validate.go +++ b/headless/validate.go @@ -7,6 +7,7 @@ import ( "github.com/gin-gonic/gin" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/proto" + "github.com/ssoroka/slice" "golang.org/x/xerrors" ) @@ -117,14 +118,29 @@ func validate(c *gin.Context) { errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) return } + router := page.HijackRequests() - page = page.Timeout(timeoutDuration) - if err := page.WaitLoad(); err != nil { - errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) - return + resources := []proto.NetworkResourceType{ + proto.NetworkResourceTypeFont, + proto.NetworkResourceTypeImage, + proto.NetworkResourceTypeMedia, + proto.NetworkResourceTypeStylesheet, + proto.NetworkResourceTypeWebSocket, // we don't need websockets to fetch html } - if err := page.WaitRepaint(); err != nil { + router.MustAdd("*", func(ctx *rod.Hijack) { + if slice.Contains(resources, ctx.Request.Type()) { + ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient) + return + } + + ctx.ContinueRequest(&proto.FetchContinueRequest{}) + }) + + go router.Run() + + page = page.Timeout(timeoutDuration) + if err := page.WaitLoad(); err != nil { errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) return } From 24b88f0a39369749e2627f04b0aa6f046c654545 Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 9 Nov 2022 23:13:41 +0700 Subject: [PATCH 07/17] feat: add lambda cmd for headless --- Makefile | 40 ++++++++++++++++++++++ cmd/lambda_headless/Dockerfile | 61 ++++++++++++++++++++++++++++++++++ cmd/lambda_headless/main.go | 17 ++++++++++ 3 files changed, 118 insertions(+) create mode 100644 cmd/lambda_headless/Dockerfile create mode 100644 cmd/lambda_headless/main.go diff --git a/Makefile b/Makefile index 9d89555..7258e13 100644 --- a/Makefile +++ b/Makefile @@ -3,12 +3,18 @@ bin_dir=build/ commit=$$(git rev-parse HEAD) time=$$(date +%s) +aws_registry_uri=${aws-account-id}.dkr.ecr.${aws-lambda-region}.amazonaws.com +aws_docker_image=${aws_registry_uri}/${docker-image-name}:${commit} # Things in my.mk: # aws-lambda-function-staging=my-lambda-function-staging # aws-lambda-function-production=my-lambda-function-production +# aws-lambda-headless-function-staging=my-lambda-headless-function-staging # aws-lambda-region=ap-east-1 # aws-lambda-role=arn:aws:iam::xxxxx:.... +# aws-account-id=xxxxxxxxxx +# docker-image-name=lambda_headless + -include ./my.mk build: @@ -78,3 +84,37 @@ lambda-pack-worker-staging: lambda-build-worker-staging lambda-update-worker-staging: lambda-pack-worker-staging @aws lambda update-function-code --function-name ${aws-lambda-function-worker-staging} --zip-file 'fileb://./build/lambda.zip' + +lamda-create-registry-headless: + @aws ecr get-login-password \ + --region ${aws-lambda-region} | docker login \ + --username AWS \ + --password-stdin ${aws_registry_uri} + @aws ecr describe-repositories \ + --repository-names ${docker-image-name} || \ + aws ecr create-repository \ + --repository-name ${docker-image-name} \ + --region ${aws-lambda-region} \ + --image-scanning-configuration scanOnPush=true \ + --image-tag-mutability MUTABLE + +lambda-build-headless-staging: + @docker build -f ./cmd/lambda_headless/Dockerfile -t ${docker-image-name}:${commit} . + @docker tag ${docker-image-name}:${commit} ${aws_docker_image} + +lambda-pack-headless-staging: lamda-create-registry-headless lambda-build-headless-staging + @docker push ${aws_docker_image} + +lambda-create-headless-staging: lambda-pack-headless-staging + aws lambda create-function \ + --package-type Image \ + --region ${aws-lambda-region} \ + --function-name ${aws-lambda-headless-function-staging} \ + --code ImageUri=${aws_docker_image} \ + --memory-size 1200 \ + --timeout 30 \ + --architectures x86_64 \ + --role ${aws-lambda-role} + +lambda-update-headless-staging: lambda-pack-headless-staging + @aws lambda update-function-code --function-name ${aws-lambda-headless-function-staging} --image-uri ${aws_docker_image} diff --git a/cmd/lambda_headless/Dockerfile b/cmd/lambda_headless/Dockerfile new file mode 100644 index 0000000..626d161 --- /dev/null +++ b/cmd/lambda_headless/Dockerfile @@ -0,0 +1,61 @@ +# syntax=docker/dockerfile:1 + +# https://docs.docker.com/language/golang/build-images/ +FROM golang:1.18-buster AS build + +WORKDIR /app + +COPY go.mod ./ +COPY go.sum ./ +RUN go mod download +COPY . ./ + +# Remember to build your handler executable for Linux! +# https://github.com/aws/aws-lambda-go/blob/main/README.md#building-your-function +RUN env GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \ + go build -o /main ./cmd/lambda_headless/main.go + + +# Install chromium +FROM public.ecr.aws/lambda/provided:al2 as chromium + +# install brotli, so we can decompress chromium +# we don't have access to brotli out of the box, to install we first need epel +# https://docs.fedoraproject.org/en-US/epel/#what_is_extra_packages_for_enterprise_linux_or_epel +RUN yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm && \ + yum -y install brotli && \ + yum clean all + +# download chromium +# s/o to https://github.com/alixaxel/chrome-aws-lambda for the binary +RUN yum -y install wget && \ + wget --progress=dot:giga https://raw.githubusercontent.com/alixaxel/chrome-aws-lambda/master/bin/chromium.br -O /chromium.br && \ + yum clean all + +# decompress chromium +RUN brotli -d /chromium.br + +# copy artifacts to a clean image +FROM public.ecr.aws/lambda/provided:al2 + +# install chromium dependencies +RUN yum -y install \ + libX11 \ + nano \ + unzip \ + wget \ + xclock \ + xorg-x11-xauth \ + xterm && \ + yum clean all + +# copy in chromium from chromium stage +COPY --from=chromium /chromium /opt/chromium + +# grant our program access to chromium +RUN chmod 777 /opt/chromium + +# copy in lambda fn from build stage +COPY --from=build /main /main + +ENTRYPOINT ["/main"] diff --git a/cmd/lambda_headless/main.go b/cmd/lambda_headless/main.go new file mode 100644 index 0000000..c9c8b0b --- /dev/null +++ b/cmd/lambda_headless/main.go @@ -0,0 +1,17 @@ +package main + +import ( + "github.com/akrylysov/algnhsa" + "github.com/sirupsen/logrus" + "github.com/nextdotid/proof_server/headless" +) + +func init() { + logrus.SetLevel(logrus.WarnLevel) + headless.Init("/opt/chromium") +} + +func main() { + algnhsa.ListenAndServe(headless.Engine, nil) +} + From a96d685a23dfecaac1c5ab54dd515585fa4bd91d Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 9 Nov 2022 23:42:16 +0700 Subject: [PATCH 08/17] feat: implement headless client --- headless/client.go | 64 +++++++++++++++++++++++++++++++++++++++ headless/validate.go | 18 +++++------ headless/validate_test.go | 26 ++++++++-------- 3 files changed, 84 insertions(+), 24 deletions(-) create mode 100644 headless/client.go diff --git a/headless/client.go b/headless/client.go new file mode 100644 index 0000000..d92cfb3 --- /dev/null +++ b/headless/client.go @@ -0,0 +1,64 @@ +package headless + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + + "golang.org/x/xerrors" +) + +// HeadlessClient handles communication for headless browser service +type HeadlessClient struct { + url string + client *http.Client +} + +// NewHeadlessClient creates a new headless client +func NewHeadlessClient(url string) *HeadlessClient { + return &HeadlessClient{url, http.DefaultClient} +} + +// Validate validates whether the given payload is valid +func (h *HeadlessClient) Validate(ctx context.Context, payload *ValidateRequest) (bool, error) { + body, err := json.Marshal(payload) + if err != nil { + return false, xerrors.Errorf("%w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, h.url, bytes.NewReader(body)) + if err != nil { + return false, xerrors.Errorf("%w", err) + } + + req.Header.Add("Content-Type", "application/json") + + res, err := h.client.Do(req) + if res != nil && err != nil { + if _, err := io.Copy(io.Discard, res.Body); err != nil { + return false, xerrors.Errorf("%w", err) + } + } + + if res != nil { + defer res.Body.Close() + } + + if err != nil { + return false, xerrors.Errorf("%w", err) + } + + contents, err := io.ReadAll(res.Body) + if err != nil { + return false, xerrors.Errorf("%w", err) + } + + var resBody ValidateRespond + if err := json.Unmarshal(contents, &resBody); err != nil { + return false, xerrors.Errorf("%w", err) + } + + return resBody.IsValid, nil +} diff --git a/headless/validate.go b/headless/validate.go index f6ac42d..fc2d57e 100644 --- a/headless/validate.go +++ b/headless/validate.go @@ -57,17 +57,13 @@ type ValidateRequest struct { Match Match `json:"match"` } -type ErrorResponse struct { - Message string `json:"message"` -} - -type SuccessResponse struct { +type ValidateRespond struct { IsValid bool `json:"is_valid"` - Detail string `json:"detail,omitempty"` + Message string `json:"message,omitempty"` } func errorResp(c *gin.Context, error_code int, err error) { - c.JSON(error_code, ErrorResponse{ + c.JSON(error_code, ValidateRespond{ Message: err.Error(), }) } @@ -153,27 +149,27 @@ func validate(c *gin.Context) { } if _, err := page.ElementR(selector, req.Match.MatchRegExp.Value); err != nil { - c.JSON(http.StatusOK, SuccessResponse{IsValid: false, Detail: err.Error()}) + c.JSON(http.StatusOK, ValidateRespond{IsValid: false, Message: err.Error()}) return } case matchTypeXPath: selector := req.Match.MatchXPath.Selector if _, err := page.ElementX(selector); err != nil { - c.JSON(http.StatusOK, SuccessResponse{IsValid: false, Detail: err.Error()}) + c.JSON(http.StatusOK, ValidateRespond{IsValid: false, Message: err.Error()}) return } case matchTypeJS: js := req.Match.MatchJS.Value if _, err := page.ElementByJS(rod.Eval(js)); err != nil { - c.JSON(http.StatusOK, SuccessResponse{IsValid: false, Detail: err.Error()}) + c.JSON(http.StatusOK, ValidateRespond{IsValid: false, Message: err.Error()}) return } } - c.JSON(http.StatusOK, SuccessResponse{IsValid: true}) + c.JSON(http.StatusOK, ValidateRespond{IsValid: true}) } func checkValidateRequest(req *ValidateRequest) error { diff --git a/headless/validate_test.go b/headless/validate_test.go index 83307f7..2fbdd67 100644 --- a/headless/validate_test.go +++ b/headless/validate_test.go @@ -72,36 +72,36 @@ func Test_Validate(t *testing.T) { t.Run("success", func(t *testing.T) { // using regexp req := newValidRequest(ts.URL, "regexp") - res := headless.SuccessResponse{} + res := headless.ValidateRespond{} APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) assert.Equal(t, true, res.IsValid) - assert.Equal(t, "", res.Detail) + assert.Equal(t, "", res.Message) // using xpath req = newValidRequest(ts.URL, "xpath") - res = headless.SuccessResponse{} + res = headless.ValidateRespond{} APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) assert.Equal(t, true, res.IsValid) - assert.Equal(t, "", res.Detail) + assert.Equal(t, "", res.Message) // using js req = newValidRequest(ts.URL, "js") - res = headless.SuccessResponse{} + res = headless.ValidateRespond{} APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) assert.Equal(t, true, res.IsValid) - assert.Equal(t, "", res.Detail) + assert.Equal(t, "", res.Message) }) t.Run("error ", func(t *testing.T) { // invalid location req := newValidRequest(ts.URL, "regexp") - res := headless.ErrorResponse{} + res := headless.ValidateRespond{} req.Location = "" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -109,7 +109,7 @@ func Test_Validate(t *testing.T) { // invalid timeout req = newValidRequest(ts.URL, "regexp") - res = headless.ErrorResponse{} + res = headless.ValidateRespond{} req.Timeout = "invalid" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -117,7 +117,7 @@ func Test_Validate(t *testing.T) { // invalid match type req = newValidRequest(ts.URL, "regexp") - res = headless.ErrorResponse{} + res = headless.ValidateRespond{} req.Match.Type = "invalid" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -125,7 +125,7 @@ func Test_Validate(t *testing.T) { // missing regexp value req = newValidRequest(ts.URL, "regexp") - res = headless.ErrorResponse{} + res = headless.ValidateRespond{} req.Match.MatchRegExp.Value = "" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -133,7 +133,7 @@ func Test_Validate(t *testing.T) { // missing xpath selector req = newValidRequest(ts.URL, "xpath") - res = headless.ErrorResponse{} + res = headless.ValidateRespond{} req.Match.MatchXPath.Selector = "" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -141,7 +141,7 @@ func Test_Validate(t *testing.T) { // missing js value req = newValidRequest(ts.URL, "js") - res = headless.ErrorResponse{} + res = headless.ValidateRespond{} req.Match.MatchJS.Value = "" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -149,7 +149,7 @@ func Test_Validate(t *testing.T) { // target text is not found req = newValidRequest(ts.URL, "regexp") - success := headless.SuccessResponse{} + success := headless.ValidateRespond{} req.Match.MatchRegExp.Value = "unknown-text" APITestCall(headless.Engine, "POST", "/v1/validate", req, &success) From d00e478e0e147d28c525a7b9b9fd5b790f17ba13 Mon Sep 17 00:00:00 2001 From: synycboom Date: Thu, 10 Nov 2022 00:20:01 +0700 Subject: [PATCH 09/17] refactor: rename function name from validate to find --- headless/client.go | 8 ++--- headless/{validate.go => find.go} | 20 +++++------ headless/{validate_test.go => find_test.go} | 38 ++++++++++----------- headless/headless.go | 2 +- 4 files changed, 34 insertions(+), 34 deletions(-) rename headless/{validate.go => find.go} (90%) rename headless/{validate_test.go => find_test.go} (83%) diff --git a/headless/client.go b/headless/client.go index d92cfb3..38bb7de 100644 --- a/headless/client.go +++ b/headless/client.go @@ -21,8 +21,8 @@ func NewHeadlessClient(url string) *HeadlessClient { return &HeadlessClient{url, http.DefaultClient} } -// Validate validates whether the given payload is valid -func (h *HeadlessClient) Validate(ctx context.Context, payload *ValidateRequest) (bool, error) { +// Find find whether the target matching payload exists +func (h *HeadlessClient) Find(ctx context.Context, payload *FindRequest) (bool, error) { body, err := json.Marshal(payload) if err != nil { return false, xerrors.Errorf("%w", err) @@ -55,10 +55,10 @@ func (h *HeadlessClient) Validate(ctx context.Context, payload *ValidateRequest) return false, xerrors.Errorf("%w", err) } - var resBody ValidateRespond + var resBody FindRespond if err := json.Unmarshal(contents, &resBody); err != nil { return false, xerrors.Errorf("%w", err) } - return resBody.IsValid, nil + return resBody.Found, nil } diff --git a/headless/validate.go b/headless/find.go similarity index 90% rename from headless/validate.go rename to headless/find.go index fc2d57e..18fce9d 100644 --- a/headless/validate.go +++ b/headless/find.go @@ -51,25 +51,25 @@ type Match struct { MatchJS *MatchJS `json:"js"` } -type ValidateRequest struct { +type FindRequest struct { Location string `json:"location"` Timeout string `json:"timeout"` Match Match `json:"match"` } -type ValidateRespond struct { - IsValid bool `json:"is_valid"` +type FindRespond struct { + Found bool `json:"found"` Message string `json:"message,omitempty"` } func errorResp(c *gin.Context, error_code int, err error) { - c.JSON(error_code, ValidateRespond{ + c.JSON(error_code, FindRespond{ Message: err.Error(), }) } func validate(c *gin.Context) { - var req ValidateRequest + var req FindRequest if err := c.Bind(&req); err != nil { errorResp(c, http.StatusBadRequest, xerrors.Errorf("Param error")) return @@ -149,30 +149,30 @@ func validate(c *gin.Context) { } if _, err := page.ElementR(selector, req.Match.MatchRegExp.Value); err != nil { - c.JSON(http.StatusOK, ValidateRespond{IsValid: false, Message: err.Error()}) + c.JSON(http.StatusOK, FindRespond{Found: false, Message: err.Error()}) return } case matchTypeXPath: selector := req.Match.MatchXPath.Selector if _, err := page.ElementX(selector); err != nil { - c.JSON(http.StatusOK, ValidateRespond{IsValid: false, Message: err.Error()}) + c.JSON(http.StatusOK, FindRespond{Found: false, Message: err.Error()}) return } case matchTypeJS: js := req.Match.MatchJS.Value if _, err := page.ElementByJS(rod.Eval(js)); err != nil { - c.JSON(http.StatusOK, ValidateRespond{IsValid: false, Message: err.Error()}) + c.JSON(http.StatusOK, FindRespond{Found: false, Message: err.Error()}) return } } - c.JSON(http.StatusOK, ValidateRespond{IsValid: true}) + c.JSON(http.StatusOK, FindRespond{Found: true}) } -func checkValidateRequest(req *ValidateRequest) error { +func checkValidateRequest(req *FindRequest) error { if req.Location == "" { return xerrors.Errorf("'location' is missing") } diff --git a/headless/validate_test.go b/headless/find_test.go similarity index 83% rename from headless/validate_test.go rename to headless/find_test.go index 2fbdd67..d5f8c71 100644 --- a/headless/validate_test.go +++ b/headless/find_test.go @@ -10,10 +10,10 @@ import ( "github.com/nextdotid/proof_server/headless" ) -func newValidRequest(location string, matchType string) headless.ValidateRequest { +func newValidRequest(location string, matchType string) headless.FindRequest { switch matchType { case "regexp": - return headless.ValidateRequest{ + return headless.FindRequest{ Location: location, Timeout: "2s", Match: headless.Match{ @@ -25,7 +25,7 @@ func newValidRequest(location string, matchType string) headless.ValidateRequest }, } case "xpath": - return headless.ValidateRequest{ + return headless.FindRequest{ Location: location, Timeout: "2s", Match: headless.Match{ @@ -36,7 +36,7 @@ func newValidRequest(location string, matchType string) headless.ValidateRequest }, } case "js": - return headless.ValidateRequest{ + return headless.FindRequest{ Location: location, Timeout: "2s", Match: headless.Match{ @@ -48,7 +48,7 @@ func newValidRequest(location string, matchType string) headless.ValidateRequest } } - return headless.ValidateRequest{} + return headless.FindRequest{} } func Test_Validate(t *testing.T) { @@ -72,36 +72,36 @@ func Test_Validate(t *testing.T) { t.Run("success", func(t *testing.T) { // using regexp req := newValidRequest(ts.URL, "regexp") - res := headless.ValidateRespond{} + res := headless.FindRespond{} APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) - assert.Equal(t, true, res.IsValid) + assert.Equal(t, true, res.Found) assert.Equal(t, "", res.Message) // using xpath req = newValidRequest(ts.URL, "xpath") - res = headless.ValidateRespond{} + res = headless.FindRespond{} APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) - assert.Equal(t, true, res.IsValid) + assert.Equal(t, true, res.Found) assert.Equal(t, "", res.Message) // using js req = newValidRequest(ts.URL, "js") - res = headless.ValidateRespond{} + res = headless.FindRespond{} APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) - assert.Equal(t, true, res.IsValid) + assert.Equal(t, true, res.Found) assert.Equal(t, "", res.Message) }) t.Run("error ", func(t *testing.T) { // invalid location req := newValidRequest(ts.URL, "regexp") - res := headless.ValidateRespond{} + res := headless.FindRespond{} req.Location = "" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -109,7 +109,7 @@ func Test_Validate(t *testing.T) { // invalid timeout req = newValidRequest(ts.URL, "regexp") - res = headless.ValidateRespond{} + res = headless.FindRespond{} req.Timeout = "invalid" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -117,7 +117,7 @@ func Test_Validate(t *testing.T) { // invalid match type req = newValidRequest(ts.URL, "regexp") - res = headless.ValidateRespond{} + res = headless.FindRespond{} req.Match.Type = "invalid" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -125,7 +125,7 @@ func Test_Validate(t *testing.T) { // missing regexp value req = newValidRequest(ts.URL, "regexp") - res = headless.ValidateRespond{} + res = headless.FindRespond{} req.Match.MatchRegExp.Value = "" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -133,7 +133,7 @@ func Test_Validate(t *testing.T) { // missing xpath selector req = newValidRequest(ts.URL, "xpath") - res = headless.ValidateRespond{} + res = headless.FindRespond{} req.Match.MatchXPath.Selector = "" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -141,7 +141,7 @@ func Test_Validate(t *testing.T) { // missing js value req = newValidRequest(ts.URL, "js") - res = headless.ValidateRespond{} + res = headless.FindRespond{} req.Match.MatchJS.Value = "" APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) @@ -149,10 +149,10 @@ func Test_Validate(t *testing.T) { // target text is not found req = newValidRequest(ts.URL, "regexp") - success := headless.ValidateRespond{} + success := headless.FindRespond{} req.Match.MatchRegExp.Value = "unknown-text" APITestCall(headless.Engine, "POST", "/v1/validate", req, &success) - assert.Equal(t, success.IsValid, false) + assert.Equal(t, success.Found, false) }) } diff --git a/headless/headless.go b/headless/headless.go index 6ab71c9..5a52646 100644 --- a/headless/headless.go +++ b/headless/headless.go @@ -30,7 +30,7 @@ func Init(launcherPath string) { Engine.Use(middlewareCors()) Engine.GET("/healthz", healthz) - Engine.POST("/v1/validate", validate) + Engine.POST("/v1/find", validate) } func healthz(c *gin.Context) { From 62a74cc53f29edc0f8d26f823073345250eb9883 Mon Sep 17 00:00:00 2001 From: synycboom Date: Thu, 10 Nov 2022 00:27:25 +0700 Subject: [PATCH 10/17] fix: missing url path --- headless/client.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/headless/client.go b/headless/client.go index 38bb7de..3d94926 100644 --- a/headless/client.go +++ b/headless/client.go @@ -6,6 +6,8 @@ import ( "encoding/json" "io" "net/http" + "net/url" + "path" "golang.org/x/xerrors" ) @@ -23,12 +25,18 @@ func NewHeadlessClient(url string) *HeadlessClient { // Find find whether the target matching payload exists func (h *HeadlessClient) Find(ctx context.Context, payload *FindRequest) (bool, error) { + u, err := url.Parse(h.url) + if err != nil { + return false, xerrors.Errorf("%w", err) + } + + u.Path = path.Join(u.Path, "/v1/find") body, err := json.Marshal(payload) if err != nil { return false, xerrors.Errorf("%w", err) } - req, err := http.NewRequestWithContext(ctx, http.MethodPost, h.url, bytes.NewReader(body)) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, u.String(), bytes.NewReader(body)) if err != nil { return false, xerrors.Errorf("%w", err) } From 6d153d5aef5bacae783320697f7b6678199f308b Mon Sep 17 00:00:00 2001 From: synycboom Date: Thu, 10 Nov 2022 00:48:29 +0700 Subject: [PATCH 11/17] fix: wrong api calls --- headless/find_test.go | 46 ++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/headless/find_test.go b/headless/find_test.go index d5f8c71..2a2e455 100644 --- a/headless/find_test.go +++ b/headless/find_test.go @@ -51,20 +51,22 @@ func newValidRequest(location string, matchType string) headless.FindRequest { return headless.FindRequest{} } -func Test_Validate(t *testing.T) { +func Test_Find(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Add("Content-Type", "text/html; charset=utf-8") - w.Write([]byte(` - - - - document.body.innerHTML = '

match-this-text

'; - - - - - - `)) + w.Write([]byte( + ` + + + + document.body.innerHTML = '

match-this-text

'; + + + + + + `, + )) })) defer ts.Close() @@ -74,7 +76,7 @@ func Test_Validate(t *testing.T) { req := newValidRequest(ts.URL, "regexp") res := headless.FindRespond{} - APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) assert.Equal(t, true, res.Found) assert.Equal(t, "", res.Message) @@ -83,7 +85,7 @@ func Test_Validate(t *testing.T) { req = newValidRequest(ts.URL, "xpath") res = headless.FindRespond{} - APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) assert.Equal(t, true, res.Found) assert.Equal(t, "", res.Message) @@ -92,7 +94,7 @@ func Test_Validate(t *testing.T) { req = newValidRequest(ts.URL, "js") res = headless.FindRespond{} - APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) assert.Equal(t, true, res.Found) assert.Equal(t, "", res.Message) @@ -103,7 +105,7 @@ func Test_Validate(t *testing.T) { req := newValidRequest(ts.URL, "regexp") res := headless.FindRespond{} req.Location = "" - APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) assert.Contains(t, res.Message, "location") @@ -111,7 +113,7 @@ func Test_Validate(t *testing.T) { req = newValidRequest(ts.URL, "regexp") res = headless.FindRespond{} req.Timeout = "invalid" - APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) assert.Contains(t, res.Message, "timeout") @@ -119,7 +121,7 @@ func Test_Validate(t *testing.T) { req = newValidRequest(ts.URL, "regexp") res = headless.FindRespond{} req.Match.Type = "invalid" - APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) assert.Contains(t, res.Message, "match.type") @@ -127,7 +129,7 @@ func Test_Validate(t *testing.T) { req = newValidRequest(ts.URL, "regexp") res = headless.FindRespond{} req.Match.MatchRegExp.Value = "" - APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) assert.Contains(t, res.Message, "match.regexp.value") @@ -135,7 +137,7 @@ func Test_Validate(t *testing.T) { req = newValidRequest(ts.URL, "xpath") res = headless.FindRespond{} req.Match.MatchXPath.Selector = "" - APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) assert.Contains(t, res.Message, "match.xpath.selector") @@ -143,7 +145,7 @@ func Test_Validate(t *testing.T) { req = newValidRequest(ts.URL, "js") res = headless.FindRespond{} req.Match.MatchJS.Value = "" - APITestCall(headless.Engine, "POST", "/v1/validate", req, &res) + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) assert.Contains(t, res.Message, "match.js.value") @@ -151,7 +153,7 @@ func Test_Validate(t *testing.T) { req = newValidRequest(ts.URL, "regexp") success := headless.FindRespond{} req.Match.MatchRegExp.Value = "unknown-text" - APITestCall(headless.Engine, "POST", "/v1/validate", req, &success) + APITestCall(headless.Engine, "POST", "/v1/find", req, &success) assert.Equal(t, success.Found, false) }) From d159d019aeeb17d91bd4154bc290d77e5f869d60 Mon Sep 17 00:00:00 2001 From: synycboom Date: Thu, 10 Nov 2022 00:49:23 +0700 Subject: [PATCH 12/17] refactor: clean up unused code --- headless/headless.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/headless/headless.go b/headless/headless.go index 5a52646..fb37770 100644 --- a/headless/headless.go +++ b/headless/headless.go @@ -6,7 +6,6 @@ import ( "github.com/gin-contrib/cors" "github.com/gin-gonic/gin" "github.com/nextdotid/proof_server/common" - "github.com/nextdotid/proof_server/validator" "github.com/sirupsen/logrus" ) @@ -34,11 +33,6 @@ func Init(launcherPath string) { } func healthz(c *gin.Context) { - platforms := make([]string, 0) - for p := range validator.PlatformFactories { - platforms = append(platforms, string(p)) - } - c.JSON(http.StatusOK, gin.H{ "hello": "proof service", "environment": common.Environment, From fc549700358ccbbe1cdc76ad69bdaee1a2064d8e Mon Sep 17 00:00:00 2001 From: synycboom Date: Sat, 19 Nov 2022 15:35:20 +0700 Subject: [PATCH 13/17] fix: wait for XHR before searching --- headless/find.go | 4 ++++ headless/find_test.go | 24 ++++++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/headless/find.go b/headless/find.go index 18fce9d..051f292 100644 --- a/headless/find.go +++ b/headless/find.go @@ -134,6 +134,7 @@ func validate(c *gin.Context) { }) go router.Run() + defer router.Stop() page = page.Timeout(timeoutDuration) if err := page.WaitLoad(); err != nil { @@ -141,6 +142,9 @@ func validate(c *gin.Context) { return } + // Wait for XHR + page.WaitNavigation(proto.PageLifecycleEventNameNetworkAlmostIdle)() + switch req.Match.Type { case matchTypeRegex: selector := req.Match.MatchRegExp.Selector diff --git a/headless/find_test.go b/headless/find_test.go index 2a2e455..0f1527c 100644 --- a/headless/find_test.go +++ b/headless/find_test.go @@ -1,9 +1,11 @@ package headless_test import ( + "fmt" "net/http" "net/http/httptest" "testing" + "time" "github.com/stretchr/testify/assert" @@ -52,20 +54,34 @@ func newValidRequest(location string, matchType string) headless.FindRequest { } func Test_Find(t *testing.T) { + apiTs := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // To simulate network latency + time.Sleep(time.Duration(300) * time.Millisecond) + + w.Header().Add("Content-Type", "application/json; charset=utf-8") + w.Write([]byte(`{ "content": "match-this-text" }`)) + })) + + defer apiTs.Close() + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Add("Content-Type", "text/html; charset=utf-8") w.Write([]byte( - ` + fmt.Sprintf(` - - document.body.innerHTML = '

match-this-text

'; + - `, + `, apiTs.URL), )) })) From 6e3c6ae9623df134c4cc7ffec82525355ad8243b Mon Sep 17 00:00:00 2001 From: synycboom Date: Sat, 19 Nov 2022 15:35:52 +0700 Subject: [PATCH 14/17] fix: wrong indent --- headless/find.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/headless/find.go b/headless/find.go index 051f292..5d70793 100644 --- a/headless/find.go +++ b/headless/find.go @@ -142,7 +142,7 @@ func validate(c *gin.Context) { return } - // Wait for XHR + // Wait for XHR page.WaitNavigation(proto.PageLifecycleEventNameNetworkAlmostIdle)() switch req.Match.Type { From c722b74f32803d8722a442799360b59c0bce6568 Mon Sep 17 00:00:00 2001 From: synycboom Date: Sat, 19 Nov 2022 15:44:58 +0700 Subject: [PATCH 15/17] refactor: extract html response to a variable --- headless/find_test.go | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/headless/find_test.go b/headless/find_test.go index 0f1527c..74d3dfc 100644 --- a/headless/find_test.go +++ b/headless/find_test.go @@ -65,24 +65,24 @@ func Test_Find(t *testing.T) { defer apiTs.Close() ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + html := fmt.Sprintf(` + + + + + + + `, + apiTs.URL) + w.Header().Add("Content-Type", "text/html; charset=utf-8") - w.Write([]byte( - fmt.Sprintf(` - - - - - - - - `, apiTs.URL), - )) + w.Write([]byte(html)) })) defer ts.Close() From 282628d4c81ae004c19e57a52d47586a31c3d34d Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 23 Nov 2022 18:08:37 +0700 Subject: [PATCH 16/17] feat: return node's content instead of boolean --- headless/client.go | 18 +++++++-------- headless/find.go | 52 ++++++++++++++++++++++++++----------------- headless/find_test.go | 16 ++++++------- 3 files changed, 49 insertions(+), 37 deletions(-) diff --git a/headless/client.go b/headless/client.go index 3d94926..bd0f59f 100644 --- a/headless/client.go +++ b/headless/client.go @@ -24,21 +24,21 @@ func NewHeadlessClient(url string) *HeadlessClient { } // Find find whether the target matching payload exists -func (h *HeadlessClient) Find(ctx context.Context, payload *FindRequest) (bool, error) { +func (h *HeadlessClient) Find(ctx context.Context, payload *FindRequest) (string, error) { u, err := url.Parse(h.url) if err != nil { - return false, xerrors.Errorf("%w", err) + return "", xerrors.Errorf("%w", err) } u.Path = path.Join(u.Path, "/v1/find") body, err := json.Marshal(payload) if err != nil { - return false, xerrors.Errorf("%w", err) + return "", xerrors.Errorf("%w", err) } req, err := http.NewRequestWithContext(ctx, http.MethodPost, u.String(), bytes.NewReader(body)) if err != nil { - return false, xerrors.Errorf("%w", err) + return "", xerrors.Errorf("%w", err) } req.Header.Add("Content-Type", "application/json") @@ -46,7 +46,7 @@ func (h *HeadlessClient) Find(ctx context.Context, payload *FindRequest) (bool, res, err := h.client.Do(req) if res != nil && err != nil { if _, err := io.Copy(io.Discard, res.Body); err != nil { - return false, xerrors.Errorf("%w", err) + return "", xerrors.Errorf("%w", err) } } @@ -55,18 +55,18 @@ func (h *HeadlessClient) Find(ctx context.Context, payload *FindRequest) (bool, } if err != nil { - return false, xerrors.Errorf("%w", err) + return "", xerrors.Errorf("%w", err) } contents, err := io.ReadAll(res.Body) if err != nil { - return false, xerrors.Errorf("%w", err) + return "", xerrors.Errorf("%w", err) } var resBody FindRespond if err := json.Unmarshal(contents, &resBody); err != nil { - return false, xerrors.Errorf("%w", err) + return "", xerrors.Errorf("%w", err) } - return resBody.Found, nil + return resBody.Content, nil } diff --git a/headless/find.go b/headless/find.go index 5d70793..3bc495b 100644 --- a/headless/find.go +++ b/headless/find.go @@ -58,7 +58,7 @@ type FindRequest struct { } type FindRespond struct { - Found bool `json:"found"` + Content string `json:"content"` Message string `json:"message,omitempty"` } @@ -144,36 +144,48 @@ func validate(c *gin.Context) { // Wait for XHR page.WaitNavigation(proto.PageLifecycleEventNameNetworkAlmostIdle)() + content, err := find(req.Match, page) + if err != nil { + c.JSON(http.StatusOK, FindRespond{Content: "", Message: err.Error()}) + + return + } - switch req.Match.Type { + c.JSON(http.StatusOK, FindRespond{Content: content}) +} + +func find(match Match, page *rod.Page) (content string, err error) { + var element *rod.Element + switch match.Type { case matchTypeRegex: - selector := req.Match.MatchRegExp.Selector - if selector == "" { - selector = "*" + if match.MatchRegExp.Selector == "" { + match.MatchRegExp.Selector = "*" } - if _, err := page.ElementR(selector, req.Match.MatchRegExp.Value); err != nil { - c.JSON(http.StatusOK, FindRespond{Found: false, Message: err.Error()}) - - return + element, err = page.ElementR(match.MatchRegExp.Selector, match.MatchRegExp.Value) + if err != nil { + return "", xerrors.Errorf("%w", err) } case matchTypeXPath: - selector := req.Match.MatchXPath.Selector - if _, err := page.ElementX(selector); err != nil { - c.JSON(http.StatusOK, FindRespond{Found: false, Message: err.Error()}) - - return + element, err = page.ElementX(match.MatchXPath.Selector) + if err != nil { + return "", xerrors.Errorf("%w", err) } case matchTypeJS: - js := req.Match.MatchJS.Value - if _, err := page.ElementByJS(rod.Eval(js)); err != nil { - c.JSON(http.StatusOK, FindRespond{Found: false, Message: err.Error()}) - - return + element, err = page.ElementByJS(rod.Eval(match.MatchJS.Value)) + if err != nil { + return "", xerrors.Errorf("%w", err) } + default: + return "", xerrors.Errorf("%s", "invalid payload") + } + + text, err := element.Text() + if err != nil { + return "", xerrors.Errorf("%w", err) } - c.JSON(http.StatusOK, FindRespond{Found: true}) + return text, nil } func checkValidateRequest(req *FindRequest) error { diff --git a/headless/find_test.go b/headless/find_test.go index 74d3dfc..def62b6 100644 --- a/headless/find_test.go +++ b/headless/find_test.go @@ -22,7 +22,7 @@ func newValidRequest(location string, matchType string) headless.FindRequest { Type: "regexp", MatchRegExp: &headless.MatchRegExp{ Selector: "*", - Value: "match-this-text", + Value: "^Sig: .*$", }, }, } @@ -44,7 +44,7 @@ func newValidRequest(location string, matchType string) headless.FindRequest { Match: headless.Match{ Type: "js", MatchJS: &headless.MatchJS{ - Value: "() => [].filter.call(document.querySelectorAll('*'), (el) => el.textContent === 'match-this-text')[0]", + Value: "() => [].filter.call(document.querySelectorAll('*'), (el) => el.textContent.includes('match-this-text'))[0]", }, }, } @@ -59,7 +59,7 @@ func Test_Find(t *testing.T) { time.Sleep(time.Duration(300) * time.Millisecond) w.Header().Add("Content-Type", "application/json; charset=utf-8") - w.Write([]byte(`{ "content": "match-this-text" }`)) + w.Write([]byte(`{ "content": "Sig: match-this-text" }`)) })) defer apiTs.Close() @@ -79,7 +79,7 @@ func Test_Find(t *testing.T) { `, - apiTs.URL) + apiTs.URL) w.Header().Add("Content-Type", "text/html; charset=utf-8") w.Write([]byte(html)) @@ -94,7 +94,7 @@ func Test_Find(t *testing.T) { APITestCall(headless.Engine, "POST", "/v1/find", req, &res) - assert.Equal(t, true, res.Found) + assert.Equal(t, "Sig: match-this-text", res.Content) assert.Equal(t, "", res.Message) // using xpath @@ -103,7 +103,7 @@ func Test_Find(t *testing.T) { APITestCall(headless.Engine, "POST", "/v1/find", req, &res) - assert.Equal(t, true, res.Found) + assert.Equal(t, "Sig: match-this-text", res.Content) assert.Equal(t, "", res.Message) // using js @@ -112,7 +112,7 @@ func Test_Find(t *testing.T) { APITestCall(headless.Engine, "POST", "/v1/find", req, &res) - assert.Equal(t, true, res.Found) + assert.Equal(t, "Sig: match-this-text", res.Content) assert.Equal(t, "", res.Message) }) @@ -171,6 +171,6 @@ func Test_Find(t *testing.T) { req.Match.MatchRegExp.Value = "unknown-text" APITestCall(headless.Engine, "POST", "/v1/find", req, &success) - assert.Equal(t, success.Found, false) + assert.Equal(t, success.Content, "") }) } From 3bdb90354e2c9a45e3111dd70c690360e33b96ed Mon Sep 17 00:00:00 2001 From: synycboom Date: Wed, 23 Nov 2022 19:01:12 +0700 Subject: [PATCH 17/17] test: change target matching text --- headless/find_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/headless/find_test.go b/headless/find_test.go index def62b6..64a384b 100644 --- a/headless/find_test.go +++ b/headless/find_test.go @@ -33,7 +33,7 @@ func newValidRequest(location string, matchType string) headless.FindRequest { Match: headless.Match{ Type: "xpath", MatchXPath: &headless.MatchXPath{ - Selector: "//text()[contains(.,'match-this-text')]", + Selector: "//text()[contains(.,'Sig:')]", }, }, } @@ -44,7 +44,7 @@ func newValidRequest(location string, matchType string) headless.FindRequest { Match: headless.Match{ Type: "js", MatchJS: &headless.MatchJS{ - Value: "() => [].filter.call(document.querySelectorAll('*'), (el) => el.textContent.includes('match-this-text'))[0]", + Value: "() => [].filter.call(document.querySelectorAll('*'), (el) => el.textContent.includes('Sig:'))[0]", }, }, }