diff --git a/Makefile b/Makefile index 9d89555..7258e13 100644 --- a/Makefile +++ b/Makefile @@ -3,12 +3,18 @@ bin_dir=build/ commit=$$(git rev-parse HEAD) time=$$(date +%s) +aws_registry_uri=${aws-account-id}.dkr.ecr.${aws-lambda-region}.amazonaws.com +aws_docker_image=${aws_registry_uri}/${docker-image-name}:${commit} # Things in my.mk: # aws-lambda-function-staging=my-lambda-function-staging # aws-lambda-function-production=my-lambda-function-production +# aws-lambda-headless-function-staging=my-lambda-headless-function-staging # aws-lambda-region=ap-east-1 # aws-lambda-role=arn:aws:iam::xxxxx:.... +# aws-account-id=xxxxxxxxxx +# docker-image-name=lambda_headless + -include ./my.mk build: @@ -78,3 +84,37 @@ lambda-pack-worker-staging: lambda-build-worker-staging lambda-update-worker-staging: lambda-pack-worker-staging @aws lambda update-function-code --function-name ${aws-lambda-function-worker-staging} --zip-file 'fileb://./build/lambda.zip' + +lamda-create-registry-headless: + @aws ecr get-login-password \ + --region ${aws-lambda-region} | docker login \ + --username AWS \ + --password-stdin ${aws_registry_uri} + @aws ecr describe-repositories \ + --repository-names ${docker-image-name} || \ + aws ecr create-repository \ + --repository-name ${docker-image-name} \ + --region ${aws-lambda-region} \ + --image-scanning-configuration scanOnPush=true \ + --image-tag-mutability MUTABLE + +lambda-build-headless-staging: + @docker build -f ./cmd/lambda_headless/Dockerfile -t ${docker-image-name}:${commit} . + @docker tag ${docker-image-name}:${commit} ${aws_docker_image} + +lambda-pack-headless-staging: lamda-create-registry-headless lambda-build-headless-staging + @docker push ${aws_docker_image} + +lambda-create-headless-staging: lambda-pack-headless-staging + aws lambda create-function \ + --package-type Image \ + --region ${aws-lambda-region} \ + --function-name ${aws-lambda-headless-function-staging} \ + --code ImageUri=${aws_docker_image} \ + --memory-size 1200 \ + --timeout 30 \ + --architectures x86_64 \ + --role ${aws-lambda-role} + +lambda-update-headless-staging: lambda-pack-headless-staging + @aws lambda update-function-code --function-name ${aws-lambda-headless-function-staging} --image-uri ${aws_docker_image} diff --git a/cmd/headless/main.go b/cmd/headless/main.go new file mode 100644 index 0000000..a7341e8 --- /dev/null +++ b/cmd/headless/main.go @@ -0,0 +1,22 @@ +package main + +import ( + "flag" + "fmt" + + "github.com/nextdotid/proof_server/headless" + "github.com/sirupsen/logrus" +) + +var ( + flagPort = flag.Int("port", 9801, "Listen port") +) + +func main() { + flag.Parse() + logrus.SetLevel(logrus.DebugLevel) + headless.Init("") + + fmt.Printf("Server now running on 0.0.0.0:%d", *flagPort) + headless.Engine.Run(fmt.Sprintf("0.0.0.0:%d", *flagPort)) +} diff --git a/cmd/lambda_headless/Dockerfile b/cmd/lambda_headless/Dockerfile new file mode 100644 index 0000000..626d161 --- /dev/null +++ b/cmd/lambda_headless/Dockerfile @@ -0,0 +1,61 @@ +# syntax=docker/dockerfile:1 + +# https://docs.docker.com/language/golang/build-images/ +FROM golang:1.18-buster AS build + +WORKDIR /app + +COPY go.mod ./ +COPY go.sum ./ +RUN go mod download +COPY . ./ + +# Remember to build your handler executable for Linux! +# https://github.com/aws/aws-lambda-go/blob/main/README.md#building-your-function +RUN env GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \ + go build -o /main ./cmd/lambda_headless/main.go + + +# Install chromium +FROM public.ecr.aws/lambda/provided:al2 as chromium + +# install brotli, so we can decompress chromium +# we don't have access to brotli out of the box, to install we first need epel +# https://docs.fedoraproject.org/en-US/epel/#what_is_extra_packages_for_enterprise_linux_or_epel +RUN yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm && \ + yum -y install brotli && \ + yum clean all + +# download chromium +# s/o to https://github.com/alixaxel/chrome-aws-lambda for the binary +RUN yum -y install wget && \ + wget --progress=dot:giga https://raw.githubusercontent.com/alixaxel/chrome-aws-lambda/master/bin/chromium.br -O /chromium.br && \ + yum clean all + +# decompress chromium +RUN brotli -d /chromium.br + +# copy artifacts to a clean image +FROM public.ecr.aws/lambda/provided:al2 + +# install chromium dependencies +RUN yum -y install \ + libX11 \ + nano \ + unzip \ + wget \ + xclock \ + xorg-x11-xauth \ + xterm && \ + yum clean all + +# copy in chromium from chromium stage +COPY --from=chromium /chromium /opt/chromium + +# grant our program access to chromium +RUN chmod 777 /opt/chromium + +# copy in lambda fn from build stage +COPY --from=build /main /main + +ENTRYPOINT ["/main"] diff --git a/cmd/lambda_headless/main.go b/cmd/lambda_headless/main.go new file mode 100644 index 0000000..c9c8b0b --- /dev/null +++ b/cmd/lambda_headless/main.go @@ -0,0 +1,17 @@ +package main + +import ( + "github.com/akrylysov/algnhsa" + "github.com/sirupsen/logrus" + "github.com/nextdotid/proof_server/headless" +) + +func init() { + logrus.SetLevel(logrus.WarnLevel) + headless.Init("/opt/chromium") +} + +func main() { + algnhsa.ListenAndServe(headless.Engine, nil) +} + diff --git a/go.mod b/go.mod index db3ed03..da27db7 100644 --- a/go.mod +++ b/go.mod @@ -13,10 +13,12 @@ require ( github.com/gagliardetto/solana-go v1.4.0 github.com/gin-gonic/gin v1.7.7 github.com/go-resty/resty/v2 v2.7.0 + github.com/go-rod/rod v0.112.0 github.com/gotd/td v0.71.0 github.com/mr-tron/base58 v1.2.0 github.com/sirupsen/logrus v1.9.0 github.com/spf13/viper v1.11.0 + github.com/ssoroka/slice v0.0.0-20220402005549-78f0cea3df8b github.com/wealdtech/go-ens/v3 v3.5.5 ) @@ -120,6 +122,9 @@ require ( github.com/tklauser/numcpus v0.2.2 // indirect github.com/ugorji/go/codec v1.2.7 // indirect github.com/wealdtech/go-multicodec v1.4.0 // indirect + github.com/ysmood/goob v0.4.0 // indirect + github.com/ysmood/gson v0.7.2 // indirect + github.com/ysmood/leakless v0.8.0 // indirect go.opencensus.io v0.23.0 // indirect go.opentelemetry.io/otel v1.11.1 // indirect go.opentelemetry.io/otel/trace v1.11.1 // indirect diff --git a/go.sum b/go.sum index 7d2515d..70cda13 100644 --- a/go.sum +++ b/go.sum @@ -253,6 +253,8 @@ github.com/go-playground/validator/v10 v10.10.1 h1:uA0+amWMiglNZKZ9FJRKUAe9U3RX9 github.com/go-playground/validator/v10 v10.10.1/go.mod h1:i+3WkQ1FvaUjjxh1kSvIA4dMGDBiPU55YFDl0WbKdWU= github.com/go-resty/resty/v2 v2.7.0 h1:me+K9p3uhSmXtrBZ4k9jcEAfJmuC8IivWHwaLZwPrFY= github.com/go-resty/resty/v2 v2.7.0/go.mod h1:9PWDzw47qPphMRFfhsyk0NnSgvluHcljSMVIq3w7q0I= +github.com/go-rod/rod v0.112.0 h1:U9Yc+quw4hxZ6GrdbWFBeylvaYElEKM9ijFW2LYkGlA= +github.com/go-rod/rod v0.112.0/go.mod h1:GZDtmEs6RpF6kBRYpGCZXxXlKNneKVPiKOjaMbmVVjE= github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE= github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -672,6 +674,8 @@ github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5q github.com/spf13/viper v1.7.1/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= github.com/spf13/viper v1.11.0 h1:7OX/1FS6n7jHD1zGrZTM7WtY13ZELRyosK4k93oPr44= github.com/spf13/viper v1.11.0/go.mod h1:djo0X/bA5+tYVoCn+C7cAYJGcVn/qYLFTG8gdUsX7Zk= +github.com/ssoroka/slice v0.0.0-20220402005549-78f0cea3df8b h1:nDFJ1KYD1CSRP3nHtkvCH+ztuoz+QW++OvCLgpS6kQE= +github.com/ssoroka/slice v0.0.0-20220402005549-78f0cea3df8b/go.mod h1:l4Ov7Zo7X3/MCC+pefg/lN7x8X8FKb1Ub7oxosKKJa0= github.com/status-im/keycard-go v0.0.0-20190316090335-8537d3370df4 h1:Gb2Tyox57NRNuZ2d3rmvB3pcmbu7O1RS3m8WRx7ilrg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -724,6 +728,17 @@ github.com/wealdtech/go-multicodec v1.4.0/go.mod h1:aedGMaTeYkIqi/KCPre1ho5rTb3h github.com/wealdtech/go-string2eth v1.1.0 h1:USJQmysUrBYYmZs7d45pMb90hRSyEwizP7lZaOZLDAw= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= +github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ= +github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18= +github.com/ysmood/got v0.31.3 h1:UvvF+TDVsZLO7MSzm/Bd/H4HVp+7S5YwsxgdwaKq8uA= +github.com/ysmood/got v0.31.3/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY= +github.com/ysmood/gotrace v0.6.0 h1:SyI1d4jclswLhg7SWTL6os3L1WOKeNn/ZtzVQF8QmdY= +github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM= +github.com/ysmood/gson v0.7.1/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= +github.com/ysmood/gson v0.7.2 h1:1iWUvpi5DPvd2j59W7ifRPR9DiAZ3Ga+fmMl1mJrRbM= +github.com/ysmood/gson v0.7.2/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= +github.com/ysmood/leakless v0.8.0 h1:BzLrVoiwxikpgEQR0Lk8NyBN5Cit2b1z+u0mgL4ZJak= +github.com/ysmood/leakless v0.8.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/headless/client.go b/headless/client.go new file mode 100644 index 0000000..bd0f59f --- /dev/null +++ b/headless/client.go @@ -0,0 +1,72 @@ +package headless + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "net/url" + "path" + + "golang.org/x/xerrors" +) + +// HeadlessClient handles communication for headless browser service +type HeadlessClient struct { + url string + client *http.Client +} + +// NewHeadlessClient creates a new headless client +func NewHeadlessClient(url string) *HeadlessClient { + return &HeadlessClient{url, http.DefaultClient} +} + +// Find find whether the target matching payload exists +func (h *HeadlessClient) Find(ctx context.Context, payload *FindRequest) (string, error) { + u, err := url.Parse(h.url) + if err != nil { + return "", xerrors.Errorf("%w", err) + } + + u.Path = path.Join(u.Path, "/v1/find") + body, err := json.Marshal(payload) + if err != nil { + return "", xerrors.Errorf("%w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, u.String(), bytes.NewReader(body)) + if err != nil { + return "", xerrors.Errorf("%w", err) + } + + req.Header.Add("Content-Type", "application/json") + + res, err := h.client.Do(req) + if res != nil && err != nil { + if _, err := io.Copy(io.Discard, res.Body); err != nil { + return "", xerrors.Errorf("%w", err) + } + } + + if res != nil { + defer res.Body.Close() + } + + if err != nil { + return "", xerrors.Errorf("%w", err) + } + + contents, err := io.ReadAll(res.Body) + if err != nil { + return "", xerrors.Errorf("%w", err) + } + + var resBody FindRespond + if err := json.Unmarshal(contents, &resBody); err != nil { + return "", xerrors.Errorf("%w", err) + } + + return resBody.Content, nil +} diff --git a/headless/find.go b/headless/find.go new file mode 100644 index 0000000..3bc495b --- /dev/null +++ b/headless/find.go @@ -0,0 +1,237 @@ +package headless + +import ( + "net/http" + "time" + + "github.com/gin-gonic/gin" + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/proto" + "github.com/ssoroka/slice" + "golang.org/x/xerrors" +) + +const ( + matchTypeRegex = "regexp" + matchTypeXPath = "xpath" + matchTypeJS = "js" + defaultTimeout = "10s" +) + +var ( + validMatchTypes = map[string]struct{}{ + matchTypeJS: {}, + matchTypeXPath: {}, + matchTypeRegex: {}, + } +) + +type MatchRegExp struct { + // Selector is the target element if not specified "*" will be used + Selector string `json:"selector"` + + // Value is the target value + Value string `json:"value"` +} + +type MatchXPath struct { + // Selector is the xpath selector + Selector string `json:"selector"` +} + +type MatchJS struct { + // Value is the javascript value + Value string `json:"value"` +} + +type Match struct { + Type string `json:"type"` + MatchRegExp *MatchRegExp `json:"regexp"` + MatchXPath *MatchXPath `json:"xpath"` + MatchJS *MatchJS `json:"js"` +} + +type FindRequest struct { + Location string `json:"location"` + Timeout string `json:"timeout"` + Match Match `json:"match"` +} + +type FindRespond struct { + Content string `json:"content"` + Message string `json:"message,omitempty"` +} + +func errorResp(c *gin.Context, error_code int, err error) { + c.JSON(error_code, FindRespond{ + Message: err.Error(), + }) +} + +func validate(c *gin.Context) { + var req FindRequest + if err := c.Bind(&req); err != nil { + errorResp(c, http.StatusBadRequest, xerrors.Errorf("Param error")) + return + } + + if err := checkValidateRequest(&req); err != nil { + errorResp(c, http.StatusBadRequest, err) + return + } + + launcher := newLauncher(LauncherPath) + defer launcher.Cleanup() + defer launcher.Kill() + + u, err := launcher.Launch() + if err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + + browser := rod.New().ControlURL(u) + if err := browser.Connect(); err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + + defer browser.Close() + + page, err := browser.Page(proto.TargetCreateTarget{URL: req.Location}) + if err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + + timeout := req.Timeout + if timeout == "" { + timeout = defaultTimeout + } + + timeoutDuration, err := time.ParseDuration(timeout) + if err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + router := page.HijackRequests() + + resources := []proto.NetworkResourceType{ + proto.NetworkResourceTypeFont, + proto.NetworkResourceTypeImage, + proto.NetworkResourceTypeMedia, + proto.NetworkResourceTypeStylesheet, + proto.NetworkResourceTypeWebSocket, // we don't need websockets to fetch html + } + + router.MustAdd("*", func(ctx *rod.Hijack) { + if slice.Contains(resources, ctx.Request.Type()) { + ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient) + return + } + + ctx.ContinueRequest(&proto.FetchContinueRequest{}) + }) + + go router.Run() + defer router.Stop() + + page = page.Timeout(timeoutDuration) + if err := page.WaitLoad(); err != nil { + errorResp(c, http.StatusInternalServerError, xerrors.Errorf("%w", err)) + return + } + + // Wait for XHR + page.WaitNavigation(proto.PageLifecycleEventNameNetworkAlmostIdle)() + content, err := find(req.Match, page) + if err != nil { + c.JSON(http.StatusOK, FindRespond{Content: "", Message: err.Error()}) + + return + } + + c.JSON(http.StatusOK, FindRespond{Content: content}) +} + +func find(match Match, page *rod.Page) (content string, err error) { + var element *rod.Element + switch match.Type { + case matchTypeRegex: + if match.MatchRegExp.Selector == "" { + match.MatchRegExp.Selector = "*" + } + + element, err = page.ElementR(match.MatchRegExp.Selector, match.MatchRegExp.Value) + if err != nil { + return "", xerrors.Errorf("%w", err) + } + case matchTypeXPath: + element, err = page.ElementX(match.MatchXPath.Selector) + if err != nil { + return "", xerrors.Errorf("%w", err) + } + case matchTypeJS: + element, err = page.ElementByJS(rod.Eval(match.MatchJS.Value)) + if err != nil { + return "", xerrors.Errorf("%w", err) + } + default: + return "", xerrors.Errorf("%s", "invalid payload") + } + + text, err := element.Text() + if err != nil { + return "", xerrors.Errorf("%w", err) + } + + return text, nil +} + +func checkValidateRequest(req *FindRequest) error { + if req.Location == "" { + return xerrors.Errorf("'location' is missing") + } + + if req.Timeout != "" { + if _, err := time.ParseDuration(req.Timeout); err != nil { + return xerrors.Errorf("'timeout' is invalid") + } + } + + if _, ok := validMatchTypes[req.Match.Type]; !ok { + return xerrors.Errorf("'match.type' should be 'regexp', 'xpath', or 'js'") + } + + if req.Match.Type == matchTypeRegex { + if req.Match.MatchRegExp == nil { + return xerrors.Errorf("'match.regexp' payload is missing") + } + + if req.Match.MatchRegExp.Value == "" { + return xerrors.Errorf("'match.regexp.value' must be specified") + } + } + + if req.Match.Type == matchTypeXPath { + if req.Match.MatchXPath == nil { + return xerrors.Errorf("'match.xpath' payload is missing") + } + + if req.Match.MatchXPath.Selector == "" { + return xerrors.Errorf("'match.xpath.selector' must be specified") + } + } + + if req.Match.Type == matchTypeJS { + if req.Match.MatchJS == nil { + return xerrors.Errorf("'match.js' payload is missing") + } + + if req.Match.MatchJS.Value == "" { + return xerrors.Errorf("'match.js.value' must be specified") + } + } + + return nil +} diff --git a/headless/find_test.go b/headless/find_test.go new file mode 100644 index 0000000..64a384b --- /dev/null +++ b/headless/find_test.go @@ -0,0 +1,176 @@ +package headless_test + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/nextdotid/proof_server/headless" +) + +func newValidRequest(location string, matchType string) headless.FindRequest { + switch matchType { + case "regexp": + return headless.FindRequest{ + Location: location, + Timeout: "2s", + Match: headless.Match{ + Type: "regexp", + MatchRegExp: &headless.MatchRegExp{ + Selector: "*", + Value: "^Sig: .*$", + }, + }, + } + case "xpath": + return headless.FindRequest{ + Location: location, + Timeout: "2s", + Match: headless.Match{ + Type: "xpath", + MatchXPath: &headless.MatchXPath{ + Selector: "//text()[contains(.,'Sig:')]", + }, + }, + } + case "js": + return headless.FindRequest{ + Location: location, + Timeout: "2s", + Match: headless.Match{ + Type: "js", + MatchJS: &headless.MatchJS{ + Value: "() => [].filter.call(document.querySelectorAll('*'), (el) => el.textContent.includes('Sig:'))[0]", + }, + }, + } + } + + return headless.FindRequest{} +} + +func Test_Find(t *testing.T) { + apiTs := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // To simulate network latency + time.Sleep(time.Duration(300) * time.Millisecond) + + w.Header().Add("Content-Type", "application/json; charset=utf-8") + w.Write([]byte(`{ "content": "Sig: match-this-text" }`)) + })) + + defer apiTs.Close() + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + html := fmt.Sprintf(` + +
+ + + + + `, + apiTs.URL) + + w.Header().Add("Content-Type", "text/html; charset=utf-8") + w.Write([]byte(html)) + })) + + defer ts.Close() + + t.Run("success", func(t *testing.T) { + // using regexp + req := newValidRequest(ts.URL, "regexp") + res := headless.FindRespond{} + + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) + + assert.Equal(t, "Sig: match-this-text", res.Content) + assert.Equal(t, "", res.Message) + + // using xpath + req = newValidRequest(ts.URL, "xpath") + res = headless.FindRespond{} + + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) + + assert.Equal(t, "Sig: match-this-text", res.Content) + assert.Equal(t, "", res.Message) + + // using js + req = newValidRequest(ts.URL, "js") + res = headless.FindRespond{} + + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) + + assert.Equal(t, "Sig: match-this-text", res.Content) + assert.Equal(t, "", res.Message) + }) + + t.Run("error ", func(t *testing.T) { + // invalid location + req := newValidRequest(ts.URL, "regexp") + res := headless.FindRespond{} + req.Location = "" + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) + + assert.Contains(t, res.Message, "location") + + // invalid timeout + req = newValidRequest(ts.URL, "regexp") + res = headless.FindRespond{} + req.Timeout = "invalid" + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) + + assert.Contains(t, res.Message, "timeout") + + // invalid match type + req = newValidRequest(ts.URL, "regexp") + res = headless.FindRespond{} + req.Match.Type = "invalid" + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) + + assert.Contains(t, res.Message, "match.type") + + // missing regexp value + req = newValidRequest(ts.URL, "regexp") + res = headless.FindRespond{} + req.Match.MatchRegExp.Value = "" + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) + + assert.Contains(t, res.Message, "match.regexp.value") + + // missing xpath selector + req = newValidRequest(ts.URL, "xpath") + res = headless.FindRespond{} + req.Match.MatchXPath.Selector = "" + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) + + assert.Contains(t, res.Message, "match.xpath.selector") + + // missing js value + req = newValidRequest(ts.URL, "js") + res = headless.FindRespond{} + req.Match.MatchJS.Value = "" + APITestCall(headless.Engine, "POST", "/v1/find", req, &res) + + assert.Contains(t, res.Message, "match.js.value") + + // target text is not found + req = newValidRequest(ts.URL, "regexp") + success := headless.FindRespond{} + req.Match.MatchRegExp.Value = "unknown-text" + APITestCall(headless.Engine, "POST", "/v1/find", req, &success) + + assert.Equal(t, success.Content, "") + }) +} diff --git a/headless/headless.go b/headless/headless.go new file mode 100644 index 0000000..fb37770 --- /dev/null +++ b/headless/headless.go @@ -0,0 +1,42 @@ +package headless + +import ( + "net/http" + + "github.com/gin-contrib/cors" + "github.com/gin-gonic/gin" + "github.com/nextdotid/proof_server/common" + "github.com/sirupsen/logrus" +) + +var ( + Engine *gin.Engine + LauncherPath string + l = logrus.WithFields(logrus.Fields{"module": "headless"}) +) + +func middlewareCors() gin.HandlerFunc { + return cors.Default() +} + +func Init(launcherPath string) { + LauncherPath = launcherPath + if Engine != nil { + return + } + + Engine = gin.Default() + Engine.Use(middlewareCors()) + + Engine.GET("/healthz", healthz) + Engine.POST("/v1/find", validate) +} + +func healthz(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{ + "hello": "proof service", + "environment": common.Environment, + "revision": common.Revision, + "built_at": common.BuildTime, + }) +} diff --git a/headless/headless_test.go b/headless/headless_test.go new file mode 100644 index 0000000..6550150 --- /dev/null +++ b/headless/headless_test.go @@ -0,0 +1,31 @@ +package headless_test + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/gin-gonic/gin" + "github.com/nextdotid/proof_server/config" + "github.com/nextdotid/proof_server/headless" +) + +func TestMain(m *testing.M) { + config.Init("../config/config.test.json") + headless.Init("") + os.Exit(m.Run()) +} + +func APITestCall(engine *gin.Engine, method, url string, body any, response any) *httptest.ResponseRecorder { + bb, _ := json.Marshal(body) + w := httptest.NewRecorder() + req, _ := http.NewRequest(method, url, bytes.NewReader(bb)) + req.Header.Add("Content-Type", "application/json") + engine.ServeHTTP(w, req) + json.Unmarshal(w.Body.Bytes(), response) + + return w +} diff --git a/headless/launcher.go b/headless/launcher.go new file mode 100644 index 0000000..646ff24 --- /dev/null +++ b/headless/launcher.go @@ -0,0 +1,42 @@ +package headless + +import "github.com/go-rod/rod/lib/launcher" + +func newLauncher(path string) *launcher.Launcher { + if path == "" { + var found bool + + path, found = launcher.LookPath() + if !found { + path = launcher.NewBrowser().MustGet() + } + } + + return launcher.New(). + Bin(path). + // recommended flags to run in serverless environments + // see https://github.com/alixaxel/chrome-aws-lambda/blob/master/source/index.ts + Set("allow-running-insecure-content"). + Set("autoplay-policy", "user-gesture-required"). + Set("disable-component-update"). + Set("disable-domain-reliability"). + Set("disable-features", "AudioServiceOutOfProcess", "IsolateOrigins", "site-per-process"). + Set("disable-print-preview"). + Set("disable-setuid-sandbox"). + Set("disable-site-isolation-trials"). + Set("disable-speech-api"). + Set("disable-web-security"). + Set("disk-cache-size", "33554432"). + Set("enable-features", "SharedArrayBuffer"). + Set("hide-scrollbars"). + Set("ignore-gpu-blocklist"). + Set("in-process-gpu"). + Set("mute-audio"). + Set("no-default-browser-check"). + Set("no-pings"). + Set("no-sandbox"). + Set("no-zygote"). + Set("single-process"). + Set("use-gl", "swiftshader"). + Set("window-size", "1920", "1080") +}