Skip to content

Commit

Permalink
vision/detect: delete unused sample and add subtests (#885)
Browse files Browse the repository at this point in the history
Using parallel subtests for TestDetect saves about ~20 seconds per run
(~22 seconds -> ~2 seconds).

This PR also makes it so we don't need time.Now().Unix() to make a unique bucket name. Instead, it cleans and uses the same bucket every time.
  • Loading branch information
tbpg committed Jun 13, 2019
1 parent 719088e commit 13cc034
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 102 deletions.
55 changes: 1 addition & 54 deletions vision/detect/detect.go
Expand Up @@ -515,59 +515,6 @@ func detectLogos(w io.Writer, file string) error {

// [END vision_logo_detection]

// [START vision_text_detection_pdf]

// detectAsyncDocument performs Optical Character Recognition (OCR) on a
// PDF file stored in GCS.
func detectAsyncDocument(w io.Writer, gcsSourceURI, gcsDestinationURI string) error {
ctx := context.Background()

client, err := vision.NewImageAnnotatorClient(ctx)
if err != nil {
return err
}

request := &visionpb.AsyncBatchAnnotateFilesRequest{
Requests: []*visionpb.AsyncAnnotateFileRequest{
{
Features: []*visionpb.Feature{
{
Type: visionpb.Feature_DOCUMENT_TEXT_DETECTION,
},
},
InputConfig: &visionpb.InputConfig{
GcsSource: &visionpb.GcsSource{Uri: gcsSourceURI},
// Supported MimeTypes are: "application/pdf" and "image/tiff".
MimeType: "application/pdf",
},
OutputConfig: &visionpb.OutputConfig{
GcsDestination: &visionpb.GcsDestination{Uri: gcsDestinationURI},
// How many pages should be grouped into each json output file.
BatchSize: 2,
},
},
},
}

operation, err := client.AsyncBatchAnnotateFiles(ctx, request)
if err != nil {
return err
}

fmt.Fprintf(w, "Waiting for the operation to finish.")

resp, err := operation.Wait(ctx)
if err != nil {
return err
}

fmt.Fprintf(w, "%v", resp)

return nil
}

// [END vision_text_detection_pdf]

// [START vision_localize_objects]

// localizeObjects gets objects and bounding boxes from the Vision API for an image at the given file path.
Expand Down Expand Up @@ -1003,7 +950,7 @@ func detectLogosURI(w io.Writer, file string) error {

// [START vision_text_detection_pdf_gcs]

// detectAsyncDocument performs Optical Character Recognition (OCR) on a
// detectAsyncDocumentURI performs Optical Character Recognition (OCR) on a
// PDF file stored in GCS.
func detectAsyncDocumentURI(w io.Writer, gcsSourceURI, gcsDestinationURI string) error {
ctx := context.Background()
Expand Down
108 changes: 60 additions & 48 deletions vision/detect/detect_test.go
Expand Up @@ -21,12 +21,10 @@ import (
"io"
"strings"
"testing"
"time"

"cloud.google.com/go/storage"
"google.golang.org/api/iterator"

"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
"google.golang.org/api/iterator"
)

func TestDetect(t *testing.T) {
Expand Down Expand Up @@ -57,30 +55,32 @@ func TestDetect(t *testing.T) {
if tt.local == nil {
continue
}

var buf bytes.Buffer
err := tt.local(&buf, "../testdata/"+tt.path)
if err != nil {
t.Fatalf("Local %s(%q): got %v, want nil err", tt.name, tt.path, err)
}
if got := buf.String(); !strings.Contains(strings.ToLower(got), strings.ToLower(tt.wantContain)) {
t.Errorf("Local %s(%q): got %q, want to contain %q", tt.name, tt.path, got, tt.wantContain)
}
t.Run(tt.name+"/local", func(t *testing.T) {
t.Parallel()
var buf bytes.Buffer
if err := tt.local(&buf, "../testdata/"+tt.path); err != nil {
t.Fatalf("Local %s(%q): got %v, want nil err", tt.name, tt.path, err)
}
if got, wantContain := strings.ToLower(buf.String()), strings.ToLower(tt.wantContain); !strings.Contains(got, wantContain) {
t.Errorf("Local %s(%q): got %q, want to contain %q", tt.name, tt.path, got, wantContain)
}
})
}

for _, tt := range tests {
if tt.gcs == nil {
continue
}

var buf bytes.Buffer
err := tt.gcs(&buf, "gs://python-docs-samples-tests/vision/"+tt.path)
if err != nil {
t.Fatalf("GCS %s(%q): got %v, want nil err", tt.name, tt.path, err)
}
if got := buf.String(); !strings.Contains(strings.ToLower(got), strings.ToLower(tt.wantContain)) {
t.Errorf("GCS %s(%q): got %q, want to contain %q", tt.name, tt.path, got, tt.wantContain)
}
t.Run(tt.name+"/gcs", func(t *testing.T) {
t.Parallel()
var buf bytes.Buffer
if err := tt.gcs(&buf, "gs://python-docs-samples-tests/vision/"+tt.path); err != nil {
t.Fatalf("GCS %s(%q): got %v, want nil err", tt.name, tt.path, err)
}
if got, wantContain := strings.ToLower(buf.String()), strings.ToLower(tt.wantContain); !strings.Contains(got, wantContain) {
t.Errorf("GCS %s(%q): got %q, want to contain %q", tt.name, tt.path, got, wantContain)
}
})
}
}

Expand All @@ -89,43 +89,19 @@ func TestDetectAsyncDocument(t *testing.T) {

ctx := context.Background()

// Create a temporary bucket
client, err := storage.NewClient(ctx)
if err != nil {
t.Fatal(err)
}

bucketName := fmt.Sprintf("%s-golang-samples-%d", tc.ProjectID, time.Now().Unix())
bucketName := fmt.Sprintf("%s-vision", tc.ProjectID)
bucket := client.Bucket(bucketName)
if err := bucket.Create(ctx, tc.ProjectID, nil); err != nil {
t.Fatal(err)
}
cleanBucket(ctx, t, client, tc.ProjectID, bucketName)

// Clean and delete the bucket at the end of the test
defer func() {
it := bucket.Objects(ctx, nil)
for {
attrs, err := it.Next()
if err == iterator.Done {
break
}
if err != nil {
t.Fatal(err)
}
if err := bucket.Object(attrs.Name).Delete(ctx); err != nil {
t.Fatal(err)
}
}
if err := bucket.Delete(ctx); err != nil {
t.Fatal(err)
}
}()

// Run the test
var buf bytes.Buffer
gcsSourceURI := "gs://python-docs-samples-tests/HodgeConj.pdf"
gcsDestinationURI := "gs://" + bucketName + "/vision/"
err = detectAsyncDocument(&buf, gcsSourceURI, gcsDestinationURI)
err = detectAsyncDocumentURI(&buf, gcsSourceURI, gcsDestinationURI)
if err != nil {
t.Fatal(err)
}
Expand All @@ -143,3 +119,39 @@ func TestDetectAsyncDocument(t *testing.T) {
}
}
}

func cleanBucket(ctx context.Context, t *testing.T, client *storage.Client, projectID, bucket string) {
deleteBucketIfExists(ctx, t, client, bucket)

b := client.Bucket(bucket)
// Now create it
if err := b.Create(ctx, projectID, nil); err != nil {
t.Fatalf("Bucket.Create(%q): %v", bucket, err)
}
}

func deleteBucketIfExists(ctx context.Context, t *testing.T, client *storage.Client, bucket string) {
b := client.Bucket(bucket)
if _, err := b.Attrs(ctx); err != nil {
return
}

// Delete all the elements in the already existent bucket
it := b.Objects(ctx, nil)
for {
attrs, err := it.Next()
if err == iterator.Done {
break
}
if err != nil {
t.Fatalf("Bucket.Objects(%q): %v", bucket, err)
}
if err := b.Object(attrs.Name).Delete(ctx); err != nil {
t.Fatalf("Bucket(%q).Object(%q).Delete: %v", bucket, attrs.Name, err)
}
}
// Then delete the bucket itself
if err := b.Delete(ctx); err != nil {
t.Fatalf("Bucket.Delete(%q): %v", bucket, err)
}
}

0 comments on commit 13cc034

Please sign in to comment.