Skip to content

Commit

Permalink
List obj fix (#1060)
Browse files Browse the repository at this point in the history
Fix in how iteration is done in ListObject method. Refer to PR description for more details.
  • Loading branch information
sethiay committed Apr 19, 2023
1 parent 6e9754e commit bcfff52
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 14 deletions.
19 changes: 12 additions & 7 deletions internal/storage/bucket_handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,7 @@ func (b *bucketHandle) ListObjects(ctx context.Context, req *gcs.ListObjectsRequ
// Iterating through all the objects in the bucket and one by one adding them to the list.
for {
var attrs *storage.ObjectAttrs
// itr.next returns all the objects present in the bucket. Hence adding a
// check to break after required number of objects are returned.
// If req.MaxResults is 0, then wait till iterator is done. This is similar
// to https://github.com/GoogleCloudPlatform/gcsfuse/blob/master/vendor/github.com/jacobsa/gcloud/gcs/bucket.go#L164
if req.MaxResults != 0 && (len(list.Objects) == req.MaxResults) {
break
}

attrs, err = itr.Next()
if err == iterator.Done {
err = nil
Expand All @@ -280,6 +274,17 @@ func (b *bucketHandle) ListObjects(ctx context.Context, req *gcs.ListObjectsRequ
currObject := storageutil.ObjectAttrsToBucketObject(attrs)
list.Objects = append(list.Objects, currObject)
}

// itr.next returns all the objects present in the bucket. Hence adding a
// check to break after iterating over the current page. pi.Remaining()
// function returns number of items (items + prefixes) remaining in current
// page to be iterated by iterator (itr). The func returns (number of items in current page - 1)
// after first itr.Next() call and becomes 0 when iteration is done.
// If req.MaxResults is 0, then wait till iterator is done. This is similar
// to https://github.com/GoogleCloudPlatform/gcsfuse/blob/master/vendor/github.com/jacobsa/gcloud/gcs/bucket.go#L164
if req.MaxResults != 0 && (pi.Remaining() == 0) {
break
}
}

list.ContinuationToken = itr.PageInfo().Token
Expand Down
19 changes: 12 additions & 7 deletions internal/storage/bucket_handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,17 +448,17 @@ func (t *BucketHandleTest) TestListObjectMethodForMaxResult() {
&gcs.ListObjectsRequest{
Prefix: "",
Delimiter: "",
IncludeTrailingDelimiter: true,
IncludeTrailingDelimiter: false,
ContinuationToken: "",
MaxResults: 4,
ProjectionVal: 0,
})

twoObj, err2 := t.bucketHandle.ListObjects(context.Background(),
&gcs.ListObjectsRequest{
Prefix: "",
Delimiter: "",
IncludeTrailingDelimiter: true,
Prefix: "gcsfuse/",
Delimiter: "/",
IncludeTrailingDelimiter: false,
ContinuationToken: "",
MaxResults: 2,
ProjectionVal: 0,
Expand All @@ -473,12 +473,17 @@ func (t *BucketHandleTest) TestListObjectMethodForMaxResult() {
AssertEq(TestObjectName, fourObj.Objects[3].Name)
AssertEq(nil, fourObj.CollapsedRuns)

// Validate that 2 objects are listed when MaxResults is passed 2.
// Note: The behavior is different in real GCS storage JSON API. In real API,
// only 1 object and 1 collapsedRuns would have been returned if
// IncludeTrailingDelimiter = false and 2 objects and 1 collapsedRuns if
// IncludeTrailingDelimiter = true.
// This is because fake storage doesn't support pagination and hence maxResults
// has no affect.
AssertEq(nil, err2)
AssertEq(2, len(twoObj.Objects))
AssertEq(TestObjectRootFolderName, twoObj.Objects[0].Name)
AssertEq(TestObjectSubRootFolderName, twoObj.Objects[1].Name)
AssertEq(nil, twoObj.CollapsedRuns)
AssertEq(TestObjectName, twoObj.Objects[1].Name)
AssertEq(1, len(twoObj.CollapsedRuns))
}

func (t *BucketHandleTest) TestListObjectMethodWithMissingMaxResult() {
Expand Down

0 comments on commit bcfff52

Please sign in to comment.