From 708a1381aa61c725f42e54e28efc695df0819645 Mon Sep 17 00:00:00 2001 From: Tamer Sherif <69483382+tasherif-msft@users.noreply.github.com> Date: Thu, 12 Jan 2023 08:59:27 -0800 Subject: [PATCH] Fail copy job if single blob does not exist (#1981) * Job fail if single file does not exist * fixed change * fail only on a single file not existing * fail on file not found * fail on file not found * fail on file not found * cleanup * added tests * cleanup * removed test --- cmd/copy.go | 8 +- cmd/copyEnumeratorInit.go | 34 ++++-- cmd/copyEnumeratorInit_test.go | 164 +++++++++++++++++++++++++++++ cmd/syncEnumerator.go | 4 +- cmd/zc_enumerator.go | 2 +- cmd/zc_traverser_benchmark.go | 4 +- cmd/zc_traverser_blob.go | 28 +++-- cmd/zc_traverser_blob_account.go | 4 +- cmd/zc_traverser_blob_versions.go | 6 +- cmd/zc_traverser_blobfs.go | 4 +- cmd/zc_traverser_blobfs_account.go | 4 +- cmd/zc_traverser_file.go | 4 +- cmd/zc_traverser_file_account.go | 4 +- cmd/zc_traverser_gcp.go | 8 +- cmd/zc_traverser_gcp_service.go | 4 +- cmd/zc_traverser_list.go | 7 +- cmd/zc_traverser_local.go | 10 +- cmd/zc_traverser_s3.go | 8 +- cmd/zc_traverser_s3_service.go | 4 +- cmd/zt_test.go | 2 + cmd/zt_traverser_blob_test.go | 121 +++++++++++++++++++++ common/fe-ste-models.go | 22 ++-- 22 files changed, 387 insertions(+), 69 deletions(-) create mode 100644 cmd/copyEnumeratorInit_test.go create mode 100644 cmd/zt_traverser_blob_test.go diff --git a/cmd/copy.go b/cmd/copy.go index b7664401a..0c7d16165 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -26,7 +26,6 @@ import ( "encoding/json" "errors" "fmt" - "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" "io" "math" "net/url" @@ -36,6 +35,8 @@ import ( "sync" "time" + "github.com/Azure/azure-storage-azcopy/v10/jobsAdmin" + "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-blob-go/azblob" @@ -1113,7 +1114,9 @@ type CookedCopyCmdArgs struct { FollowSymlinks bool ForceWrite common.OverwriteOption // says whether we should try to overwrite ForceIfReadOnly bool // says whether we should _force_ any overwrites (triggered by forceWrite) to work on Azure Files objects that are set to read-only - autoDecompress bool + IsSourceDir bool + + autoDecompress bool // options from flags blockSize int64 @@ -1672,7 +1675,6 @@ func (cca *CookedCopyCmdArgs) ReportProgressOrExit(lcm common.LifecycleMgr) (tot // indicate whether constrained by disk or not isBenchmark := cca.FromTo.From() == common.ELocation.Benchmark() perfString, diskString := getPerfDisplayText(summary.PerfStrings, summary.PerfConstraint, duration, isBenchmark) - return fmt.Sprintf("%.1f %%, %v Done, %v Failed, %v Pending, %v Skipped, %v Total%s, %s%s%s", summary.PercentComplete, summary.TransfersCompleted, diff --git a/cmd/copyEnumeratorInit.go b/cmd/copyEnumeratorInit.go index d54b0e823..98d57b1ab 100755 --- a/cmd/copyEnumeratorInit.go +++ b/cmd/copyEnumeratorInit.go @@ -28,6 +28,23 @@ type BucketToContainerNameResolver interface { ResolveName(bucketName string) (string, error) } +func (cca *CookedCopyCmdArgs) validateSourceDir(traverser ResourceTraverser) error { + var err error + // Ensure we're only copying a directory under valid conditions + cca.IsSourceDir, err = traverser.IsDirectory(true) + if cca.IsSourceDir && + !cca.Recursive && // Copies the folder & everything under it + !cca.StripTopDir { // Copies only everything under it + // todo: dir only transfer, also todo: support syncing the root folder's acls on sync. + return errors.New("cannot use directory as source without --recursive or a trailing wildcard (/*)") + } + // check if error is file not found - if it is then we need to make sure it's not a wild card + if err != nil && strings.EqualFold(err.Error(), common.FILE_NOT_FOUND) && !cca.StripTopDir { + return err + } + return nil +} + func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrderRequest, ctx context.Context) (*CopyEnumerator, error) { var traverser ResourceTraverser @@ -91,18 +108,14 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde return nil, err } - // Ensure we're only copying a directory under valid conditions - isSourceDir := traverser.IsDirectory(true) - if isSourceDir && - !cca.Recursive && // Copies the folder & everything under it - !cca.StripTopDir { // Copies only everything under it - // todo: dir only transfer, also todo: support syncing the root folder's acls on sync. - return nil, errors.New("cannot use directory as source without --recursive or a trailing wildcard (/*)") + err = cca.validateSourceDir(traverser) + if err != nil { + return nil, err } - // Check if the destination is a directory so we can correctly decide where our files land + // Check if the destination is a directory to correctly decide where our files land isDestDir := cca.isDestDirectory(cca.Destination, &ctx) - if cca.ListOfVersionIDs != nil && (!(cca.FromTo == common.EFromTo.BlobLocal() || cca.FromTo == common.EFromTo.BlobTrash()) || isSourceDir || !isDestDir) { + if cca.ListOfVersionIDs != nil && (!(cca.FromTo == common.EFromTo.BlobLocal() || cca.FromTo == common.EFromTo.BlobTrash()) || cca.IsSourceDir || !isDestDir) { log.Fatalf("Either source is not a blob or destination is not a local folder") } srcLevel, err := DetermineLocationLevel(cca.Source.Value, cca.FromTo.From(), true) @@ -367,7 +380,8 @@ func (cca *CookedCopyCmdArgs) isDestDirectory(dst common.ResourceString, ctx *co return false } - return rt.IsDirectory(false) + isDir, _ := rt.IsDirectory(false) + return isDir } // Initialize the modular filters outside of copy to increase readability. diff --git a/cmd/copyEnumeratorInit_test.go b/cmd/copyEnumeratorInit_test.go new file mode 100644 index 000000000..d99d2e5ca --- /dev/null +++ b/cmd/copyEnumeratorInit_test.go @@ -0,0 +1,164 @@ +// Copyright © 2017 Microsoft +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "context" + "github.com/Azure/azure-storage-azcopy/v10/common" + "github.com/Azure/azure-storage-azcopy/v10/ste" + "github.com/Azure/azure-storage-blob-go/azblob" + chk "gopkg.in/check.v1" +) + +type copyEnumeratorSuite struct{} + +var _ = chk.Suite(©EnumeratorSuite{}) + +// ============================================= BLOB TRAVERSER TESTS ======================================= +func (ce *copyEnumeratorSuite) TestValidateSourceDirThatExists(c *chk.C) { + bsu := getBSU() + + // Generate source container and blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + + dirName := "source_dir" + createNewDirectoryStub(c, containerURL, dirName) + // set up to create blob traverser + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) + + // List + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, dirName) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, true, true, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + + // dir but recursive flag not set - fail + cca := CookedCopyCmdArgs{StripTopDir: false, Recursive: false} + err := cca.validateSourceDir(blobTraverser) + c.Assert(err.Error(), chk.Equals, "cannot use directory as source without --recursive or a trailing wildcard (/*)") + + // dir but recursive flag set - pass + cca.Recursive = true + err = cca.validateSourceDir(blobTraverser) + c.Assert(err, chk.IsNil) + c.Assert(cca.IsSourceDir, chk.Equals, true) +} + +func (ce *copyEnumeratorSuite) TestValidateSourceDirDoesNotExist(c *chk.C) { + bsu := getBSU() + + // Generate source container and blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + + dirName := "source_dir/" + // set up to create blob traverser + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) + + // List + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, dirName) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, true, true, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + + // dir but recursive flag not set - fail + cca := CookedCopyCmdArgs{StripTopDir: false, Recursive: false} + err := cca.validateSourceDir(blobTraverser) + c.Assert(err.Error(), chk.Equals, "cannot use directory as source without --recursive or a trailing wildcard (/*)") + + // dir but recursive flag set - pass + cca.Recursive = true + err = cca.validateSourceDir(blobTraverser) + c.Assert(err, chk.IsNil) + c.Assert(cca.IsSourceDir, chk.Equals, true) +} + +func (ce *copyEnumeratorSuite) TestValidateSourceFileExists(c *chk.C) { + bsu := getBSU() + + // Generate source container and blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + + fileName := "source_file" + _, fileName = createNewBlockBlob(c, containerURL, fileName) + + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) + + // List + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, fileName) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, true, true, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + + cca := CookedCopyCmdArgs{StripTopDir: false, Recursive: false} + err := cca.validateSourceDir(blobTraverser) + c.Assert(err, chk.IsNil) + c.Assert(cca.IsSourceDir, chk.Equals, false) +} + +func (ce *copyEnumeratorSuite) TestValidateSourceFileDoesNotExist(c *chk.C) { + bsu := getBSU() + + // Generate source container and blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + + fileName := "source_file" + + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) + + // List + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, fileName) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, true, true, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + + cca := CookedCopyCmdArgs{StripTopDir: false, Recursive: false} + err := cca.validateSourceDir(blobTraverser) + c.Assert(err.Error(), chk.Equals, common.FILE_NOT_FOUND) + c.Assert(cca.IsSourceDir, chk.Equals, false) +} + +func (ce *copyEnumeratorSuite) TestValidateSourceWithWildCard(c *chk.C) { + bsu := getBSU() + + // Generate source container and blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + + dirName := "source_dir_does_not_exist" + // set up to create blob traverser + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) + + // List + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, dirName) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, true, true, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + + // dir but recursive flag not set - fail + cca := CookedCopyCmdArgs{StripTopDir: true, Recursive: false} + err := cca.validateSourceDir(blobTraverser) + c.Assert(err, chk.IsNil) + c.Assert(cca.IsSourceDir, chk.Equals, false) +} diff --git a/cmd/syncEnumerator.go b/cmd/syncEnumerator.go index 6d7f1105a..307704224 100755 --- a/cmd/syncEnumerator.go +++ b/cmd/syncEnumerator.go @@ -92,7 +92,9 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s } // verify that the traversers are targeting the same type of resources - if sourceTraverser.IsDirectory(true) != destinationTraverser.IsDirectory(true) { + sourceIsDir, _ := sourceTraverser.IsDirectory(true) + destIsDir, _ := destinationTraverser.IsDirectory(true) + if sourceIsDir != destIsDir { return nil, errors.New("trying to sync between different resource types (either file <-> directory or directory <-> file) which is not allowed." + "sync must happen between source and destination of the same type, e.g. either file <-> file or directory <-> directory." + "To make sure target is handled as a directory, add a trailing '/' to the target.") diff --git a/cmd/zc_enumerator.go b/cmd/zc_enumerator.go index c62bcb735..14880ccc9 100755 --- a/cmd/zc_enumerator.go +++ b/cmd/zc_enumerator.go @@ -274,7 +274,7 @@ func newStoredObject(morpher objectMorpher, name string, relativePath string, en // pass each StoredObject to the given objectProcessor if it passes all the filters type ResourceTraverser interface { Traverse(preprocessor objectMorpher, processor objectProcessor, filters []ObjectFilter) error - IsDirectory(isSource bool) bool + IsDirectory(isSource bool) (bool, error) // isDirectory has an isSource flag for a single exception to blob. // Blob should ONLY check remote if it's a source. // On destinations, because blobs and virtual directories can share names, we should support placing in both ways. diff --git a/cmd/zc_traverser_benchmark.go b/cmd/zc_traverser_benchmark.go index 0d7ee38a7..ca9d2a6eb 100644 --- a/cmd/zc_traverser_benchmark.go +++ b/cmd/zc_traverser_benchmark.go @@ -45,8 +45,8 @@ func newBenchmarkTraverser(source string, incrementEnumerationCounter enumeratio nil } -func (t *benchmarkTraverser) IsDirectory(bool) bool { - return true +func (t *benchmarkTraverser) IsDirectory(bool) (bool, error) { + return true, nil } func (_ *benchmarkTraverser) toReversedString(i uint) string { diff --git a/cmd/zc_traverser_blob.go b/cmd/zc_traverser_blob.go index 6430fa58a..142de3b58 100644 --- a/cmd/zc_traverser_blob.go +++ b/cmd/zc_traverser_blob.go @@ -61,28 +61,30 @@ type blobTraverser struct { includeSnapshot bool includeVersion bool + + stripTopDir bool } -func (t *blobTraverser) IsDirectory(isSource bool) bool { +func (t *blobTraverser) IsDirectory(isSource bool) (bool, error) { isDirDirect := copyHandlerUtil{}.urlIsContainerOrVirtualDirectory(t.rawURL) // Skip the single blob check if we're checking a destination. // This is an individual exception for blob because blob supports virtual directories and blobs sharing the same name. if isDirDirect || !isSource { - return isDirDirect + return isDirDirect, nil } - _, _, isDirStub, err := t.getPropertiesIfSingleBlob() + _, _, isDirStub, blobErr := t.getPropertiesIfSingleBlob() - if stgErr, ok := err.(azblob.StorageError); ok { + if stgErr, ok := blobErr.(azblob.StorageError); ok { // We know for sure this is a single blob still, let it walk on through to the traverser. if stgErr.ServiceCode() == common.CPK_ERROR_SERVICE_CODE { - return false + return false, nil } } - if err == nil { - return isDirStub + if blobErr == nil { + return isDirStub, nil } blobURLParts := azblob.NewBlobURLParts(*t.rawURL) @@ -95,15 +97,21 @@ func (t *blobTraverser) IsDirectory(isSource bool) bool { msg := fmt.Sprintf("Failed to check if the destination is a folder or a file (Azure Files). Assuming the destination is a file: %s", err) azcopyScanningLogger.Log(pipeline.LogError, msg) } - return false + return false, nil } if len(resp.Segment.BlobItems) == 0 { //Not a directory - return false + if stgErr, ok := blobErr.(azblob.StorageError); ok { + // if the blob is not found return the error to throw + if stgErr.ServiceCode() == common.BLOB_NOT_FOUND { + return false, errors.New(common.FILE_NOT_FOUND) + } + } + return false, blobErr } - return true + return true, nil } func (t *blobTraverser) getPropertiesIfSingleBlob() (props *azblob.BlobGetPropertiesResponse, isBlob bool, isDirStub bool, err error) { diff --git a/cmd/zc_traverser_blob_account.go b/cmd/zc_traverser_blob_account.go index 10c27637a..6c946e01c 100644 --- a/cmd/zc_traverser_blob_account.go +++ b/cmd/zc_traverser_blob_account.go @@ -47,8 +47,8 @@ type blobAccountTraverser struct { cpkOptions common.CpkOptions } -func (t *blobAccountTraverser) IsDirectory(_ bool) bool { - return true // Returns true as account traversal is inherently folder-oriented and recursive. +func (t *blobAccountTraverser) IsDirectory(_ bool) (bool, error) { + return true, nil // Returns true as account traversal is inherently folder-oriented and recursive. } func (t *blobAccountTraverser) listContainers() ([]string, error) { diff --git a/cmd/zc_traverser_blob_versions.go b/cmd/zc_traverser_blob_versions.go index 95e90e403..c280a7964 100644 --- a/cmd/zc_traverser_blob_versions.go +++ b/cmd/zc_traverser_blob_versions.go @@ -40,17 +40,17 @@ type blobVersionsTraverser struct { cpkOptions common.CpkOptions } -func (t *blobVersionsTraverser) IsDirectory(isSource bool) bool { +func (t *blobVersionsTraverser) IsDirectory(isSource bool) (bool, error) { isDirDirect := copyHandlerUtil{}.urlIsContainerOrVirtualDirectory(t.rawURL) // Skip the single blob check if we're checking a destination. // This is an individual exception for blob because blob supports virtual directories and blobs sharing the same name. if isDirDirect || !isSource { - return isDirDirect + return isDirDirect, nil } // The base blob may not exist in some cases. - return false + return false, nil } func (t *blobVersionsTraverser) getBlobProperties(versionID string) (props *azblob.BlobGetPropertiesResponse, err error) { diff --git a/cmd/zc_traverser_blobfs.go b/cmd/zc_traverser_blobfs.go index 35e0d646d..7ed5405b6 100644 --- a/cmd/zc_traverser_blobfs.go +++ b/cmd/zc_traverser_blobfs.go @@ -54,8 +54,8 @@ func newBlobFSTraverser(rawURL *url.URL, p pipeline.Pipeline, ctx context.Contex return } -func (t *blobFSTraverser) IsDirectory(bool) bool { - return copyHandlerUtil{}.urlIsBFSFileSystemOrDirectory(t.ctx, t.rawURL, t.p) // This gets all the fanciness done for us. +func (t *blobFSTraverser) IsDirectory(bool) (bool, error) { + return copyHandlerUtil{}.urlIsBFSFileSystemOrDirectory(t.ctx, t.rawURL, t.p), nil // This gets all the fanciness done for us. } func (t *blobFSTraverser) getPropertiesIfSingleFile() (*azbfs.PathGetPropertiesResponse, bool, error) { diff --git a/cmd/zc_traverser_blobfs_account.go b/cmd/zc_traverser_blobfs_account.go index 3c5f7396c..b768b7b0d 100644 --- a/cmd/zc_traverser_blobfs_account.go +++ b/cmd/zc_traverser_blobfs_account.go @@ -44,8 +44,8 @@ type BlobFSAccountTraverser struct { incrementEnumerationCounter enumerationCounterFunc } -func (t *BlobFSAccountTraverser) IsDirectory(isSource bool) bool { - return true // Returns true as account traversal is inherently folder-oriented and recursive. +func (t *BlobFSAccountTraverser) IsDirectory(isSource bool) (bool, error) { + return true, nil // Returns true as account traversal is inherently folder-oriented and recursive. } func (t *BlobFSAccountTraverser) listContainers() ([]string, error) { diff --git a/cmd/zc_traverser_file.go b/cmd/zc_traverser_file.go index 8ec1a4269..06027f6eb 100644 --- a/cmd/zc_traverser_file.go +++ b/cmd/zc_traverser_file.go @@ -46,8 +46,8 @@ type fileTraverser struct { incrementEnumerationCounter enumerationCounterFunc } -func (t *fileTraverser) IsDirectory(bool) bool { - return copyHandlerUtil{}.urlIsAzureFileDirectory(t.ctx, t.rawURL, t.p) // This handles all of the fanciness for us. +func (t *fileTraverser) IsDirectory(bool) (bool, error) { + return copyHandlerUtil{}.urlIsAzureFileDirectory(t.ctx, t.rawURL, t.p), nil // This handles all of the fanciness for us. } func (t *fileTraverser) getPropertiesIfSingleFile() (*azfile.FileGetPropertiesResponse, bool) { diff --git a/cmd/zc_traverser_file_account.go b/cmd/zc_traverser_file_account.go index e4b50f2fa..edbcbb50a 100644 --- a/cmd/zc_traverser_file_account.go +++ b/cmd/zc_traverser_file_account.go @@ -42,8 +42,8 @@ type fileAccountTraverser struct { incrementEnumerationCounter enumerationCounterFunc } -func (t *fileAccountTraverser) IsDirectory(isSource bool) bool { - return true // Returns true as account traversal is inherently folder-oriented and recursive. +func (t *fileAccountTraverser) IsDirectory(isSource bool) (bool, error) { + return true, nil // Returns true as account traversal is inherently folder-oriented and recursive. } func (t *fileAccountTraverser) listContainers() ([]string, error) { diff --git a/cmd/zc_traverser_gcp.go b/cmd/zc_traverser_gcp.go index 25010b327..278df9327 100644 --- a/cmd/zc_traverser_gcp.go +++ b/cmd/zc_traverser_gcp.go @@ -24,20 +24,20 @@ type gcpTraverser struct { incrementEnumerationCounter enumerationCounterFunc } -func (t *gcpTraverser) IsDirectory(isSource bool) bool { +func (t *gcpTraverser) IsDirectory(isSource bool) (bool, error) { //Identify whether directory or not syntactically isDirDirect := !t.gcpURLParts.IsObjectSyntactically() && (t.gcpURLParts.IsDirectorySyntactically() || t.gcpURLParts.IsBucketSyntactically()) if !isSource { - return isDirDirect + return isDirDirect, nil } bkt := t.gcpClient.Bucket(t.gcpURLParts.BucketName) obj := bkt.Object(t.gcpURLParts.ObjectKey) //Directories do not have attributes and hence throw error _, err := obj.Attrs(t.ctx) if err == gcpUtils.ErrObjectNotExist { - return true + return true, nil } - return false + return false, nil } func (t *gcpTraverser) Traverse(preprocessor objectMorpher, processor objectProcessor, filters []ObjectFilter) error { diff --git a/cmd/zc_traverser_gcp_service.go b/cmd/zc_traverser_gcp_service.go index 3a076529f..e4bf2ff89 100644 --- a/cmd/zc_traverser_gcp_service.go +++ b/cmd/zc_traverser_gcp_service.go @@ -24,8 +24,8 @@ type gcpServiceTraverser struct { var projectID = "" -func (t *gcpServiceTraverser) IsDirectory(isSource bool) bool { - return true //Account traversals are inherently folder based +func (t *gcpServiceTraverser) IsDirectory(isSource bool) (bool, error) { + return true, nil //Account traversals are inherently folder based } func (t *gcpServiceTraverser) listContainers() ([]string, error) { diff --git a/cmd/zc_traverser_list.go b/cmd/zc_traverser_list.go index 791639248..3776bf9e6 100755 --- a/cmd/zc_traverser_list.go +++ b/cmd/zc_traverser_list.go @@ -40,8 +40,8 @@ type listTraverser struct { type childTraverserGenerator func(childPath string) (ResourceTraverser, error) // There is no impact to a list traverser returning false because a list traverser points directly to relative paths. -func (l *listTraverser) IsDirectory(bool) bool { - return false +func (l *listTraverser) IsDirectory(bool) (bool, error) { + return false, nil } // To kill the traverser, close() the channel under it. @@ -61,7 +61,8 @@ func (l *listTraverser) Traverse(preprocessor objectMorpher, processor objectPro } // listTraverser will only ever execute on the source - if !l.recursive && childTraverser.IsDirectory(true) { + isDir, _ := childTraverser.IsDirectory(true) + if !l.recursive && isDir { continue // skip over directories } diff --git a/cmd/zc_traverser_local.go b/cmd/zc_traverser_local.go index a5980a7b0..c9cd644e7 100755 --- a/cmd/zc_traverser_local.go +++ b/cmd/zc_traverser_local.go @@ -48,18 +48,18 @@ type localTraverser struct { errorChannel chan ErrorFileInfo } -func (t *localTraverser) IsDirectory(bool) bool { +func (t *localTraverser) IsDirectory(bool) (bool, error) { if strings.HasSuffix(t.fullPath, "/") { - return true + return true, nil } props, err := common.OSStat(t.fullPath) if err != nil { - return false + return false, err } - return props.IsDir() + return props.IsDir(), nil } func (t *localTraverser) getInfoIfSingleFile() (os.FileInfo, bool, error) { @@ -363,7 +363,7 @@ func WalkWithSymlinks(appCtx context.Context, fullPath string, walkFunc filepath func (t *localTraverser) Traverse(preprocessor objectMorpher, processor objectProcessor, filters []ObjectFilter) (err error) { singleFileInfo, isSingleFile, err := t.getInfoIfSingleFile() - + // it fails here if file does not exist if err != nil { azcopyScanningLogger.Log(pipeline.LogError, fmt.Sprintf("Failed to scan path %s: %s", t.fullPath, err.Error())) return fmt.Errorf("failed to scan path %s due to %s", t.fullPath, err.Error()) diff --git a/cmd/zc_traverser_s3.go b/cmd/zc_traverser_s3.go index f30dd4bff..825ad4fcb 100644 --- a/cmd/zc_traverser_s3.go +++ b/cmd/zc_traverser_s3.go @@ -45,22 +45,22 @@ type s3Traverser struct { incrementEnumerationCounter enumerationCounterFunc } -func (t *s3Traverser) IsDirectory(isSource bool) bool { +func (t *s3Traverser) IsDirectory(isSource bool) (bool, error) { // Do a basic syntax check isDirDirect := !t.s3URLParts.IsObjectSyntactically() && (t.s3URLParts.IsDirectorySyntactically() || t.s3URLParts.IsBucketSyntactically()) // S3 can convert directories and objects sharing names as well. if !isSource { - return isDirDirect + return isDirDirect, nil } _, err := t.s3Client.StatObject(t.s3URLParts.BucketName, t.s3URLParts.ObjectKey, minio.StatObjectOptions{}) if err != nil { - return true + return true, nil } - return false + return false, nil } func (t *s3Traverser) Traverse(preprocessor objectMorpher, processor objectProcessor, filters []ObjectFilter) (err error) { diff --git a/cmd/zc_traverser_s3_service.go b/cmd/zc_traverser_s3_service.go index 1362292cb..413a09baf 100644 --- a/cmd/zc_traverser_s3_service.go +++ b/cmd/zc_traverser_s3_service.go @@ -48,8 +48,8 @@ type s3ServiceTraverser struct { incrementEnumerationCounter enumerationCounterFunc } -func (t *s3ServiceTraverser) IsDirectory(isSource bool) bool { - return true // Returns true as account traversal is inherently folder-oriented and recursive. +func (t *s3ServiceTraverser) IsDirectory(isSource bool) (bool, error) { + return true, nil // Returns true as account traversal is inherently folder-oriented and recursive. } func (t *s3ServiceTraverser) listContainers() ([]string, error) { diff --git a/cmd/zt_test.go b/cmd/zt_test.go index 1c317b49d..76eba7e0a 100644 --- a/cmd/zt_test.go +++ b/cmd/zt_test.go @@ -270,6 +270,7 @@ func getAccountAndKey() (string, string) { return name, key } +// get blob account service URL func getBSU() azblob.ServiceURL { accountName, accountKey := getAccountAndKey() u, _ := url.Parse(fmt.Sprintf("https://%s.blob.core.windows.net/", accountName)) @@ -357,6 +358,7 @@ func createNewBlockBlob(c *chk.C, container azblob.ContainerURL, prefix string) return } +// create metadata indicating that this is a dir func createNewDirectoryStub(c *chk.C, container azblob.ContainerURL, dirPath string) { dir := container.NewBlockBlobURL(dirPath) diff --git a/cmd/zt_traverser_blob_test.go b/cmd/zt_traverser_blob_test.go new file mode 100644 index 000000000..cd2eb4879 --- /dev/null +++ b/cmd/zt_traverser_blob_test.go @@ -0,0 +1,121 @@ +// Copyright © 2017 Microsoft +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "context" + "github.com/Azure/azure-storage-azcopy/v10/common" + "github.com/Azure/azure-storage-azcopy/v10/ste" + "github.com/Azure/azure-storage-blob-go/azblob" + chk "gopkg.in/check.v1" +) + +type traverserBlobSuite struct{} + +var _ = chk.Suite(&traverserBlobSuite{}) + +func (s *traverserBlobSuite) TestIsSourceDirWithStub(c *chk.C) { + bsu := getBSU() + + // Generate source container and blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + + dirName := "source_dir" + createNewDirectoryStub(c, containerURL, dirName) + // set up to create blob traverser + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) + + // List + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, dirName) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, true, true, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + + isDir, err := blobTraverser.IsDirectory(true) + c.Assert(isDir, chk.Equals, true) + c.Assert(err, chk.Equals, nil) +} + +func (s *traverserBlobSuite) TestIsSourceDirWithNoStub(c *chk.C) { + bsu := getBSU() + + // Generate source container and blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + + dirName := "source_dir/" + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) + + // List + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, dirName) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, true, true, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + + isDir, err := blobTraverser.IsDirectory(true) + c.Assert(isDir, chk.Equals, true) + c.Assert(err, chk.Equals, nil) +} + +func (s *traverserBlobSuite) TestIsSourceFileExists(c *chk.C) { + bsu := getBSU() + + // Generate source container and blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + + fileName := "source_file" + _, fileName = createNewBlockBlob(c, containerURL, fileName) + + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) + + // List + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, fileName) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, true, true, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + + isDir, err := blobTraverser.IsDirectory(true) + c.Assert(isDir, chk.Equals, false) + c.Assert(err, chk.IsNil) +} + +func (s *traverserBlobSuite) TestIsSourceFileDoesNotExist(c *chk.C) { + bsu := getBSU() + + // Generate source container and blobs + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + + fileName := "file_does_not_exist" + ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) + p := azblob.NewPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{}) + + // List + rawBlobURLWithSAS := scenarioHelper{}.getRawBlobURLWithSAS(c, containerName, fileName) + blobTraverser := newBlobTraverser(&rawBlobURLWithSAS, p, ctx, true, true, func(common.EntityType) {}, false, common.CpkOptions{}, false, false, false) + + isDir, err := blobTraverser.IsDirectory(true) + c.Assert(isDir, chk.Equals, false) + c.Assert(err.Error(), chk.Equals, common.FILE_NOT_FOUND) +} diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index 46d68d396..0db482d90 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -55,6 +55,8 @@ const ( // Since we haven't updated the Go SDKs to handle CPK just yet, we need to detect CPK related errors // and inform the user that we don't support CPK yet. CPK_ERROR_SERVICE_CODE = "BlobUsesCustomerSpecifiedEncryption" + BLOB_NOT_FOUND = "BlobNotFound" + FILE_NOT_FOUND = "The specified file was not found." ) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -110,7 +112,7 @@ type PartNumber uint32 type Version uint32 type Status uint32 -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// var EDeleteSnapshotsOption = DeleteSnapshotsOption(0) type DeleteSnapshotsOption uint8 @@ -145,7 +147,7 @@ func (d DeleteSnapshotsOption) ToDeleteSnapshotsOptionType() azblob.DeleteSnapsh return azblob.DeleteSnapshotsOptionType(strings.ToLower(d.String())) } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// var EPermanentDeleteOption = PermanentDeleteOption(3) // Default to "None" type PermanentDeleteOption uint8 @@ -610,7 +612,7 @@ func (ft *FromTo) IsPropertyOnlyTransfer() bool { var BenchmarkLmt = time.Date(1900, 1, 1, 0, 0, 0, 0, time.UTC) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Enumerates the values for blob type. type BlobType uint8 @@ -679,9 +681,11 @@ func (t TransferStatus) StatusLocked() bool { // Is an overwrite necessary to ch func (TransferStatus) NotStarted() TransferStatus { return TransferStatus(0) } // TODO confirm whether this is actually needed -// Outdated: -// Transfer started & at least 1 chunk has successfully been transferred. -// Used to resume a transfer that started to avoid transferring all chunks thereby improving performance +// +// Outdated: +// Transfer started & at least 1 chunk has successfully been transferred. +// Used to resume a transfer that started to avoid transferring all chunks thereby improving performance +// // Update(Jul 2020): This represents the state of transfer as soon as the file is scheduled. func (TransferStatus) Started() TransferStatus { return TransferStatus(1) } @@ -977,7 +981,7 @@ func (i *InvalidMetadataHandleOption) UnmarshalJSON(b []byte) error { return i.Parse(s) } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// const ( DefaultBlockBlobBlockSize = 8 * 1024 * 1024 MaxBlockBlobBlockSize = 4000 * 1024 * 1024 @@ -1566,7 +1570,7 @@ func GetClientProvidedKey(options CpkOptions) azblob.ClientProvidedKeyOptions { return ToClientProvidedKeyOptions(_cpkInfo, _cpkScopeInfo) } -//////////////////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////////////////// type SetPropertiesFlags uint32 // [0000000000...32 times] var ESetPropertiesFlags = SetPropertiesFlags(0) @@ -1589,7 +1593,7 @@ func (op *SetPropertiesFlags) ShouldTransferBlobTags() bool { return (*op)&ESetPropertiesFlags.SetBlobTags() == ESetPropertiesFlags.SetBlobTags() } -//////////////////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////////////////// type RehydratePriorityType uint8 var ERehydratePriorityType = RehydratePriorityType(0) // setting default as none