Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Linux/POSIX properties persistence #1780

Merged
merged 27 commits into from
Jun 1, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
3cf159f
SIP Implementation
adreed-msft Apr 4, 2022
d80276b
Opt for an interface instead of a struct
adreed-msft May 2, 2022
087199f
Implement upload
adreed-msft May 2, 2022
0ee0c67
Implement file creation
adreed-msft May 4, 2022
145c082
Implement property setting; todo: error handling
adreed-msft May 5, 2022
b3c8ae9
Error handling
adreed-msft May 6, 2022
ab20deb
Add upload support to append/page blob
adreed-msft May 6, 2022
8f54d37
Implement flag
adreed-msft May 10, 2022
8ff7c84
Somewhat fix download
adreed-msft May 12, 2022
2a94119
Merge branch 'dev' into adreed/unix-properties
adreed-msft May 12, 2022
7e6cc76
Handle folder uploading/copying
adreed-msft May 23, 2022
06f4f7d
Merge remote-tracking branch 'origin/adreed/unix-properties' into adr…
adreed-msft May 23, 2022
3017a7e
Remove download
adreed-msft May 23, 2022
e1aeda8
Fix getUNIXProperties
adreed-msft May 23, 2022
0bd00bc
Remove device filtering
adreed-msft May 24, 2022
5e18a72
Convert flag to bool
adreed-msft May 25, 2022
1fecf73
Attempt to fix accidental folder transfers
adreed-msft May 25, 2022
400cbd2
Fix test function signatures
adreed-msft May 25, 2022
be7f803
Add preserve-posix-properties to sync
adreed-msft May 25, 2022
e1920ca
Limit auto-setting to sync and copy
adreed-msft May 25, 2022
6b8b4ce
Default to false
adreed-msft May 26, 2022
9e34de1
Potentially fix testing
adreed-msft May 26, 2022
473eb21
Add check for flag
adreed-msft May 26, 2022
eb01a51
Shift back to single-call
adreed-msft May 27, 2022
ba9127f
Handle comments on PR
adreed-msft May 27, 2022
0702189
Add includeDirectoryStubs to folder property considerations
adreed-msft May 27, 2022
1f39f18
Address final comments
adreed-msft May 31, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions cmd/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ type rawCopyCmdArgs struct {
// Opt-in flag to persist additional SMB properties to Azure Files. Named ...info instead of ...properties
// because the latter was similar enough to preserveSMBPermissions to induce user error
preserveSMBInfo bool
// Opt-in flag to persist additional POSIX properties
preservePOSIXProperties string
// Opt-in flag to preserve the blob index tags during service to service transfer.
s2sPreserveBlobTags bool
// Flag to enable Window's special privileges
Expand Down Expand Up @@ -638,6 +640,22 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) {
cooked.preserveSMBInfo = false
}

if raw.preservePOSIXProperties == "" && areBothLocationsPOSIXAware(cooked.FromTo) {
if cooked.FromTo.IsUpload() {
raw.preservePOSIXProperties = common.EPosixPropertiesOption.FullFidelity().String()
} else if cooked.FromTo.IsDownload() {
raw.preservePOSIXProperties = common.EPosixPropertiesOption.SpecialFilesAndProperties().String() // todo: should we default to FullFidelity? It may not always work out well for the customer.
} // ignore BlobBlob, these properties persist via metadata & it doesn't make sense to explicitly ignore these bits of metadata.
adreed-msft marked this conversation as resolved.
Show resolved Hide resolved
adreed-msft marked this conversation as resolved.
Show resolved Hide resolved
// todo: should we consider appending it to a user's custom metadata if it's specified for BlobBlob?
adreed-msft marked this conversation as resolved.
Show resolved Hide resolved
}

if err := cooked.preservePOSIXProperties.Parse(raw.preservePOSIXProperties); err != nil {
return cooked, err
}
if cooked.preservePOSIXProperties.IsTruthy() && !areBothLocationsPOSIXAware(cooked.FromTo) {
return cooked, errors.New("preserve-posix-permissions is set, but the job is not between POSIX property aware resources")
}

if err = validatePreserveSMBPropertyOption(cooked.preserveSMBInfo, cooked.FromTo, &cooked.ForceWrite, "preserve-smb-info"); err != nil {
return cooked, err
}
Expand Down Expand Up @@ -907,6 +925,11 @@ func areBothLocationsSMBAware(fromTo common.FromTo) bool {
}
}

func areBothLocationsPOSIXAware(fromTo common.FromTo) bool {
// POSIX properties are stored in blob metadata-- They don't need a special persistence strategy for BlobBlob. That said, we silently ignore the BlobBlob case, because customers are assumed to be blind to the inner workings of AzCopy.
adreed-msft marked this conversation as resolved.
Show resolved Hide resolved
return (runtime.GOOS == "linux" && (fromTo == common.EFromTo.BlobLocal() || fromTo == common.EFromTo.LocalBlob())) || fromTo == common.EFromTo.BlobBlob()
}

func validatePreserveSMBPropertyOption(toPreserve bool, fromTo common.FromTo, overwrite *common.OverwriteOption, flagName string) error {
if toPreserve && !(fromTo == common.EFromTo.LocalFile() ||
fromTo == common.EFromTo.FileLocal() ||
Expand Down Expand Up @@ -1104,6 +1127,8 @@ type CookedCopyCmdArgs struct {
preservePermissions common.PreservePermissionsOption
// Whether the user wants to preserve the SMB properties ...
preserveSMBInfo bool
// Whether the user wants to preserve the POSIX properties ...
preservePOSIXProperties common.PosixPropertiesOption

// Whether to enable Windows special privileges
backupMode bool
Expand Down Expand Up @@ -1869,6 +1894,7 @@ func init() {
cpCmd.PersistentFlags().BoolVar(&raw.asSubdir, "as-subdir", true, "True by default. Places folder sources as subdirectories under the destination.")
cpCmd.PersistentFlags().BoolVar(&raw.preserveOwner, common.PreserveOwnerFlagName, common.PreserveOwnerDefault, "Only has an effect in downloads, and only when --preserve-smb-permissions is used. If true (the default), the file Owner and Group are preserved in downloads. If set to false, --preserve-smb-permissions will still preserve ACLs but Owner and Group will be based on the user running AzCopy")
cpCmd.PersistentFlags().BoolVar(&raw.preserveSMBInfo, "preserve-smb-info", true, "For SMB-aware locations, flag will be set to true by default. Preserves SMB property info (last write time, creation time, attribute bits) between SMB-aware resources (Windows and Azure Files). Only the attribute bits supported by Azure Files will be transferred; any others will be ignored. This flag applies to both files and folders, unless a file-only filter is specified (e.g. include-pattern). The info transferred for folders is the same as that for files, except for Last Write Time which is never preserved for folders.")
cpCmd.PersistentFlags().StringVar(&raw.preservePOSIXProperties, "preserve-posix-properties", "", "On Linux, the flag will be set to true by default. Preserves property info gleamed from stat or statx.")
cpCmd.PersistentFlags().BoolVar(&raw.forceIfReadOnly, "force-if-read-only", false, "When overwriting an existing file on Windows or Azure Files, force the overwrite to work even if the existing file has its read-only attribute set")
cpCmd.PersistentFlags().BoolVar(&raw.backupMode, common.BackupModeFlagName, false, "Activates Windows' SeBackupPrivilege for uploads, or SeRestorePrivilege for downloads, to allow AzCopy to see read all files, regardless of their file system permissions, and to restore all permissions. Requires that the account running AzCopy already has these permissions (e.g. has Administrator rights or is a member of the 'Backup Operators' group). All this flag does is activate privileges that the account already has")
cpCmd.PersistentFlags().BoolVar(&raw.putMd5, "put-md5", false, "Create an MD5 hash of each file, and save the hash as the Content-MD5 property of the destination blob or file. (By default the hash is NOT created.) Only available when uploading.")
Expand Down
10 changes: 3 additions & 7 deletions cmd/copyEnumeratorInit.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde
jobPartOrder.CpkOptions = cca.CpkOptions
jobPartOrder.PreserveSMBPermissions = cca.preservePermissions
jobPartOrder.PreserveSMBInfo = cca.preserveSMBInfo
jobPartOrder.PreservePOSIXProperties = cca.preservePOSIXProperties

// Infer on download so that we get LMT and MD5 on files download
// On S2S transfers the following rules apply:
Expand All @@ -80,10 +81,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde
jobPartOrder.S2SInvalidMetadataHandleOption = cca.s2sInvalidMetadataHandleOption
jobPartOrder.S2SPreserveBlobTags = cca.S2sPreserveBlobTags

traverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &srcCredInfo,
&cca.FollowSymlinks, cca.ListOfFilesChannel, cca.Recursive, getRemoteProperties,
cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs,
cca.S2sPreserveBlobTags, cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions)
traverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &srcCredInfo, &cca.FollowSymlinks, cca.ListOfFilesChannel, cca.Recursive, getRemoteProperties, cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, cca.S2sPreserveBlobTags, cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions, cca.preservePOSIXProperties)

if err != nil {
return nil, err
Expand Down Expand Up @@ -354,9 +352,7 @@ func (cca *CookedCopyCmdArgs) isDestDirectory(dst common.ResourceString, ctx *co
return false
}

rt, err := InitResourceTraverser(dst, cca.FromTo.To(), ctx, &dstCredInfo, nil,
nil, false, false, false, common.EPermanentDeleteOption.None(),
func(common.EntityType) {}, cca.ListOfVersionIDs, false, pipeline.LogNone, cca.CpkOptions)
rt, err := InitResourceTraverser(dst, cca.FromTo.To(), ctx, &dstCredInfo, nil, nil, false, false, false, common.EPermanentDeleteOption.None(), func(common.EntityType) {}, cca.ListOfVersionIDs, false, pipeline.LogNone, cca.CpkOptions, common.EPosixPropertiesOption.None())

if err != nil {
return false
Expand Down
4 changes: 1 addition & 3 deletions cmd/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,7 @@ func (cooked cookedListCmdArgs) HandleListContainerCommand() (err error) {
}
}

traverser, err := InitResourceTraverser(source, cooked.location, &ctx, &credentialInfo, nil, nil,
true, false, false, common.EPermanentDeleteOption.None(), func(common.EntityType) {},
nil, false, pipeline2.LogNone, common.CpkOptions{})
traverser, err := InitResourceTraverser(source, cooked.location, &ctx, &credentialInfo, nil, nil, true, false, false, common.EPermanentDeleteOption.None(), func(common.EntityType) {}, nil, false, pipeline2.LogNone, common.CpkOptions{}, common.EPosixPropertiesOption.None())

if err != nil {
return fmt.Errorf("failed to initialize traverser: %s", err.Error())
Expand Down
5 changes: 1 addition & 4 deletions cmd/removeEnumerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@ func newRemoveEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator, er
ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion)

// Include-path is handled by ListOfFilesChannel.
sourceTraverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &cca.credentialInfo,
nil, cca.ListOfFilesChannel, cca.Recursive, false, cca.IncludeDirectoryStubs,
cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, false,
cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions)
sourceTraverser, err = InitResourceTraverser(cca.Source, cca.FromTo.From(), &ctx, &cca.credentialInfo, nil, cca.ListOfFilesChannel, cca.Recursive, false, cca.IncludeDirectoryStubs, cca.permanentDeleteOption, func(common.EntityType) {}, cca.ListOfVersionIDs, false, cca.LogVerbosity.ToPipelineLogLevel(), cca.CpkOptions, common.EPosixPropertiesOption.None())

// report failure to create traverser
if err != nil {
Expand Down
13 changes: 6 additions & 7 deletions cmd/syncEnumerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,11 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s
// TODO: enable symlink support in a future release after evaluating the implications
adreed-msft marked this conversation as resolved.
Show resolved Hide resolved
// GetProperties is enabled by default as sync supports both upload and download.
// This property only supports Files and S3 at the moment, but provided that Files sync is coming soon, enable to avoid stepping on Files sync work
sourceTraverser, err := InitResourceTraverser(cca.source, cca.fromTo.From(), &ctx, &srcCredInfo, nil,
nil, cca.recursive, true, cca.isHNSToHNS, common.EPermanentDeleteOption.None(), func(entityType common.EntityType) {
if entityType == common.EEntityType.File() {
atomic.AddUint64(&cca.atomicSourceFilesScanned, 1)
}
}, nil, cca.s2sPreserveBlobTags, cca.logVerbosity.ToPipelineLogLevel(), cca.cpkOptions)
sourceTraverser, err := InitResourceTraverser(cca.source, cca.fromTo.From(), &ctx, &srcCredInfo, nil, nil, cca.recursive, true, cca.isHNSToHNS, common.EPermanentDeleteOption.None(), func(entityType common.EntityType) {
if entityType == common.EEntityType.File() {
atomic.AddUint64(&cca.atomicSourceFilesScanned, 1)
}
}, nil, cca.s2sPreserveBlobTags, cca.logVerbosity.ToPipelineLogLevel(), cca.cpkOptions, common.EPosixPropertiesOption.None())

if err != nil {
return nil, err
Expand All @@ -84,7 +83,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s
if entityType == common.EEntityType.File() {
atomic.AddUint64(&cca.atomicDestinationFilesScanned, 1)
}
}, nil, cca.s2sPreserveBlobTags, cca.logVerbosity.ToPipelineLogLevel(), cca.cpkOptions)
}, nil, cca.s2sPreserveBlobTags, cca.logVerbosity.ToPipelineLogLevel(), cca.cpkOptions, common.EPosixPropertiesOption.None())
if err != nil {
return nil, err
}
Expand Down
8 changes: 4 additions & 4 deletions cmd/zc_enumerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ type enumerationCounterFunc func(entityType common.EntityType)
func InitResourceTraverser(resource common.ResourceString, location common.Location, ctx *context.Context,
credential *common.CredentialInfo, followSymlinks *bool, listOfFilesChannel chan string, recursive, getProperties,
includeDirectoryStubs bool, permanentDeleteOption common.PermanentDeleteOption, incrementEnumerationCounter enumerationCounterFunc, listOfVersionIds chan string,
s2sPreserveBlobTags bool, logLevel pipeline.LogLevel, cpkOptions common.CpkOptions) (ResourceTraverser, error) {
s2sPreserveBlobTags bool, logLevel pipeline.LogLevel, cpkOptions common.CpkOptions, posixPropertiesOption common.PosixPropertiesOption) (ResourceTraverser, error) {
var output ResourceTraverser
var p *pipeline.Pipeline

Expand Down Expand Up @@ -392,7 +392,7 @@ func InitResourceTraverser(resource common.ResourceString, location common.Locat
output = newListTraverser(baseResource, location, nil, nil, recursive, toFollow, getProperties,
globChan, includeDirectoryStubs, incrementEnumerationCounter, s2sPreserveBlobTags, logLevel, cpkOptions)
} else {
output = newLocalTraverser(resource.ValueLocal(), recursive, toFollow, incrementEnumerationCounter)
output = newLocalTraverser(resource.ValueLocal(), recursive, toFollow, incrementEnumerationCounter, posixPropertiesOption)
}
case common.ELocation.Benchmark():
ben, err := newBenchmarkTraverser(resource.Value, incrementEnumerationCounter)
Expand Down Expand Up @@ -421,11 +421,11 @@ func InitResourceTraverser(resource common.ResourceString, location common.Locat
return nil, errors.New(accountTraversalInherentlyRecursiveError)
}

output = newBlobAccountTraverser(resourceURL, *p, *ctx, includeDirectoryStubs, incrementEnumerationCounter, s2sPreserveBlobTags, cpkOptions)
output = newBlobAccountTraverser(resourceURL, *p, *ctx, includeDirectoryStubs, incrementEnumerationCounter, s2sPreserveBlobTags, cpkOptions, posixPropertiesOption)
} else if listOfVersionIds != nil {
output = newBlobVersionsTraverser(resourceURL, *p, *ctx, recursive, includeDirectoryStubs, incrementEnumerationCounter, listOfVersionIds, cpkOptions)
} else {
output = newBlobTraverser(resourceURL, *p, *ctx, recursive, includeDirectoryStubs, incrementEnumerationCounter, s2sPreserveBlobTags, cpkOptions, includeDeleted, includeSnapshot, includeVersion)
output = newBlobTraverser(resourceURL, *p, *ctx, recursive, includeDirectoryStubs, incrementEnumerationCounter, s2sPreserveBlobTags, cpkOptions, includeDeleted, includeSnapshot, includeVersion, posixPropertiesOption)
}
case common.ELocation.File():
resourceURL, err := resource.FullURL()
Expand Down
48 changes: 47 additions & 1 deletion cmd/zc_traverser_blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"context"
"fmt"
"net/url"
"strconv"
"strings"

"github.com/Azure/azure-storage-azcopy/v10/common/parallel"
Expand Down Expand Up @@ -61,6 +62,8 @@ type blobTraverser struct {
includeSnapshot bool

includeVersion bool

posixPropOption common.PosixPropertiesOption
}

func (t *blobTraverser) IsDirectory(isSource bool) bool {
Expand Down Expand Up @@ -231,6 +234,38 @@ func (t *blobTraverser) Traverse(preprocessor objectMorpher, processor objectPro
return t.serialList(containerURL, blobUrlParts.ContainerName, searchPrefix, extraSearchPrefix, preprocessor, processor, filters)
}

// If a user does not want to persist device reference files, we should filter them out.
// It does not make sense to implement this as a standard filter, as local does not get this information via metadata.
func (t *blobTraverser) blobRepresentsDevice(object StoredObject) bool {
if t.posixPropOption != common.EPosixPropertiesOption.SpecialFilesAndProperties() {
return false // The user either wants these files, or is not processing POSIX properties at all. Either way, it makes sense to transfer this file.
}

stxmask, ok := object.Metadata[common.LINUXStatxMaskMeta]
if ok { // statx
m, err := strconv.ParseUint(stxmask, 10, 32)
if err != nil {
return false
}

if !common.StatXReturned(uint32(m), common.STATX_MODE) {
return false
}
}

mode, ok := object.Metadata[common.POSIXModeMeta]
if !ok {
return false
}

m, err := strconv.ParseUint(mode, 10, 32)
if err != nil {
return false
}

return uint32(m)&common.S_IFCHR == common.S_IFCHR || uint32(m)&common.S_IFBLK == common.S_IFBLK
}

func (t *blobTraverser) parallelList(containerURL azblob.ContainerURL, containerName string, searchPrefix string,
extraSearchPrefix string, preprocessor objectMorpher, processor objectProcessor, filters []ObjectFilter) error {
// Define how to enumerate its contents
Expand Down Expand Up @@ -270,6 +305,8 @@ func (t *blobTraverser) parallelList(containerURL azblob.ContainerURL, container
containerName,
)

// No need to check if this is a device, since it's a directory stub.

adreed-msft marked this conversation as resolved.
Show resolved Hide resolved
if t.s2sPreserveSourceTags {
var BlobTags *azblob.BlobTags
BlobTags, err = fblobURL.GetTags(t.ctx, nil)
Expand Down Expand Up @@ -298,6 +335,10 @@ func (t *blobTraverser) parallelList(containerURL azblob.ContainerURL, container

storedObject := t.createStoredObjectForBlob(preprocessor, blobInfo, strings.TrimPrefix(blobInfo.Name, searchPrefix), containerName)

if t.blobRepresentsDevice(storedObject) {
continue // skip devices unless the user wants them
}

if t.s2sPreserveSourceTags && blobInfo.BlobTags != nil {
blobTagsMap := common.BlobTags{}
for _, blobTag := range blobInfo.BlobTags.BlobTagSet {
Expand Down Expand Up @@ -420,6 +461,10 @@ func (t *blobTraverser) serialList(containerURL azblob.ContainerURL, containerNa

storedObject := t.createStoredObjectForBlob(preprocessor, blobInfo, relativePath, containerName)

if t.blobRepresentsDevice(storedObject) {
continue // filter out device files if the user didn't request them
}

// Setting blob tags
if t.s2sPreserveSourceTags && blobInfo.BlobTags != nil {
blobTagsMap := common.BlobTags{}
Expand All @@ -446,7 +491,7 @@ func (t *blobTraverser) serialList(containerURL azblob.ContainerURL, containerNa
return nil
}

func newBlobTraverser(rawURL *url.URL, p pipeline.Pipeline, ctx context.Context, recursive, includeDirectoryStubs bool, incrementEnumerationCounter enumerationCounterFunc, s2sPreserveSourceTags bool, cpkOptions common.CpkOptions, includeDeleted, includeSnapshot, includeVersion bool) (t *blobTraverser) {
func newBlobTraverser(rawURL *url.URL, p pipeline.Pipeline, ctx context.Context, recursive, includeDirectoryStubs bool, incrementEnumerationCounter enumerationCounterFunc, s2sPreserveSourceTags bool, cpkOptions common.CpkOptions, includeDeleted, includeSnapshot, includeVersion bool, posixPropertiesOption common.PosixPropertiesOption) (t *blobTraverser) {
t = &blobTraverser{
rawURL: rawURL,
p: p,
Expand All @@ -460,6 +505,7 @@ func newBlobTraverser(rawURL *url.URL, p pipeline.Pipeline, ctx context.Context,
includeDeleted: includeDeleted,
includeSnapshot: includeSnapshot,
includeVersion: includeVersion,
posixPropOption: posixPropertiesOption,
}

disableHierarchicalScanning := strings.ToLower(glcm.GetEnvironmentVariable(common.EEnvironmentVariable.DisableHierarchicalScanning()))
Expand Down
Loading