From ede5628eecff7737ff958fff602cc8f2352c2c01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hermann?= Date: Wed, 11 Mar 2026 12:07:23 +0100 Subject: [PATCH 1/2] feat: extended file attributes --- cmd/cloudstic/cmd_auth.go | 4 +- cmd/cloudstic/cmd_backup.go | 47 +++- cmd/cloudstic/cmd_backup_test.go | 95 +++++++ cmd/cloudstic/completion.go | 22 +- cmd/cloudstic/usage.go | 4 + docs/spec.md | 19 +- docs/user-guide.md | 28 +- go.mod | 9 +- go.sum | 5 - internal/core/core_test.go | 63 +++++ internal/core/models.go | 7 + internal/engine/backup_scan.go | 30 +++ internal/engine/backup_scan_test.go | 92 +++++++ pkg/source/gdrive.go | 1 + pkg/source/local_source.go | 45 +++- pkg/source/local_source_xattr_darwin.go | 34 +++ pkg/source/local_source_xattr_linux.go | 82 ++++++ pkg/source/local_source_xattr_stub.go | 12 + pkg/source/local_source_xattr_test.go | 323 ++++++++++++++++++++++++ pkg/source/local_source_xattr_unix.go | 116 +++++++++ pkg/source/onedrive.go | 1 + pkg/source/sftp_source.go | 8 + rfcs/0004-extended-file-attributes.md | 15 +- 23 files changed, 1035 insertions(+), 27 deletions(-) create mode 100644 cmd/cloudstic/cmd_backup_test.go create mode 100644 pkg/source/local_source_xattr_darwin.go create mode 100644 pkg/source/local_source_xattr_linux.go create mode 100644 pkg/source/local_source_xattr_stub.go create mode 100644 pkg/source/local_source_xattr_test.go create mode 100644 pkg/source/local_source_xattr_unix.go diff --git a/cmd/cloudstic/cmd_auth.go b/cmd/cloudstic/cmd_auth.go index c93bde4..1edf937 100644 --- a/cmd/cloudstic/cmd_auth.go +++ b/cmd/cloudstic/cmd_auth.go @@ -252,7 +252,7 @@ func (r *runner) runAuthLogin() int { if googleCreds == "" { googleCreds = os.Getenv("GOOGLE_APPLICATION_CREDENTIALS") } - src, err := initSource(ctx, "gdrive:/", false, "", googleCreds, auth.GoogleTokenFile, "", "", g, nil) + src, err := initSource(ctx, "gdrive:/", false, "", googleCreds, auth.GoogleTokenFile, "", "", false, false, false, "", g, nil) if err != nil { return r.fail("Failed to initialize Google auth source: %v", err) } @@ -262,7 +262,7 @@ func (r *runner) runAuthLogin() int { if onedriveClientID == "" { onedriveClientID = os.Getenv("ONEDRIVE_CLIENT_ID") } - src, err := initSource(ctx, "onedrive:/", false, "", "", "", onedriveClientID, auth.OneDriveTokenFile, g, nil) + src, err := initSource(ctx, "onedrive:/", false, "", "", "", onedriveClientID, auth.OneDriveTokenFile, false, false, false, "", g, nil) if err != nil { return r.fail("Failed to initialize OneDrive auth source: %v", err) } diff --git a/cmd/cloudstic/cmd_backup.go b/cmd/cloudstic/cmd_backup.go index db14b4b..dc4dc9d 100644 --- a/cmd/cloudstic/cmd_backup.go +++ b/cmd/cloudstic/cmd_backup.go @@ -33,6 +33,10 @@ type backupArgs struct { googleTokenFile string onedriveClientID string onedriveTokenFile string + skipMode bool + skipFlags bool + skipXattrs bool + xattrNamespaces string tags stringArrayFlags excludes stringArrayFlags flagsSet map[string]bool @@ -53,6 +57,10 @@ func parseBackupArgs() *backupArgs { googleTokenFile := fs.String("google-token-file", envDefault("GOOGLE_TOKEN_FILE", ""), "Path to Google OAuth token file") onedriveClientID := fs.String("onedrive-client-id", envDefault("ONEDRIVE_CLIENT_ID", ""), "OneDrive OAuth client ID") onedriveTokenFile := fs.String("onedrive-token-file", envDefault("ONEDRIVE_TOKEN_FILE", ""), "Path to OneDrive OAuth token file") + skipMode := fs.Bool("skip-mode", false, "Skip POSIX mode, uid, gid, btime, and flags collection") + skipFlags := fs.Bool("skip-flags", false, "Skip file flags ioctl (Linux only; no effect on macOS)") + skipXattrs := fs.Bool("skip-xattrs", false, "Skip extended attribute collection") + xattrNamespaces := fs.String("xattr-namespaces", "", "Restrict xattr collection to these prefixes (comma-separated, e.g. \"user.,com.apple.\")") fs.Var(&a.tags, "tag", "Tag to apply to the snapshot (can be specified multiple times)") fs.Var(&a.excludes, "exclude", "Exclude pattern (gitignore syntax, repeatable)") mustParse(fs) @@ -69,6 +77,10 @@ func parseBackupArgs() *backupArgs { a.googleTokenFile = *googleTokenFile a.onedriveClientID = *onedriveClientID a.onedriveTokenFile = *onedriveTokenFile + a.skipMode = *skipMode + a.skipFlags = *skipFlags + a.skipXattrs = *skipXattrs + a.xattrNamespaces = *xattrNamespaces a.flagsSet = map[string]bool{} fs.Visit(func(f *flag.Flag) { a.flagsSet[f.Name] = true @@ -116,7 +128,22 @@ func (r *runner) runSingleBackup(a *backupArgs) int { ctx := context.Background() - src, err := initSource(ctx, a.sourceURI, a.skipNativeFiles, a.volumeUUID, a.googleCreds, a.googleTokenFile, a.onedriveClientID, a.onedriveTokenFile, a.g, excludePatterns) + src, err := initSource( + ctx, + a.sourceURI, + a.skipNativeFiles, + a.volumeUUID, + a.googleCreds, + a.googleTokenFile, + a.onedriveClientID, + a.onedriveTokenFile, + a.skipMode, + a.skipFlags, + a.skipXattrs, + a.xattrNamespaces, + a.g, + excludePatterns, + ) if err != nil { return r.fail("Failed to init source: %v", err) } @@ -576,7 +603,7 @@ func (r *runner) printBackupSummary(res *engine.RunResult) { } } -func initSource(ctx context.Context, sourceURI string, skipNativeFiles bool, volumeUUID, googleCreds, googleTokenFile, onedriveClientID, onedriveTokenFile string, g *globalFlags, excludePatterns []string) (source.Source, error) { +func initSource(ctx context.Context, sourceURI string, skipNativeFiles bool, volumeUUID, googleCreds, googleTokenFile, onedriveClientID, onedriveTokenFile string, skipMode, skipFlags, skipXattrs bool, xattrNamespaces string, g *globalFlags, excludePatterns []string) (source.Source, error) { uri, err := parseSourceURI(sourceURI) if err != nil { return nil, err @@ -588,6 +615,22 @@ func initSource(ctx context.Context, sourceURI string, skipNativeFiles bool, vol if volumeUUID != "" { opts = append(opts, source.WithVolumeUUID(volumeUUID)) } + if skipMode { + opts = append(opts, source.WithSkipMode()) + } + if skipFlags { + opts = append(opts, source.WithSkipFlags()) + } + if skipXattrs { + opts = append(opts, source.WithSkipXattrs()) + } + if xattrNamespaces != "" { + prefixes := strings.Split(xattrNamespaces, ",") + for i := range prefixes { + prefixes[i] = strings.TrimSpace(prefixes[i]) + } + opts = append(opts, source.WithXattrNamespaces(prefixes)) + } return source.NewLocalSource(uri.path, opts...), nil case "sftp": sftpOpts := g.buildSFTPSourceOpts(uri) diff --git a/cmd/cloudstic/cmd_backup_test.go b/cmd/cloudstic/cmd_backup_test.go new file mode 100644 index 0000000..adab11b --- /dev/null +++ b/cmd/cloudstic/cmd_backup_test.go @@ -0,0 +1,95 @@ +package main + +import ( + "context" + "strings" + "testing" +) + +func TestInitSource_Local_ExtendedOptions(t *testing.T) { + tmpDir := t.TempDir() + a := &backupArgs{ + skipMode: true, + skipFlags: true, + skipXattrs: true, + xattrNamespaces: "user.,com.apple.", + } + g := &globalFlags{} + + src, err := initSource(context.Background(), "local:"+tmpDir, false, "", "", "", "", "", a.skipMode, a.skipFlags, a.skipXattrs, a.xattrNamespaces, g, nil) + if err != nil { + t.Fatalf("initSource failed: %v", err) + } + if src == nil { + t.Fatal("expected non-nil source") + } + + // Verify info reflects local source. + info := src.Info() + if info.Type != "local" { + t.Errorf("expected source type 'local', got %q", info.Type) + } +} + +func TestInitSource_Local_NoExtendedOptions(t *testing.T) { + tmpDir := t.TempDir() + a := &backupArgs{} + g := &globalFlags{} + + src, err := initSource(context.Background(), "local:"+tmpDir, false, "", "", "", "", "", a.skipMode, a.skipFlags, a.skipXattrs, a.xattrNamespaces, g, nil) + if err != nil { + t.Fatalf("initSource failed: %v", err) + } + if src == nil { + t.Fatal("expected non-nil source") + } +} + +func TestInitSource_Local_VolumeUUID(t *testing.T) { + tmpDir := t.TempDir() + a := &backupArgs{} + g := &globalFlags{} + + src, err := initSource(context.Background(), "local:"+tmpDir, false, "test-uuid-123", "", "", "", "", a.skipMode, a.skipFlags, a.skipXattrs, a.xattrNamespaces, g, nil) + if err != nil { + t.Fatalf("initSource failed: %v", err) + } + info := src.Info() + if info.VolumeUUID != "test-uuid-123" { + t.Errorf("expected VolumeUUID 'test-uuid-123', got %q", info.VolumeUUID) + } +} + +func TestInitSource_Local_XattrNamespacesParsing(t *testing.T) { + tmpDir := t.TempDir() + a := &backupArgs{ + xattrNamespaces: "user.,com.apple.", + } + g := &globalFlags{} + + src, err := initSource(context.Background(), "local:"+tmpDir, false, "", "", "", "", "", a.skipMode, a.skipFlags, a.skipXattrs, a.xattrNamespaces, g, nil) + if err != nil { + t.Fatalf("initSource failed: %v", err) + } + if src == nil { + t.Fatal("expected non-nil source") + } +} + +func TestInitSource_UnsupportedType(t *testing.T) { + a := &backupArgs{} + g := &globalFlags{} + + _, err := initSource(context.Background(), "invalid-source:/", false, "", "", "", "", "", a.skipMode, a.skipFlags, a.skipXattrs, a.xattrNamespaces, g, nil) + if err == nil { + t.Fatal("expected error for unsupported source type") + } + if !strings.Contains(err.Error(), "unknown source scheme") { + t.Errorf("expected 'unknown source scheme' error, got: %v", err) + } +} + +func TestPrintUsage_Smoke(t *testing.T) { + // Verify printUsage doesn't panic. + printUsage() +} diff --git a/cmd/cloudstic/completion.go b/cmd/cloudstic/completion.go index 73ac215..fa86e5d 100644 --- a/cmd/cloudstic/completion.go +++ b/cmd/cloudstic/completion.go @@ -52,8 +52,8 @@ _cloudstic() { case "${words[i]}" in -*) # skip flags and their values - case "${words[i]}" in - -store|-profile|-profiles-file|-s3-endpoint|-s3-region|-s3-profile|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-auth-ref|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account|-json) + case "${words[i]}" in + -store|-profile|-profiles-file|-s3-endpoint|-s3-region|-s3-profile|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-all-profiles|-auth-ref|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account|-json|-xattr-namespaces) ((i++)) ;; esac ;; @@ -73,10 +73,10 @@ _cloudstic() { # Complete flags per subcommand local cmd_flags="" case "$cmd" in - init) - cmd_flags="-add-recovery-key -no-encryption -adopt-slots" ;; - backup) - cmd_flags="-source -profile -all-profiles -auth-ref -profiles-file -skip-native-files -google-credentials -google-token-file -onedrive-client-id -onedrive-token-file -tag -dry-run" ;; + init) + cmd_flags="-add-recovery-key -no-encryption -adopt-slots" ;; + backup) + cmd_flags="-source -profile -all-profiles -auth-ref -profiles-file -skip-native-files -google-credentials -google-token-file -onedrive-client-id -onedrive-token-file -tag -dry-run -skip-mode -skip-flags -skip-xattrs -xattr-namespaces" ;; restore) cmd_flags="-output -format -path -dry-run" ;; prune) @@ -321,7 +321,11 @@ _cloudstic() { '-onedrive-client-id[OneDrive OAuth client ID]:id:' \ '-onedrive-token-file[OneDrive OAuth token file]:path:_files' \ '*-tag[Tag for the snapshot]:tag:' \ - '-dry-run[Scan without writing]' + '-dry-run[Scan without writing]' \ + '-skip-mode[Skip POSIX mode/uid/gid/btime/flags]' \ + '-skip-flags[Skip file flags ioctl (Linux only)]' \ + '-skip-xattrs[Skip extended attribute collection]' \ + '-xattr-namespaces[Restrict xattr collection to prefixes]:prefixes:' ;; profile) local -a profile_commands @@ -652,6 +656,10 @@ complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l onedrive-client complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l onedrive-token-file -r -F -d 'OneDrive OAuth token file' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l tag -x -d 'Tag for the snapshot' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l dry-run -d 'Scan without writing' +complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l skip-mode -d 'Skip POSIX mode/uid/gid/btime/flags' +complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l skip-flags -d 'Skip file flags ioctl (Linux only)' +complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l skip-xattrs -d 'Skip extended attribute collection' +complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l xattr-namespaces -x -d 'Restrict xattr collection to prefixes' # profile subcommands complete -c cloudstic -n '__fish_seen_subcommand_from profile; and not __fish_seen_subcommand_from list show new' -a list -d 'List stores, auth entries, and backup profiles' diff --git a/cmd/cloudstic/usage.go b/cmd/cloudstic/usage.go index bb824e6..4f8802f 100644 --- a/cmd/cloudstic/usage.go +++ b/cmd/cloudstic/usage.go @@ -137,6 +137,10 @@ func printUsage() { {"-exclude ", "Exclude pattern, gitignore syntax (repeatable)"}, {"-exclude-file ", "Load exclude patterns from file (one per line, gitignore syntax)"}, {"-dry-run", "Scan source and report changes without writing to the store"}, + {"-skip-mode", "Skip POSIX mode, uid, gid, btime, and flags collection"}, + {"-skip-flags", "Skip file flags ioctl (Linux only; no effect on macOS)"}, + {"-skip-xattrs", "Skip extended attribute collection"}, + {"-xattr-namespaces ", "Restrict xattr collection to prefixes (comma-separated)"}, }) t.Blank() t.Note( diff --git a/docs/spec.md b/docs/spec.md index cd23a79..3acbf9b 100644 --- a/docs/spec.md +++ b/docs/spec.md @@ -160,7 +160,13 @@ All objects are stored under a flat key namespace of the form `/`. "size": 21733, "mtime": 1710000000, "owner": "user@example.com", - "extra": { "mimeType": "application/pdf" } + "extra": { "mimeType": "application/pdf" }, + "mode": 33261, + "uid": 501, + "gid": 20, + "btime": 1710000000, + "flags": 0, + "xattrs": { "user.tag": "cHJvamVjdA==" } } ``` @@ -173,6 +179,12 @@ All objects are stored under a flat key namespace of the form `/`. | `content_ref` | Opaque content reference used as `content/` key; HMAC of `content_hash` for encrypted repos, plain `content_hash` for unencrypted repos | | `paths` | Reserved for future use (multi-path support) | | `extra` | Source-specific metadata (e.g. MIME type) | +| `mode` | POSIX file mode bits (e.g. `0755` = `493`). Omitted if zero. | +| `uid` | Numeric owner user ID. Omitted if zero. | +| `gid` | Numeric owner group ID. Omitted if zero. | +| `btime` | File creation (birth) time as Unix epoch seconds. Omitted if zero. | +| `flags` | OS-specific file flags (macOS `UF_*`/`SF_*`, Linux `FS_*_FL`). Omitted if zero. | +| `xattrs` | Extended attributes as `name → base64(value)` map. Omitted if empty.| * `fileId` is **the HAMT key**. * Folders have an empty `content_hash`, `content_ref`, and `size` of 0. @@ -225,7 +237,8 @@ Object key: `node/` "source": { "type": "gdrive", "account": "user@gmail.com", - "path": "my-drive://" + "path": "my-drive://", + "fs_type": "google-drive" }, "meta": { "generator": "cloudstic-cli" @@ -239,7 +252,7 @@ Object key: `node/` | Field | Description | |----------------|----------------------------------------------------------------------| | `seq` | Monotonically increasing sequence number | -| `source` | Origin of the backup (type, account, path) — used for retention grouping | +| `source` | Origin of the backup (type, account, path, fs_type) — used for retention grouping | | `meta` | Free-form key-value metadata (generator, etc.) | | `tags` | User-defined labels for retention policies | | `change_token` | Opaque token for incremental sources (omitted when not applicable) | diff --git a/docs/user-guide.md b/docs/user-guide.md index 35b1233..05a1de7 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -314,6 +314,10 @@ cloudstic backup -source local:~/Documents -dry-run | `-exclude` | | Exclude pattern using gitignore syntax (repeatable) | | `-exclude-file` | | Path to file containing exclude patterns, one per line | | `-volume-uuid` | | Override volume UUID for local source (enables cross-machine incremental backup for portable drives) | +| `-skip-mode` | | Skip POSIX metadata collection (mode, uid, gid, btime, flags) | +| `-skip-flags` | | Skip file flags collection (Linux `ioctl`; no effect on macOS) | +| `-skip-xattrs` | | Skip extended attribute collection | +| `-xattr-namespaces` | | Comma-separated xattr namespace prefixes to collect (e.g. `user.,com.apple.`) | | `-dry-run` | `false` | Scan source and report changes without writing to the store | `-profile` and `-all-profiles` are mutually exclusive. @@ -1156,8 +1160,28 @@ cloudstic backup -source local:~/project -exclude-file .backupignore | `-exclude` | | Exclude pattern, gitignore syntax (repeatable) | | `-exclude-file` | | File containing exclude patterns (one per line) | | `-volume-uuid` | | Override volume UUID (see [Portable drives](#portable-drives)) | +| `-skip-mode` | | Skip POSIX metadata collection (mode, uid, gid, btime, flags) | +| `-skip-flags` | | Skip file flags collection (Linux `ioctl`; no effect on macOS) | +| `-skip-xattrs` | | Skip extended attribute collection | +| `-xattr-namespaces` | | Comma-separated xattr namespace prefixes to collect (e.g. `user.,com.apple.`) | -Cloudstic walks the directory recursively. Symbolic links are not followed. File permissions are not preserved — only name, size, modification time, and content are captured. +Cloudstic walks the directory recursively. Symbolic links are not followed. + +**Extended file attributes:** By default, Cloudstic captures POSIX permissions (mode bits), numeric ownership (uid/gid), file creation time (btime, where supported), per-file flags, and extended attributes (xattrs). These are stored in each snapshot and will be used by future restore modes to faithfully recreate file metadata. To control what is captured: + +```bash +# Skip all POSIX metadata (mode, uid, gid, btime, flags) +cloudstic backup -source local -source-path /data -skip-mode + +# Skip only file flags (Linux ioctl; no effect on macOS) +cloudstic backup -source local -source-path /data -skip-flags + +# Skip extended attributes +cloudstic backup -source local -source-path /data -skip-xattrs + +# Collect only user.* xattrs (skip security.*, system.*, etc.) +cloudstic backup -source local -source-path /data -xattr-namespaces "user." +``` See [Exclude patterns](#exclude-patterns) for the full pattern syntax reference. @@ -1227,6 +1251,8 @@ cloudstic backup -source sftp://backup@myserver.com/home/user/files \ If neither `-source-sftp-password` nor `-source-sftp-key` is provided, Cloudstic will fall back to your `SSH_AUTH_SOCK` agent. +SFTP backups capture file permissions (mode bits) and numeric ownership (uid/gid) via the SFTPv3 protocol. Birth time, file flags, and extended attributes are not available over SFTP. + Cloudstic walks the remote directory recursively. File permissions are not preserved — only name, size, modification time, and content are captured. The `-exclude` and `-exclude-file` flags work with SFTP sources. See [Exclude patterns](#exclude-patterns) for the full pattern syntax. diff --git a/go.mod b/go.mod index 104a5d0..3f8f1b7 100644 --- a/go.mod +++ b/go.mod @@ -7,9 +7,11 @@ require ( github.com/aws/aws-sdk-go-v2 v1.41.2 github.com/aws/aws-sdk-go-v2/config v1.32.10 github.com/aws/aws-sdk-go-v2/credentials v1.19.10 + github.com/aws/aws-sdk-go-v2/service/kms v1.50.1 github.com/aws/aws-sdk-go-v2/service/s3 v1.96.2 github.com/aws/smithy-go v1.24.1 - github.com/jackc/pgx/v5 v5.8.0 + github.com/buger/jsonparser v1.1.1 + github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/jedib0t/go-pretty/v6 v6.7.5 github.com/jotfs/fastcdc-go v0.2.0 github.com/klauspost/compress v1.18.0 @@ -21,6 +23,7 @@ require ( golang.org/x/crypto v0.48.0 golang.org/x/oauth2 v0.35.0 golang.org/x/sync v0.19.0 + golang.org/x/sys v0.41.0 golang.org/x/term v0.40.0 google.golang.org/api v0.256.0 ) @@ -42,12 +45,10 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.10 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.18 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.18 // indirect - github.com/aws/aws-sdk-go-v2/service/kms v1.50.1 // indirect github.com/aws/aws-sdk-go-v2/service/signin v1.0.6 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.30.11 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.15 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.41.7 // indirect - github.com/buger/jsonparser v1.1.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/containerd/errdefs v1.0.0 // indirect @@ -70,7 +71,6 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect github.com/googleapis/gax-go/v2 v2.15.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect - github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect @@ -104,7 +104,6 @@ require ( go.opentelemetry.io/otel/metric v1.39.0 // indirect go.opentelemetry.io/otel/trace v1.39.0 // indirect golang.org/x/net v0.49.0 // indirect - golang.org/x/sys v0.41.0 // indirect golang.org/x/text v0.34.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 // indirect diff --git a/go.sum b/go.sum index 3eb5e62..9839f74 100644 --- a/go.sum +++ b/go.sum @@ -114,13 +114,8 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= -github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= -github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= -github.com/jackc/pgx/v5 v5.8.0 h1:TYPDoleBBme0xGSAX3/+NujXXtpZn9HBONkQC7IEZSo= -github.com/jackc/pgx/v5 v5.8.0/go.mod h1:QVeDInX2m9VyzvNeiCJVjCkNFqzsNb43204HshNSZKw= -github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/jedib0t/go-pretty/v6 v6.7.5 h1:9dJSWTJnsXJVVAbvxIFxeHf/JxoJd7GUl5o3UzhtuiM= github.com/jedib0t/go-pretty/v6 v6.7.5/go.mod h1:YwC5CE4fJ1HFUDeivSV1r//AmANFHyqczZk+U6BDALU= diff --git a/internal/core/core_test.go b/internal/core/core_test.go index 3113a48..e4c8041 100644 --- a/internal/core/core_test.go +++ b/internal/core/core_test.go @@ -44,3 +44,66 @@ func TestComputeJSONHash(t *testing.T) { t.Errorf("Hash mismatch: got %s, calculated from data %s", hash, expectedHash) } } + +func TestFileMetaHashStability_WithXattrs(t *testing.T) { + // Xattrs map keys must be sorted for deterministic hashing. + meta := FileMeta{ + Version: 1, + FileID: "test.txt", + Name: "test.txt", + Type: FileTypeFile, + Size: 100, + Mtime: 1000, + Mode: 0755, + Uid: 501, + Gid: 20, + Btime: 900, + Xattrs: map[string][]byte{ + "user.zeta": []byte("last"), + "user.alpha": []byte("first"), + }, + } + + hash1, _, err := ComputeJSONHash(&meta) + if err != nil { + t.Fatal(err) + } + + // Compute again — must be identical. + hash2, _, err := ComputeJSONHash(&meta) + if err != nil { + t.Fatal(err) + } + + if hash1 != hash2 { + t.Errorf("hash not stable: %s vs %s", hash1, hash2) + } +} + +func TestFileMetaOmitempty(t *testing.T) { + // Fields with zero values should be omitted from JSON. + meta := FileMeta{ + Version: 1, + FileID: "test.txt", + Name: "test.txt", + Type: FileTypeFile, + Size: 100, + Mtime: 1000, + } + + _, data, err := ComputeJSONHash(&meta) + if err != nil { + t.Fatal(err) + } + + var decoded map[string]interface{} + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatal(err) + } + + for _, field := range []string{"mode", "uid", "gid", "btime", "flags", "xattrs"} { + if _, ok := decoded[field]; ok { + t.Errorf("expected %q to be omitted from JSON when zero, but it was present", field) + } + } +} diff --git a/internal/core/models.go b/internal/core/models.go index 112582f..ad85bd6 100644 --- a/internal/core/models.go +++ b/internal/core/models.go @@ -41,6 +41,12 @@ type FileMeta struct { Mtime int64 `json:"mtime"` // Unix timestamp Owner string `json:"owner"` Extra map[string]interface{} `json:"extra,omitempty"` + Mode uint32 `json:"mode,omitempty"` // POSIX permission bits (st_mode & 0xFFF) + Uid uint32 `json:"uid,omitempty"` // POSIX user ID + Gid uint32 `json:"gid,omitempty"` // POSIX group ID + Btime int64 `json:"btime,omitempty"` // birth/creation time, Unix seconds; 0 = not available + Flags uint32 `json:"flags,omitempty"` // per-file flags (chflags / FS_IOC_GETFLAGS) + Xattrs map[string][]byte `json:"xattrs,omitempty"` // extended attributes: name → raw bytes } func (f *FileMeta) Ref() (string, []byte, error) { @@ -77,6 +83,7 @@ type SourceInfo struct { Identity string `json:"identity,omitempty"` // stable container identity for lineage matching PathID string `json:"path_id,omitempty"` // stable selected-root identity within container DriveName string `json:"drive_name,omitempty"` // human-readable container label (e.g. "My Drive") + FsType string `json:"fs_type,omitempty"` // source filesystem type (e.g. "apfs", "ext4", "sftp") // Legacy fields (read-only compatibility path; slated for future removal). VolumeUUID string `json:"volume_uuid,omitempty"` diff --git a/internal/engine/backup_scan.go b/internal/engine/backup_scan.go index aaa40ad..6a9beb2 100644 --- a/internal/engine/backup_scan.go +++ b/internal/engine/backup_scan.go @@ -233,9 +233,39 @@ func metadataEqual(a, b core.FileMeta) bool { a.Size == b.Size && a.Mtime == b.Mtime && a.Type == b.Type && + a.Mode == b.Mode && + a.Uid == b.Uid && + a.Gid == b.Gid && + a.Btime == b.Btime && + a.Flags == b.Flags && + xattrsEqual(a.Xattrs, b.Xattrs) && len(a.Parents) == len(b.Parents) } +func xattrsEqual(a, b map[string][]byte) bool { + if len(a) != len(b) { + return false + } + for k, v := range a { + if bv, ok := b[k]; !ok || !bytesEqual(v, bv) { + return false + } + } + return true +} + +func bytesEqual(a, b []byte) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + func (bm *BackupManager) insertFolder(_ context.Context, root string, meta *core.FileMeta, phase ui.Phase) (string, error) { if bm.cfg.verbose { phase.Log(fmt.Sprintf("Folder: %s (New/Changed)", meta.Name)) diff --git a/internal/engine/backup_scan_test.go b/internal/engine/backup_scan_test.go index e9903c3..879ac4d 100644 --- a/internal/engine/backup_scan_test.go +++ b/internal/engine/backup_scan_test.go @@ -300,3 +300,95 @@ func TestScanIncremental_DeleteWithoutParentUsesExistingMetadataParent(t *testin t.Fatalf("expected FILE_1 to be deleted, got ref %q", ref) } } + +func TestMetadataEqual_ExtendedFields(t *testing.T) { + base := core.FileMeta{ + Name: "test.txt", + Size: 100, + Mtime: 1000, + Type: core.FileTypeFile, + Mode: 0755, + Uid: 501, + Gid: 20, + Btime: 900, + Flags: 0x10, + Xattrs: map[string][]byte{ + "user.tag": []byte("v1"), + }, + } + + t.Run("identical", func(t *testing.T) { + b := base + b.Xattrs = map[string][]byte{"user.tag": []byte("v1")} + if !metadataEqual(base, b) { + t.Error("expected equal") + } + }) + + tests := []struct { + name string + modify func(m *core.FileMeta) + }{ + {"mode", func(m *core.FileMeta) { m.Mode = 0644 }}, + {"uid", func(m *core.FileMeta) { m.Uid = 0 }}, + {"gid", func(m *core.FileMeta) { m.Gid = 100 }}, + {"btime", func(m *core.FileMeta) { m.Btime = 800 }}, + {"flags", func(m *core.FileMeta) { m.Flags = 0 }}, + {"xattrs_value", func(m *core.FileMeta) { m.Xattrs = map[string][]byte{"user.tag": []byte("v2")} }}, + {"xattrs_extra_key", func(m *core.FileMeta) { + m.Xattrs = map[string][]byte{"user.tag": []byte("v1"), "user.other": []byte("x")} + }}, + {"xattrs_missing", func(m *core.FileMeta) { m.Xattrs = nil }}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := base + b.Xattrs = map[string][]byte{"user.tag": []byte("v1")} + tt.modify(&b) + if metadataEqual(base, b) { + t.Errorf("expected not equal after modifying %s", tt.name) + } + }) + } +} + +func TestXattrsEqual(t *testing.T) { + t.Run("both_nil", func(t *testing.T) { + if !xattrsEqual(nil, nil) { + t.Error("expected equal") + } + }) + t.Run("one_nil", func(t *testing.T) { + if xattrsEqual(nil, map[string][]byte{"k": {}}) { + t.Error("expected not equal") + } + }) + t.Run("empty_maps", func(t *testing.T) { + if !xattrsEqual(map[string][]byte{}, map[string][]byte{}) { + t.Error("expected equal") + } + }) +} + +func TestBytesEqual(t *testing.T) { + tests := []struct { + name string + a, b []byte + want bool + }{ + {"both nil", nil, nil, true}, + {"both empty", []byte{}, []byte{}, true}, + {"equal", []byte{1, 2, 3}, []byte{1, 2, 3}, true}, + {"different length", []byte{1, 2}, []byte{1, 2, 3}, false}, + {"different content", []byte{1, 2, 3}, []byte{1, 2, 4}, false}, + {"one nil", nil, []byte{1}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := bytesEqual(tt.a, tt.b); got != tt.want { + t.Errorf("bytesEqual(%v, %v) = %v, want %v", tt.a, tt.b, got, tt.want) + } + }) + } +} diff --git a/pkg/source/gdrive.go b/pkg/source/gdrive.go index a967df9..a28c523 100644 --- a/pkg/source/gdrive.go +++ b/pkg/source/gdrive.go @@ -259,6 +259,7 @@ func (s *GDriveSource) Info() core.SourceInfo { Path: s.rootPath, PathID: s.selectedRootID(), DriveName: "My Drive", + FsType: "google-drive", } if s.isSharedDrive() { diff --git a/pkg/source/local_source.go b/pkg/source/local_source.go index 731a773..55ec414 100644 --- a/pkg/source/local_source.go +++ b/pkg/source/local_source.go @@ -49,6 +49,10 @@ func (s *LocalSource) Info() core.SourceInfo { Path: displayPath, PathID: pathID, DriveName: s.volumeLabel, + FsType: s.fsType, + + VolumeUUID: s.volumeUUID, + VolumeLabel: s.volumeLabel, Identity: func() string { if s.volumeUUID != "" { return s.volumeUUID @@ -61,7 +65,11 @@ func (s *LocalSource) Info() core.SourceInfo { // localOptions holds configuration for a local filesystem source. type localOptions struct { excludePatterns []string - volumeUUID string // explicit override for volume UUID + volumeUUID string // explicit override for volume UUID + skipMode bool // skip Mode, Uid, Gid, Btime, Flags collection + skipFlags bool // skip Flags ioctl only (Linux); no-op on macOS + skipXattrs bool // skip extended attribute collection + xattrNamespaces []string // restrict xattr collection to these prefixes } // LocalOption configures a local filesystem source. @@ -83,6 +91,29 @@ func WithVolumeUUID(uuid string) LocalOption { } } +// WithSkipMode disables collection of POSIX mode, uid, gid, btime, and flags. +func WithSkipMode() LocalOption { + return func(o *localOptions) { o.skipMode = true } +} + +// WithSkipFlags disables the FS_IOC_GETFLAGS ioctl on Linux. On macOS, +// flags come free from stat and this option has no effect. +func WithSkipFlags() LocalOption { + return func(o *localOptions) { o.skipFlags = true } +} + +// WithSkipXattrs disables extended attribute collection. +func WithSkipXattrs() LocalOption { + return func(o *localOptions) { o.skipXattrs = true } +} + +// WithXattrNamespaces restricts xattr collection to attributes whose name +// starts with one of the given prefixes (e.g. "user.", "com.apple."). +// An empty slice (default) collects all readable attributes. +func WithXattrNamespaces(prefixes []string) LocalOption { + return func(o *localOptions) { o.xattrNamespaces = prefixes } +} + // LocalSource implements Source for local filesystem. type LocalSource struct { rootPath string @@ -90,6 +121,11 @@ type LocalSource struct { volumeUUID string volumeLabel string volumeMountPoint string + fsType string + skipMode bool + skipFlags bool + skipXattrs bool + xattrNamespaces []string } // NewLocalSource creates a local filesystem source rooted at rootPath. @@ -110,6 +146,11 @@ func NewLocalSource(rootPath string, opts ...LocalOption) *LocalSource { volumeUUID: uuid, volumeLabel: label, volumeMountPoint: mountPoint, + fsType: detectFsType(rootPath), + skipMode: cfg.skipMode, + skipFlags: cfg.skipFlags, + skipXattrs: cfg.skipXattrs, + xattrNamespaces: cfg.xattrNamespaces, } } @@ -161,6 +202,8 @@ func (s *LocalSource) Walk(ctx context.Context, callback func(core.FileMeta) err Mtime: info.ModTime().Unix(), } + readExtendedMeta(path, &meta, s.skipMode, s.skipFlags, s.skipXattrs, s.xattrNamespaces) + return callback(meta) }) } diff --git a/pkg/source/local_source_xattr_darwin.go b/pkg/source/local_source_xattr_darwin.go new file mode 100644 index 0000000..1867495 --- /dev/null +++ b/pkg/source/local_source_xattr_darwin.go @@ -0,0 +1,34 @@ +//go:build darwin + +package source + +import ( + "syscall" +) + +// readBtime returns the file birth time from macOS stat. +func readBtime(_ string, st *syscall.Stat_t) int64 { + return st.Birthtimespec.Sec +} + +// readFlags returns the per-file flags from macOS stat (UF_IMMUTABLE, etc.). +func readFlags(_ string, st *syscall.Stat_t) uint32 { + return st.Flags +} + +// detectFsType returns the filesystem type name for the given path on macOS. +func detectFsType(path string) string { + var stat syscall.Statfs_t + if err := syscall.Statfs(path, &stat); err != nil { + return "" + } + // F_fstypename is a null-terminated char array on macOS. + name := make([]byte, 0, len(stat.Fstypename)) + for _, c := range stat.Fstypename { + if c == 0 { + break + } + name = append(name, byte(c)) + } + return string(name) +} diff --git a/pkg/source/local_source_xattr_linux.go b/pkg/source/local_source_xattr_linux.go new file mode 100644 index 0000000..405b5e7 --- /dev/null +++ b/pkg/source/local_source_xattr_linux.go @@ -0,0 +1,82 @@ +//go:build linux + +package source + +import ( + "fmt" + "os" + "syscall" + "unsafe" + + "golang.org/x/sys/unix" +) + +// readBtime returns the file birth time via statx on Linux. +// Returns 0 if the kernel or filesystem does not support btime. +func readBtime(path string, _ *syscall.Stat_t) int64 { + var stx unix.Statx_t + err := unix.Statx(unix.AT_FDCWD, path, unix.AT_SYMLINK_NOFOLLOW, unix.STATX_BTIME, &stx) + if err != nil { + return 0 + } + if stx.Mask&unix.STATX_BTIME != 0 && stx.Btime.Sec != 0 { + return stx.Btime.Sec + } + return 0 +} + +// readFlags returns the per-file flags via FS_IOC_GETFLAGS ioctl on Linux. +// Returns 0 on filesystems that don't support it. +func readFlags(path string, _ *syscall.Stat_t) uint32 { + f, err := os.Open(path) + if err != nil { + return 0 + } + defer func() { _ = f.Close() }() + + var flags uint32 + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, f.Fd(), unix.FS_IOC_GETFLAGS, uintptr(unsafe.Pointer(&flags))) + if errno != 0 { + return 0 + } + return flags +} + +// detectFsType returns the filesystem type name for the given path on Linux. +func detectFsType(path string) string { + var stat unix.Statfs_t + if err := unix.Statfs(path, &stat); err != nil { + return "" + } + return fsTypeName(stat.Type) +} + +// fsTypeName maps Linux filesystem magic numbers to human-readable names. +func fsTypeName(magic int64) string { + switch magic { + case 0xEF53: + return "ext4" + case 0x9123683E: + return "btrfs" + case 0x58465342: + return "xfs" + case 0x2FC12FC1: + return "zfs" + case 0x6969: + return "nfs" + case 0x01021994: + return "tmpfs" + case 0x5346544E: + return "ntfs" + case 0x4D44: + return "fat" + case -137439006848, 0x2011BAB0: // exfat magic (may vary) + return "exfat" + case 0x61756673: + return "aufs" + case 0x794C7630: + return "overlayfs" + default: + return fmt.Sprintf("unknown:0x%X", uint64(magic)) + } +} diff --git a/pkg/source/local_source_xattr_stub.go b/pkg/source/local_source_xattr_stub.go new file mode 100644 index 0000000..706539f --- /dev/null +++ b/pkg/source/local_source_xattr_stub.go @@ -0,0 +1,12 @@ +//go:build !linux && !darwin + +package source + +import "github.com/cloudstic/cli/internal/core" + +// readExtendedMeta is a no-op on platforms where extended metadata +// collection is not supported. +func readExtendedMeta(_ string, _ *core.FileMeta, _, _, _ bool, _ []string) {} + +// detectFsType is a no-op on unsupported platforms. +func detectFsType(_ string) string { return "" } diff --git a/pkg/source/local_source_xattr_test.go b/pkg/source/local_source_xattr_test.go new file mode 100644 index 0000000..ddc6da4 --- /dev/null +++ b/pkg/source/local_source_xattr_test.go @@ -0,0 +1,323 @@ +//go:build linux || darwin + +package source + +import ( + "context" + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/cloudstic/cli/internal/core" + "golang.org/x/sys/unix" +) + +func TestLocalSource_Walk_ExtendedMeta(t *testing.T) { + tmpDir := t.TempDir() + + // Create a file with specific permissions. + filePath := filepath.Join(tmpDir, "script.sh") + if err := os.WriteFile(filePath, []byte("#!/bin/sh"), 0755); err != nil { + t.Fatal(err) + } + + s := NewLocalSource(tmpDir) + + var files []core.FileMeta + err := s.Walk(context.Background(), func(fm core.FileMeta) error { + files = append(files, fm) + return nil + }) + if err != nil { + t.Fatal(err) + } + + if len(files) != 1 { + t.Fatalf("expected 1 file, got %d", len(files)) + } + + fm := files[0] + + // Mode should have execute bit set. + if fm.Mode == 0 { + t.Error("expected Mode to be populated") + } + if fm.Mode&0111 == 0 { + t.Errorf("expected execute bits in mode %04o", fm.Mode) + } + + // Uid/Gid should be populated (at least on local filesystem). + // We just check they're not both zero on a non-root system. + if os.Getuid() != 0 && fm.Uid == 0 { + t.Error("expected non-zero Uid for non-root user") + } + + // Btime may or may not be available depending on fs/kernel. + // On macOS it should always be present. + if runtime.GOOS == "darwin" && fm.Btime == 0 { + t.Error("expected Btime to be populated on macOS") + } +} + +func TestLocalSource_Walk_SkipMode(t *testing.T) { + tmpDir := t.TempDir() + + if err := os.WriteFile(filepath.Join(tmpDir, "file.txt"), []byte("data"), 0644); err != nil { + t.Fatal(err) + } + + s := NewLocalSource(tmpDir, WithSkipMode()) + + var files []core.FileMeta + err := s.Walk(context.Background(), func(fm core.FileMeta) error { + files = append(files, fm) + return nil + }) + if err != nil { + t.Fatal(err) + } + + if len(files) != 1 { + t.Fatalf("expected 1 file, got %d", len(files)) + } + + fm := files[0] + if fm.Mode != 0 || fm.Uid != 0 || fm.Gid != 0 || fm.Btime != 0 || fm.Flags != 0 { + t.Errorf("expected all extended fields to be zero with SkipMode, got mode=%o uid=%d gid=%d btime=%d flags=%d", + fm.Mode, fm.Uid, fm.Gid, fm.Btime, fm.Flags) + } +} + +func TestLocalSource_Walk_SkipXattrs(t *testing.T) { + tmpDir := t.TempDir() + + if err := os.WriteFile(filepath.Join(tmpDir, "file.txt"), []byte("data"), 0644); err != nil { + t.Fatal(err) + } + + s := NewLocalSource(tmpDir, WithSkipXattrs()) + + var files []core.FileMeta + err := s.Walk(context.Background(), func(fm core.FileMeta) error { + files = append(files, fm) + return nil + }) + if err != nil { + t.Fatal(err) + } + + if len(files) != 1 { + t.Fatalf("expected 1 file, got %d", len(files)) + } + + if files[0].Xattrs != nil { + t.Error("expected Xattrs to be nil with SkipXattrs") + } +} + +func TestLocalSource_Walk_SkipFlags(t *testing.T) { + tmpDir := t.TempDir() + + if err := os.WriteFile(filepath.Join(tmpDir, "file.txt"), []byte("data"), 0644); err != nil { + t.Fatal(err) + } + + s := NewLocalSource(tmpDir, WithSkipFlags()) + + var files []core.FileMeta + err := s.Walk(context.Background(), func(fm core.FileMeta) error { + files = append(files, fm) + return nil + }) + if err != nil { + t.Fatal(err) + } + + if len(files) != 1 { + t.Fatalf("expected 1 file, got %d", len(files)) + } + + // Mode/Uid/Gid should still be populated (only flags are skipped). + fm := files[0] + if fm.Mode == 0 { + t.Error("expected Mode to be populated even with SkipFlags") + } + // Flags should be zero (skipped). + if fm.Flags != 0 { + t.Errorf("expected Flags=0 with SkipFlags, got %d", fm.Flags) + } +} + +func TestLocalSource_Walk_XattrNamespaces(t *testing.T) { + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "file.txt") + + if err := os.WriteFile(filePath, []byte("data"), 0644); err != nil { + t.Fatal(err) + } + + // Set an xattr to test namespace filtering. + if runtime.GOOS == "darwin" { + // macOS: xattrs don't have namespace prefixes, but we use com.test. + if err := unix.Setxattr(filePath, "com.test.tag", []byte("val"), 0); err != nil { + t.Skip("cannot set xattr:", err) + } + } else { + if err := unix.Setxattr(filePath, "user.test.tag", []byte("val"), 0); err != nil { + t.Skip("cannot set xattr:", err) + } + } + + // Use a namespace that doesn't match the xattr we set. + s := NewLocalSource(tmpDir, WithXattrNamespaces([]string{"nomatch."})) + + var files []core.FileMeta + err := s.Walk(context.Background(), func(fm core.FileMeta) error { + files = append(files, fm) + return nil + }) + if err != nil { + t.Fatal(err) + } + + if len(files) != 1 { + t.Fatalf("expected 1 file, got %d", len(files)) + } + + if files[0].Xattrs != nil { + t.Error("expected Xattrs to be nil when namespace doesn't match") + } +} + +func TestLocalSource_Walk_Xattr_RoundTrip(t *testing.T) { + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "file.txt") + + if err := os.WriteFile(filePath, []byte("data"), 0644); err != nil { + t.Fatal(err) + } + + // Set an xattr. + var attrName string + if runtime.GOOS == "darwin" { + attrName = "com.test.tag" + } else { + attrName = "user.test.tag" + } + if err := unix.Setxattr(filePath, attrName, []byte("hello"), 0); err != nil { + t.Skip("cannot set xattr:", err) + } + + s := NewLocalSource(tmpDir) + + var files []core.FileMeta + err := s.Walk(context.Background(), func(fm core.FileMeta) error { + files = append(files, fm) + return nil + }) + if err != nil { + t.Fatal(err) + } + + if len(files) != 1 { + t.Fatalf("expected 1 file, got %d", len(files)) + } + + fm := files[0] + if fm.Xattrs == nil { + t.Fatal("expected Xattrs to be populated") + } + + val, ok := fm.Xattrs[attrName] + if !ok { + t.Fatalf("expected xattr %q to be present, got keys: %v", attrName, fm.Xattrs) + } + if string(val) != "hello" { + t.Errorf("expected xattr value %q, got %q", "hello", string(val)) + } +} + +func TestListXattrs_NoAttrs(t *testing.T) { + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "file.txt") + if err := os.WriteFile(filePath, []byte("data"), 0644); err != nil { + t.Fatal(err) + } + + // File with no xattrs should return nil. + result := listXattrs(filePath, nil) + // May or may not be nil depending on macOS quarantine attrs. + // Just verify it doesn't error/panic. + _ = result +} + +func TestListXattrs_NonexistentPath(t *testing.T) { + result := listXattrs("/nonexistent/path/xyz", nil) + if result != nil { + t.Error("expected nil for nonexistent path") + } +} + +func TestGetXattr_NonexistentAttr(t *testing.T) { + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "file.txt") + if err := os.WriteFile(filePath, []byte("data"), 0644); err != nil { + t.Fatal(err) + } + + _, err := getXattr(filePath, "user.nonexistent") + if err == nil { + t.Error("expected error for nonexistent xattr") + } +} + +func TestLocalSource_Info_FsType(t *testing.T) { + tmpDir := t.TempDir() + s := NewLocalSource(tmpDir) + info := s.Info() + + if info.FsType == "" { + t.Error("expected FsType to be populated on local filesystem") + } +} + +func TestSplitXattrNames(t *testing.T) { + tests := []struct { + name string + buf []byte + want []string + }{ + {"empty", nil, nil}, + {"single", []byte("user.tag\x00"), []string{"user.tag"}}, + {"multiple", []byte("user.a\x00user.b\x00"), []string{"user.a", "user.b"}}, + {"no trailing null", []byte("user.a"), []string{"user.a"}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := splitXattrNames(tt.buf) + if len(got) != len(tt.want) { + t.Errorf("got %v, want %v", got, tt.want) + return + } + for i := range got { + if got[i] != tt.want[i] { + t.Errorf("got[%d] = %q, want %q", i, got[i], tt.want[i]) + } + } + }) + } +} + +func TestHasPrefix(t *testing.T) { + if !hasPrefix("user.tag", []string{"user."}) { + t.Error("expected match") + } + if hasPrefix("security.label", []string{"user.", "com.apple."}) { + t.Error("expected no match") + } + if !hasPrefix("com.apple.quarantine", []string{"com.apple."}) { + t.Error("expected match") + } +} diff --git a/pkg/source/local_source_xattr_unix.go b/pkg/source/local_source_xattr_unix.go new file mode 100644 index 0000000..8f293d6 --- /dev/null +++ b/pkg/source/local_source_xattr_unix.go @@ -0,0 +1,116 @@ +//go:build linux || darwin + +package source + +import ( + "strings" + "syscall" + + "github.com/cloudstic/cli/internal/core" + "golang.org/x/sys/unix" +) + +// readExtendedMeta populates Mode, Uid, Gid, Btime, Flags, and Xattrs on +// the given FileMeta by inspecting the file at path. The skip flags control +// which metadata groups are collected. +func readExtendedMeta(path string, meta *core.FileMeta, skipMode, skipFlags, skipXattrs bool, xattrNamespaces []string) { + if !skipMode { + var st syscall.Stat_t + if err := syscall.Lstat(path, &st); err == nil { + meta.Mode = uint32(st.Mode) & 0xFFF + meta.Uid = st.Uid + meta.Gid = st.Gid + meta.Btime = readBtime(path, &st) + if !skipFlags { + meta.Flags = readFlags(path, &st) + } + } + } + + if !skipXattrs { + meta.Xattrs = listXattrs(path, xattrNamespaces) + } +} + +// listXattrs retrieves all extended attributes for path, optionally filtered +// by namespace prefixes. Returns nil if there are no attributes or on error. +func listXattrs(path string, namespaces []string) map[string][]byte { + sz, err := unix.Listxattr(path, nil) + if err != nil || sz <= 0 { + return nil + } + + buf := make([]byte, sz) + sz, err = unix.Listxattr(path, buf) + if err != nil || sz <= 0 { + return nil + } + + // Parse null-separated attribute names. + names := splitXattrNames(buf[:sz]) + if len(names) == 0 { + return nil + } + + xattrs := make(map[string][]byte, len(names)) + for _, name := range names { + if len(namespaces) > 0 && !hasPrefix(name, namespaces) { + continue + } + val, err := getXattr(path, name) + if err != nil { + continue // attribute may have been removed between list and get + } + xattrs[name] = val + } + + if len(xattrs) == 0 { + return nil + } + return xattrs +} + +// getXattr retrieves a single extended attribute value. +func getXattr(path, name string) ([]byte, error) { + sz, err := unix.Getxattr(path, name, nil) + if err != nil { + return nil, err + } + if sz == 0 { + return []byte{}, nil + } + buf := make([]byte, sz) + sz, err = unix.Getxattr(path, name, buf) + if err != nil { + return nil, err + } + return buf[:sz], nil +} + +// splitXattrNames splits a null-separated list of attribute names. +func splitXattrNames(buf []byte) []string { + var names []string + for len(buf) > 0 { + idx := 0 + for idx < len(buf) && buf[idx] != 0 { + idx++ + } + if idx > 0 { + names = append(names, string(buf[:idx])) + } + buf = buf[idx:] + if len(buf) > 0 { + buf = buf[1:] // skip null terminator + } + } + return names +} + +func hasPrefix(name string, prefixes []string) bool { + for _, p := range prefixes { + if strings.HasPrefix(name, p) { + return true + } + } + return false +} diff --git a/pkg/source/onedrive.go b/pkg/source/onedrive.go index a6b9d35..e9fbffb 100644 --- a/pkg/source/onedrive.go +++ b/pkg/source/onedrive.go @@ -214,6 +214,7 @@ func (s *OneDriveSource) Info() core.SourceInfo { Path: s.rootPath, PathID: s.rootID, DriveName: "My Drive", + FsType: "onedrive", } if s.driveID != "" { info.Identity = s.driveID diff --git a/pkg/source/sftp_source.go b/pkg/source/sftp_source.go index 298ba4e..742941b 100644 --- a/pkg/source/sftp_source.go +++ b/pkg/source/sftp_source.go @@ -142,6 +142,7 @@ func (s *SFTPSource) Info() core.SourceInfo { Path: s.rootPath, Identity: identity, PathID: s.rootPath, + FsType: "sftp", } } @@ -204,6 +205,13 @@ func (s *SFTPSource) Walk(ctx context.Context, callback func(core.FileMeta) erro Mtime: info.ModTime().Unix(), } + // Extract POSIX metadata from SFTPv3 Attrs. + if fs, ok := info.Sys().(*sftp.FileStat); ok { + meta.Mode = fs.Mode & 0xFFF + meta.Uid = fs.UID + meta.Gid = fs.GID + } + if err := callback(meta); err != nil { return err } diff --git a/rfcs/0004-extended-file-attributes.md b/rfcs/0004-extended-file-attributes.md index 702231d..fd499b5 100644 --- a/rfcs/0004-extended-file-attributes.md +++ b/rfcs/0004-extended-file-attributes.md @@ -1,6 +1,6 @@ # RFC 0004: Extended File Attributes -* **Status:** Proposed +* **Status:** Implemented * **Date:** 2026-03-07 * **Affects:** `pkg/source/local_source.go`, `pkg/source/sftp_source.go`, `internal/core/models.go`, `internal/engine/backup_scan.go`, `internal/engine/restore.go` @@ -616,3 +616,16 @@ The current restore path produces a ZIP archive. A `restore --target ` mode * Handling permission elevation: `Lchown` requires root on most systems. * Conflict resolution: what to do when target paths already exist. * Progress reporting and resume on interrupted restores. + +### RFC Cloud source metadata + +Google Drive and OneDrive expose metadata that doesn't map to POSIX attributes but is worth preserving for round-trip fidelity. This would extend the `Extra` map rather than adding top-level `FileMeta` fields: + +* **Birth time (`Btime`)**: Google Drive exposes `createdTime`, OneDrive exposes `createdDateTime`. The `Btime` field proposed in this RFC covers all sources — cloud sources should populate it alongside local/SFTP. +* **MIME type**: Google Drive already stores `mimeType` in `Extra`; OneDrive deserializes `file.mimeType` but discards it. Both should be consistent. +* **Content hash**: OneDrive exposes `file.hashes.sha256Hash` but we don't capture it in `ContentHash`. Google Drive already populates `sha256Checksum`. +* **File owner**: OneDrive exposes `createdBy`/`lastModifiedBy` but doesn't populate `FileMeta.Owner`. Google Drive already captures the first owner's email. +* **Google Drive properties**: `file.Properties` and `file.AppProperties` are user/app-defined key-value pairs, conceptually similar to filesystem xattrs. These could be stored under `Extra["properties"]` if round-trip preservation is desired. +* **OneDrive media facets**: `image`, `photo`, `video`, and `audio` facets contain EXIF-like metadata (dimensions, camera model, duration). Useful for media-heavy backups but increases object size. + +Implementation should be a separate effort since it doesn't require schema changes beyond what this RFC already proposes (`Btime`) and consistent use of `Extra`. From 9c1c53436e2ac94fc29b151154ed4f2f1569fd4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hermann?= Date: Mon, 16 Mar 2026 08:37:43 +0100 Subject: [PATCH 2/2] fix: address RFC0004 review feedback --- cmd/cloudstic/cmd_backup.go | 22 +++++++++++++++++----- cmd/cloudstic/cmd_backup_test.go | 13 +++++++++++++ cmd/cloudstic/completion.go | 4 ++-- cmd/cloudstic/usage.go | 2 +- docs/user-guide.md | 16 ++++++++-------- pkg/source/local_source.go | 3 +-- pkg/source/local_source_xattr_unix.go | 8 ++++---- 7 files changed, 46 insertions(+), 22 deletions(-) diff --git a/cmd/cloudstic/cmd_backup.go b/cmd/cloudstic/cmd_backup.go index dc4dc9d..406476f 100644 --- a/cmd/cloudstic/cmd_backup.go +++ b/cmd/cloudstic/cmd_backup.go @@ -58,7 +58,7 @@ func parseBackupArgs() *backupArgs { onedriveClientID := fs.String("onedrive-client-id", envDefault("ONEDRIVE_CLIENT_ID", ""), "OneDrive OAuth client ID") onedriveTokenFile := fs.String("onedrive-token-file", envDefault("ONEDRIVE_TOKEN_FILE", ""), "Path to OneDrive OAuth token file") skipMode := fs.Bool("skip-mode", false, "Skip POSIX mode, uid, gid, btime, and flags collection") - skipFlags := fs.Bool("skip-flags", false, "Skip file flags ioctl (Linux only; no effect on macOS)") + skipFlags := fs.Bool("skip-flags", false, "Skip file flags collection") skipXattrs := fs.Bool("skip-xattrs", false, "Skip extended attribute collection") xattrNamespaces := fs.String("xattr-namespaces", "", "Restrict xattr collection to these prefixes (comma-separated, e.g. \"user.,com.apple.\")") fs.Var(&a.tags, "tag", "Tag to apply to the snapshot (can be specified multiple times)") @@ -625,11 +625,10 @@ func initSource(ctx context.Context, sourceURI string, skipNativeFiles bool, vol opts = append(opts, source.WithSkipXattrs()) } if xattrNamespaces != "" { - prefixes := strings.Split(xattrNamespaces, ",") - for i := range prefixes { - prefixes[i] = strings.TrimSpace(prefixes[i]) + prefixes := parseXattrNamespacePrefixes(xattrNamespaces) + if len(prefixes) > 0 { + opts = append(opts, source.WithXattrNamespaces(prefixes)) } - opts = append(opts, source.WithXattrNamespaces(prefixes)) } return source.NewLocalSource(uri.path, opts...), nil case "sftp": @@ -697,6 +696,19 @@ func initSource(ctx context.Context, sourceURI string, skipNativeFiles bool, vol } } +func parseXattrNamespacePrefixes(raw string) []string { + parts := strings.Split(raw, ",") + prefixes := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p == "" { + continue + } + prefixes = append(prefixes, p) + } + return prefixes +} + // resolveTokenPath returns the token file path to use. If explicit is non-empty // it is used as-is; otherwise the filename is placed in the cloudstic config dir. func resolveTokenPath(explicit, defaultFilename string) (string, error) { diff --git a/cmd/cloudstic/cmd_backup_test.go b/cmd/cloudstic/cmd_backup_test.go index adab11b..f3165f0 100644 --- a/cmd/cloudstic/cmd_backup_test.go +++ b/cmd/cloudstic/cmd_backup_test.go @@ -89,6 +89,19 @@ func TestInitSource_UnsupportedType(t *testing.T) { } } +func TestParseXattrNamespacePrefixes(t *testing.T) { + got := parseXattrNamespacePrefixes("user., com.apple., ,security.,") + want := []string{"user.", "com.apple.", "security."} + if len(got) != len(want) { + t.Fatalf("len=%d want=%d (%v)", len(got), len(want), got) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("got[%d]=%q want=%q", i, got[i], want[i]) + } + } +} + func TestPrintUsage_Smoke(t *testing.T) { // Verify printUsage doesn't panic. printUsage() diff --git a/cmd/cloudstic/completion.go b/cmd/cloudstic/completion.go index fa86e5d..cf02ffb 100644 --- a/cmd/cloudstic/completion.go +++ b/cmd/cloudstic/completion.go @@ -323,7 +323,7 @@ _cloudstic() { '*-tag[Tag for the snapshot]:tag:' \ '-dry-run[Scan without writing]' \ '-skip-mode[Skip POSIX mode/uid/gid/btime/flags]' \ - '-skip-flags[Skip file flags ioctl (Linux only)]' \ + '-skip-flags[Skip file flags collection]' \ '-skip-xattrs[Skip extended attribute collection]' \ '-xattr-namespaces[Restrict xattr collection to prefixes]:prefixes:' ;; @@ -657,7 +657,7 @@ complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l onedrive-token- complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l tag -x -d 'Tag for the snapshot' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l dry-run -d 'Scan without writing' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l skip-mode -d 'Skip POSIX mode/uid/gid/btime/flags' -complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l skip-flags -d 'Skip file flags ioctl (Linux only)' +complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l skip-flags -d 'Skip file flags collection' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l skip-xattrs -d 'Skip extended attribute collection' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l xattr-namespaces -x -d 'Restrict xattr collection to prefixes' diff --git a/cmd/cloudstic/usage.go b/cmd/cloudstic/usage.go index 4f8802f..d4394e1 100644 --- a/cmd/cloudstic/usage.go +++ b/cmd/cloudstic/usage.go @@ -138,7 +138,7 @@ func printUsage() { {"-exclude-file ", "Load exclude patterns from file (one per line, gitignore syntax)"}, {"-dry-run", "Scan source and report changes without writing to the store"}, {"-skip-mode", "Skip POSIX mode, uid, gid, btime, and flags collection"}, - {"-skip-flags", "Skip file flags ioctl (Linux only; no effect on macOS)"}, + {"-skip-flags", "Skip file flags collection"}, {"-skip-xattrs", "Skip extended attribute collection"}, {"-xattr-namespaces ", "Restrict xattr collection to prefixes (comma-separated)"}, }) diff --git a/docs/user-guide.md b/docs/user-guide.md index 05a1de7..27b2f30 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -315,7 +315,7 @@ cloudstic backup -source local:~/Documents -dry-run | `-exclude-file` | | Path to file containing exclude patterns, one per line | | `-volume-uuid` | | Override volume UUID for local source (enables cross-machine incremental backup for portable drives) | | `-skip-mode` | | Skip POSIX metadata collection (mode, uid, gid, btime, flags) | -| `-skip-flags` | | Skip file flags collection (Linux `ioctl`; no effect on macOS) | +| `-skip-flags` | | Skip file flags collection | | `-skip-xattrs` | | Skip extended attribute collection | | `-xattr-namespaces` | | Comma-separated xattr namespace prefixes to collect (e.g. `user.,com.apple.`) | | `-dry-run` | `false` | Scan source and report changes without writing to the store | @@ -1161,7 +1161,7 @@ cloudstic backup -source local:~/project -exclude-file .backupignore | `-exclude-file` | | File containing exclude patterns (one per line) | | `-volume-uuid` | | Override volume UUID (see [Portable drives](#portable-drives)) | | `-skip-mode` | | Skip POSIX metadata collection (mode, uid, gid, btime, flags) | -| `-skip-flags` | | Skip file flags collection (Linux `ioctl`; no effect on macOS) | +| `-skip-flags` | | Skip file flags collection | | `-skip-xattrs` | | Skip extended attribute collection | | `-xattr-namespaces` | | Comma-separated xattr namespace prefixes to collect (e.g. `user.,com.apple.`) | @@ -1171,16 +1171,16 @@ Cloudstic walks the directory recursively. Symbolic links are not followed. ```bash # Skip all POSIX metadata (mode, uid, gid, btime, flags) -cloudstic backup -source local -source-path /data -skip-mode +cloudstic backup -source local:/data -skip-mode -# Skip only file flags (Linux ioctl; no effect on macOS) -cloudstic backup -source local -source-path /data -skip-flags +# Skip only file flags +cloudstic backup -source local:/data -skip-flags # Skip extended attributes -cloudstic backup -source local -source-path /data -skip-xattrs +cloudstic backup -source local:/data -skip-xattrs # Collect only user.* xattrs (skip security.*, system.*, etc.) -cloudstic backup -source local -source-path /data -xattr-namespaces "user." +cloudstic backup -source local:/data -xattr-namespaces "user." ``` See [Exclude patterns](#exclude-patterns) for the full pattern syntax reference. @@ -1253,7 +1253,7 @@ If neither `-source-sftp-password` nor `-source-sftp-key` is provided, Cloudstic SFTP backups capture file permissions (mode bits) and numeric ownership (uid/gid) via the SFTPv3 protocol. Birth time, file flags, and extended attributes are not available over SFTP. -Cloudstic walks the remote directory recursively. File permissions are not preserved — only name, size, modification time, and content are captured. +Cloudstic walks the remote directory recursively. Mode bits and uid/gid are captured in snapshot metadata. Restore application of these fields depends on restore format support. The `-exclude` and `-exclude-file` flags work with SFTP sources. See [Exclude patterns](#exclude-patterns) for the full pattern syntax. diff --git a/pkg/source/local_source.go b/pkg/source/local_source.go index 55ec414..117e68c 100644 --- a/pkg/source/local_source.go +++ b/pkg/source/local_source.go @@ -96,8 +96,7 @@ func WithSkipMode() LocalOption { return func(o *localOptions) { o.skipMode = true } } -// WithSkipFlags disables the FS_IOC_GETFLAGS ioctl on Linux. On macOS, -// flags come free from stat and this option has no effect. +// WithSkipFlags disables file flags collection. func WithSkipFlags() LocalOption { return func(o *localOptions) { o.skipFlags = true } } diff --git a/pkg/source/local_source_xattr_unix.go b/pkg/source/local_source_xattr_unix.go index 8f293d6..f7f38f0 100644 --- a/pkg/source/local_source_xattr_unix.go +++ b/pkg/source/local_source_xattr_unix.go @@ -35,13 +35,13 @@ func readExtendedMeta(path string, meta *core.FileMeta, skipMode, skipFlags, ski // listXattrs retrieves all extended attributes for path, optionally filtered // by namespace prefixes. Returns nil if there are no attributes or on error. func listXattrs(path string, namespaces []string) map[string][]byte { - sz, err := unix.Listxattr(path, nil) + sz, err := unix.Llistxattr(path, nil) if err != nil || sz <= 0 { return nil } buf := make([]byte, sz) - sz, err = unix.Listxattr(path, buf) + sz, err = unix.Llistxattr(path, buf) if err != nil || sz <= 0 { return nil } @@ -72,7 +72,7 @@ func listXattrs(path string, namespaces []string) map[string][]byte { // getXattr retrieves a single extended attribute value. func getXattr(path, name string) ([]byte, error) { - sz, err := unix.Getxattr(path, name, nil) + sz, err := unix.Lgetxattr(path, name, nil) if err != nil { return nil, err } @@ -80,7 +80,7 @@ func getXattr(path, name string) ([]byte, error) { return []byte{}, nil } buf := make([]byte, sz) - sz, err = unix.Getxattr(path, name, buf) + sz, err = unix.Lgetxattr(path, name, buf) if err != nil { return nil, err }