diff --git a/cmd/cloudstic/cmd_backup.go b/cmd/cloudstic/cmd_backup.go index 4df208e..db14b4b 100644 --- a/cmd/cloudstic/cmd_backup.go +++ b/cmd/cloudstic/cmd_backup.go @@ -395,8 +395,11 @@ func cloneGlobalFlags(src *globalFlags) *globalFlags { clone := *src store := *src.store + profile := *src.profile + profilesFile := *src.profilesFile s3Endpoint := *src.s3Endpoint s3Region := *src.s3Region + s3Profile := *src.s3Profile s3AccessKey := *src.s3AccessKey s3SecretKey := *src.s3SecretKey sourceSFTPPassword := *src.sourceSFTPPassword @@ -416,8 +419,11 @@ func cloneGlobalFlags(src *globalFlags) *globalFlags { debug := *src.debug clone.store = &store + clone.profile = &profile + clone.profilesFile = &profilesFile clone.s3Endpoint = &s3Endpoint clone.s3Region = &s3Region + clone.s3Profile = &s3Profile clone.s3AccessKey = &s3AccessKey clone.s3SecretKey = &s3SecretKey clone.sourceSFTPPassword = &sourceSFTPPassword diff --git a/cmd/cloudstic/cmd_backup_profile_test.go b/cmd/cloudstic/cmd_backup_profile_test.go index 5e109c0..0ffc976 100644 --- a/cmd/cloudstic/cmd_backup_profile_test.go +++ b/cmd/cloudstic/cmd_backup_profile_test.go @@ -272,9 +272,15 @@ func TestApplyProfileAuthToBackupArgs_CLIFlagsPreserved(t *testing.T) { func TestCloneGlobalFlags_Independence(t *testing.T) { orig := newTestGlobalFlags() *orig.store = "original-store" + *orig.profile = "orig-profile" + *orig.profilesFile = "/tmp/orig-profiles.yaml" + *orig.s3Profile = "orig-s3-profile" clone := cloneGlobalFlags(orig) *clone.store = "modified-store" + *clone.profile = "clone-profile" + *clone.profilesFile = "/tmp/clone-profiles.yaml" + *clone.s3Profile = "clone-s3-profile" if *orig.store != "original-store" { t.Fatalf("original store=%q want original-store", *orig.store) @@ -282,6 +288,15 @@ func TestCloneGlobalFlags_Independence(t *testing.T) { if *clone.store != "modified-store" { t.Fatalf("clone store=%q want modified-store", *clone.store) } + if *orig.profile != "orig-profile" { + t.Fatalf("original profile=%q want orig-profile", *orig.profile) + } + if *orig.profilesFile != "/tmp/orig-profiles.yaml" { + t.Fatalf("original profilesFile=%q want /tmp/orig-profiles.yaml", *orig.profilesFile) + } + if *orig.s3Profile != "orig-s3-profile" { + t.Fatalf("original s3Profile=%q want orig-s3-profile", *orig.s3Profile) + } } func TestApplyProfileStoreToGlobalFlags_AllFields(t *testing.T) { diff --git a/cmd/cloudstic/cmd_profile.go b/cmd/cloudstic/cmd_profile.go index da42e79..f5773cc 100644 --- a/cmd/cloudstic/cmd_profile.go +++ b/cmd/cloudstic/cmd_profile.go @@ -356,6 +356,9 @@ func (r *runner) runProfileNew() int { createdStore := false if a.store != "" { + if _, err := parseStoreURI(a.store); err != nil { + return r.fail("Invalid store URI: %v", err) + } cfg.Stores[a.storeRef] = cloudstic.ProfileStore{URI: a.store} createdStore = true } else if a.storeRef != "" { @@ -479,13 +482,16 @@ func (r *runner) promptStoreSelection(cfg *cloudstic.ProfilesConfig) (string, bo if refName == "" { return "", false, r.fail("Store reference name is required") } - uri, err := r.promptLine("Store URI (e.g. s3://bucket/path, local:/path, sftp://host/path)", "") + uri, err := r.promptLine("Store URI (e.g. s3:bucket/path, local:/path, sftp://host/path)", "") if err != nil { return "", false, r.fail("Failed to read store URI: %v", err) } if uri == "" { return "", false, r.fail("Store URI is required") } + if _, err := parseStoreURI(uri); err != nil { + return "", false, r.fail("Invalid store URI: %v", err) + } cfg.Stores[refName] = cloudstic.ProfileStore{URI: uri} return refName, true, 0 } diff --git a/cmd/cloudstic/cmd_profile_test.go b/cmd/cloudstic/cmd_profile_test.go index cc0d46a..16ebd4a 100644 --- a/cmd/cloudstic/cmd_profile_test.go +++ b/cmd/cloudstic/cmd_profile_test.go @@ -320,7 +320,7 @@ func TestRunProfileNew_CloudSourceRequiresAuthRef(t *testing.T) { "-profiles-file", profilesPath, "-name", "drive-backup", "-source", "gdrive:/", - "-store-ref", "s", "-store", "s3://bucket", + "-store-ref", "s", "-store", "s3:bucket", } var out strings.Builder var errOut strings.Builder @@ -342,7 +342,7 @@ func TestRunProfileNew_RejectsUnknownAuthRef(t *testing.T) { "-profiles-file", profilesPath, "-name", "work-drive", "-source", "gdrive-changes:/Team", - "-store-ref", "s", "-store", "s3://bucket", + "-store-ref", "s", "-store", "s3:bucket", "-auth-ref", "google-work", } var out strings.Builder @@ -605,6 +605,28 @@ func TestRunProfileNew_InvalidSource(t *testing.T) { } } +func TestRunProfileNew_InvalidStoreURI(t *testing.T) { + tmpDir := t.TempDir() + profilesPath := filepath.Join(tmpDir, "profiles.yaml") + os.Args = []string{ + "cloudstic", "profile", "new", + "-profiles-file", profilesPath, + "-name", "bad-store", + "-source", "local:/data", + "-store-ref", "s", "-store", "s3://bucket", + } + var out strings.Builder + var errOut strings.Builder + r := &runner{out: &out, errOut: &errOut} + + if code := r.runProfile(); code == 0 { + t.Fatal("expected non-zero exit code") + } + if !strings.Contains(errOut.String(), "Invalid store URI") { + t.Fatalf("unexpected error output: %s", errOut.String()) + } +} + func TestRunProfileList_WithOneDriveAuth(t *testing.T) { tmpDir := t.TempDir() profilesPath := filepath.Join(tmpDir, "profiles.yaml") diff --git a/cmd/cloudstic/completion.go b/cmd/cloudstic/completion.go index 7fd0244..33a7dfa 100644 --- a/cmd/cloudstic/completion.go +++ b/cmd/cloudstic/completion.go @@ -53,9 +53,9 @@ _cloudstic() { -*) # skip flags and their values case "${words[i]}" in - -store|-profile|-profiles-file|-s3-endpoint|-s3-region|-s3-profile|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-all-profiles|-auth-ref|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account|-json) - ((i++)) ;; - esac + -store|-profile|-profiles-file|-s3-endpoint|-s3-region|-s3-profile|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-auth-ref|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account|-json) + ((i++)) ;; + esac ;; *) cmd="${words[i]}" @@ -281,9 +281,9 @@ _cloudstic() { -*) # Skip flags with values case "${words[i]}" in - -store|-profile|-profiles-file|-s3-endpoint|-s3-region|-s3-profile|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-all-profiles|-auth-ref|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account) - (( i++ )) ;; - esac + -store|-profile|-profiles-file|-s3-endpoint|-s3-region|-s3-profile|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-auth-ref|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account) + (( i++ )) ;; + esac ;; *) cmd="${words[i]}" diff --git a/cmd/cloudstic/completion_test.go b/cmd/cloudstic/completion_test.go index 3143941..2a58735 100644 --- a/cmd/cloudstic/completion_test.go +++ b/cmd/cloudstic/completion_test.go @@ -31,7 +31,6 @@ func TestCompletionBash(t *testing.T) { "-profiles-file", "-profile", "-all-profiles", "-auth-ref", - "-auth-ref", // Value completions "local: s3: b2: sftp://", "gdrive", "onedrive", diff --git a/internal/engine/backup_scan.go b/internal/engine/backup_scan.go index 746d22b..aaa40ad 100644 --- a/internal/engine/backup_scan.go +++ b/internal/engine/backup_scan.go @@ -119,7 +119,14 @@ func (bm *BackupManager) scanIncremental(ctx context.Context, oldRoot string, in switch fc.Type { case source.ChangeDelete: bm.recordRemoved(fc.Meta.Type) - s.root, err = bm.tree.Delete(s.root, primaryParentID(&fc.Meta), fc.Meta.FileID) + deleteParentID := primaryParentID(&fc.Meta) + if deleteParentID == "" { + deleteParentID, err = bm.lookupDeleteParentID(s.root, fc.Meta.FileID) + if err != nil { + return err + } + } + s.root, err = bm.tree.Delete(s.root, deleteParentID, fc.Meta.FileID) if err != nil { return fmt.Errorf("hamt delete %s: %w", fc.Meta.FileID, err) } @@ -138,6 +145,26 @@ func (bm *BackupManager) scanIncremental(ctx context.Context, oldRoot string, in return s.root, s.pending, s.totalBytes, newToken, nil } +func (bm *BackupManager) lookupDeleteParentID(root, fileID string) (string, error) { + if root == "" { + return "", nil + } + + ref, err := bm.tree.LookupByFileID(root, fileID) + if err != nil { + return "", fmt.Errorf("lookup old file for delete %s: %w", fileID, err) + } + if ref == "" { + return "", nil + } + + oldMeta, err := bm.loadMeta(ref) + if err != nil { + return "", fmt.Errorf("load old file metadata for delete %s: %w", fileID, err) + } + return primaryParentID(oldMeta), nil +} + // detectChange compares meta against the previous snapshot. It returns whether // the entry changed, and the old value ref (empty when the entry is new). // diff --git a/internal/engine/backup_scan_test.go b/internal/engine/backup_scan_test.go index b4c8dfb..e9903c3 100644 --- a/internal/engine/backup_scan_test.go +++ b/internal/engine/backup_scan_test.go @@ -8,6 +8,7 @@ import ( "github.com/cloudstic/cli/internal/core" "github.com/cloudstic/cli/internal/hamt" "github.com/cloudstic/cli/internal/ui" + "github.com/cloudstic/cli/pkg/source" "github.com/cloudstic/cli/pkg/store" ) @@ -228,3 +229,74 @@ func TestDetectChange_NativeFileCarriesForwardMetadata(t *testing.T) { t.Errorf("Expected same ref (metadata carried forward), got %q vs %q", ref2, ref) } } + +type mockIncrementalSource struct { + *MockSource + startToken string + changes []source.FileChange + newToken string +} + +func (s *mockIncrementalSource) GetStartPageToken() (string, error) { + return s.startToken, nil +} + +func (s *mockIncrementalSource) WalkChanges(_ context.Context, _ string, callback func(source.FileChange) error) (string, error) { + for _, ch := range s.changes { + if err := callback(ch); err != nil { + return "", err + } + } + return s.newToken, nil +} + +func TestScanIncremental_DeleteWithoutParentUsesExistingMetadataParent(t *testing.T) { + ctx := context.Background() + base := NewMockSource() + base.Files["FOLDER_1"] = MockFile{Meta: core.FileMeta{FileID: "FOLDER_1", Name: "folder", Type: core.FileTypeFolder}} + base.Files["FILE_1"] = MockFile{ + Meta: core.FileMeta{ + FileID: "FILE_1", + Name: "a.txt", + Type: core.FileTypeFile, + Parents: []string{"FOLDER_1"}, + Size: 3, + }, + Content: []byte("abc"), + } + + inc := &mockIncrementalSource{ + MockSource: base, + startToken: "tok-1", + newToken: "tok-2", + } + + dest := NewMockStore() + mgr := NewBackupManager(inc, dest, ui.NewNoOpReporter(), nil) + _, err := mgr.Run(ctx) + if err != nil { + t.Fatalf("first backup failed: %v", err) + } + + deleteOnly := []source.FileChange{{ + Type: source.ChangeDelete, + Meta: core.FileMeta{FileID: "FILE_1", Type: core.FileTypeFile}, + }} + inc.changes = deleteOnly + delete(base.Files, "FILE_1") + + mgr2 := NewBackupManager(inc, dest, ui.NewNoOpReporter(), nil) + second, err := mgr2.Run(ctx) + if err != nil { + t.Fatalf("second backup failed: %v", err) + } + + tree := hamt.NewTree(store.NewCompressedStore(dest)) + ref, err := tree.Lookup(second.Root, "FOLDER_1", "FILE_1") + if err != nil { + t.Fatalf("lookup failed: %v", err) + } + if ref != "" { + t.Fatalf("expected FILE_1 to be deleted, got ref %q", ref) + } +} diff --git a/pkg/source/gdrive.go b/pkg/source/gdrive.go index d5cc813..a967df9 100644 --- a/pkg/source/gdrive.go +++ b/pkg/source/gdrive.go @@ -592,7 +592,11 @@ func (s *GDriveSource) visitEntryWithPath(f *drive.File, pathMap map[string]stri p := meta.Name if len(f.Parents) > 0 { if parentPath, ok := pathMap[f.Parents[0]]; ok { - p = parentPath + "/" + meta.Name + if parentPath == "" { + p = meta.Name + } else { + p = parentPath + "/" + meta.Name + } } } meta.Paths = []string{p} diff --git a/pkg/source/gdrive_changes.go b/pkg/source/gdrive_changes.go index c1b9d82..e7d2f37 100644 --- a/pkg/source/gdrive_changes.go +++ b/pkg/source/gdrive_changes.go @@ -321,7 +321,7 @@ func (s *GDriveChangeSource) shouldExcludeChange(fc FileChange, excludedIDs map[ } func (s *GDriveChangeSource) changeToFileChange(ch *drive.Change) FileChange { - if ch.Removed || (ch.File != nil && ch.File.Trashed) { + if ch.Removed || ch.File == nil || ch.File.Trashed { return FileChange{ Type: ChangeDelete, Meta: core.FileMeta{FileID: ch.FileId}, diff --git a/pkg/source/gdrive_test.go b/pkg/source/gdrive_test.go index b89eb8c..311fbd9 100644 --- a/pkg/source/gdrive_test.go +++ b/pkg/source/gdrive_test.go @@ -345,6 +345,38 @@ func TestVisitEntryWithPath_PathComputation(t *testing.T) { } } +func TestVisitEntryWithPath_RootRelativePath_NoLeadingSlash(t *testing.T) { + s := &GDriveSource{ + exclude: NewExcludeMatcher(nil), + mimeTypes: make(map[string]string), + } + + var got string + callback := func(meta core.FileMeta) error { + if len(meta.Paths) > 0 { + got = meta.Paths[0] + } + return nil + } + + pathMap := map[string]string{"rootFolderID": ""} + excludedPaths := make(map[string]bool) + + err := s.visitEntryWithPath(&drive.File{ + Id: "file1", + Name: "child.txt", + MimeType: "text/plain", + Parents: []string{"rootFolderID"}, + }, pathMap, excludedPaths, callback) + if err != nil { + t.Fatalf("visitEntryWithPath: %v", err) + } + + if got != "child.txt" { + t.Fatalf("path = %q, want %q", got, "child.txt") + } +} + func TestChangeToFileChange_RecordsMimeType(t *testing.T) { s := &GDriveChangeSource{ GDriveSource: GDriveSource{ @@ -426,6 +458,28 @@ func TestChangeToFileChange_DeletedFile(t *testing.T) { } } +func TestChangeToFileChange_NilFilePayload(t *testing.T) { + s := &GDriveChangeSource{ + GDriveSource: GDriveSource{ + exclude: NewExcludeMatcher(nil), + mimeTypes: make(map[string]string), + }, + } + + fc := s.changeToFileChange(&drive.Change{ + FileId: "file1", + Removed: false, + File: nil, + }) + + if fc.Type != ChangeDelete { + t.Errorf("Type = %v, want ChangeDelete", fc.Type) + } + if fc.Meta.FileID != "file1" { + t.Errorf("FileID = %q, want %q", fc.Meta.FileID, "file1") + } +} + func TestChangeToFileChange_TrashedFile(t *testing.T) { s := &GDriveChangeSource{ GDriveSource: GDriveSource{ diff --git a/pkg/source/local_source_darwin.go b/pkg/source/local_source_darwin.go index db61658..619f04f 100644 --- a/pkg/source/local_source_darwin.go +++ b/pkg/source/local_source_darwin.go @@ -186,7 +186,8 @@ func getVolumeLabel(path string) string { } // Parse attrreference: offset from start of attrreference field, then length - if buf[0] < 12 { + attrLen := *(*uint32)(unsafe.Pointer(&buf[0])) + if attrLen < 12 { return "" } off := *(*int32)(unsafe.Pointer(&buf[4])) diff --git a/pkg/source/local_source_linux.go b/pkg/source/local_source_linux.go index c0657c4..bf63eb4 100644 --- a/pkg/source/local_source_linux.go +++ b/pkg/source/local_source_linux.go @@ -6,6 +6,7 @@ import ( "bufio" "os" "path/filepath" + "strconv" "strings" "syscall" ) @@ -32,6 +33,8 @@ func detectVolumeIdentity(path string) (uuid, label, mountPoint string) { // deviceForPath finds the mount device and mount point for a given filesystem // path by parsing /proc/mounts and matching on device ID (stat.Dev). func deviceForPath(path string) (device, mountPoint string, err error) { + path = filepath.Clean(path) + var st syscall.Stat_t if err := syscall.Stat(path, &st); err != nil { return "", "", err @@ -54,10 +57,10 @@ func deviceForPath(path string) (device, mountPoint string, err error) { if len(fields) < 2 { continue } - mnt := fields[1] + mnt := unescapeMountField(fields[1]) - // Check if this mount point is a prefix of our path - if !strings.HasPrefix(path, mnt) { + // Check if this mount point is a path prefix with a segment boundary. + if !hasPathPrefix(path, mnt) { continue } // Use the longest matching mount point (most specific) @@ -70,10 +73,53 @@ func deviceForPath(path string) (device, mountPoint string, err error) { } } } + if err := scanner.Err(); err != nil { + return "", "", err + } return bestDevice, bestMount, nil } +func hasPathPrefix(path, prefix string) bool { + if prefix == "/" { + return strings.HasPrefix(path, "/") + } + if path == prefix { + return true + } + return strings.HasPrefix(path, prefix+"/") +} + +func unescapeMountField(s string) string { + if !strings.Contains(s, "\\") { + return s + } + + out := make([]byte, 0, len(s)) + for i := 0; i < len(s); i++ { + if s[i] != '\\' || i+3 >= len(s) { + out = append(out, s[i]) + continue + } + if !isOctalDigit(s[i+1]) || !isOctalDigit(s[i+2]) || !isOctalDigit(s[i+3]) { + out = append(out, s[i]) + continue + } + v, err := strconv.ParseUint(s[i+1:i+4], 8, 8) + if err != nil { + out = append(out, s[i]) + continue + } + out = append(out, byte(v)) + i += 3 + } + return string(out) +} + +func isOctalDigit(b byte) bool { + return b >= '0' && b <= '7' +} + // findUUIDForDevice scans /dev/disk/by-uuid/ for a symlink pointing to the // given device and returns the UUID (the symlink name). func findUUIDForDevice(device string) string { @@ -89,7 +135,7 @@ func findUUIDForDevice(device string) string { continue } if filepath.Base(target) == deviceBase { - return e.Name() + return strings.ToUpper(e.Name()) } } return "" diff --git a/pkg/source/local_source_stub.go b/pkg/source/local_source_stub.go index 9bdfeaa..74b991f 100644 --- a/pkg/source/local_source_stub.go +++ b/pkg/source/local_source_stub.go @@ -3,8 +3,8 @@ package source // detectVolumeIdentity is a stub for platforms where volume UUID detection -// is not yet implemented (Windows, plan9, etc.). It returns empty strings, -// causing the engine to fall back to legacy account+path matching. +// is not yet implemented. It returns empty strings, causing the engine to +// fall back to legacy account+path matching. func detectVolumeIdentity(_ string) (uuid, label, mountPoint string) { return "", "", "" } diff --git a/pkg/source/local_source_volume_test.go b/pkg/source/local_source_volume_test.go index 4f07016..000bfac 100644 --- a/pkg/source/local_source_volume_test.go +++ b/pkg/source/local_source_volume_test.go @@ -34,8 +34,8 @@ func TestDetectVolumeIdentity_TempDir(t *testing.T) { } func TestDetectVolumeIdentity_ReturnsUppercaseUUID(t *testing.T) { - if runtime.GOOS != "darwin" && runtime.GOOS != "linux" { - t.Skip("UUID detection only implemented on darwin and linux") + if runtime.GOOS != "darwin" && runtime.GOOS != "linux" && runtime.GOOS != "windows" { + t.Skip("UUID detection only implemented on darwin, linux, and windows") } tmpDir, err := os.MkdirTemp("", "cloudstic-case-test-*") diff --git a/pkg/source/local_source_windows.go b/pkg/source/local_source_windows.go index 62f1b0e..c54168a 100644 --- a/pkg/source/local_source_windows.go +++ b/pkg/source/local_source_windows.go @@ -29,14 +29,15 @@ type partitionInfoGPT struct { // partitionInformationEx mirrors the Windows PARTITION_INFORMATION_EX struct. // The union at the end is defined as the GPT variant (the larger of the two). type partitionInformationEx struct { - PartitionStyle uint32 - _ uint32 // padding for int64 alignment - StartingOffset int64 - PartitionLength int64 - PartitionNumber uint32 - RewritePartition byte - _ [3]byte // padding to align union - GPT partitionInfoGPT + PartitionStyle uint32 + _ uint32 // padding for int64 alignment + StartingOffset int64 + PartitionLength int64 + PartitionNumber uint32 + RewritePartition byte + IsServicePartition byte + _ [2]byte // padding to align union + GPT partitionInfoGPT } func detectVolumeIdentity(path string) (uuid, label, mountPoint string) { @@ -78,8 +79,14 @@ func getVolumeLabel(mountPoint string) string { // getPartitionUUID returns the GPT partition UUID via DeviceIoControl. // Returns empty for MBR partitions or on error. func getPartitionUUID(mountPoint string) string { - // Open the volume: "C:\" → "\\.\C:" (strip trailing backslash). - volumePath := `\\.\` + strings.TrimRight(mountPoint, `\`) + volumeName, err := getVolumeNameForMountPoint(mountPoint) + if err != nil || volumeName == "" { + return "" + } + + // Open the volume GUID path (e.g. "\\?\Volume{GUID}\") so mounted-folder + // volumes work correctly too. + volumePath := `\\.\` + strings.TrimPrefix(strings.TrimRight(volumeName, `\`), `\\?\`) pathUTF16, err := windows.UTF16PtrFromString(volumePath) if err != nil { return "" @@ -120,6 +127,18 @@ func getPartitionUUID(mountPoint string) string { return strings.ToUpper(formatGUID(info.GPT.PartitionId)) } +func getVolumeNameForMountPoint(mountPoint string) (string, error) { + mountUTF16, err := windows.UTF16PtrFromString(mountPoint) + if err != nil { + return "", err + } + var volumeName [windows.MAX_PATH + 1]uint16 + if err := windows.GetVolumeNameForVolumeMountPoint(mountUTF16, &volumeName[0], uint32(len(volumeName))); err != nil { + return "", err + } + return windows.UTF16ToString(volumeName[:]), nil +} + // formatGUID formats a Windows GUID as a standard UUID string. func formatGUID(g windows.GUID) string { return fmt.Sprintf("%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X", diff --git a/pkg/source/onedrive.go b/pkg/source/onedrive.go index 458dcc8..a6b9d35 100644 --- a/pkg/source/onedrive.go +++ b/pkg/source/onedrive.go @@ -6,8 +6,10 @@ import ( "fmt" "io" "net/http" + "net/url" "os" "path/filepath" + "strings" "time" "github.com/cloudstic/cli/internal/core" @@ -104,10 +106,7 @@ func NewOneDriveSource(ctx context.Context, opts ...OneDriveOption) (*OneDriveSo client := conf.Client(ctx, token) - rootPath := cfg.rootPath - if rootPath == "" { - rootPath = "/" - } + rootPath := normalizeOneDriveRootPath(cfg.rootPath) src := &OneDriveSource{ client: client, driveName: cfg.driveName, @@ -131,20 +130,21 @@ func (s *OneDriveSource) resolveDriveName(ctx context.Context) error { if err != nil { return fmt.Errorf("create request: %w", err) } - resp, err := s.client.Do(req) + respByID, err := s.client.Do(req) if err == nil { - defer func() { _ = resp.Body.Close() }() - if resp.StatusCode == http.StatusOK { + if respByID.StatusCode == http.StatusOK { var drive struct { ID string `json:"id"` Name string `json:"name"` } - if err := json.NewDecoder(resp.Body).Decode(&drive); err == nil { + if decodeErr := json.NewDecoder(respByID.Body).Decode(&drive); decodeErr == nil { + _ = respByID.Body.Close() s.driveID = drive.ID s.driveName = drive.Name return nil } } + _ = respByID.Body.Close() } // Fetch all drives and find by name @@ -152,7 +152,7 @@ func (s *OneDriveSource) resolveDriveName(ctx context.Context) error { if err != nil { return fmt.Errorf("create request: %w", err) } - resp, err = s.client.Do(req) + resp, err := s.client.Do(req) if err != nil { return fmt.Errorf("list drives: %w", err) } @@ -385,12 +385,41 @@ func (s *OneDriveSource) getRootURL() string { if s.driveID != "" { base = fmt.Sprintf("https://graph.microsoft.com/v1.0/drives/%s/root", s.driveID) } - if s.rootPath != "" && s.rootPath != "/" { - return fmt.Sprintf("%s:%s", base, s.rootPath) + encodedRootPath := encodeOneDriveRootPath(s.rootPath) + if encodedRootPath != "" { + return fmt.Sprintf("%s:/%s", base, encodedRootPath) } return base } +func normalizeOneDriveRootPath(path string) string { + trimmed := strings.TrimSpace(path) + if trimmed == "" || trimmed == "/" { + return "/" + } + + if !strings.HasPrefix(trimmed, "/") { + trimmed = "/" + trimmed + } + trimmed = strings.TrimRight(trimmed, "/") + if trimmed == "" { + return "/" + } + return trimmed +} + +func encodeOneDriveRootPath(path string) string { + normalized := normalizeOneDriveRootPath(path) + if normalized == "/" { + return "" + } + parts := strings.Split(strings.TrimPrefix(normalized, "/"), "/") + for i, p := range parts { + parts[i] = url.PathEscape(p) + } + return strings.Join(parts, "/") +} + func (s *OneDriveSource) Walk(ctx context.Context, callback func(core.FileMeta) error) error { rootURL := s.getRootURL() diff --git a/pkg/source/onedrive_changes.go b/pkg/source/onedrive_changes.go index 59daef8..c8e700f 100644 --- a/pkg/source/onedrive_changes.go +++ b/pkg/source/onedrive_changes.go @@ -37,7 +37,7 @@ func (s *OneDriveChangeSource) Info() core.SourceInfo { // requesting a "latest" delta token. The returned string is a full deltaLink URL. func (s *OneDriveChangeSource) GetStartPageToken() (string, error) { url := s.getRootURL() - if s.rootPath != "" && s.rootPath != "/" { + if normalizeOneDriveRootPath(s.rootPath) != "/" { url += ":/delta?token=latest" } else { url += "/delta?token=latest" @@ -113,25 +113,23 @@ func (s *OneDriveChangeSource) WalkChanges(ctx context.Context, token string, ca } func (s *OneDriveChangeSource) filterChangesByRootPath(changes []FileChange) []FileChange { - if s.rootPath == "" || s.rootPath == "/" { + normalizedRoot := normalizeOneDriveRootPath(s.rootPath) + if normalizedRoot == "/" { return changes } var valid []FileChange - trimmedRoot := strings.TrimPrefix(s.rootPath, "/") + trimmedRoot := strings.TrimPrefix(normalizedRoot, "/") for _, fc := range changes { if len(fc.Meta.Paths) > 0 { p := fc.Meta.Paths[0] - if !strings.HasPrefix(p, trimmedRoot+"/") && p != trimmedRoot { + if p == trimmedRoot { + continue // Skip root folder itself; emit descendants only. + } + if !strings.HasPrefix(p, trimmedRoot+"/") { continue // Outside of root path } // Adjust path relative to root - stripped := strings.TrimPrefix(p, trimmedRoot+"/") - stripped = strings.TrimPrefix(stripped, trimmedRoot) - if stripped == "" { - fc.Meta.Paths = []string{fc.Meta.Name} - } else { - fc.Meta.Paths = []string{stripped} - } + fc.Meta.Paths = []string{strings.TrimPrefix(p, trimmedRoot+"/")} } else if fc.Type == ChangeUpsert { continue } diff --git a/pkg/source/onedrive_test.go b/pkg/source/onedrive_test.go index d1a8bd9..94c1ea5 100644 --- a/pkg/source/onedrive_test.go +++ b/pkg/source/onedrive_test.go @@ -99,8 +99,8 @@ func TestOneDriveFilterChangesByRootPath(t *testing.T) { filtered := s.filterChangesByRootPath(changes) - if len(filtered) != 4 { - t.Fatalf("expected 4 filtered changes, got %d", len(filtered)) + if len(filtered) != 3 { + t.Fatalf("expected 3 filtered changes, got %d", len(filtered)) } if filtered[0].Meta.Paths[0] != "file1.txt" { @@ -109,11 +109,8 @@ func TestOneDriveFilterChangesByRootPath(t *testing.T) { if filtered[1].Meta.Paths[0] != "sub/file2.txt" { t.Errorf("expected stripped path sub/file2.txt, got %s", filtered[1].Meta.Paths[0]) } - if filtered[2].Meta.Paths[0] != "my" { - t.Errorf("expected stripped path 'my', got %s", filtered[2].Meta.Paths[0]) - } - if filtered[3].Type != ChangeDelete { - t.Errorf("expected delete change, got %v", filtered[3].Type) + if filtered[2].Type != ChangeDelete { + t.Errorf("expected delete change, got %v", filtered[2].Type) } // Test with rootPath = "" @@ -125,3 +122,31 @@ func TestOneDriveFilterChangesByRootPath(t *testing.T) { t.Errorf("expected %d changes with empty rootPath, got %d", len(changes), len(filtered2)) } } + +func TestNormalizeOneDriveRootPath(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"", "/"}, + {"/", "/"}, + {"docs", "/docs"}, + {"/docs/", "/docs"}, + {" /Team Files/ ", "/Team Files"}, + } + + for _, tc := range tests { + if got := normalizeOneDriveRootPath(tc.in); got != tc.want { + t.Errorf("normalizeOneDriveRootPath(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} + +func TestOneDriveGetRootURL_EncodesPath(t *testing.T) { + s := &OneDriveSource{rootPath: "/Team Files/R?D"} + got := s.getRootURL() + want := "https://graph.microsoft.com/v1.0/me/drive/root:/Team%20Files/R%3FD" + if got != want { + t.Errorf("getRootURL() = %q, want %q", got, want) + } +} diff --git a/pkg/source/onedrive_test.go.orig b/pkg/source/onedrive_test.go.orig deleted file mode 100644 index cf41666..0000000 --- a/pkg/source/onedrive_test.go.orig +++ /dev/null @@ -1,41 +0,0 @@ -package source - -import "testing" - -func TestOneDriveInfo(t *testing.T) { - s := &OneDriveSource{account: "user@outlook.com"} - info := s.Info() - - if info.Type != "onedrive" { - t.Errorf("Type = %q, want onedrive", info.Type) - } - if info.Account != "user@outlook.com" { - t.Errorf("Account = %q, want user@outlook.com", info.Account) - } - if info.Path != "/" { - t.Errorf("Path = %q, want /", info.Path) - } - if info.VolumeUUID != "" { - t.Errorf("VolumeUUID = %q, want empty", info.VolumeUUID) - } - if info.VolumeLabel != "My Drive" { - t.Errorf("VolumeLabel = %q, want My Drive", info.VolumeLabel) - } -} - -func TestOneDriveChangesInfo_Type(t *testing.T) { - s := &OneDriveChangeSource{ - OneDriveSource: OneDriveSource{account: "user@outlook.com"}, - } - info := s.Info() - - if info.Type != "onedrive-changes" { - t.Errorf("Type = %q, want onedrive-changes", info.Type) - } - if info.VolumeLabel != "My Drive" { - t.Errorf("VolumeLabel = %q, want My Drive", info.VolumeLabel) - } - if info.Path != "/" { - t.Errorf("Path = %q, want /", info.Path) - } -} diff --git a/pkg/source/onedrive_test.go.rej b/pkg/source/onedrive_test.go.rej deleted file mode 100644 index fbad673..0000000 --- a/pkg/source/onedrive_test.go.rej +++ /dev/null @@ -1,18 +0,0 @@ -@@ -10,7 +10,7 @@ - ) - - func TestOneDriveInfo(t *testing.T) { -- s := &OneDriveSource{account: "user@domain.com"} -+ s := &OneDriveSource{account: "user@domain.com", rootPath: "/"} - info := s.Info() - - if info.Type != "onedrive" { -@@ -33,7 +33,7 @@ - - func TestOneDriveChangesInfo_Type(t *testing.T) { - s := &OneDriveChangeSource{ -- OneDriveSource: OneDriveSource{account: "user@domain.com"}, -+ OneDriveSource: OneDriveSource{account: "user@domain.com", rootPath: "/"}, - } - info := s.Info() - diff --git a/pkg/store/pack.go b/pkg/store/pack.go index e734881..4463756 100644 --- a/pkg/store/pack.go +++ b/pkg/store/pack.go @@ -327,6 +327,7 @@ func (s *PackStore) Flush(ctx context.Context) error { } s.mu.Lock() s.catalogDirty = false + totalEntries := len(s.catalog) nodeCount := 0 for k := range s.catalog { if strings.HasPrefix(k, "node/") { @@ -334,7 +335,7 @@ func (s *PackStore) Flush(ctx context.Context) error { } } s.mu.Unlock() - debugf("pack: catalog flushed — %d total entries, %d node/* entries", len(s.catalog), nodeCount) + debugf("pack: catalog flushed — %d total entries, %d node/* entries", totalEntries, nodeCount) } return nil diff --git a/rfcs/0002-affinity-model.md b/rfcs/0002-affinity-model.md index 460a376..4c35dfd 100644 --- a/rfcs/0002-affinity-model.md +++ b/rfcs/0002-affinity-model.md @@ -2,7 +2,7 @@ * **Status:** Implemented * **Date:** 2026-03-07 -* **Related:** [RFC 0001](file:///Users/loichermann/workspace/cloudstic-cli/rfcs/0001-hamt-evolution.md) +* **Related:** [RFC 0001](./0001-hamt-evolution.md) ## Abstract @@ -39,16 +39,16 @@ The current trie has these constants: Bias the HAMT key so that files sharing a parent directory group into a common trie subtree: ``` -AffinityKey(parentID, fileID) = SHA256(parentID)[:4] + SHA256(fileID)[:28] +AffinityKey(parentID, fileID) = SHA256(parentID)[:4] + SHA256(fileID)[4:] ``` Where: * `[:N]` denotes the first `N` hex characters of the SHA-256 hex string. * `SHA256(parentID)[:4]` = **16 bits** (2 bytes) of parent-derived entropy. -* `SHA256(fileID)[:28]` = **112 bits** (14 bytes) of file-local entropy. +* `SHA256(fileID)[4:]` = **240 bits** (30 bytes) of file-local entropy. -Full key length remains 32 hex characters, identical to the current `computePathKey` output length — so the rest of the routing machinery (`indexForLevel`, `insertAt`, `lookupAt`, etc.) is unchanged. +Full key length remains 64 hex characters, identical to the current `computePathKey` output length, so the rest of the routing machinery (`indexForLevel`, `insertAt`, `lookupAt`, etc.) is unchanged. ### Locality Guarantee @@ -61,7 +61,7 @@ In concrete terms: **a backup of a directory with `N` files now writes to a sing ### What "ParentID" Means -In `core.FileMeta`, `Parents` is `[]string` of `"filemeta/"` **object references**, not the raw source identifiers. For the Affinity Key, `parentID` should be the **raw source-level parent identifier** — e.g., the Google Drive folder ID stored in `FileMeta.FileID` of the parent — to maintain stable keys across snapshots. Using the content-addressed ref would cause every metadata change to a parent folder to re-key all its children. +In `core.FileMeta`, `Parents` is `[]string` of raw source identifiers (for example, Google Drive folder IDs or normalized local parent paths), not `filemeta/` object references. For the affinity key, `parentID` is this raw source-level parent identifier so keys remain stable across snapshots. For sources (like local filesystems) where files can have multiple parents, use the **primary parent** (index 0 of the parent list, or the closest filesystem ancestor) for the key construction. @@ -100,17 +100,7 @@ Note that `LeafEntry.Key` continues to store the **raw `fileID`** — it is the ### 2. Snapshot Format Version -Tag new snapshots with a format version to prevent cross-version mutations: - -```go -// core.Snapshot gains a HAMTVersion field -type Snapshot struct { - // ... - HAMTVersion int `json:"hamt_version,omitempty"` // 1 = legacy, 2 = affinity keys -} -``` - -Clients reading a `hamt_version: 2` snapshot must use `AffinityKey` for all trie operations. Older clients without this field default to version 1 (current behavior). +No separate snapshot version field was required for this rollout. Compatibility is handled at the HAMT leaf-entry level: `LeafEntry.PathKey` stores the routing key used by newer writers, while legacy entries without `PathKey` are still handled by fallback logic during reads/diff/walk. ## 4. Trade-offs and Constraints @@ -133,9 +123,9 @@ Current `Lookup(root, key)` only needs `fileID`. With the affinity model, a look ## 5. Backward Compatibility -**Breaking.** This change alters the path of every key in the trie. Existing repositories must be either: +Implemented as a rolling-compatible change: -1. **Migrated:** Perform a one-time full walk of the snapshot, re-emit every `(fileID, parentID, value)` triple via `Insert` into a new tree, and replace the root reference. -2. **Versioned:** New snapshots created after a configured cutoff use `HAMTv2`; old snapshots remain readable and writable using the legacy key scheme. +1. New writes use affinity routing (`AffinityKey(parentID, fileID)`) and store `PathKey` in leaf entries. +2. Older trees remain readable because legacy entries without `PathKey` fall back to `computePathKey(fileID)` in read/diff paths. -The versioned approach (option 2) is strongly recommended for production. The `HAMTVersion` field on `Snapshot` provides the discriminator. A migration CLI subcommand (`cloudstic migrate-hamt`) can optionally backfill older snapshots. +This avoids a repository-wide migration step and does not require a dedicated `Snapshot` format field. diff --git a/scripts/benchmark/run.sh b/scripts/benchmark/run.sh index 87cc694..5ebece9 100755 --- a/scripts/benchmark/run.sh +++ b/scripts/benchmark/run.sh @@ -61,6 +61,14 @@ echo "Building cloudstic binary..." go build -o /tmp/cloudstic ./cmd/cloudstic export CLOUDSTIC_BIN="/tmp/cloudstic" +# Ensure benchmark runs are not affected by operator shell defaults. +# In particular, a pre-set CLOUDSTIC_KMS_KEY_ARN would make `cloudstic init` +# try AWS KMS even for local/local runs. +unset CLOUDSTIC_KMS_KEY_ARN CLOUDSTIC_KMS_REGION CLOUDSTIC_KMS_ENDPOINT +unset CLOUDSTIC_PROFILE CLOUDSTIC_PROFILES_FILE +unset CLOUDSTIC_STORE CLOUDSTIC_S3_ENDPOINT CLOUDSTIC_S3_REGION CLOUDSTIC_S3_PROFILE +unset CLOUDSTIC_S3_ACCESS_KEY CLOUDSTIC_S3_SECRET_KEY + # Create temp dirs DATA_TEMPLATE=$(mktemp -d -t benchmark-template-XXXXXX) DATA_DIR=$(mktemp -d -t benchmark-data-XXXXXX) @@ -289,12 +297,14 @@ print_repo_size() { printf "| %-30s | %12s | %13s | %12s |\n" "Final Repo Size" "$size" "-" "-" } -PASSWORD="benchmark-password-123" -export CLOUDSTIC_ENCRYPTION_PASSWORD="$PASSWORD" -export RESTIC_PASSWORD="$PASSWORD" -export BORG_PASSPHRASE="$PASSWORD" -export DUPLICACY_PASSWORD="$PASSWORD" -export DUPLICACY_DEFAULT_PASSWORD="$PASSWORD" +# Repository password used by all tools during benchmarks. +# Override with: BENCH_REPO_PASSWORD='your-password' ./scripts/benchmark/run.sh ... +REPO_PASSWORD=${BENCH_REPO_PASSWORD:-benchmark-password-123} +export CLOUDSTIC_PASSWORD="$REPO_PASSWORD" +export RESTIC_PASSWORD="$REPO_PASSWORD" +export BORG_PASSPHRASE="$REPO_PASSWORD" +export DUPLICACY_PASSWORD="$REPO_PASSWORD" +export DUPLICACY_DEFAULT_PASSWORD="$REPO_PASSWORD" # --------------------------------------------------------------------------- # Cloudstic @@ -310,7 +320,7 @@ benchmark_cloudstic() { if [ "$STORE" == "s3" ]; then BENCH_REPO_DIR="" BENCH_S3_PREFIX="s3://$S3_BUCKET/cloudstic/" - store_flags="-store s3:$S3_BUCKET/cloudstic/ -encryption-password $PASSWORD" + store_flags="-store s3:$S3_BUCKET/cloudstic/ -password $REPO_PASSWORD" $CLOUDSTIC_BIN init $store_flags >/dev/null || true else BENCH_REPO_DIR="$repo" diff --git a/scripts/check.sh b/scripts/check.sh index 646a838..b00b425 100755 --- a/scripts/check.sh +++ b/scripts/check.sh @@ -13,7 +13,7 @@ echo "==> Running golangci-lint..." golangci-lint run ./... echo "==> Running markdownlint..." -npx markdownlint-cli2 "**/*.md" 2>/dev/null +npx markdownlint-cli2 "**/*.md" echo "==> Running go test..." go test -v -race -coverprofile=coverage.out -count=1 ./...