diff --git a/cmd/cloudstic/format.go b/cmd/cloudstic/format.go index c2abcd8..1f8ccfa 100644 --- a/cmd/cloudstic/format.go +++ b/cmd/cloudstic/format.go @@ -47,8 +47,12 @@ func (r *runner) renderSnapshotTable(entries []engine.SnapshotEntry, reasons map var source, account, path string if e.Snap.Source != nil { source = e.Snap.Source.Type - if e.Snap.Source.VolumeLabel != "" { - source += " (" + e.Snap.Source.VolumeLabel + ")" + driveName := e.Snap.Source.DriveName + if driveName == "" { + driveName = e.Snap.Source.VolumeLabel + } + if driveName != "" { + source += " (" + driveName + ")" } account = e.Snap.Source.Account path = e.Snap.Source.Path @@ -74,10 +78,17 @@ func sourceGroupKey(s *core.SourceInfo) string { if s == nil { return "" } + pathToken := s.Path + if s.PathID != "" { + pathToken = s.PathID + } + if s.Identity != "" { + return s.Type + "\x00" + s.Identity + "\x00" + pathToken + } if s.VolumeUUID != "" { - return s.Type + "\x00" + s.VolumeUUID + "\x00" + s.Path + return s.Type + "\x00" + s.VolumeUUID + "\x00" + pathToken } - return s.Type + "\x00" + s.Account + "\x00" + s.Path + return s.Type + "\x00" + s.Account + "\x00" + pathToken } // sourceGroupLabel returns a human-readable label for a source group. @@ -87,8 +98,12 @@ func sourceGroupLabel(s *core.SourceInfo) string { } var parts []string label := s.Type - if s.VolumeLabel != "" { - label += " (" + s.VolumeLabel + ")" + driveName := s.DriveName + if driveName == "" { + driveName = s.VolumeLabel + } + if driveName != "" { + label += " (" + driveName + ")" } parts = append(parts, label) if s.Account != "" { diff --git a/docs/sources.md b/docs/sources.md index bf0c174..d139cd6 100644 --- a/docs/sources.md +++ b/docs/sources.md @@ -21,7 +21,7 @@ type Source interface { |--------|-------------| | `Walk` | Enumerate every file and folder. Parents **must** be emitted before their children. | | `GetFileStream` | Return a readable stream for a file, identified by its source-specific `fileID`. | -| `Info` | Return metadata about the source (type, account, path) stored in the snapshot. | +| `Info` | Return source identity and display metadata stored in the snapshot. | | `Size` | Return the total size of the source (used for progress reporting). | ### IncrementalSource @@ -54,17 +54,31 @@ Returned by `Info()` and stored in the snapshot's `source` field: ```go type SourceInfo struct { - Type string // e.g. "gdrive", "local", "sftp", "onedrive", "gdrive-changes" - Account string // Google email, hostname, user@host, etc. - Path string // drive path, filesystem path, etc. + Type string // e.g. "gdrive", "local", "sftp", "onedrive", "gdrive-changes" + Account string // friendly display account (email, hostname, user@host) + Path string // friendly display path + Identity string // stable container identity for lineage matching + PathID string // stable selected-root identity within the container + DriveName string // friendly container label (e.g. "My Drive") + + // Legacy compatibility fields (read from older snapshots). + VolumeUUID string + VolumeLabel string } ``` The engine uses `SourceInfo` to: -- Find the previous snapshot from the same source (for incremental comparison) +- Match previous snapshots for incremental comparison - Group snapshots in retention policies (`forget --group-by source,account,path`) +Matching precedence: + +1. `Type + Identity + PathID` +2. `Type + Identity + Path` (bridge fallback) +3. `Type + VolumeUUID + Path` (legacy fallback) +4. `Type + Account + Path` (legacy fallback) + ### FileMeta The common file metadata model emitted by all sources during `Walk` or `WalkChanges`: @@ -97,8 +111,11 @@ type FileMeta struct { | **FileID** | Relative path from root (e.g. `subdir/file.txt`) | | **Parents** | Parent directory's relative path | | **ContentHash** | Not provided (computed by the engine during upload) | -| **SourceInfo.Account** | Machine hostname | -| **SourceInfo.Path** | Absolute path to the backed-up directory | +| **SourceInfo.Identity** | Partition UUID (portable) or hostname (fallback) | +| **SourceInfo.PathID** | Stable path token (portable drives use absolute-from-root, e.g. `/Photos`) | +| **SourceInfo.Account** | Machine hostname (display) | +| **SourceInfo.DriveName** | Volume label when available | +| **SourceInfo.Path** | Display path (portable drives are shown absolute from drive root, e.g. `/Photos`) | Walks the directory tree using `filepath.Walk`. Symbolic links are not followed. @@ -111,7 +128,10 @@ Walks the directory tree using `filepath.Walk`. Symbolic links are not followed. | **FileID** | Relative path from root (e.g. `subdir/file.txt`) | | **Parents** | Parent directory's relative path | | **ContentHash** | Not provided (computed by the engine during upload) | +| **SourceInfo.Identity** | `user@host` | +| **SourceInfo.PathID** | Remote root directory path | | **SourceInfo.Account** | `user@host` | +| **SourceInfo.DriveName** | *(empty)* | | **SourceInfo.Path** | Remote root directory path | Walks the remote directory tree via SFTP. Supports password, SSH private key, and ssh-agent authentication. @@ -125,8 +145,11 @@ Walks the remote directory tree via SFTP. Supports password, SSH private key, an | **FileID** | Google Drive file ID (e.g. `1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgV`) | | **Parents** | Google Drive parent folder IDs | | **ContentHash** | SHA-256 checksum from the Drive API (avoids re-downloading unchanged files) | -| **SourceInfo.Account** | Google account email | -| **SourceInfo.Path** | `my-drive://` or `://` | +| **SourceInfo.Identity** | My Drive: stable Google account ID; Shared Drive: shared drive ID | +| **SourceInfo.PathID** | Resolved root folder ID (stable across rename/move) | +| **SourceInfo.Account** | Google account email (display) | +| **SourceInfo.DriveName** | `My Drive` or shared drive name | +| **SourceInfo.Path** | User-selected display path | Lists all files and folders via `files.list`, then topologically sorts folders so parents are emitted before children. Supports My Drive and Shared Drives (via `gdrive://`), with optional folder scoping (via `gdrive:///path/to/folder`). @@ -139,7 +162,10 @@ Lists all files and folders via `files.list`, then topologically sorts folders s | **FileID** | Same as `gdrive` | | **Parents** | Same as `gdrive` | | **ContentHash** | Same as `gdrive` | +| **SourceInfo.Identity** | Same as `gdrive` | +| **SourceInfo.PathID** | Same as `gdrive` | | **SourceInfo.Account** | Same as `gdrive` | +| **SourceInfo.DriveName** | Same as `gdrive` | | **SourceInfo.Path** | Same as `gdrive` | | **Change token** | Google Drive Changes API start page token | @@ -156,8 +182,11 @@ Folder changes are topologically sorted before file changes, ensuring parent ref | **FileID** | OneDrive item ID | | **Parents** | OneDrive parent item ID | | **ContentHash** | Not provided (computed by the engine during upload) | -| **SourceInfo.Account** | User principal name from Microsoft Graph `/me` | -| **SourceInfo.Path** | `onedrive://` | +| **SourceInfo.Identity** | Selected drive ID or stable account ID | +| **SourceInfo.PathID** | Resolved root item ID (fallback to root path) | +| **SourceInfo.Account** | User principal name from Microsoft Graph `/me` (display) | +| **SourceInfo.DriveName** | `My Drive` or selected drive name | +| **SourceInfo.Path** | User-selected display path | Walks the drive recursively starting from the root item via the Microsoft Graph API. Folders are visited depth-first, ensuring parents are emitted before children. @@ -170,7 +199,10 @@ Walks the drive recursively starting from the root item via the Microsoft Graph | **FileID** | Same as `onedrive` | | **Parents** | Same as `onedrive` | | **ContentHash** | Same as `onedrive` | +| **SourceInfo.Identity** | Same as `onedrive` | +| **SourceInfo.PathID** | Same as `onedrive` | | **SourceInfo.Account** | Same as `onedrive` | +| **SourceInfo.DriveName** | Same as `onedrive` | | **SourceInfo.Path** | Same as `onedrive` | | **Change token** | Microsoft Graph delta link | @@ -183,7 +215,7 @@ Embeds `OneDriveSource` and reuses its `Walk`, `GetFileStream`, and metadata con The backup engine (`internal/engine/backup.go`) interacts with sources as follows: 1. **Detect source type** — check if the source implements `IncrementalSource` -2. **Load previous state** — find the most recent snapshot with a matching `SourceInfo` +2. **Load previous state** — find the most recent snapshot with a matching source identity 3. **If incremental and a previous token exists** — call `WalkChanges(token)` to get a delta, then apply upserts and deletes to the previous HAMT 4. **Otherwise** — call `GetStartPageToken()` (if incremental) then `Walk()` for a full scan, comparing each entry against the previous HAMT 5. **Upload changed files** — call `GetFileStream(fileID)` for each file that needs uploading @@ -204,5 +236,5 @@ To add a new source: 2. `Walk` must emit parents before children 3. `FileID` must be a stable, unique identifier within the source — it's used as the HAMT key 4. `GetFileStream` must return the raw file bytes for the given `FileID` -5. `Info()` should return a unique `SourceInfo` so snapshots from different sources are distinguishable +5. `Info()` should return stable `Identity` + `PathID` values so lineage remains consistent over time 6. Register the source type in `cmd/cloudstic/main.go` in the `initSource` function diff --git a/docs/user-guide.md b/docs/user-guide.md index df75f0e..8741522 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -277,6 +277,8 @@ cloudstic backup -source local:~/Documents -dry-run The `gdrive-changes` and `onedrive-changes` source types use their respective change/delta APIs for faster incremental backups after the first full backup. +Cloudstic tracks source lineage using stable source identities internally (container identity + root location identity), not just display labels. For cloud sources, this uses stable drive/folder IDs so incremental continuity is preserved across folder renames or moves. + > **Locking:** `backup` acquires a **shared lock** on the repository at the start of the run (skipped for `-dry-run`). Multiple backups can run concurrently. The lock is released when the command exits. If the repository is exclusively locked by a `prune` run, `backup` will fail immediately with an error message. Use `break-lock` if a lock is stale. #### Exclude patterns diff --git a/internal/core/models.go b/internal/core/models.go index 98c35d2..112582f 100644 --- a/internal/core/models.go +++ b/internal/core/models.go @@ -71,11 +71,16 @@ type LeafEntry struct { // first-class field on the snapshot so that forget policies can group by // source identity (Type + Account + Path). type SourceInfo struct { - Type string `json:"type"` // e.g. "gdrive", "local" - Account string `json:"account,omitempty"` // Google account email, hostname, etc. - Path string `json:"path,omitempty"` // root folder ID, filesystem path, etc. - VolumeUUID string `json:"volume_uuid,omitempty"` // stable volume identity across mounts/machines - VolumeLabel string `json:"volume_label,omitempty"` // human-readable volume name (e.g. "MyDrive") + Type string `json:"type"` // e.g. "gdrive", "local" + Account string `json:"account,omitempty"` // friendly account/host label for display + Path string `json:"path,omitempty"` // display path within the source container + Identity string `json:"identity,omitempty"` // stable container identity for lineage matching + PathID string `json:"path_id,omitempty"` // stable selected-root identity within container + DriveName string `json:"drive_name,omitempty"` // human-readable container label (e.g. "My Drive") + + // Legacy fields (read-only compatibility path; slated for future removal). + VolumeUUID string `json:"volume_uuid,omitempty"` + VolumeLabel string `json:"volume_label,omitempty"` } // Snapshot represents a backup checkpoint diff --git a/internal/engine/backup.go b/internal/engine/backup.go index 9e8abd9..8395a4b 100644 --- a/internal/engine/backup.go +++ b/internal/engine/backup.go @@ -287,9 +287,8 @@ func (bm *BackupManager) loadLatestSeq() int { } // findPreviousSnapshot lists all snapshots and returns the most recent one -// whose Source matches the given info. When VolumeUUID is set, it is preferred -// over the legacy (Type + Account + Path) match to enable cross-machine -// incremental backup for portable drives. +// whose Source matches the given info. Matching prefers the new identity +// fields and falls back to legacy fields for backward compatibility. // Returns nil when no matching snapshot exists. func (bm *BackupManager) findPreviousSnapshot(info core.SourceInfo) *core.Snapshot { entries, err := LoadSnapshotCatalog(bm.store) @@ -297,22 +296,50 @@ func (bm *BackupManager) findPreviousSnapshot(info core.SourceInfo) *core.Snapsh return nil } - // Pass 1: UUID + path match (cross-machine, mount-point-agnostic). - // Path is relative to the volume root, so different sub-directories - // of the same drive are tracked independently. + // Pass 1: identity + path_id (preferred). + if info.Identity != "" && info.PathID != "" { + for _, e := range entries { + if e.Snap.Source != nil && + e.Snap.Source.Type == info.Type && + e.Snap.Source.Identity == info.Identity && + e.Snap.Source.PathID == info.PathID { + snap := e.Snap + return &snap + } + } + } + + // Pass 2: identity + path bridge for snapshots without path_id. + if info.Identity != "" { + for _, e := range entries { + if e.Snap.Source != nil && + e.Snap.Source.Type == info.Type && + e.Snap.Source.Identity == info.Identity && + e.Snap.Source.Path == info.Path { + snap := e.Snap + return &snap + } + } + } + + // Pass 3: legacy UUID + path match. if info.VolumeUUID != "" { + legacyPath := info.PathID + if legacyPath == "" { + legacyPath = info.Path + } for _, e := range entries { if e.Snap.Source != nil && e.Snap.Source.Type == info.Type && e.Snap.Source.VolumeUUID == info.VolumeUUID && - e.Snap.Source.Path == info.Path { + (e.Snap.Source.Path == legacyPath || e.Snap.Source.Path == info.Path) { snap := e.Snap return &snap } } } - // Pass 2: legacy match (type + account + path) + // Pass 4: legacy match (type + account + path) for _, e := range entries { if e.Snap.Source != nil && e.Snap.Source.Type == info.Type && diff --git a/internal/engine/list.go b/internal/engine/list.go index 1f2a1f4..2922c6a 100644 --- a/internal/engine/list.go +++ b/internal/engine/list.go @@ -54,6 +54,17 @@ func (lm *ListManager) Run(ctx context.Context, opts ...ListOption) (*ListResult source := "" if e.Snap.Source != nil { source = fmt.Sprintf(" source=%s account=%s path=%s", e.Snap.Source.Type, e.Snap.Source.Account, e.Snap.Source.Path) + if e.Snap.Source.DriveName != "" { + source += fmt.Sprintf(" drive=%s", e.Snap.Source.DriveName) + } else if e.Snap.Source.VolumeLabel != "" { + source += fmt.Sprintf(" drive=%s", e.Snap.Source.VolumeLabel) + } + if e.Snap.Source.Identity != "" { + source += fmt.Sprintf(" identity=%s", e.Snap.Source.Identity) + } + if e.Snap.Source.PathID != "" { + source += fmt.Sprintf(" path_id=%s", e.Snap.Source.PathID) + } } fmt.Fprintf(os.Stderr, " %s seq=%d created=%s%s\n", e.Ref, e.Snap.Seq, e.Snap.Created, source) } diff --git a/internal/engine/policy.go b/internal/engine/policy.go index 42e5b6f..2ac9d94 100644 --- a/internal/engine/policy.go +++ b/internal/engine/policy.go @@ -132,20 +132,21 @@ func makeGroupKey(snap *core.Snapshot, gf groupFields) GroupKey { if gf.source { k.Source = snap.Source.Type } - // When VolumeUUID is present, use it as the primary grouping - // identity instead of account. Path is kept (it is relative to - // the volume root) so that different sub-directories of the same - // drive are grouped independently. - if snap.Source.VolumeUUID != "" && (gf.account || gf.path) { - k.Account = snap.Source.VolumeUUID - if gf.path { - k.Path = snap.Source.Path - } - } else { - if gf.account { + // Prefer new identity fields, then legacy volume UUID, then account/path. + if gf.account { + switch { + case snap.Source.Identity != "": + k.Account = snap.Source.Identity + case snap.Source.VolumeUUID != "": + k.Account = snap.Source.VolumeUUID + default: k.Account = snap.Source.Account } - if gf.path { + } + if gf.path { + if snap.Source.PathID != "" { + k.Path = snap.Source.PathID + } else { k.Path = snap.Source.Path } } @@ -191,14 +192,24 @@ func matchesFilter(snap *core.Snapshot, f snapshotFilter) bool { if snap.Source == nil { return false } - // Accept either the human-readable account (hostname/email) or the - // VolumeUUID so that portable-drive snapshots can be targeted by UUID. - if snap.Source.Account != f.account && snap.Source.VolumeUUID != f.account { + // Accept display account and identity fields for compatibility. + if snap.Source.Account != f.account && + snap.Source.Identity != f.account && + snap.Source.VolumeUUID != f.account { return false } } - if f.path != "" && (snap.Source == nil || snap.Source.Path != f.path) { - return false + if f.path != "" { + if snap.Source == nil { + return false + } + if snap.Source.PathID != "" { + if snap.Source.PathID != f.path && snap.Source.Path != f.path { + return false + } + } else if snap.Source.Path != f.path { + return false + } } if len(f.tags) > 0 { tagSet := make(map[string]bool, len(snap.Tags)) diff --git a/pkg/source/gdrive.go b/pkg/source/gdrive.go index 95bced8..30478d1 100644 --- a/pkg/source/gdrive.go +++ b/pkg/source/gdrive.go @@ -122,6 +122,7 @@ type GDriveSource struct { rootFolderID string // if empty, defaults to "root" (entire drive) rootPath string // The string path the user specified, or "/" account string // Google account email; populated automatically + accountID string // stable Google account identity; populated automatically driveName string // shared drive name; populated during construction exclude *ExcludeMatcher skipNativeFiles bool @@ -237,24 +238,35 @@ func NewGDriveSource(ctx context.Context, opts ...GDriveOption) (*GDriveSource, func (s *GDriveSource) Info() core.SourceInfo { account := s.account - if account == "" { - if about, err := s.service.About.Get().Fields("user(emailAddress)").Do(); err == nil && about.User != nil { - account = about.User.EmailAddress - s.account = account + accountID := s.accountID + if s.service != nil && (account == "" || accountID == "") { + if about, err := s.service.About.Get().Fields("user(emailAddress,permissionId)").Do(); err == nil && about.User != nil { + if account == "" { + account = about.User.EmailAddress + s.account = account + } + if accountID == "" { + accountID = about.User.PermissionId + s.accountID = accountID + } } } info := core.SourceInfo{ - Type: "gdrive", - Account: account, - Path: s.rootPath, + Type: "gdrive", + Account: account, + Path: s.rootPath, + PathID: s.selectedRootID(), + DriveName: "My Drive", } if s.isSharedDrive() { - info.VolumeUUID = s.driveID - info.VolumeLabel = s.driveName + info.Identity = s.driveID + info.DriveName = s.driveName + } else if accountID != "" { + info.Identity = accountID } else { - info.VolumeLabel = "My Drive" + info.Identity = account } return info @@ -264,6 +276,16 @@ func (s *GDriveSource) isSharedDrive() bool { return s.driveID != "" } +func (s *GDriveSource) selectedRootID() string { + if s.rootFolderID != "" { + return s.rootFolderID + } + if s.isSharedDrive() { + return s.driveID + } + return "root" +} + // resolvePathToFolderID resolves a string path (e.g. "/foo/bar") to a Drive folder ID. func (s *GDriveSource) resolvePathToFolderID(ctx context.Context, path string) (string, error) { parts := strings.Split(strings.Trim(path, "/"), "/") diff --git a/pkg/source/gdrive_test.go b/pkg/source/gdrive_test.go index 1faca34..b89eb8c 100644 --- a/pkg/source/gdrive_test.go +++ b/pkg/source/gdrive_test.go @@ -500,11 +500,14 @@ func TestGDriveInfo_MyDrive_Root(t *testing.T) { if info.Path != "/" { t.Errorf("Path = %q, want /", info.Path) } - if info.VolumeUUID != "" { - t.Errorf("VolumeUUID = %q, want empty for My Drive", info.VolumeUUID) + if info.Identity != "user@gmail.com" { + t.Errorf("Identity = %q, want user@gmail.com", info.Identity) } - if info.VolumeLabel != "My Drive" { - t.Errorf("VolumeLabel = %q, want My Drive", info.VolumeLabel) + if info.DriveName != "My Drive" { + t.Errorf("DriveName = %q, want My Drive", info.DriveName) + } + if info.PathID != "root" { + t.Errorf("PathID = %q, want root", info.PathID) } } @@ -515,11 +518,14 @@ func TestGDriveInfo_MyDrive_Subfolder(t *testing.T) { if info.Path != "/myfolder" { t.Errorf("Path = %q, want /myfolder", info.Path) } - if info.VolumeUUID != "" { - t.Errorf("VolumeUUID = %q, want empty for My Drive", info.VolumeUUID) + if info.Identity != "user@gmail.com" { + t.Errorf("Identity = %q, want user@gmail.com", info.Identity) + } + if info.DriveName != "My Drive" { + t.Errorf("DriveName = %q, want My Drive", info.DriveName) } - if info.VolumeLabel != "My Drive" { - t.Errorf("VolumeLabel = %q, want My Drive", info.VolumeLabel) + if info.PathID != "folder123" { + t.Errorf("PathID = %q, want folder123", info.PathID) } } @@ -535,11 +541,14 @@ func TestGDriveInfo_SharedDrive_Root(t *testing.T) { if info.Path != "/" { t.Errorf("Path = %q, want /", info.Path) } - if info.VolumeUUID != "shared-drive-abc" { - t.Errorf("VolumeUUID = %q, want shared-drive-abc", info.VolumeUUID) + if info.Identity != "shared-drive-abc" { + t.Errorf("Identity = %q, want shared-drive-abc", info.Identity) } - if info.VolumeLabel != "Team Photos" { - t.Errorf("VolumeLabel = %q, want Team Photos", info.VolumeLabel) + if info.DriveName != "Team Photos" { + t.Errorf("DriveName = %q, want Team Photos", info.DriveName) + } + if info.PathID != "shared-drive-abc" { + t.Errorf("PathID = %q, want shared-drive-abc", info.PathID) } } @@ -556,11 +565,14 @@ func TestGDriveInfo_SharedDrive_Subfolder(t *testing.T) { if info.Path != "/team/folder456" { t.Errorf("Path = %q, want /team/folder456", info.Path) } - if info.VolumeUUID != "shared-drive-abc" { - t.Errorf("VolumeUUID = %q, want shared-drive-abc", info.VolumeUUID) + if info.Identity != "shared-drive-abc" { + t.Errorf("Identity = %q, want shared-drive-abc", info.Identity) + } + if info.DriveName != "Team Photos" { + t.Errorf("DriveName = %q, want Team Photos", info.DriveName) } - if info.VolumeLabel != "Team Photos" { - t.Errorf("VolumeLabel = %q, want Team Photos", info.VolumeLabel) + if info.PathID != "folder456" { + t.Errorf("PathID = %q, want folder456", info.PathID) } } @@ -573,7 +585,7 @@ func TestGDriveChangesInfo_Type(t *testing.T) { if info.Type != "gdrive-changes" { t.Errorf("Type = %q, want gdrive-changes", info.Type) } - if info.VolumeLabel != "My Drive" { - t.Errorf("VolumeLabel = %q, want My Drive", info.VolumeLabel) + if info.DriveName != "My Drive" { + t.Errorf("DriveName = %q, want My Drive", info.DriveName) } } diff --git a/pkg/source/local_source.go b/pkg/source/local_source.go index 208b075..731a773 100644 --- a/pkg/source/local_source.go +++ b/pkg/source/local_source.go @@ -5,6 +5,7 @@ import ( "io" "os" "path/filepath" + "strings" "github.com/cloudstic/cli/internal/core" ) @@ -29,12 +30,31 @@ func (s *LocalSource) Info() core.SourceInfo { } } + pathID := infoPath + displayPath := infoPath + if s.volumeUUID != "" { + clean := strings.TrimPrefix(pathID, "./") + switch clean { + case "", ".": + displayPath = "/" + default: + displayPath = "/" + strings.TrimPrefix(clean, "/") + } + pathID = displayPath + } + return core.SourceInfo{ - Type: "local", - Account: hostname, - Path: infoPath, - VolumeUUID: s.volumeUUID, - VolumeLabel: s.volumeLabel, + Type: "local", + Account: hostname, + Path: displayPath, + PathID: pathID, + DriveName: s.volumeLabel, + Identity: func() string { + if s.volumeUUID != "" { + return s.volumeUUID + } + return hostname + }(), } } diff --git a/pkg/source/local_source_test.go b/pkg/source/local_source_test.go index d72c8bf..bc3264c 100644 --- a/pkg/source/local_source_test.go +++ b/pkg/source/local_source_test.go @@ -126,16 +126,26 @@ func TestLocalSource(t *testing.T) { if info.Type != "local" { t.Errorf("Expected type 'local', got %s", info.Type) } - if info.VolumeUUID != "" { + if src.VolumeUUID() != "" { // Path is volume-relative when UUID is detected. - if len(info.Path) > 0 && info.Path[0] == '/' { - t.Errorf("Expected volume-relative Path when UUID is set, got absolute: %s", info.Path) + if len(info.Path) == 0 || info.Path[0] != '/' { + t.Errorf("Expected display Path absolute from drive root when UUID is set, got: %s", info.Path) + } + if info.Identity != src.VolumeUUID() { + t.Errorf("Expected Identity %q, got %q", src.VolumeUUID(), info.Identity) } } else { if info.Path != tmpDir { t.Errorf("Expected Path '%s', got %s", tmpDir, info.Path) } } + if src.VolumeUUID() != "" { + if info.PathID != info.Path { + t.Errorf("Expected portable PathID to be absolute-from-root, got PathID=%q Path=%q", info.PathID, info.Path) + } + } else if info.PathID != info.Path { + t.Errorf("Expected PathID to equal Path for non-portable source, got PathID=%q Path=%q", info.PathID, info.Path) + } // Test Size() size, err := src.Size(ctx) diff --git a/pkg/source/local_source_volume_test.go b/pkg/source/local_source_volume_test.go index ddac4be..4f07016 100644 --- a/pkg/source/local_source_volume_test.go +++ b/pkg/source/local_source_volume_test.go @@ -69,8 +69,8 @@ func TestLocalSource_WithVolumeUUID_Override(t *testing.T) { src := NewLocalSource(tmpDir, WithVolumeUUID(explicitUUID)) info := src.Info() - if info.VolumeUUID != explicitUUID { - t.Errorf("expected VolumeUUID=%q, got %q", explicitUUID, info.VolumeUUID) + if info.Identity != explicitUUID { + t.Errorf("expected Identity=%q, got %q", explicitUUID, info.Identity) } } @@ -91,20 +91,30 @@ func TestLocalSource_Info_PopulatesVolumeFields(t *testing.T) { t.Error("expected non-empty Account (hostname)") } - // VolumeUUID and VolumeLabel are populated via the platform-specific - // detectVolumeIdentity. We just verify they're set correctly on the - // Info output (they may be empty on stub platforms). - if info.VolumeUUID != src.VolumeUUID() { - t.Errorf("Info().VolumeUUID=%q != VolumeUUID()=%q", info.VolumeUUID, src.VolumeUUID()) + // Identity and DriveName are populated from platform-specific volume + // discovery (or hostname fallback for identity when UUID is unavailable). + if src.VolumeUUID() != "" { + if info.Identity != src.VolumeUUID() { + t.Errorf("Info().Identity=%q != VolumeUUID()=%q", info.Identity, src.VolumeUUID()) + } + } else if info.Identity == "" { + t.Error("expected non-empty Identity when VolumeUUID is unavailable") } - if info.VolumeLabel != src.VolumeLabel() { - t.Errorf("Info().VolumeLabel=%q != VolumeLabel()=%q", info.VolumeLabel, src.VolumeLabel()) + if info.DriveName != src.VolumeLabel() { + t.Errorf("Info().DriveName=%q != VolumeLabel()=%q", info.DriveName, src.VolumeLabel()) } - // When VolumeUUID is set, Path should be relative to the volume mount - // point (not an absolute path). - if info.VolumeUUID != "" && len(info.Path) > 0 && info.Path[0] == '/' { - t.Errorf("Path should be volume-relative when VolumeUUID is set, got absolute: %q", info.Path) + // When VolumeUUID is set, Path and PathID should both be absolute from + // drive root. + if src.VolumeUUID() != "" && (len(info.Path) == 0 || info.Path[0] != '/') { + t.Errorf("Path should be absolute-from-root display when VolumeUUID is set, got: %q", info.Path) + } + if src.VolumeUUID() != "" { + if info.PathID != info.Path { + t.Errorf("expected portable PathID to be absolute-from-root, got PathID=%q Path=%q", info.PathID, info.Path) + } + } else if info.PathID != info.Path { + t.Errorf("expected PathID to equal Path for non-portable sources, got PathID=%q Path=%q", info.PathID, info.Path) } } diff --git a/pkg/source/onedrive.go b/pkg/source/onedrive.go index a1bc16c..1bd9c5f 100644 --- a/pkg/source/onedrive.go +++ b/pkg/source/onedrive.go @@ -65,10 +65,12 @@ func WithOneDriveExcludePatterns(patterns []string) OneDriveOption { type OneDriveSource struct { client *http.Client + accountID string // cached stable account identity; populated lazily by Info() account string // cached user principal name; populated lazily by Info() driveID string // The resolved Drive ID driveName string // The Drive Name (from config) rootPath string // The string path the user specified, or "/" + rootID string // stable selected root folder/item ID exclude *ExcludeMatcher } @@ -193,44 +195,87 @@ func (s *OneDriveSource) resolveDriveName(ctx context.Context) error { } func (s *OneDriveSource) Info() core.SourceInfo { - if s.account == "" { - s.account = s.fetchAccount() + if s.client != nil && (s.account == "" || s.accountID == "") { + id, upn := s.fetchAccountInfo() + if s.accountID == "" { + s.accountID = id + } + if s.account == "" { + s.account = upn + } + } + if s.client != nil && s.rootID == "" { + s.rootID = s.resolveRootID(context.Background()) } info := core.SourceInfo{ - Type: "onedrive", - Account: s.account, - Path: s.rootPath, + Type: "onedrive", + Account: s.account, + Path: s.rootPath, + PathID: s.rootID, + DriveName: "My Drive", } if s.driveID != "" { - info.VolumeUUID = s.driveID - info.VolumeLabel = s.driveName + info.Identity = s.driveID + info.DriveName = s.driveName + } else if s.accountID != "" { + info.Identity = s.accountID } else { - info.VolumeLabel = "My Drive" + info.Identity = s.account + } + if info.PathID == "" { + info.PathID = s.rootPath } return info } -func (s *OneDriveSource) fetchAccount() string { +func (s *OneDriveSource) fetchAccountInfo() (id, upn string) { req, err := http.NewRequestWithContext(context.Background(), "GET", - "https://graph.microsoft.com/v1.0/me?$select=userPrincipalName", nil) + "https://graph.microsoft.com/v1.0/me?$select=id,userPrincipalName", nil) if err != nil { - return "" + return "", "" } resp, err := s.client.Do(req) if err != nil { - return "" + return "", "" } defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { - return "" + return "", "" } var me struct { + ID string `json:"id"` UPN string `json:"userPrincipalName"` } if err := json.NewDecoder(resp.Body).Decode(&me); err != nil { + return "", "" + } + return me.ID, me.UPN +} + +func (s *OneDriveSource) resolveRootID(ctx context.Context) string { + rootURL := s.getRootURL() + req, err := http.NewRequestWithContext(ctx, "GET", rootURL, nil) + if err != nil { + return "" + } + resp, err := s.client.Do(req) + if err != nil { + return "" + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusOK { + return "" + } + var item struct { + ID string `json:"id"` + } + if err := json.NewDecoder(resp.Body).Decode(&item); err != nil { + return "" + } + if item.ID == "" { return "" } - return me.UPN + return item.ID } func loadToken(file string) (*oauth2.Token, error) { diff --git a/pkg/source/onedrive_test.go b/pkg/source/onedrive_test.go index 37050b8..d1a8bd9 100644 --- a/pkg/source/onedrive_test.go +++ b/pkg/source/onedrive_test.go @@ -19,11 +19,14 @@ func TestOneDriveInfo(t *testing.T) { if info.Path != "/" { t.Errorf("Path = %q, want /", info.Path) } - if info.VolumeUUID != "" { - t.Errorf("VolumeUUID = %q, want empty", info.VolumeUUID) + if info.Identity != "user@outlook.com" { + t.Errorf("Identity = %q, want user@outlook.com", info.Identity) } - if info.VolumeLabel != "My Drive" { - t.Errorf("VolumeLabel = %q, want My Drive", info.VolumeLabel) + if info.DriveName != "My Drive" { + t.Errorf("DriveName = %q, want My Drive", info.DriveName) + } + if info.PathID != "/" { + t.Errorf("PathID = %q, want /", info.PathID) } } @@ -36,8 +39,8 @@ func TestOneDriveChangesInfo_Type(t *testing.T) { if info.Type != "onedrive-changes" { t.Errorf("Type = %q, want onedrive-changes", info.Type) } - if info.VolumeLabel != "My Drive" { - t.Errorf("VolumeLabel = %q, want My Drive", info.VolumeLabel) + if info.DriveName != "My Drive" { + t.Errorf("DriveName = %q, want My Drive", info.DriveName) } if info.Path != "/" { t.Errorf("Path = %q, want /", info.Path) diff --git a/pkg/source/sftp_source.go b/pkg/source/sftp_source.go index c4ed942..298ba4e 100644 --- a/pkg/source/sftp_source.go +++ b/pkg/source/sftp_source.go @@ -135,10 +135,13 @@ func (s *SFTPSource) Close() error { } func (s *SFTPSource) Info() core.SourceInfo { + identity := fmt.Sprintf("%s@%s", s.user, s.host) return core.SourceInfo{ - Type: "sftp", - Account: fmt.Sprintf("%s@%s", s.user, s.host), - Path: s.rootPath, + Type: "sftp", + Account: identity, + Path: s.rootPath, + Identity: identity, + PathID: s.rootPath, } } diff --git a/rfcs/0009-unified-source-identity.md b/rfcs/0009-unified-source-identity.md new file mode 100644 index 0000000..1181f91 --- /dev/null +++ b/rfcs/0009-unified-source-identity.md @@ -0,0 +1,204 @@ +# RFC 0009: Unified Source Identity + +- **Status:** Implemented +- **Date:** 2026-03-14 +- **Affects:** `internal/core/models.go`, `pkg/source/*`, `internal/engine/backup.go`, `internal/engine/policy.go` + +## Abstract + +`SourceInfo` currently mixes stable lineage identity with display metadata. For example, `account` is used both as a matching key and as a human label. This causes fragile behavior for cloud accounts and creates field overloading (`volume_uuid`/`volume_label`) across unrelated providers. + +This RFC introduces two explicit lineage fields, `identity` (container identity) and `path_id` (stable root-location identity), keeps `account` for friendly display only, and adds `drive_name` as a dedicated container label. The rollout is backward compatible for repositories with existing snapshots: new binaries can continue from old backups without rewriting old snapshot objects. + +## Context + +Today, previous snapshot lookup and retention grouping use legacy combinations: + +- `type + volume_uuid + path` when `volume_uuid` is present. +- Otherwise `type + account + path`. + +This works but is semantically inconsistent: + +- `account` can be hostname, email, or `user@host`. +- `volume_uuid` can mean a local partition UUID or a cloud drive ID. +- `volume_label` can mean a disk label or a shared drive name. + +We want clear separation: + +- **Identity**: stable container key for lineage. +- **PathID**: stable root-location key inside a container. +- **Account / DriveName / Path**: human-friendly display fields. + +## Goals + +- Define one provider-agnostic lineage key per backup source. +- Keep display metadata separate from lineage identity. +- Preserve backward compatibility with existing repositories. +- Avoid snapshot migrations and avoid dual-writing to legacy fields. + +## Non-goals + +- No rewrite of existing snapshot objects. +- No immediate CLI flag redesign. +- No change to chunk/content dedup behavior. + +## Proposal + +### 1. SourceInfo schema + +Add three fields: + +- `identity`: stable source identity. +- `path_id`: stable identity of the selected root location inside the source container. +- `drive_name`: friendly container label. + +```go +type SourceInfo struct { + Type string `json:"type"` + Account string `json:"account,omitempty"` // friendly account/host label + Path string `json:"path,omitempty"` // display path within the container + Identity string `json:"identity,omitempty"` // stable lineage identity + PathID string `json:"path_id,omitempty"` // stable selected-root identity + DriveName string `json:"drive_name,omitempty"` // friendly drive/container name + + // Legacy fields kept for reading old snapshots. + VolumeUUID string `json:"volume_uuid,omitempty"` + VolumeLabel string `json:"volume_label,omitempty"` +} +``` + +### 2. Identity mapping by source type + +#### Local portable drive + +- `identity`: partition GUID. +- `path_id`: absolute-style path from drive root (for example `/Photos`, `/`). +- `account`: hostname. +- `drive_name`: disk label. +- `path`: absolute-style display path from drive root (for example `/Photos`, `/`). + +#### Local (non-portable) + +When stable partition identity is unavailable or not applicable (for example root +filesystem backups or platforms/filesystems where UUID discovery is not +available): + +- `identity`: hostname. +- `path_id`: absolute source path. +- `account`: hostname. +- `drive_name`: empty (or platform volume label when available). +- `path`: absolute source path. + +This preserves existing behavior while still using the new `identity` field. + +#### Google My Drive + +- `identity`: stable Google account ID (opaque user identifier). +- `path_id`: root folder ID resolved from the selected path. +- `account`: account email (friendly display). +- `drive_name`: `My Drive`. +- `path`: backup path. + +#### Google Shared Drive + +- `identity`: shared drive ID. +- `path_id`: root folder ID resolved from the selected path. +- `account`: account email used to access the drive. +- `drive_name`: shared drive name. +- `path`: backup path. + +#### SFTP + +- `identity`: `user@host`. +- `path_id`: source path. +- `account`: `user@host`. +- `drive_name`: empty. +- `path`: source path. + +### 3. Lineage key decision: use `path_id` to survive cloud folder rename/move + +We do **not** concatenate `path` into `identity`. + +Instead, lineage uses a dedicated `path_id` field so cloud folder rename/move does not break incremental continuity. + +Lineage matching uses: + +- `type + identity + path_id` when `identity` and `path_id` exist. + +Rationale: + +- Keeps `identity` as pure container identity (drive/account), reusable across paths. +- Avoids delimiter/escaping/parsing concerns in a composite string. +- Keeps display path (`path`) independent from lineage key. +- Makes Google/OneDrive lineage robust to folder rename and move because folder IDs are stable. + +## Backward compatibility + +No dual-writing to legacy fields. + +New binary behavior: + +1. If current source has `identity` and `path_id`, attempt `type + identity + path_id` match first. +2. If step 1 has no match, fallback to `type + identity + path` to bridge early rollouts where `path_id` may be missing. +3. Fallback to legacy `type + volume_uuid + path` for old portable-drive snapshots. +4. Fallback to legacy `type + account + path` for old snapshots. + +Implications: + +- New versions continue from old backups. +- Old versions may not recognize new `identity` semantics, which is acceptable. + +## Engine changes + +### Previous snapshot lookup (`backup.go`) + +Preferred matching order: + +1. `type + identity + path_id` +2. `type + identity + path` (bridge fallback when `path_id` absent) +3. `type + volume_uuid + path` +4. `type + account + path` + +### Retention grouping (`policy.go`) + +When grouping by account/path semantics, use: + +1. `identity` as the account-like grouping token when present. +2. Else `volume_uuid`. +3. Else `account`. + +For path grouping token, use: + +1. `path_id` when present. +2. Else `path`. + +## Implementation plan + +1. Add `identity`, `path_id`, and `drive_name` to `core.SourceInfo`. +2. Populate new fields in source adapters: + - `pkg/source/local_source.go` + - `pkg/source/gdrive.go` + - `pkg/source/gdrive_changes.go` + - `pkg/source/onedrive.go` + - `pkg/source/onedrive_changes.go` + - `pkg/source/sftp_source.go` +3. Update matching logic in `internal/engine/backup.go`. +4. Update grouping/filter logic in `internal/engine/policy.go`. +5. Keep list output display-oriented (`account`, `drive_name`, `path`), while matching/grouping uses `identity`/`path_id`. +6. Add compatibility tests for old/new/mixed snapshot catalogs. + +## Test matrix + +- Local portable drive backed up on host A then host B, same partition GUID. +- Google My Drive with stable account ID and mutable email display. +- Google Shared Drive accessed by different accounts, same drive ID. +- Google folder rename and move with unchanged folder ID; incremental lineage must continue. +- Mixed repositories with old snapshots (no `identity`) and new snapshots (`identity` present). +- Mode switch continuity (`gdrive` and `gdrive-changes`) with same effective source. + +## Open questions + +- Confirm exact Google field for stable account identity in My Drive mode. +- Confirm exact OneDrive stable account field (`id` vs UPN fallback behavior). +- Confirm whether `path_id` should be emitted for all providers immediately or staged by provider. +- Decide whether to expose `identity` in default `list` output or JSON-only initially. diff --git a/rfcs/README.md b/rfcs/README.md new file mode 100644 index 0000000..26ee656 --- /dev/null +++ b/rfcs/README.md @@ -0,0 +1,11 @@ +# RFC Index + +- [RFC 0001: HAMT Evolution](0001-hamt-evolution.md) +- [RFC 0002: Affinity Model](0002-affinity-model.md) +- [RFC 0003: Google Native File Export](0003-google-native-file-export.md) +- [RFC 0004: Extended File Attributes](0004-extended-file-attributes.md) +- [RFC 0005: Portable Drive Identity](0005-portable-drive-identity.md) +- [RFC 0006: Direct to Filesystem Restore](0006-direct-to-filesystem-restore.md) +- [RFC 0007: Cloud Subdirectory Backup](0007-cloud-subdirectory-backup.md) +- [RFC 0008: Drive Identity by Name](0008-drive-identity-by-name.md) +- [RFC 0009: Unified Source Identity](0009-unified-source-identity.md)