Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions cmd/cloudstic/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,12 @@ func (r *runner) renderSnapshotTable(entries []engine.SnapshotEntry, reasons map
var source, account, path string
if e.Snap.Source != nil {
source = e.Snap.Source.Type
if e.Snap.Source.VolumeLabel != "" {
source += " (" + e.Snap.Source.VolumeLabel + ")"
driveName := e.Snap.Source.DriveName
if driveName == "" {
driveName = e.Snap.Source.VolumeLabel
}
if driveName != "" {
source += " (" + driveName + ")"
}
account = e.Snap.Source.Account
path = e.Snap.Source.Path
Expand All @@ -74,10 +78,17 @@ func sourceGroupKey(s *core.SourceInfo) string {
if s == nil {
return ""
}
pathToken := s.Path
if s.PathID != "" {
pathToken = s.PathID
}
if s.Identity != "" {
return s.Type + "\x00" + s.Identity + "\x00" + pathToken
}
if s.VolumeUUID != "" {
return s.Type + "\x00" + s.VolumeUUID + "\x00" + s.Path
return s.Type + "\x00" + s.VolumeUUID + "\x00" + pathToken
}
return s.Type + "\x00" + s.Account + "\x00" + s.Path
return s.Type + "\x00" + s.Account + "\x00" + pathToken
}

// sourceGroupLabel returns a human-readable label for a source group.
Expand All @@ -87,8 +98,12 @@ func sourceGroupLabel(s *core.SourceInfo) string {
}
var parts []string
label := s.Type
if s.VolumeLabel != "" {
label += " (" + s.VolumeLabel + ")"
driveName := s.DriveName
if driveName == "" {
driveName = s.VolumeLabel
}
if driveName != "" {
label += " (" + driveName + ")"
}
parts = append(parts, label)
if s.Account != "" {
Expand Down
58 changes: 45 additions & 13 deletions docs/sources.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type Source interface {
|--------|-------------|
| `Walk` | Enumerate every file and folder. Parents **must** be emitted before their children. |
| `GetFileStream` | Return a readable stream for a file, identified by its source-specific `fileID`. |
| `Info` | Return metadata about the source (type, account, path) stored in the snapshot. |
| `Info` | Return source identity and display metadata stored in the snapshot. |
| `Size` | Return the total size of the source (used for progress reporting). |

### IncrementalSource
Expand Down Expand Up @@ -54,17 +54,31 @@ Returned by `Info()` and stored in the snapshot's `source` field:

```go
type SourceInfo struct {
Type string // e.g. "gdrive", "local", "sftp", "onedrive", "gdrive-changes"
Account string // Google email, hostname, user@host, etc.
Path string // drive path, filesystem path, etc.
Type string // e.g. "gdrive", "local", "sftp", "onedrive", "gdrive-changes"
Account string // friendly display account (email, hostname, user@host)
Path string // friendly display path
Identity string // stable container identity for lineage matching
PathID string // stable selected-root identity within the container
DriveName string // friendly container label (e.g. "My Drive")

// Legacy compatibility fields (read from older snapshots).
VolumeUUID string
VolumeLabel string
}
```

The engine uses `SourceInfo` to:

- Find the previous snapshot from the same source (for incremental comparison)
- Match previous snapshots for incremental comparison
- Group snapshots in retention policies (`forget --group-by source,account,path`)

Matching precedence:

1. `Type + Identity + PathID`
2. `Type + Identity + Path` (bridge fallback)
3. `Type + VolumeUUID + Path` (legacy fallback)
4. `Type + Account + Path` (legacy fallback)

### FileMeta

The common file metadata model emitted by all sources during `Walk` or `WalkChanges`:
Expand Down Expand Up @@ -97,8 +111,11 @@ type FileMeta struct {
| **FileID** | Relative path from root (e.g. `subdir/file.txt`) |
| **Parents** | Parent directory's relative path |
| **ContentHash** | Not provided (computed by the engine during upload) |
| **SourceInfo.Account** | Machine hostname |
| **SourceInfo.Path** | Absolute path to the backed-up directory |
| **SourceInfo.Identity** | Partition UUID (portable) or hostname (fallback) |
| **SourceInfo.PathID** | Stable path token (portable drives use absolute-from-root, e.g. `/Photos`) |
| **SourceInfo.Account** | Machine hostname (display) |
| **SourceInfo.DriveName** | Volume label when available |
| **SourceInfo.Path** | Display path (portable drives are shown absolute from drive root, e.g. `/Photos`) |

Walks the directory tree using `filepath.Walk`. Symbolic links are not followed.

Expand All @@ -111,7 +128,10 @@ Walks the directory tree using `filepath.Walk`. Symbolic links are not followed.
| **FileID** | Relative path from root (e.g. `subdir/file.txt`) |
| **Parents** | Parent directory's relative path |
| **ContentHash** | Not provided (computed by the engine during upload) |
| **SourceInfo.Identity** | `user@host` |
| **SourceInfo.PathID** | Remote root directory path |
| **SourceInfo.Account** | `user@host` |
| **SourceInfo.DriveName** | *(empty)* |
| **SourceInfo.Path** | Remote root directory path |

Walks the remote directory tree via SFTP. Supports password, SSH private key, and ssh-agent authentication.
Expand All @@ -125,8 +145,11 @@ Walks the remote directory tree via SFTP. Supports password, SSH private key, an
| **FileID** | Google Drive file ID (e.g. `1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgV`) |
| **Parents** | Google Drive parent folder IDs |
| **ContentHash** | SHA-256 checksum from the Drive API (avoids re-downloading unchanged files) |
| **SourceInfo.Account** | Google account email |
| **SourceInfo.Path** | `my-drive://` or `<driveID>://<rootFolderID>` |
| **SourceInfo.Identity** | My Drive: stable Google account ID; Shared Drive: shared drive ID |
| **SourceInfo.PathID** | Resolved root folder ID (stable across rename/move) |
| **SourceInfo.Account** | Google account email (display) |
| **SourceInfo.DriveName** | `My Drive` or shared drive name |
| **SourceInfo.Path** | User-selected display path |

Lists all files and folders via `files.list`, then topologically sorts folders so parents are emitted before children. Supports My Drive and Shared Drives (via `gdrive://<Drive Name>`), with optional folder scoping (via `gdrive://<Drive Name>/path/to/folder`).

Expand All @@ -139,7 +162,10 @@ Lists all files and folders via `files.list`, then topologically sorts folders s
| **FileID** | Same as `gdrive` |
| **Parents** | Same as `gdrive` |
| **ContentHash** | Same as `gdrive` |
| **SourceInfo.Identity** | Same as `gdrive` |
| **SourceInfo.PathID** | Same as `gdrive` |
| **SourceInfo.Account** | Same as `gdrive` |
| **SourceInfo.DriveName** | Same as `gdrive` |
| **SourceInfo.Path** | Same as `gdrive` |
| **Change token** | Google Drive Changes API start page token |

Expand All @@ -156,8 +182,11 @@ Folder changes are topologically sorted before file changes, ensuring parent ref
| **FileID** | OneDrive item ID |
| **Parents** | OneDrive parent item ID |
| **ContentHash** | Not provided (computed by the engine during upload) |
| **SourceInfo.Account** | User principal name from Microsoft Graph `/me` |
| **SourceInfo.Path** | `onedrive://` |
| **SourceInfo.Identity** | Selected drive ID or stable account ID |
| **SourceInfo.PathID** | Resolved root item ID (fallback to root path) |
| **SourceInfo.Account** | User principal name from Microsoft Graph `/me` (display) |
| **SourceInfo.DriveName** | `My Drive` or selected drive name |
| **SourceInfo.Path** | User-selected display path |

Walks the drive recursively starting from the root item via the Microsoft Graph API. Folders are visited depth-first, ensuring parents are emitted before children.

Expand All @@ -170,7 +199,10 @@ Walks the drive recursively starting from the root item via the Microsoft Graph
| **FileID** | Same as `onedrive` |
| **Parents** | Same as `onedrive` |
| **ContentHash** | Same as `onedrive` |
| **SourceInfo.Identity** | Same as `onedrive` |
| **SourceInfo.PathID** | Same as `onedrive` |
| **SourceInfo.Account** | Same as `onedrive` |
| **SourceInfo.DriveName** | Same as `onedrive` |
| **SourceInfo.Path** | Same as `onedrive` |
| **Change token** | Microsoft Graph delta link |

Expand All @@ -183,7 +215,7 @@ Embeds `OneDriveSource` and reuses its `Walk`, `GetFileStream`, and metadata con
The backup engine (`internal/engine/backup.go`) interacts with sources as follows:

1. **Detect source type** — check if the source implements `IncrementalSource`
2. **Load previous state** — find the most recent snapshot with a matching `SourceInfo`
2. **Load previous state** — find the most recent snapshot with a matching source identity
3. **If incremental and a previous token exists** — call `WalkChanges(token)` to get a delta, then apply upserts and deletes to the previous HAMT
4. **Otherwise** — call `GetStartPageToken()` (if incremental) then `Walk()` for a full scan, comparing each entry against the previous HAMT
5. **Upload changed files** — call `GetFileStream(fileID)` for each file that needs uploading
Expand All @@ -204,5 +236,5 @@ To add a new source:
2. `Walk` must emit parents before children
3. `FileID` must be a stable, unique identifier within the source — it's used as the HAMT key
4. `GetFileStream` must return the raw file bytes for the given `FileID`
5. `Info()` should return a unique `SourceInfo` so snapshots from different sources are distinguishable
5. `Info()` should return stable `Identity` + `PathID` values so lineage remains consistent over time
6. Register the source type in `cmd/cloudstic/main.go` in the `initSource` function
2 changes: 2 additions & 0 deletions docs/user-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ cloudstic backup -source local:~/Documents -dry-run

The `gdrive-changes` and `onedrive-changes` source types use their respective change/delta APIs for faster incremental backups after the first full backup.

Cloudstic tracks source lineage using stable source identities internally (container identity + root location identity), not just display labels. For cloud sources, this uses stable drive/folder IDs so incremental continuity is preserved across folder renames or moves.

> **Locking:** `backup` acquires a **shared lock** on the repository at the start of the run (skipped for `-dry-run`). Multiple backups can run concurrently. The lock is released when the command exits. If the repository is exclusively locked by a `prune` run, `backup` will fail immediately with an error message. Use `break-lock` if a lock is stale.

#### Exclude patterns
Expand Down
15 changes: 10 additions & 5 deletions internal/core/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,16 @@ type LeafEntry struct {
// first-class field on the snapshot so that forget policies can group by
// source identity (Type + Account + Path).
type SourceInfo struct {
Type string `json:"type"` // e.g. "gdrive", "local"
Account string `json:"account,omitempty"` // Google account email, hostname, etc.
Path string `json:"path,omitempty"` // root folder ID, filesystem path, etc.
VolumeUUID string `json:"volume_uuid,omitempty"` // stable volume identity across mounts/machines
VolumeLabel string `json:"volume_label,omitempty"` // human-readable volume name (e.g. "MyDrive")
Type string `json:"type"` // e.g. "gdrive", "local"
Account string `json:"account,omitempty"` // friendly account/host label for display
Path string `json:"path,omitempty"` // display path within the source container
Identity string `json:"identity,omitempty"` // stable container identity for lineage matching
PathID string `json:"path_id,omitempty"` // stable selected-root identity within container
DriveName string `json:"drive_name,omitempty"` // human-readable container label (e.g. "My Drive")

// Legacy fields (read-only compatibility path; slated for future removal).
VolumeUUID string `json:"volume_uuid,omitempty"`
VolumeLabel string `json:"volume_label,omitempty"`
}

// Snapshot represents a backup checkpoint
Expand Down
43 changes: 35 additions & 8 deletions internal/engine/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,32 +287,59 @@ func (bm *BackupManager) loadLatestSeq() int {
}

// findPreviousSnapshot lists all snapshots and returns the most recent one
// whose Source matches the given info. When VolumeUUID is set, it is preferred
// over the legacy (Type + Account + Path) match to enable cross-machine
// incremental backup for portable drives.
// whose Source matches the given info. Matching prefers the new identity
// fields and falls back to legacy fields for backward compatibility.
// Returns nil when no matching snapshot exists.
func (bm *BackupManager) findPreviousSnapshot(info core.SourceInfo) *core.Snapshot {
entries, err := LoadSnapshotCatalog(bm.store)
if err != nil {
return nil
}

// Pass 1: UUID + path match (cross-machine, mount-point-agnostic).
// Path is relative to the volume root, so different sub-directories
// of the same drive are tracked independently.
// Pass 1: identity + path_id (preferred).
if info.Identity != "" && info.PathID != "" {
for _, e := range entries {
if e.Snap.Source != nil &&
e.Snap.Source.Type == info.Type &&
e.Snap.Source.Identity == info.Identity &&
e.Snap.Source.PathID == info.PathID {
snap := e.Snap
return &snap
}
}
}

// Pass 2: identity + path bridge for snapshots without path_id.
if info.Identity != "" {
for _, e := range entries {
if e.Snap.Source != nil &&
e.Snap.Source.Type == info.Type &&
e.Snap.Source.Identity == info.Identity &&
e.Snap.Source.Path == info.Path {
snap := e.Snap
return &snap
}
}
}

// Pass 3: legacy UUID + path match.
if info.VolumeUUID != "" {
legacyPath := info.PathID
if legacyPath == "" {
legacyPath = info.Path
}
for _, e := range entries {
if e.Snap.Source != nil &&
e.Snap.Source.Type == info.Type &&
e.Snap.Source.VolumeUUID == info.VolumeUUID &&
e.Snap.Source.Path == info.Path {
(e.Snap.Source.Path == legacyPath || e.Snap.Source.Path == info.Path) {
snap := e.Snap
return &snap
}
}
}

// Pass 2: legacy match (type + account + path)
// Pass 4: legacy match (type + account + path)
for _, e := range entries {
if e.Snap.Source != nil &&
e.Snap.Source.Type == info.Type &&
Expand Down
11 changes: 11 additions & 0 deletions internal/engine/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@ func (lm *ListManager) Run(ctx context.Context, opts ...ListOption) (*ListResult
source := ""
if e.Snap.Source != nil {
source = fmt.Sprintf(" source=%s account=%s path=%s", e.Snap.Source.Type, e.Snap.Source.Account, e.Snap.Source.Path)
if e.Snap.Source.DriveName != "" {
source += fmt.Sprintf(" drive=%s", e.Snap.Source.DriveName)
} else if e.Snap.Source.VolumeLabel != "" {
source += fmt.Sprintf(" drive=%s", e.Snap.Source.VolumeLabel)
}
if e.Snap.Source.Identity != "" {
source += fmt.Sprintf(" identity=%s", e.Snap.Source.Identity)
}
if e.Snap.Source.PathID != "" {
source += fmt.Sprintf(" path_id=%s", e.Snap.Source.PathID)
}
}
fmt.Fprintf(os.Stderr, " %s seq=%d created=%s%s\n", e.Ref, e.Snap.Seq, e.Snap.Created, source)
}
Expand Down
45 changes: 28 additions & 17 deletions internal/engine/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,20 +132,21 @@ func makeGroupKey(snap *core.Snapshot, gf groupFields) GroupKey {
if gf.source {
k.Source = snap.Source.Type
}
// When VolumeUUID is present, use it as the primary grouping
// identity instead of account. Path is kept (it is relative to
// the volume root) so that different sub-directories of the same
// drive are grouped independently.
if snap.Source.VolumeUUID != "" && (gf.account || gf.path) {
k.Account = snap.Source.VolumeUUID
if gf.path {
k.Path = snap.Source.Path
}
} else {
if gf.account {
// Prefer new identity fields, then legacy volume UUID, then account/path.
if gf.account {
switch {
case snap.Source.Identity != "":
k.Account = snap.Source.Identity
case snap.Source.VolumeUUID != "":
k.Account = snap.Source.VolumeUUID
default:
k.Account = snap.Source.Account
}
if gf.path {
}
if gf.path {
if snap.Source.PathID != "" {
k.Path = snap.Source.PathID
} else {
k.Path = snap.Source.Path
}
}
Expand Down Expand Up @@ -191,14 +192,24 @@ func matchesFilter(snap *core.Snapshot, f snapshotFilter) bool {
if snap.Source == nil {
return false
}
// Accept either the human-readable account (hostname/email) or the
// VolumeUUID so that portable-drive snapshots can be targeted by UUID.
if snap.Source.Account != f.account && snap.Source.VolumeUUID != f.account {
// Accept display account and identity fields for compatibility.
if snap.Source.Account != f.account &&
snap.Source.Identity != f.account &&
snap.Source.VolumeUUID != f.account {
return false
}
}
if f.path != "" && (snap.Source == nil || snap.Source.Path != f.path) {
return false
if f.path != "" {
if snap.Source == nil {
return false
}
if snap.Source.PathID != "" {
if snap.Source.PathID != f.path && snap.Source.Path != f.path {
return false
}
} else if snap.Source.Path != f.path {
return false
}
}
if len(f.tags) > 0 {
tagSet := make(map[string]bool, len(snap.Tags))
Expand Down
Loading
Loading