Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 26 additions & 17 deletions cmd/cloudstic/cmd_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@ import (
)

type backupArgs struct {
g *globalFlags
sourceType string
sourcePath string
driveID string
rootFolder string
dryRun bool
excludeFile string
tags stringArrayFlags
excludes stringArrayFlags
g *globalFlags
sourceType string
sourcePath string
driveID string
rootFolder string
dryRun bool
excludeFile string
skipNativeFiles bool
tags stringArrayFlags
excludes stringArrayFlags
}

func parseBackupArgs() *backupArgs {
Expand All @@ -37,6 +38,7 @@ func parseBackupArgs() *backupArgs {
driveID := fs.String("drive-id", envDefault("CLOUDSTIC_DRIVE_ID", ""), "Shared drive ID for gdrive source (omit for My Drive)")
rootFolder := fs.String("root-folder", envDefault("CLOUDSTIC_ROOT_FOLDER", ""), "Root folder ID for gdrive source (defaults to entire drive)")
dryRun := fs.Bool("dry-run", false, "Scan source and report changes without writing to the store")
skipNativeFiles := fs.Bool("skip-native-files", false, "Exclude Google-native files (Docs, Sheets, Slides, etc.) from the backup")
excludeFile := fs.String("exclude-file", "", "Path to file with exclude patterns (one per line, gitignore syntax)")
fs.Var(&a.tags, "tag", "Tag to apply to the snapshot (can be specified multiple times)")
fs.Var(&a.excludes, "exclude", "Exclude pattern (gitignore syntax, repeatable)")
Expand All @@ -46,6 +48,7 @@ func parseBackupArgs() *backupArgs {
a.driveID = *driveID
a.rootFolder = *rootFolder
a.dryRun = *dryRun
a.skipNativeFiles = *skipNativeFiles
a.excludeFile = *excludeFile
return a
}
Expand All @@ -60,7 +63,7 @@ func (r *runner) runBackup() int {

ctx := context.Background()

src, err := initSource(ctx, a.sourceType, a.sourcePath, a.driveID, a.rootFolder, a.g, excludePatterns)
src, err := initSource(ctx, a.sourceType, a.sourcePath, a.driveID, a.rootFolder, a.skipNativeFiles, a.g, excludePatterns)
if err != nil {
return r.fail("Failed to init source: %v", err)
}
Expand Down Expand Up @@ -132,7 +135,7 @@ func (r *runner) printBackupSummary(res *engine.RunResult) {
}
}

func initSource(ctx context.Context, sourceType, sourcePath, driveID, rootFolder string, g *globalFlags, excludePatterns []string) (source.Source, error) {
func initSource(ctx context.Context, sourceType, sourcePath, driveID, rootFolder string, skipNativeFiles bool, g *globalFlags, excludePatterns []string) (source.Source, error) {
switch sourceType {
case "local":
return source.NewLocalSource(sourcePath, source.WithLocalExcludePatterns(excludePatterns)), nil
Expand All @@ -152,28 +155,34 @@ func initSource(ctx context.Context, sourceType, sourcePath, driveID, rootFolder
if err != nil {
return nil, err
}
return source.NewGDriveSource(
ctx,
gdriveOpts := []source.GDriveOption{
source.WithCredsPath(creds),
source.WithTokenPath(tokenPath),
source.WithDriveID(driveID),
source.WithRootFolderID(rootFolder),
source.WithGDriveExcludePatterns(excludePatterns),
)
}
if skipNativeFiles {
gdriveOpts = append(gdriveOpts, source.WithSkipNativeFiles())
}
return source.NewGDriveSource(ctx, gdriveOpts...)
case "gdrive-changes":
creds := os.Getenv("GOOGLE_APPLICATION_CREDENTIALS") // optional; uses built-in OAuth client when empty
tokenPath, err := resolveTokenPath("GOOGLE_TOKEN_FILE", "google_token.json")
if err != nil {
return nil, err
}
return source.NewGDriveChangeSource(
ctx,
gdriveOpts := []source.GDriveOption{
source.WithCredsPath(creds),
source.WithTokenPath(tokenPath),
source.WithDriveID(driveID),
source.WithRootFolderID(rootFolder),
source.WithGDriveExcludePatterns(excludePatterns),
)
}
if skipNativeFiles {
gdriveOpts = append(gdriveOpts, source.WithSkipNativeFiles())
}
return source.NewGDriveChangeSource(ctx, gdriveOpts...)
case "onedrive":
clientID := os.Getenv("ONEDRIVE_CLIENT_ID") // optional; uses built-in OAuth client when empty
tokenPath, err := resolveTokenPath("ONEDRIVE_TOKEN_FILE", "onedrive_token.json")
Expand Down
26 changes: 26 additions & 0 deletions internal/engine/backup_scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,21 @@ func (bm *BackupManager) detectChange(oldRoot string, meta *core.FileMeta) (chan
return false, "", err
}

// Native Google files: use headRevisionId as the sole change signal.
// Size and ContentHash comparisons are unreliable for exported files
// (see RFC 0003 section 2.4).
if isGoogleNativeMeta(meta) {
newRevID, _ := meta.Extra["headRevisionId"].(string)
oldRevID, _ := oldMeta.Extra["headRevisionId"].(string)
if newRevID != "" && newRevID == oldRevID {
meta.ContentHash = oldMeta.ContentHash
meta.ContentRef = oldMeta.ContentRef
meta.Size = oldMeta.Size
return false, oldRef, nil
}
return true, oldRef, nil
}

if meta.ContentHash == "" && oldMeta.ContentHash != "" && metadataEqual(*meta, *oldMeta) {
meta.ContentHash = oldMeta.ContentHash
meta.ContentRef = oldMeta.ContentRef
Expand All @@ -175,6 +190,17 @@ func (bm *BackupManager) detectChange(oldRoot string, meta *core.FileMeta) (chan
return newRef != oldRef, oldRef, nil
}

// isGoogleNativeMeta returns true if the FileMeta represents a Google-native
// file (Docs, Sheets, etc.) based on the stored mimeType in Extra.
func isGoogleNativeMeta(meta *core.FileMeta) bool {
if meta.Extra == nil {
return false
}
mimeType, _ := meta.Extra["mimeType"].(string)
return strings.HasPrefix(mimeType, "application/vnd.google-apps.") &&
mimeType != "application/vnd.google-apps.folder"
}

func metadataEqual(a, b core.FileMeta) bool {
return a.Name == b.Name &&
a.Size == b.Size &&
Expand Down
230 changes: 230 additions & 0 deletions internal/engine/backup_scan_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
package engine

import (
"context"
"encoding/json"
"testing"

"github.com/cloudstic/cli/internal/core"
"github.com/cloudstic/cli/internal/hamt"
"github.com/cloudstic/cli/internal/ui"
"github.com/cloudstic/cli/pkg/store"
)

func TestIsGoogleNativeMeta(t *testing.T) {
tests := []struct {
name string
meta core.FileMeta
want bool
}{
{"google doc", core.FileMeta{Extra: map[string]interface{}{"mimeType": "application/vnd.google-apps.document"}}, true},
{"google sheet", core.FileMeta{Extra: map[string]interface{}{"mimeType": "application/vnd.google-apps.spreadsheet"}}, true},
{"folder", core.FileMeta{Extra: map[string]interface{}{"mimeType": "application/vnd.google-apps.folder"}}, false},
{"regular file", core.FileMeta{Extra: map[string]interface{}{"mimeType": "application/pdf"}}, false},
{"no extra", core.FileMeta{}, false},
{"nil extra", core.FileMeta{Extra: nil}, false},
{"no mimeType key", core.FileMeta{Extra: map[string]interface{}{"other": "value"}}, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isGoogleNativeMeta(&tt.meta); got != tt.want {
t.Errorf("isGoogleNativeMeta() = %v, want %v", got, tt.want)
}
})
}
}

func TestDetectChange_NativeFileFastPath(t *testing.T) {
ctx := context.Background()
src := NewMockSource()
dest := NewMockStore()

// First backup: a Google Doc with headRevisionId "rev1".
src.Files["DOC_1"] = MockFile{
Meta: core.FileMeta{
FileID: "DOC_1",
Name: "Notes.docx",
Type: core.FileTypeFile,
Size: 0, // native files report 0 from Walk
Mtime: 1000,
Extra: map[string]interface{}{
"mimeType": "application/vnd.google-apps.document",
"exportMimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"headRevisionId": "rev1",
},
},
Content: []byte("exported docx content"),
}

mgr := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil)
result1, err := mgr.Run(ctx)
if err != nil {
t.Fatalf("First backup failed: %v", err)
}

// Verify the file was stored.
readStore := store.NewCompressedStore(dest)
tree := hamt.NewTree(readStore)
ref1, err := tree.Lookup(result1.Root, "", "DOC_1")
if err != nil || ref1 == "" {
t.Fatalf("DOC_1 not found in first snapshot: ref=%q err=%v", ref1, err)
}

// Second backup: same headRevisionId → should detect as unchanged.
mgr2 := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil)
result2, err := mgr2.Run(ctx)
if err != nil {
t.Fatalf("Second backup failed: %v", err)
}

if result2.FilesChanged != 0 {
t.Errorf("Expected 0 changed files (same headRevisionId), got %d", result2.FilesChanged)
}
if result2.FilesUnmodified != 1 {
t.Errorf("Expected 1 unmodified file, got %d", result2.FilesUnmodified)
}

// Third backup: different headRevisionId → should detect as changed.
src.Files["DOC_1"] = MockFile{
Meta: core.FileMeta{
FileID: "DOC_1",
Name: "Notes.docx",
Type: core.FileTypeFile,
Size: 0,
Mtime: 2000,
Extra: map[string]interface{}{
"mimeType": "application/vnd.google-apps.document",
"exportMimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"headRevisionId": "rev2",
},
},
Content: []byte("new exported docx content"),
}

mgr3 := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil)
result3, err := mgr3.Run(ctx)
if err != nil {
t.Fatalf("Third backup failed: %v", err)
}

if result3.FilesChanged != 1 {
t.Errorf("Expected 1 changed file (different headRevisionId), got %d", result3.FilesChanged)
}

// Verify the stored content changed.
ref3, err := tree.Lookup(result3.Root, "", "DOC_1")
if err != nil || ref3 == "" {
t.Fatalf("DOC_1 not found in third snapshot: ref=%q err=%v", ref3, err)
}
if ref3 == ref1 {
t.Error("Expected different ref after headRevisionId change")
}
}

func TestDetectChange_NativeFileEmptyRevID(t *testing.T) {
ctx := context.Background()
src := NewMockSource()
dest := NewMockStore()

// Native file without headRevisionId should always be treated as changed.
src.Files["DOC_1"] = MockFile{
Meta: core.FileMeta{
FileID: "DOC_1",
Name: "Notes.docx",
Type: core.FileTypeFile,
Size: 0,
Mtime: 1000,
Extra: map[string]interface{}{
"mimeType": "application/vnd.google-apps.document",
"exportMimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
},
},
Content: []byte("exported docx content"),
}

mgr := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil)
result1, err := mgr.Run(ctx)
if err != nil {
t.Fatalf("First backup failed: %v", err)
}
if result1.FilesNew != 1 {
t.Errorf("Expected 1 new file, got %d", result1.FilesNew)
}

// Second backup: still no headRevisionId → should be treated as changed.
mgr2 := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil)
result2, err := mgr2.Run(ctx)
if err != nil {
t.Fatalf("Second backup failed: %v", err)
}
if result2.FilesChanged != 1 {
t.Errorf("Expected 1 changed file (empty headRevisionId), got %d", result2.FilesChanged)
}
}

func TestDetectChange_NativeFileCarriesForwardMetadata(t *testing.T) {
ctx := context.Background()
src := NewMockSource()
dest := NewMockStore()

src.Files["DOC_1"] = MockFile{
Meta: core.FileMeta{
FileID: "DOC_1",
Name: "Notes.docx",
Type: core.FileTypeFile,
Size: 0,
Extra: map[string]interface{}{
"mimeType": "application/vnd.google-apps.document",
"headRevisionId": "rev1",
},
},
Content: []byte("content"),
}

mgr := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil)
result1, err := mgr.Run(ctx)
if err != nil {
t.Fatalf("First backup failed: %v", err)
}

// Read the stored meta to get the ContentHash and Size set by the upload.
readStore := store.NewCompressedStore(dest)
ref, err := hamt.NewTree(readStore).Lookup(result1.Root, "", "DOC_1")
if err != nil {
t.Fatalf("Lookup: %v", err)
}
data, err := readStore.Get(ctx, ref)
if err != nil {
t.Fatalf("Get: %v", err)
}
var storedMeta core.FileMeta
if err := json.Unmarshal(data, &storedMeta); err != nil {
t.Fatalf("Unmarshal: %v", err)
}
if storedMeta.ContentHash == "" {
t.Fatal("Expected ContentHash to be set after upload")
}
if storedMeta.Size == 0 {
t.Fatal("Expected Size to be set after upload")
}

// Second backup with same revID: the unchanged path should carry forward
// ContentHash and Size from the first backup.
mgr2 := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil)
result2, err := mgr2.Run(ctx)
if err != nil {
t.Fatalf("Second backup failed: %v", err)
}
if result2.FilesUnmodified != 1 {
t.Errorf("Expected 1 unmodified, got %d", result2.FilesUnmodified)
}

// Verify the ref is the same (metadata carried forward correctly).
ref2, err := hamt.NewTree(readStore).Lookup(result2.Root, "", "DOC_1")
if err != nil {
t.Fatalf("Lookup: %v", err)
}
if ref2 != ref {
t.Errorf("Expected same ref (metadata carried forward), got %q vs %q", ref2, ref)
}
}
Loading
Loading