diff --git a/cmd/cloudstic/cmd_backup.go b/cmd/cloudstic/cmd_backup.go index 7e1244e..ac7c365 100644 --- a/cmd/cloudstic/cmd_backup.go +++ b/cmd/cloudstic/cmd_backup.go @@ -17,15 +17,16 @@ import ( ) type backupArgs struct { - g *globalFlags - sourceType string - sourcePath string - driveID string - rootFolder string - dryRun bool - excludeFile string - tags stringArrayFlags - excludes stringArrayFlags + g *globalFlags + sourceType string + sourcePath string + driveID string + rootFolder string + dryRun bool + excludeFile string + skipNativeFiles bool + tags stringArrayFlags + excludes stringArrayFlags } func parseBackupArgs() *backupArgs { @@ -37,6 +38,7 @@ func parseBackupArgs() *backupArgs { driveID := fs.String("drive-id", envDefault("CLOUDSTIC_DRIVE_ID", ""), "Shared drive ID for gdrive source (omit for My Drive)") rootFolder := fs.String("root-folder", envDefault("CLOUDSTIC_ROOT_FOLDER", ""), "Root folder ID for gdrive source (defaults to entire drive)") dryRun := fs.Bool("dry-run", false, "Scan source and report changes without writing to the store") + skipNativeFiles := fs.Bool("skip-native-files", false, "Exclude Google-native files (Docs, Sheets, Slides, etc.) from the backup") excludeFile := fs.String("exclude-file", "", "Path to file with exclude patterns (one per line, gitignore syntax)") fs.Var(&a.tags, "tag", "Tag to apply to the snapshot (can be specified multiple times)") fs.Var(&a.excludes, "exclude", "Exclude pattern (gitignore syntax, repeatable)") @@ -46,6 +48,7 @@ func parseBackupArgs() *backupArgs { a.driveID = *driveID a.rootFolder = *rootFolder a.dryRun = *dryRun + a.skipNativeFiles = *skipNativeFiles a.excludeFile = *excludeFile return a } @@ -60,7 +63,7 @@ func (r *runner) runBackup() int { ctx := context.Background() - src, err := initSource(ctx, a.sourceType, a.sourcePath, a.driveID, a.rootFolder, a.g, excludePatterns) + src, err := initSource(ctx, a.sourceType, a.sourcePath, a.driveID, a.rootFolder, a.skipNativeFiles, a.g, excludePatterns) if err != nil { return r.fail("Failed to init source: %v", err) } @@ -132,7 +135,7 @@ func (r *runner) printBackupSummary(res *engine.RunResult) { } } -func initSource(ctx context.Context, sourceType, sourcePath, driveID, rootFolder string, g *globalFlags, excludePatterns []string) (source.Source, error) { +func initSource(ctx context.Context, sourceType, sourcePath, driveID, rootFolder string, skipNativeFiles bool, g *globalFlags, excludePatterns []string) (source.Source, error) { switch sourceType { case "local": return source.NewLocalSource(sourcePath, source.WithLocalExcludePatterns(excludePatterns)), nil @@ -152,28 +155,34 @@ func initSource(ctx context.Context, sourceType, sourcePath, driveID, rootFolder if err != nil { return nil, err } - return source.NewGDriveSource( - ctx, + gdriveOpts := []source.GDriveOption{ source.WithCredsPath(creds), source.WithTokenPath(tokenPath), source.WithDriveID(driveID), source.WithRootFolderID(rootFolder), source.WithGDriveExcludePatterns(excludePatterns), - ) + } + if skipNativeFiles { + gdriveOpts = append(gdriveOpts, source.WithSkipNativeFiles()) + } + return source.NewGDriveSource(ctx, gdriveOpts...) case "gdrive-changes": creds := os.Getenv("GOOGLE_APPLICATION_CREDENTIALS") // optional; uses built-in OAuth client when empty tokenPath, err := resolveTokenPath("GOOGLE_TOKEN_FILE", "google_token.json") if err != nil { return nil, err } - return source.NewGDriveChangeSource( - ctx, + gdriveOpts := []source.GDriveOption{ source.WithCredsPath(creds), source.WithTokenPath(tokenPath), source.WithDriveID(driveID), source.WithRootFolderID(rootFolder), source.WithGDriveExcludePatterns(excludePatterns), - ) + } + if skipNativeFiles { + gdriveOpts = append(gdriveOpts, source.WithSkipNativeFiles()) + } + return source.NewGDriveChangeSource(ctx, gdriveOpts...) case "onedrive": clientID := os.Getenv("ONEDRIVE_CLIENT_ID") // optional; uses built-in OAuth client when empty tokenPath, err := resolveTokenPath("ONEDRIVE_TOKEN_FILE", "onedrive_token.json") diff --git a/internal/engine/backup_scan.go b/internal/engine/backup_scan.go index 6fd7608..746d22b 100644 --- a/internal/engine/backup_scan.go +++ b/internal/engine/backup_scan.go @@ -158,6 +158,21 @@ func (bm *BackupManager) detectChange(oldRoot string, meta *core.FileMeta) (chan return false, "", err } + // Native Google files: use headRevisionId as the sole change signal. + // Size and ContentHash comparisons are unreliable for exported files + // (see RFC 0003 section 2.4). + if isGoogleNativeMeta(meta) { + newRevID, _ := meta.Extra["headRevisionId"].(string) + oldRevID, _ := oldMeta.Extra["headRevisionId"].(string) + if newRevID != "" && newRevID == oldRevID { + meta.ContentHash = oldMeta.ContentHash + meta.ContentRef = oldMeta.ContentRef + meta.Size = oldMeta.Size + return false, oldRef, nil + } + return true, oldRef, nil + } + if meta.ContentHash == "" && oldMeta.ContentHash != "" && metadataEqual(*meta, *oldMeta) { meta.ContentHash = oldMeta.ContentHash meta.ContentRef = oldMeta.ContentRef @@ -175,6 +190,17 @@ func (bm *BackupManager) detectChange(oldRoot string, meta *core.FileMeta) (chan return newRef != oldRef, oldRef, nil } +// isGoogleNativeMeta returns true if the FileMeta represents a Google-native +// file (Docs, Sheets, etc.) based on the stored mimeType in Extra. +func isGoogleNativeMeta(meta *core.FileMeta) bool { + if meta.Extra == nil { + return false + } + mimeType, _ := meta.Extra["mimeType"].(string) + return strings.HasPrefix(mimeType, "application/vnd.google-apps.") && + mimeType != "application/vnd.google-apps.folder" +} + func metadataEqual(a, b core.FileMeta) bool { return a.Name == b.Name && a.Size == b.Size && diff --git a/internal/engine/backup_scan_test.go b/internal/engine/backup_scan_test.go new file mode 100644 index 0000000..b4c8dfb --- /dev/null +++ b/internal/engine/backup_scan_test.go @@ -0,0 +1,230 @@ +package engine + +import ( + "context" + "encoding/json" + "testing" + + "github.com/cloudstic/cli/internal/core" + "github.com/cloudstic/cli/internal/hamt" + "github.com/cloudstic/cli/internal/ui" + "github.com/cloudstic/cli/pkg/store" +) + +func TestIsGoogleNativeMeta(t *testing.T) { + tests := []struct { + name string + meta core.FileMeta + want bool + }{ + {"google doc", core.FileMeta{Extra: map[string]interface{}{"mimeType": "application/vnd.google-apps.document"}}, true}, + {"google sheet", core.FileMeta{Extra: map[string]interface{}{"mimeType": "application/vnd.google-apps.spreadsheet"}}, true}, + {"folder", core.FileMeta{Extra: map[string]interface{}{"mimeType": "application/vnd.google-apps.folder"}}, false}, + {"regular file", core.FileMeta{Extra: map[string]interface{}{"mimeType": "application/pdf"}}, false}, + {"no extra", core.FileMeta{}, false}, + {"nil extra", core.FileMeta{Extra: nil}, false}, + {"no mimeType key", core.FileMeta{Extra: map[string]interface{}{"other": "value"}}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isGoogleNativeMeta(&tt.meta); got != tt.want { + t.Errorf("isGoogleNativeMeta() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestDetectChange_NativeFileFastPath(t *testing.T) { + ctx := context.Background() + src := NewMockSource() + dest := NewMockStore() + + // First backup: a Google Doc with headRevisionId "rev1". + src.Files["DOC_1"] = MockFile{ + Meta: core.FileMeta{ + FileID: "DOC_1", + Name: "Notes.docx", + Type: core.FileTypeFile, + Size: 0, // native files report 0 from Walk + Mtime: 1000, + Extra: map[string]interface{}{ + "mimeType": "application/vnd.google-apps.document", + "exportMimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "headRevisionId": "rev1", + }, + }, + Content: []byte("exported docx content"), + } + + mgr := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil) + result1, err := mgr.Run(ctx) + if err != nil { + t.Fatalf("First backup failed: %v", err) + } + + // Verify the file was stored. + readStore := store.NewCompressedStore(dest) + tree := hamt.NewTree(readStore) + ref1, err := tree.Lookup(result1.Root, "", "DOC_1") + if err != nil || ref1 == "" { + t.Fatalf("DOC_1 not found in first snapshot: ref=%q err=%v", ref1, err) + } + + // Second backup: same headRevisionId → should detect as unchanged. + mgr2 := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil) + result2, err := mgr2.Run(ctx) + if err != nil { + t.Fatalf("Second backup failed: %v", err) + } + + if result2.FilesChanged != 0 { + t.Errorf("Expected 0 changed files (same headRevisionId), got %d", result2.FilesChanged) + } + if result2.FilesUnmodified != 1 { + t.Errorf("Expected 1 unmodified file, got %d", result2.FilesUnmodified) + } + + // Third backup: different headRevisionId → should detect as changed. + src.Files["DOC_1"] = MockFile{ + Meta: core.FileMeta{ + FileID: "DOC_1", + Name: "Notes.docx", + Type: core.FileTypeFile, + Size: 0, + Mtime: 2000, + Extra: map[string]interface{}{ + "mimeType": "application/vnd.google-apps.document", + "exportMimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "headRevisionId": "rev2", + }, + }, + Content: []byte("new exported docx content"), + } + + mgr3 := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil) + result3, err := mgr3.Run(ctx) + if err != nil { + t.Fatalf("Third backup failed: %v", err) + } + + if result3.FilesChanged != 1 { + t.Errorf("Expected 1 changed file (different headRevisionId), got %d", result3.FilesChanged) + } + + // Verify the stored content changed. + ref3, err := tree.Lookup(result3.Root, "", "DOC_1") + if err != nil || ref3 == "" { + t.Fatalf("DOC_1 not found in third snapshot: ref=%q err=%v", ref3, err) + } + if ref3 == ref1 { + t.Error("Expected different ref after headRevisionId change") + } +} + +func TestDetectChange_NativeFileEmptyRevID(t *testing.T) { + ctx := context.Background() + src := NewMockSource() + dest := NewMockStore() + + // Native file without headRevisionId should always be treated as changed. + src.Files["DOC_1"] = MockFile{ + Meta: core.FileMeta{ + FileID: "DOC_1", + Name: "Notes.docx", + Type: core.FileTypeFile, + Size: 0, + Mtime: 1000, + Extra: map[string]interface{}{ + "mimeType": "application/vnd.google-apps.document", + "exportMimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + }, + }, + Content: []byte("exported docx content"), + } + + mgr := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil) + result1, err := mgr.Run(ctx) + if err != nil { + t.Fatalf("First backup failed: %v", err) + } + if result1.FilesNew != 1 { + t.Errorf("Expected 1 new file, got %d", result1.FilesNew) + } + + // Second backup: still no headRevisionId → should be treated as changed. + mgr2 := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil) + result2, err := mgr2.Run(ctx) + if err != nil { + t.Fatalf("Second backup failed: %v", err) + } + if result2.FilesChanged != 1 { + t.Errorf("Expected 1 changed file (empty headRevisionId), got %d", result2.FilesChanged) + } +} + +func TestDetectChange_NativeFileCarriesForwardMetadata(t *testing.T) { + ctx := context.Background() + src := NewMockSource() + dest := NewMockStore() + + src.Files["DOC_1"] = MockFile{ + Meta: core.FileMeta{ + FileID: "DOC_1", + Name: "Notes.docx", + Type: core.FileTypeFile, + Size: 0, + Extra: map[string]interface{}{ + "mimeType": "application/vnd.google-apps.document", + "headRevisionId": "rev1", + }, + }, + Content: []byte("content"), + } + + mgr := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil) + result1, err := mgr.Run(ctx) + if err != nil { + t.Fatalf("First backup failed: %v", err) + } + + // Read the stored meta to get the ContentHash and Size set by the upload. + readStore := store.NewCompressedStore(dest) + ref, err := hamt.NewTree(readStore).Lookup(result1.Root, "", "DOC_1") + if err != nil { + t.Fatalf("Lookup: %v", err) + } + data, err := readStore.Get(ctx, ref) + if err != nil { + t.Fatalf("Get: %v", err) + } + var storedMeta core.FileMeta + if err := json.Unmarshal(data, &storedMeta); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if storedMeta.ContentHash == "" { + t.Fatal("Expected ContentHash to be set after upload") + } + if storedMeta.Size == 0 { + t.Fatal("Expected Size to be set after upload") + } + + // Second backup with same revID: the unchanged path should carry forward + // ContentHash and Size from the first backup. + mgr2 := NewBackupManager(src, dest, ui.NewNoOpReporter(), nil) + result2, err := mgr2.Run(ctx) + if err != nil { + t.Fatalf("Second backup failed: %v", err) + } + if result2.FilesUnmodified != 1 { + t.Errorf("Expected 1 unmodified, got %d", result2.FilesUnmodified) + } + + // Verify the ref is the same (metadata carried forward correctly). + ref2, err := hamt.NewTree(readStore).Lookup(result2.Root, "", "DOC_1") + if err != nil { + t.Fatalf("Lookup: %v", err) + } + if ref2 != ref { + t.Errorf("Expected same ref (metadata carried forward), got %q vs %q", ref2, ref) + } +} diff --git a/pkg/source/gdrive.go b/pkg/source/gdrive.go index d413b50..d2d1347 100644 --- a/pkg/source/gdrive.go +++ b/pkg/source/gdrive.go @@ -28,6 +28,7 @@ type gDriveOptions struct { rootFolderID string accountEmail string excludePatterns []string + skipNativeFiles bool } // GDriveOption configures a Google Drive source. @@ -84,16 +85,26 @@ func WithGDriveExcludePatterns(patterns []string) GDriveOption { } } +// WithSkipNativeFiles excludes Google-native files (Docs, Sheets, Slides, etc.) +// from the backup. They will not appear in the snapshot at all. +func WithSkipNativeFiles() GDriveOption { + return func(o *gDriveOptions) { + o.skipNativeFiles = true + } +} + // GDriveSource implements Source for Google Drive. By default it backs up the // entire "My Drive" root. Set DriveID in GDriveSourceConfig to back up a // shared drive instead, and/or set RootFolderID to restrict to a specific // folder within the selected drive. type GDriveSource struct { - service *drive.Service - driveID string // shared drive ID; empty means "My Drive" - rootFolderID string // if empty, defaults to "root" (entire drive) - account string // Google account email; populated automatically - exclude *ExcludeMatcher + service *drive.Service + driveID string // shared drive ID; empty means "My Drive" + rootFolderID string // if empty, defaults to "root" (entire drive) + account string // Google account email; populated automatically + exclude *ExcludeMatcher + skipNativeFiles bool + mimeTypes map[string]string // fileID → mimeType; populated during Walk/WalkChanges } // NewGDriveSource creates a new GDriveSource from the given options. @@ -150,11 +161,12 @@ func NewGDriveSource(ctx context.Context, opts ...GDriveOption) (*GDriveSource, } return &GDriveSource{ - service: srv, - driveID: cfg.driveID, - rootFolderID: cfg.rootFolderID, - account: cfg.accountEmail, - exclude: NewExcludeMatcher(cfg.excludePatterns), + service: srv, + driveID: cfg.driveID, + rootFolderID: cfg.rootFolderID, + account: cfg.accountEmail, + exclude: NewExcludeMatcher(cfg.excludePatterns), + skipNativeFiles: cfg.skipNativeFiles, }, nil } @@ -260,13 +272,15 @@ func isRetryableGoogleErr(err error) bool { // for topological sort) but files are streamed page-by-page to avoid holding // the full file list in memory. func (s *GDriveSource) Walk(ctx context.Context, callback func(core.FileMeta) error) error { + s.mimeTypes = make(map[string]string) + var folders []*drive.File pageToken := "" for { call := s.service.Files.List(). Q("trashed = false AND mimeType = 'application/vnd.google-apps.folder'"). - Fields("nextPageToken, files(id, name, parents, mimeType, size, modifiedTime, owners, trashed, sha256Checksum)"). + Fields("nextPageToken, files(id, name, parents, mimeType, size, modifiedTime, owners, trashed, sha256Checksum, headRevisionId)"). PageSize(1000). Context(ctx) if s.isSharedDrive() { @@ -313,7 +327,7 @@ func (s *GDriveSource) Walk(ctx context.Context, callback func(core.FileMeta) er for { call := s.service.Files.List(). Q("trashed = false AND mimeType != 'application/vnd.google-apps.folder'"). - Fields("nextPageToken, files(id, name, parents, mimeType, size, modifiedTime, owners, trashed, sha256Checksum)"). + Fields("nextPageToken, files(id, name, parents, mimeType, size, modifiedTime, owners, trashed, sha256Checksum, headRevisionId)"). PageSize(1000). Context(ctx) if s.isSharedDrive() { @@ -379,6 +393,17 @@ func topoSortFolders(folders []*drive.File) []*drive.File { } func (s *GDriveSource) visitEntryWithPath(f *drive.File, pathMap map[string]string, excludedPaths map[string]bool, callback func(core.FileMeta) error) error { + // Record MIME type for all non-folder entries so GetFileStream can + // decide between download and export. + if f.MimeType != "application/vnd.google-apps.folder" { + s.mimeTypes[f.Id] = f.MimeType + } + + // Skip native files when the user opted out of exporting them. + if s.skipNativeFiles && isGoogleNativeMimeType(f.MimeType) { + return nil + } + meta := s.toFileMeta(f) // Compute full path from parent path map. @@ -429,17 +454,28 @@ func (s *GDriveSource) toFileMeta(f *drive.File) core.FileMeta { fileType = core.FileTypeFolder } + name := f.Name + extra := map[string]interface{}{"mimeType": f.MimeType} + + if isGoogleNativeMimeType(f.MimeType) { + name += nativeExportExtension(f.MimeType) + extra["exportMimeType"] = nativeExportMimeType(f.MimeType) + } + if f.HeadRevisionId != "" { + extra["headRevisionId"] = f.HeadRevisionId + } + return core.FileMeta{ Version: 1, FileID: f.Id, - Name: f.Name, + Name: name, Type: fileType, Parents: f.Parents, ContentHash: f.Sha256Checksum, Size: f.Size, Mtime: mtime, Owner: owner, - Extra: map[string]interface{}{"mimeType": f.MimeType}, + Extra: extra, } } @@ -490,6 +526,10 @@ func (s *GDriveSource) Size(ctx context.Context) (*SourceSize, error) { } func (s *GDriveSource) GetFileStream(fileID string) (io.ReadCloser, error) { + if mimeType, ok := s.mimeTypes[fileID]; ok && isGoogleNativeMimeType(mimeType) { + return s.exportFile(fileID, nativeExportMimeType(mimeType)) + } + var resp *http.Response err := retry.Do(context.Background(), retry.DefaultPolicy(), func() error { call := s.service.Files.Get(fileID).SupportsAllDrives(true) @@ -508,3 +548,22 @@ func (s *GDriveSource) GetFileStream(fileID string) (io.ReadCloser, error) { } return resp.Body, nil } + +func (s *GDriveSource) exportFile(fileID, exportMimeType string) (io.ReadCloser, error) { + var resp *http.Response + err := retry.Do(context.Background(), retry.DefaultPolicy(), func() error { + var err error + resp, err = s.service.Files.Export(fileID, exportMimeType).Download() + if err != nil { + if isRetryableGoogleErr(err) { + return &retry.RetryableError{Err: err} + } + return err + } + return nil + }) + if err != nil { + return nil, err + } + return resp.Body, nil +} diff --git a/pkg/source/gdrive_changes.go b/pkg/source/gdrive_changes.go index b7bcd51..b07b71a 100644 --- a/pkg/source/gdrive_changes.go +++ b/pkg/source/gdrive_changes.go @@ -48,12 +48,14 @@ func (s *GDriveChangeSource) GetStartPageToken() (string, error) { // changes are emitted before file changes so that the engine can resolve // parent references incrementally. func (s *GDriveChangeSource) WalkChanges(ctx context.Context, token string, callback func(FileChange) error) (string, error) { + s.mimeTypes = make(map[string]string) + var folderChanges, fileChanges []FileChange pageToken := token for { call := s.service.Changes.List(pageToken). - Fields("nextPageToken, newStartPageToken, changes(fileId, removed, file(id, name, parents, mimeType, size, modifiedTime, owners, trashed, sha256Checksum))"). + Fields("nextPageToken, newStartPageToken, changes(fileId, removed, file(id, name, parents, mimeType, size, modifiedTime, owners, trashed, sha256Checksum, headRevisionId))"). PageSize(1000). Context(ctx) if s.isSharedDrive() { @@ -69,6 +71,12 @@ func (s *GDriveChangeSource) WalkChanges(ctx context.Context, token string, call for _, ch := range resp.Changes { fc := s.changeToFileChange(ch) + + // Skip native files when the user opted out. + if s.skipNativeFiles && fc.Type == ChangeUpsert && ch.File != nil && isGoogleNativeMimeType(ch.File.MimeType) { + continue + } + if fc.Type == ChangeUpsert && fc.Meta.Type == core.FileTypeFolder { folderChanges = append(folderChanges, fc) } else { @@ -239,6 +247,12 @@ func (s *GDriveChangeSource) changeToFileChange(ch *drive.Change) FileChange { Meta: core.FileMeta{FileID: ch.FileId}, } } + + // Record MIME type for GetFileStream export routing. + if ch.File.MimeType != "application/vnd.google-apps.folder" { + s.mimeTypes[ch.File.Id] = ch.File.MimeType + } + return FileChange{ Type: ChangeUpsert, Meta: s.toFileMeta(ch.File), diff --git a/pkg/source/gdrive_native.go b/pkg/source/gdrive_native.go new file mode 100644 index 0000000..461c1ed --- /dev/null +++ b/pkg/source/gdrive_native.go @@ -0,0 +1,48 @@ +package source + +import "strings" + +const googleAppsPrefix = "application/vnd.google-apps." + +// nativeExportMap maps Google native MIME types to their preferred export +// format (MIME type and file extension). Types not in the map fall back to PDF. +var nativeExportMap = map[string]struct { + exportMIME string + ext string +}{ + "application/vnd.google-apps.document": {"application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx"}, + "application/vnd.google-apps.spreadsheet": {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx"}, + "application/vnd.google-apps.presentation": {"application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx"}, + "application/vnd.google-apps.drawing": {"image/svg+xml", ".svg"}, + "application/vnd.google-apps.script": {"application/vnd.google-apps.script+json", ".json"}, + "application/vnd.google-apps.form": {"application/pdf", ".pdf"}, + "application/vnd.google-apps.site": {"text/plain", ".txt"}, + "application/vnd.google-apps.jam": {"application/pdf", ".pdf"}, + "application/vnd.google-apps.map": {"application/pdf", ".pdf"}, +} + +// isGoogleNativeMimeType returns true if the MIME type is a Google-native +// format (Docs, Sheets, Slides, etc.) that cannot be downloaded directly and +// must be exported instead. Folders are excluded. +func isGoogleNativeMimeType(mimeType string) bool { + return strings.HasPrefix(mimeType, googleAppsPrefix) && + mimeType != "application/vnd.google-apps.folder" +} + +// nativeExportMimeType returns the MIME type to use when exporting a Google +// native file. Falls back to PDF for unknown native types. +func nativeExportMimeType(mimeType string) string { + if e, ok := nativeExportMap[mimeType]; ok { + return e.exportMIME + } + return "application/pdf" +} + +// nativeExportExtension returns the file extension (including the leading dot) +// to append to native file names after export. Falls back to ".pdf". +func nativeExportExtension(mimeType string) string { + if e, ok := nativeExportMap[mimeType]; ok { + return e.ext + } + return ".pdf" +} diff --git a/pkg/source/gdrive_native_test.go b/pkg/source/gdrive_native_test.go new file mode 100644 index 0000000..7b34449 --- /dev/null +++ b/pkg/source/gdrive_native_test.go @@ -0,0 +1,76 @@ +package source + +import "testing" + +func TestIsGoogleNativeMimeType(t *testing.T) { + tests := []struct { + mimeType string + want bool + }{ + {"application/vnd.google-apps.document", true}, + {"application/vnd.google-apps.spreadsheet", true}, + {"application/vnd.google-apps.presentation", true}, + {"application/vnd.google-apps.drawing", true}, + {"application/vnd.google-apps.form", true}, + {"application/vnd.google-apps.script", true}, + {"application/vnd.google-apps.site", true}, + {"application/vnd.google-apps.jam", true}, + {"application/vnd.google-apps.map", true}, + {"application/vnd.google-apps.folder", false}, + {"application/pdf", false}, + {"image/png", false}, + {"application/vnd.google-apps.unknown_future_type", true}, + {"", false}, + } + for _, tt := range tests { + if got := isGoogleNativeMimeType(tt.mimeType); got != tt.want { + t.Errorf("isGoogleNativeMimeType(%q) = %v, want %v", tt.mimeType, got, tt.want) + } + } +} + +func TestNativeExportMimeType(t *testing.T) { + tests := []struct { + mimeType string + want string + }{ + {"application/vnd.google-apps.document", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"}, + {"application/vnd.google-apps.spreadsheet", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"}, + {"application/vnd.google-apps.presentation", "application/vnd.openxmlformats-officedocument.presentationml.presentation"}, + {"application/vnd.google-apps.drawing", "image/svg+xml"}, + {"application/vnd.google-apps.script", "application/vnd.google-apps.script+json"}, + {"application/vnd.google-apps.form", "application/pdf"}, + {"application/vnd.google-apps.site", "text/plain"}, + {"application/vnd.google-apps.jam", "application/pdf"}, + {"application/vnd.google-apps.map", "application/pdf"}, + {"application/vnd.google-apps.unknown_future_type", "application/pdf"}, // fallback + } + for _, tt := range tests { + if got := nativeExportMimeType(tt.mimeType); got != tt.want { + t.Errorf("nativeExportMimeType(%q) = %q, want %q", tt.mimeType, got, tt.want) + } + } +} + +func TestNativeExportExtension(t *testing.T) { + tests := []struct { + mimeType string + want string + }{ + {"application/vnd.google-apps.document", ".docx"}, + {"application/vnd.google-apps.spreadsheet", ".xlsx"}, + {"application/vnd.google-apps.presentation", ".pptx"}, + {"application/vnd.google-apps.drawing", ".svg"}, + {"application/vnd.google-apps.script", ".json"}, + {"application/vnd.google-apps.form", ".pdf"}, + {"application/vnd.google-apps.site", ".txt"}, + {"application/vnd.google-apps.jam", ".pdf"}, + {"application/vnd.google-apps.map", ".pdf"}, + {"application/vnd.google-apps.unknown_future_type", ".pdf"}, // fallback + } + for _, tt := range tests { + if got := nativeExportExtension(tt.mimeType); got != tt.want { + t.Errorf("nativeExportExtension(%q) = %q, want %q", tt.mimeType, got, tt.want) + } + } +} diff --git a/pkg/source/gdrive_test.go b/pkg/source/gdrive_test.go index e489e25..decc342 100644 --- a/pkg/source/gdrive_test.go +++ b/pkg/source/gdrive_test.go @@ -2,14 +2,487 @@ package source import ( "testing" + + "github.com/cloudstic/cli/internal/core" + "google.golang.org/api/drive/v3" ) -func TestGDriveSource_SDK(t *testing.T) { - // Since we switched to the official SDK, the previous mock test using httptest.Server is invalid - // because the SDK doesn't expose the HTTP client BaseURL easily for overriding in NewService. - // We can inject a custom HTTP client into option.WithHTTPClient, but that requires more setup. - // Given the "use SDK" instruction, we rely on the SDK's correctness. - // We can skip integration tests that require real credentials or complex mocking of the entire Google API surface. +func TestToFileMeta_RegularFile(t *testing.T) { + s := &GDriveSource{exclude: NewExcludeMatcher(nil)} + f := &drive.File{ + Id: "file1", + Name: "photo.jpg", + MimeType: "image/jpeg", + Size: 1024, + ModifiedTime: "2024-01-15T10:30:00Z", + Sha256Checksum: "abc123", + Parents: []string{"folder1"}, + Owners: []*drive.User{{EmailAddress: "user@example.com"}}, + HeadRevisionId: "rev42", + } + + meta := s.toFileMeta(f) + + if meta.FileID != "file1" { + t.Errorf("FileID = %q, want %q", meta.FileID, "file1") + } + if meta.Name != "photo.jpg" { + t.Errorf("Name = %q, want %q", meta.Name, "photo.jpg") + } + if meta.Type != core.FileTypeFile { + t.Errorf("Type = %v, want FileTypeFile", meta.Type) + } + if meta.Size != 1024 { + t.Errorf("Size = %d, want 1024", meta.Size) + } + if meta.ContentHash != "abc123" { + t.Errorf("ContentHash = %q, want %q", meta.ContentHash, "abc123") + } + if meta.Owner != "user@example.com" { + t.Errorf("Owner = %q, want %q", meta.Owner, "user@example.com") + } + if meta.Extra["mimeType"] != "image/jpeg" { + t.Errorf("Extra[mimeType] = %v, want %q", meta.Extra["mimeType"], "image/jpeg") + } + // Regular files should not have exportMimeType. + if _, ok := meta.Extra["exportMimeType"]; ok { + t.Error("Regular file should not have exportMimeType in Extra") + } + if meta.Extra["headRevisionId"] != "rev42" { + t.Errorf("Extra[headRevisionId] = %v, want %q", meta.Extra["headRevisionId"], "rev42") + } +} + +func TestToFileMeta_NativeDocument(t *testing.T) { + s := &GDriveSource{exclude: NewExcludeMatcher(nil)} + f := &drive.File{ + Id: "doc1", + Name: "My Document", + MimeType: "application/vnd.google-apps.document", + HeadRevisionId: "rev5", + Parents: []string{"folder1"}, + } + + meta := s.toFileMeta(f) + + if meta.Name != "My Document.docx" { + t.Errorf("Name = %q, want %q (with .docx extension)", meta.Name, "My Document.docx") + } + if meta.Extra["exportMimeType"] != "application/vnd.openxmlformats-officedocument.wordprocessingml.document" { + t.Errorf("exportMimeType = %v, want docx MIME", meta.Extra["exportMimeType"]) + } + if meta.Extra["headRevisionId"] != "rev5" { + t.Errorf("headRevisionId = %v, want %q", meta.Extra["headRevisionId"], "rev5") + } +} + +func TestToFileMeta_NativeSpreadsheet(t *testing.T) { + s := &GDriveSource{exclude: NewExcludeMatcher(nil)} + f := &drive.File{ + Id: "sheet1", + Name: "Budget", + MimeType: "application/vnd.google-apps.spreadsheet", + } + + meta := s.toFileMeta(f) + + if meta.Name != "Budget.xlsx" { + t.Errorf("Name = %q, want %q", meta.Name, "Budget.xlsx") + } + if meta.Extra["exportMimeType"] != "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" { + t.Errorf("exportMimeType = %v, want xlsx MIME", meta.Extra["exportMimeType"]) + } +} + +func TestToFileMeta_NativePresentation(t *testing.T) { + s := &GDriveSource{exclude: NewExcludeMatcher(nil)} + f := &drive.File{ + Id: "slide1", + Name: "Deck", + MimeType: "application/vnd.google-apps.presentation", + } + + meta := s.toFileMeta(f) + + if meta.Name != "Deck.pptx" { + t.Errorf("Name = %q, want %q", meta.Name, "Deck.pptx") + } + if meta.Extra["exportMimeType"] != "application/vnd.openxmlformats-officedocument.presentationml.presentation" { + t.Errorf("exportMimeType = %v, want pptx MIME", meta.Extra["exportMimeType"]) + } +} + +func TestToFileMeta_Folder(t *testing.T) { + s := &GDriveSource{exclude: NewExcludeMatcher(nil)} + f := &drive.File{ + Id: "folder1", + Name: "My Folder", + MimeType: "application/vnd.google-apps.folder", + } + + meta := s.toFileMeta(f) + + if meta.Type != core.FileTypeFolder { + t.Errorf("Type = %v, want FileTypeFolder", meta.Type) + } + if meta.Name != "My Folder" { + t.Errorf("Name = %q, want %q (no extension for folders)", meta.Name, "My Folder") + } + if _, ok := meta.Extra["exportMimeType"]; ok { + t.Error("Folder should not have exportMimeType") + } +} + +func TestToFileMeta_NoHeadRevisionId(t *testing.T) { + s := &GDriveSource{exclude: NewExcludeMatcher(nil)} + f := &drive.File{ + Id: "file1", + Name: "test.txt", + MimeType: "text/plain", + } + + meta := s.toFileMeta(f) + + if _, ok := meta.Extra["headRevisionId"]; ok { + t.Error("Should not have headRevisionId when empty") + } +} + +func TestToFileMeta_NoOwners(t *testing.T) { + s := &GDriveSource{exclude: NewExcludeMatcher(nil)} + f := &drive.File{ + Id: "file1", + Name: "test.txt", + MimeType: "text/plain", + } + + meta := s.toFileMeta(f) + if meta.Owner != "" { + t.Errorf("Owner = %q, want empty", meta.Owner) + } +} + +func TestVisitEntryWithPath_SkipNativeFiles(t *testing.T) { + s := &GDriveSource{ + exclude: NewExcludeMatcher(nil), + skipNativeFiles: true, + mimeTypes: make(map[string]string), + } + + var visited []string + callback := func(meta core.FileMeta) error { + visited = append(visited, meta.FileID) + return nil + } + + pathMap := make(map[string]string) + excludedPaths := make(map[string]bool) + + // Native file should be skipped. + err := s.visitEntryWithPath(&drive.File{ + Id: "doc1", + Name: "Report", + MimeType: "application/vnd.google-apps.document", + }, pathMap, excludedPaths, callback) + if err != nil { + t.Fatal(err) + } + + // Regular file should be visited. + err = s.visitEntryWithPath(&drive.File{ + Id: "file1", + Name: "photo.jpg", + MimeType: "image/jpeg", + }, pathMap, excludedPaths, callback) + if err != nil { + t.Fatal(err) + } + + if len(visited) != 1 || visited[0] != "file1" { + t.Errorf("visited = %v, want [file1]", visited) + } +} + +func TestVisitEntryWithPath_NativeFilesIncludedByDefault(t *testing.T) { + s := &GDriveSource{ + exclude: NewExcludeMatcher(nil), + skipNativeFiles: false, + mimeTypes: make(map[string]string), + } + + var visited []string + callback := func(meta core.FileMeta) error { + visited = append(visited, meta.FileID) + return nil + } + + pathMap := make(map[string]string) + excludedPaths := make(map[string]bool) + + err := s.visitEntryWithPath(&drive.File{ + Id: "doc1", + Name: "Report", + MimeType: "application/vnd.google-apps.document", + }, pathMap, excludedPaths, callback) + if err != nil { + t.Fatal(err) + } + + if len(visited) != 1 || visited[0] != "doc1" { + t.Errorf("visited = %v, want [doc1]", visited) + } +} + +func TestVisitEntryWithPath_RecordsMimeType(t *testing.T) { + s := &GDriveSource{ + exclude: NewExcludeMatcher(nil), + mimeTypes: make(map[string]string), + } + + callback := func(meta core.FileMeta) error { return nil } + pathMap := make(map[string]string) + excludedPaths := make(map[string]bool) + + // Regular file should record mimeType. + _ = s.visitEntryWithPath(&drive.File{ + Id: "file1", + Name: "photo.jpg", + MimeType: "image/jpeg", + }, pathMap, excludedPaths, callback) + + if s.mimeTypes["file1"] != "image/jpeg" { + t.Errorf("mimeTypes[file1] = %q, want %q", s.mimeTypes["file1"], "image/jpeg") + } + + // Native file should also record mimeType. + _ = s.visitEntryWithPath(&drive.File{ + Id: "doc1", + Name: "Report", + MimeType: "application/vnd.google-apps.document", + }, pathMap, excludedPaths, callback) + + if s.mimeTypes["doc1"] != "application/vnd.google-apps.document" { + t.Errorf("mimeTypes[doc1] = %q, want native MIME", s.mimeTypes["doc1"]) + } + + // Folder should NOT record mimeType. + _ = s.visitEntryWithPath(&drive.File{ + Id: "folder1", + Name: "Stuff", + MimeType: "application/vnd.google-apps.folder", + }, pathMap, excludedPaths, callback) + + if _, ok := s.mimeTypes["folder1"]; ok { + t.Error("Folders should not be recorded in mimeTypes") + } +} + +func TestVisitEntryWithPath_SkipNativeStillRecordsMimeType(t *testing.T) { + s := &GDriveSource{ + exclude: NewExcludeMatcher(nil), + skipNativeFiles: true, + mimeTypes: make(map[string]string), + } + + callback := func(meta core.FileMeta) error { return nil } + pathMap := make(map[string]string) + excludedPaths := make(map[string]bool) + + // Even when skipping native files, mimeType should still be recorded + // (the recording happens before the skip check). + _ = s.visitEntryWithPath(&drive.File{ + Id: "doc1", + Name: "Report", + MimeType: "application/vnd.google-apps.document", + }, pathMap, excludedPaths, callback) + + if s.mimeTypes["doc1"] != "application/vnd.google-apps.document" { + t.Errorf("mimeTypes[doc1] = %q, want native MIME even when skipping", s.mimeTypes["doc1"]) + } +} + +func TestVisitEntryWithPath_PathComputation(t *testing.T) { + s := &GDriveSource{ + exclude: NewExcludeMatcher(nil), + mimeTypes: make(map[string]string), + } + + var paths []string + callback := func(meta core.FileMeta) error { + if len(meta.Paths) > 0 { + paths = append(paths, meta.Paths[0]) + } + return nil + } + + pathMap := map[string]string{"parentID": "Documents"} + excludedPaths := make(map[string]bool) + + // File with parent in pathMap. + _ = s.visitEntryWithPath(&drive.File{ + Id: "file1", + Name: "notes.txt", + MimeType: "text/plain", + Parents: []string{"parentID"}, + }, pathMap, excludedPaths, callback) + + // Native file with parent — name should have extension appended. + _ = s.visitEntryWithPath(&drive.File{ + Id: "doc1", + Name: "Report", + MimeType: "application/vnd.google-apps.document", + Parents: []string{"parentID"}, + }, pathMap, excludedPaths, callback) + + if len(paths) != 2 { + t.Fatalf("got %d paths, want 2", len(paths)) + } + if paths[0] != "Documents/notes.txt" { + t.Errorf("paths[0] = %q, want %q", paths[0], "Documents/notes.txt") + } + if paths[1] != "Documents/Report.docx" { + t.Errorf("paths[1] = %q, want %q", paths[1], "Documents/Report.docx") + } +} + +func TestChangeToFileChange_RecordsMimeType(t *testing.T) { + s := &GDriveChangeSource{ + GDriveSource: GDriveSource{ + exclude: NewExcludeMatcher(nil), + mimeTypes: make(map[string]string), + }, + } + + // Upsert of a regular file. + fc := s.changeToFileChange(&drive.Change{ + FileId: "file1", + File: &drive.File{ + Id: "file1", + Name: "photo.jpg", + MimeType: "image/jpeg", + }, + }) + + if fc.Type != ChangeUpsert { + t.Errorf("Type = %v, want ChangeUpsert", fc.Type) + } + if s.mimeTypes["file1"] != "image/jpeg" { + t.Errorf("mimeTypes[file1] = %q, want %q", s.mimeTypes["file1"], "image/jpeg") + } + + // Upsert of a native doc. + fc2 := s.changeToFileChange(&drive.Change{ + FileId: "doc1", + File: &drive.File{ + Id: "doc1", + Name: "Report", + MimeType: "application/vnd.google-apps.document", + }, + }) + + if fc2.Type != ChangeUpsert { + t.Errorf("Type = %v, want ChangeUpsert", fc2.Type) + } + if s.mimeTypes["doc1"] != "application/vnd.google-apps.document" { + t.Errorf("mimeTypes[doc1] = %q, want native MIME", s.mimeTypes["doc1"]) + } + if fc2.Meta.Name != "Report.docx" { + t.Errorf("Name = %q, want %q", fc2.Meta.Name, "Report.docx") + } + + // Folder should not be recorded in mimeTypes. + s.changeToFileChange(&drive.Change{ + FileId: "folder1", + File: &drive.File{ + Id: "folder1", + Name: "Stuff", + MimeType: "application/vnd.google-apps.folder", + }, + }) + if _, ok := s.mimeTypes["folder1"]; ok { + t.Error("Folders should not be recorded in mimeTypes") + } +} + +func TestChangeToFileChange_DeletedFile(t *testing.T) { + s := &GDriveChangeSource{ + GDriveSource: GDriveSource{ + exclude: NewExcludeMatcher(nil), + mimeTypes: make(map[string]string), + }, + } + + // Removed file. + fc := s.changeToFileChange(&drive.Change{ + FileId: "file1", + Removed: true, + }) + + if fc.Type != ChangeDelete { + t.Errorf("Type = %v, want ChangeDelete", fc.Type) + } + if fc.Meta.FileID != "file1" { + t.Errorf("FileID = %q, want %q", fc.Meta.FileID, "file1") + } +} + +func TestChangeToFileChange_TrashedFile(t *testing.T) { + s := &GDriveChangeSource{ + GDriveSource: GDriveSource{ + exclude: NewExcludeMatcher(nil), + mimeTypes: make(map[string]string), + }, + } + + fc := s.changeToFileChange(&drive.Change{ + FileId: "file1", + File: &drive.File{ + Id: "file1", + Name: "old.txt", + Trashed: true, + }, + }) + + if fc.Type != ChangeDelete { + t.Errorf("Type = %v, want ChangeDelete", fc.Type) + } +} + +func TestGetFileStream_MimeTypeRouting(t *testing.T) { + // Verify the mimeTypes map is used to determine export vs download. + // We can't do full HTTP calls without a mock server, but we verify + // the routing decision by checking what's in the map. + s := &GDriveSource{ + mimeTypes: map[string]string{ + "doc1": "application/vnd.google-apps.document", + "file1": "image/jpeg", + }, + } + + // Native file: mimeType present and is google-native → should export. + mimeType, ok := s.mimeTypes["doc1"] + if !ok || !isGoogleNativeMimeType(mimeType) { + t.Error("doc1 should be routed to export") + } + + // Regular file: mimeType present but not native → should download. + mimeType, ok = s.mimeTypes["file1"] + if !ok || isGoogleNativeMimeType(mimeType) { + t.Error("file1 should be routed to download") + } + + // Unknown file: not in mimeTypes → should download (default path). + _, ok = s.mimeTypes["unknown"] + if ok { + t.Error("unknown file should not be in mimeTypes") + } +} - t.Skip("Skipping GDrive SDK tests as they require real credentials or complex mocking of the Google API client") +func TestWithSkipNativeFiles(t *testing.T) { + var cfg gDriveOptions + opt := WithSkipNativeFiles() + opt(&cfg) + if !cfg.skipNativeFiles { + t.Error("WithSkipNativeFiles should set skipNativeFiles to true") + } } diff --git a/rfcs/0003-google-native-file-export.md b/rfcs/0003-google-native-file-export.md index b3dfe62..bd82946 100644 --- a/rfcs/0003-google-native-file-export.md +++ b/rfcs/0003-google-native-file-export.md @@ -1,6 +1,6 @@ # RFC 0003: Google Native File Export -* **Status:** Proposed +* **Status:** Implemented * **Date:** 2026-03-07 * **Affects:** `pkg/source/gdrive.go`, `pkg/source/gdrive_changes.go`, `internal/engine/backup_scan.go` @@ -209,7 +209,7 @@ When `skipNativeFiles` is false (the default), export is attempted for all nativ No changes needed to the query. The existing filter `mimeType != 'application/vnd.google-apps.folder'` correctly includes all native files. The `Size()` query also remains unchanged — native files contribute 0 bytes to the estimate, which is acceptable. -### 2.6 `mimeTypes` map population +### 2.7 `mimeTypes` map population In `Walk`: `toFileMeta` already has access to `f.MimeType`, so `visitEntryWithPath` (or `toFileMeta` itself) should populate `s.mimeTypes[f.Id] = f.MimeType` for every non-folder entry. `headRevisionId` must be added to the `fields` string in the `files.list` call so it is fetched alongside the existing fields at no extra cost: