From 50f5e23d623345c7b41e3f5f694e54e891688319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hermann?= Date: Sat, 14 Mar 2026 11:03:06 +0100 Subject: [PATCH] feat: cloud subdirectory backup --- .gitignore | 4 +- cmd/cloudstic/cmd_backup.go | 37 ++--- cmd/cloudstic/cmd_forget_test.go | 2 +- cmd/cloudstic/completion.go | 8 +- cmd/cloudstic/flags.go | 22 +-- cmd/cloudstic/store.go | 13 +- cmd/cloudstic/store_test.go | 11 +- cmd/cloudstic/usage.go | 1 - docs/sources.md | 2 +- docs/user-guide.md | 19 ++- e2e/portable_darwin.go | 76 +++++------ e2e/portable_stub.go | 9 +- pkg/source/gdrive.go | 124 +++++++++++++++-- pkg/source/gdrive_changes.go | 118 +++++++++++++--- pkg/source/gdrive_changes_test.go | 180 +++++++++++++++++++++++++ pkg/source/gdrive_test.go | 32 ++--- pkg/source/local_source_plist_test.go | 80 +++++------ pkg/source/onedrive.go | 37 ++++- pkg/source/onedrive_changes.go | 38 +++++- pkg/source/onedrive_test.go | 89 +++++++++++- pkg/source/onedrive_test.go.orig | 41 ++++++ pkg/source/onedrive_test.go.rej | 18 +++ rfcs/0007-cloud-subdirectory-backup.md | 51 +++++++ 23 files changed, 822 insertions(+), 190 deletions(-) create mode 100644 pkg/source/gdrive_changes_test.go create mode 100644 pkg/source/onedrive_test.go.orig create mode 100644 pkg/source/onedrive_test.go.rej create mode 100644 rfcs/0007-cloud-subdirectory-backup.md diff --git a/.gitignore b/.gitignore index 558f444..93da1ac 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,6 @@ test-output.json *.test *.test.exe *.prof -.DS_Store \ No newline at end of file +.DS_Store +# opencode +.opencode/ diff --git a/cmd/cloudstic/cmd_backup.go b/cmd/cloudstic/cmd_backup.go index d86198f..170d523 100644 --- a/cmd/cloudstic/cmd_backup.go +++ b/cmd/cloudstic/cmd_backup.go @@ -16,29 +16,28 @@ import ( ) type backupArgs struct { - g *globalFlags - sourceURI string - driveID string - rootFolder string - dryRun bool - excludeFile string - skipNativeFiles bool - volumeUUID string - googleCreds string - googleTokenFile string - onedriveClientID string + g *globalFlags + sourceURI string + driveID string + rootFolder string + dryRun bool + excludeFile string + skipNativeFiles bool + volumeUUID string + googleCreds string + googleTokenFile string + onedriveClientID string onedriveTokenFile string - tags stringArrayFlags - excludes stringArrayFlags + tags stringArrayFlags + excludes stringArrayFlags } func parseBackupArgs() *backupArgs { fs := flag.NewFlagSet("backup", flag.ExitOnError) a := &backupArgs{} a.g = addGlobalFlags(fs) - sourceURI := fs.String("source", envDefault("CLOUDSTIC_SOURCE", "gdrive"), "Source URI: local:, sftp://[user@]host[:port]/, gdrive, gdrive-changes, onedrive, onedrive-changes") + sourceURI := fs.String("source", envDefault("CLOUDSTIC_SOURCE", "gdrive"), "Source URI: local:, sftp://[user@]host[:port]/, gdrive[:], gdrive-changes[:], onedrive[:], onedrive-changes[:]") driveID := fs.String("drive-id", envDefault("CLOUDSTIC_DRIVE_ID", ""), "Shared drive ID for gdrive source (omit for My Drive)") - rootFolder := fs.String("root-folder", envDefault("CLOUDSTIC_ROOT_FOLDER", ""), "Root folder ID for gdrive source (defaults to entire drive)") dryRun := fs.Bool("dry-run", false, "Scan source and report changes without writing to the store") skipNativeFiles := fs.Bool("skip-native-files", false, "Exclude Google-native files (Docs, Sheets, Slides, etc.) from the backup") excludeFile := fs.String("exclude-file", "", "Path to file with exclude patterns (one per line, gitignore syntax)") @@ -52,7 +51,7 @@ func parseBackupArgs() *backupArgs { mustParse(fs) a.sourceURI = *sourceURI a.driveID = *driveID - a.rootFolder = *rootFolder + a.rootFolder = "" a.dryRun = *dryRun a.skipNativeFiles = *skipNativeFiles a.excludeFile = *excludeFile @@ -172,7 +171,7 @@ func initSource(ctx context.Context, sourceURI, driveID, rootFolder string, skip source.WithCredsPath(googleCreds), source.WithTokenPath(tokenPath), source.WithDriveID(driveID), - source.WithRootFolderID(rootFolder), + source.WithRootPath(uri.path), source.WithGDriveExcludePatterns(excludePatterns), } if skipNativeFiles { @@ -188,7 +187,7 @@ func initSource(ctx context.Context, sourceURI, driveID, rootFolder string, skip source.WithCredsPath(googleCreds), source.WithTokenPath(tokenPath), source.WithDriveID(driveID), - source.WithRootFolderID(rootFolder), + source.WithRootPath(uri.path), source.WithGDriveExcludePatterns(excludePatterns), } if skipNativeFiles { @@ -203,6 +202,7 @@ func initSource(ctx context.Context, sourceURI, driveID, rootFolder string, skip return source.NewOneDriveSource(ctx, source.WithOneDriveClientID(onedriveClientID), source.WithOneDriveTokenPath(tokenPath), + source.WithOneDriveRootPath(uri.path), source.WithOneDriveExcludePatterns(excludePatterns), ) case "onedrive-changes": @@ -213,6 +213,7 @@ func initSource(ctx context.Context, sourceURI, driveID, rootFolder string, skip return source.NewOneDriveChangeSource(ctx, source.WithOneDriveClientID(onedriveClientID), source.WithOneDriveTokenPath(tokenPath), + source.WithOneDriveRootPath(uri.path), source.WithOneDriveExcludePatterns(excludePatterns), ) default: diff --git a/cmd/cloudstic/cmd_forget_test.go b/cmd/cloudstic/cmd_forget_test.go index 32ccb27..559ecf1 100644 --- a/cmd/cloudstic/cmd_forget_test.go +++ b/cmd/cloudstic/cmd_forget_test.go @@ -117,4 +117,4 @@ func TestRunForget_Policy_DryRun(t *testing.T) { if !strings.Contains(got, "dry run") { t.Errorf("expected 'dry run' in summary, got:\n%s", got) } -} \ No newline at end of file +} diff --git a/cmd/cloudstic/completion.go b/cmd/cloudstic/completion.go index fb35329..4d1c4de 100644 --- a/cmd/cloudstic/completion.go +++ b/cmd/cloudstic/completion.go @@ -53,7 +53,7 @@ _cloudstic() { -*) # skip flags and their values case "${words[i]}" in - -store|-s3-endpoint|-s3-region|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-drive-id|-root-folder|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account|-json) + -store|-s3-endpoint|-s3-region|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-drive-id|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account|-json) ((i++)) ;; esac ;; @@ -76,7 +76,7 @@ _cloudstic() { init) cmd_flags="-add-recovery-key -no-encryption -adopt-slots" ;; backup) - cmd_flags="-source -drive-id -root-folder -skip-native-files -google-credentials -google-token-file -onedrive-client-id -onedrive-token-file -tag -dry-run" ;; + cmd_flags="-source -drive-id -skip-native-files -google-credentials -google-token-file -onedrive-client-id -onedrive-token-file -tag -dry-run" ;; restore) cmd_flags="-output -dry-run" ;; prune) @@ -199,7 +199,7 @@ _cloudstic() { -*) # Skip flags with values case "${words[i]}" in - -store|-s3-endpoint|-s3-region|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-drive-id|-root-folder|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account) + -store|-s3-endpoint|-s3-region|-s3-access-key|-s3-secret-key|-source-sftp-password|-source-sftp-key|-store-sftp-password|-store-sftp-key|-encryption-key|-password|-recovery-key|-kms-key-arn|-kms-region|-kms-endpoint|-source|-drive-id|-google-credentials|-google-token-file|-onedrive-client-id|-onedrive-token-file|-tag|-output|-keep-last|-keep-hourly|-keep-daily|-keep-weekly|-keep-monthly|-keep-yearly|-group-by|-account) (( i++ )) ;; esac ;; @@ -228,7 +228,6 @@ _cloudstic() { _arguments $global_flags \ '-source[Source URI]:uri:(local: sftp:// gdrive gdrive-changes onedrive onedrive-changes)' \ '-drive-id[Shared drive ID]:id:' \ - '-root-folder[Root folder ID]:id:' \ '-skip-native-files[Exclude Google-native files]' \ '-google-credentials[Google service account credentials JSON]:path:_files' \ '-google-token-file[Google OAuth token file]:path:_files' \ @@ -379,7 +378,6 @@ complete -c cloudstic -n '__fish_seen_subcommand_from init' -l adopt-slots -d 'A # backup complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l source -x -a 'local: sftp:// gdrive gdrive-changes onedrive onedrive-changes' -d 'Source URI' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l drive-id -x -d 'Shared drive ID' -complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l root-folder -x -d 'Root folder ID' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l skip-native-files -d 'Exclude Google-native files' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l google-credentials -r -F -d 'Google service account credentials JSON' complete -c cloudstic -n '__fish_seen_subcommand_from backup' -l google-token-file -r -F -d 'Google OAuth token file' diff --git a/cmd/cloudstic/flags.go b/cmd/cloudstic/flags.go index 9938730..24d0cbb 100644 --- a/cmd/cloudstic/flags.go +++ b/cmd/cloudstic/flags.go @@ -22,18 +22,18 @@ func envBool(key string) bool { } type globalFlags struct { - store *string - s3Endpoint, s3Region *string - s3AccessKey, s3SecretKey *string + store *string + s3Endpoint, s3Region *string + s3AccessKey, s3SecretKey *string sourceSFTPPassword, sourceSFTPKey *string - storeSFTPPassword, storeSFTPKey *string - encryptionKey *string - password *string - recoveryKey *string - kmsKeyARN, kmsRegion, kmsEndpoint *string - disablePackfile *bool - prompt, verbose, quiet, debug *bool - debugLog *ui.SafeLogWriter + storeSFTPPassword, storeSFTPKey *string + encryptionKey *string + password *string + recoveryKey *string + kmsKeyARN, kmsRegion, kmsEndpoint *string + disablePackfile *bool + prompt, verbose, quiet, debug *bool + debugLog *ui.SafeLogWriter } func addGlobalFlags(fs *flag.FlagSet) *globalFlags { diff --git a/cmd/cloudstic/store.go b/cmd/cloudstic/store.go index 3df5f05..02ebe78 100644 --- a/cmd/cloudstic/store.go +++ b/cmd/cloudstic/store.go @@ -327,6 +327,8 @@ func parseSourceURI(raw string) (*sourceURIParts, error) { return nil, fmt.Errorf("invalid source URI %q: local path cannot be empty", raw) } return &sourceURIParts{scheme: "local", path: rest}, nil + case "gdrive", "gdrive-changes", "onedrive", "onedrive-changes": + return &sourceURIParts{scheme: scheme, path: ensureLeadingSlash(rest)}, nil default: return nil, fmt.Errorf("unknown source scheme %q in %q: supported URI formats are local: and sftp://[user@]host[:port]/", scheme, raw) } @@ -335,12 +337,19 @@ func parseSourceURI(raw string) (*sourceURIParts, error) { // Bare keyword (cloud sources) switch raw { case "gdrive", "gdrive-changes", "onedrive", "onedrive-changes": - return &sourceURIParts{scheme: raw}, nil + return &sourceURIParts{scheme: raw, path: "/"}, nil default: - return nil, fmt.Errorf("unknown source %q: supported values are local:, sftp://[user@]host[:port]/, gdrive, gdrive-changes, onedrive, onedrive-changes", raw) + return nil, fmt.Errorf("unknown source %q: supported values are local:, sftp://[user@]host[:port]/, gdrive[:], gdrive-changes[:], onedrive[:], onedrive-changes[:]", raw) } } +func ensureLeadingSlash(s string) string { + if s == "" || !strings.HasPrefix(s, "/") { + return "/" + s + } + return s +} + func (g *globalFlags) buildSFTPSourceOpts(uri *sourceURIParts) []source.SFTPOption { opts := []source.SFTPOption{ source.WithSFTPSourceBasePath(uri.path), diff --git a/cmd/cloudstic/store_test.go b/cmd/cloudstic/store_test.go index ba1b033..ce00605 100644 --- a/cmd/cloudstic/store_test.go +++ b/cmd/cloudstic/store_test.go @@ -88,10 +88,13 @@ func TestParseSourceURI(t *testing.T) { {raw: "sftp:///no-host", wantErr: true}, // cloud keywords - {raw: "gdrive", want: sourceURIParts{scheme: "gdrive"}}, - {raw: "gdrive-changes", want: sourceURIParts{scheme: "gdrive-changes"}}, - {raw: "onedrive", want: sourceURIParts{scheme: "onedrive"}}, - {raw: "onedrive-changes", want: sourceURIParts{scheme: "onedrive-changes"}}, + {raw: "gdrive", want: sourceURIParts{scheme: "gdrive", path: "/"}}, + {raw: "gdrive-changes", want: sourceURIParts{scheme: "gdrive-changes", path: "/"}}, + {raw: "onedrive", want: sourceURIParts{scheme: "onedrive", path: "/"}}, + {raw: "onedrive-changes", want: sourceURIParts{scheme: "onedrive-changes", path: "/"}}, + {raw: "gdrive:/some/path", want: sourceURIParts{scheme: "gdrive", path: "/some/path"}}, + {raw: "gdrive:some/path", want: sourceURIParts{scheme: "gdrive", path: "/some/path"}}, + {raw: "onedrive:/documents", want: sourceURIParts{scheme: "onedrive", path: "/documents"}}, // invalid {raw: "sftp", wantErr: true}, diff --git a/cmd/cloudstic/usage.go b/cmd/cloudstic/usage.go index 33fb2ed..0a34654 100644 --- a/cmd/cloudstic/usage.go +++ b/cmd/cloudstic/usage.go @@ -110,7 +110,6 @@ func printUsage() { t.Flags([][2]string{ {"-source ", ui.Env("Source URI: local:, sftp://[user@]host[:port]/, gdrive, gdrive-changes, onedrive, onedrive-changes", "CLOUDSTIC_SOURCE")}, {"-drive-id ", "Shared drive ID for gdrive (omit for My Drive)"}, - {"-root-folder ", "Root folder ID for gdrive (defaults to entire drive)"}, {"-skip-native-files", "Exclude Google-native files (Docs, Sheets, Slides, etc.)"}, {"-google-credentials ", ui.Env("Path to Google service account credentials JSON", "GOOGLE_APPLICATION_CREDENTIALS")}, {"-google-token-file ", ui.Env("Path to Google OAuth token file", "GOOGLE_TOKEN_FILE")}, diff --git a/docs/sources.md b/docs/sources.md index aabcbbf..9de4604 100644 --- a/docs/sources.md +++ b/docs/sources.md @@ -128,7 +128,7 @@ Walks the remote directory tree via SFTP. Supports password, SSH private key, an | **SourceInfo.Account** | Google account email | | **SourceInfo.Path** | `my-drive://` or `://` | -Lists all files and folders via `files.list`, then topologically sorts folders so parents are emitted before children. Supports My Drive and Shared Drives (via `-drive-id`), with optional folder scoping (via `-root-folder`). +Lists all files and folders via `files.list`, then topologically sorts folders so parents are emitted before children. Supports My Drive and Shared Drives (via `-drive-id`), with optional folder scoping (via `gdrive:/path/to/folder`). ### `gdrive-changes` — Google Drive (Changes API) diff --git a/docs/user-guide.md b/docs/user-guide.md index 38e8899..8733e53 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -252,7 +252,7 @@ cloudstic backup -source local:~/Documents cloudstic backup -source gdrive # Back up a specific Google Drive shared drive and folder -cloudstic backup -source gdrive -drive-id -root-folder +cloudstic backup -source gdrive:/path/to/folder -drive-id # Back up with tags cloudstic backup -source local:~/Documents -tag daily -tag important @@ -268,9 +268,8 @@ cloudstic backup -source local:~/Documents -dry-run | Flag | Default | Description | |------|---------|-------------| -| `-source` | `gdrive` | Source type: `local:`, `sftp://[user@]host[:port]/`, `gdrive`, `gdrive-changes`, `onedrive`, `onedrive-changes` | +| `-source` | `gdrive` | Source type: `local:`, `sftp://[user@]host[:port]/`, `gdrive[:]`, `gdrive-changes[:]`, `onedrive[:]`, `onedrive-changes[:]` | | `-drive-id` | | Shared drive ID for Google Drive (omit for My Drive) | -| `-root-folder` | | Root folder ID for Google Drive (defaults to entire drive) | | `-tag` | | Tag to apply to the snapshot (repeatable) | | `-exclude` | | Exclude pattern using gitignore syntax (repeatable) | | `-exclude-file` | | Path to file containing exclude patterns, one per line | @@ -928,19 +927,18 @@ No configuration is required — Cloudstic ships with built-in OAuth credentials ```bash # Back up entire My Drive -cloudstic backup -source gdrive +cloudstic backup -source gdrive:/ # Back up a shared drive -cloudstic backup -source gdrive -drive-id +cloudstic backup -source gdrive:/ -drive-id -# Back up only a specific folder (by Google Drive folder ID) -cloudstic backup -source gdrive -root-folder +# Back up only a specific folder +cloudstic backup -source gdrive:/path/to/folder ``` | Flag | Description | |------|-------------| | `-drive-id` | Shared Drive ID (omit for personal My Drive) | -| `-root-folder` | Restrict backup to a specific folder by ID | **Environment variables (optional overrides):** @@ -965,7 +963,7 @@ cloudstic backup -source gdrive-changes cloudstic backup -source gdrive-changes ``` -Uses the same authentication and flags as [Google Drive](#google-drive) (`-drive-id`, `-root-folder`). No setup required — just run the command and authorize in the browser. +Uses the same authentication and flags as [Google Drive](#google-drive) (`-drive-id`). No setup required — just run the command and authorize in the browser. > **Tip:** You can use `-source gdrive-changes` from day one — the first run performs a full scan just like `gdrive`. Only fall back to `-source gdrive` if you need to force a complete rescan. @@ -1265,11 +1263,10 @@ cloudstic forget -keep-daily 7 -keep-monthly 12 -dry-run | `AWS_SECRET_ACCESS_KEY` | `-s3-secret-key` | S3 Secret Access Key | | `CLOUDSTIC_STORE_SFTP_PASSWORD` | `-store-sftp-password` | SFTP password for the store | | `CLOUDSTIC_STORE_SFTP_KEY` | `-store-sftp-key` | Path to SSH private key for the store | -| `CLOUDSTIC_SOURCE` | `-source` | Source URI: `local:`, `sftp://[user@]host[:port]/`, `gdrive`, `gdrive-changes`, `onedrive`, `onedrive-changes` | +| `CLOUDSTIC_SOURCE` | `-source` | Source URI: `local:`, `sftp://[user@]host[:port]/`, `gdrive[:]`, `gdrive-changes[:]`, `onedrive[:]`, `onedrive-changes[:]` | | `CLOUDSTIC_SOURCE_SFTP_PASSWORD` | `-source-sftp-password` | SFTP password for the source | | `CLOUDSTIC_SOURCE_SFTP_KEY` | `-source-sftp-key` | Path to SSH private key for the source | | `CLOUDSTIC_DRIVE_ID` | `-drive-id` | Shared drive ID for Google Drive | -| `CLOUDSTIC_ROOT_FOLDER` | `-root-folder` | Root folder ID for Google Drive | | `CLOUDSTIC_ENCRYPTION_KEY` | `-encryption-key` | Platform key (hex) | | `CLOUDSTIC_PASSWORD` | `-password` | Encryption password | | `CLOUDSTIC_RECOVERY_KEY` | `-recovery-key` | Recovery seed phrase | diff --git a/e2e/portable_darwin.go b/e2e/portable_darwin.go index 174db73..26851fc 100644 --- a/e2e/portable_darwin.go +++ b/e2e/portable_darwin.go @@ -3,22 +3,22 @@ package e2e import ( -"crypto/rand" -"fmt" -"os" -"os/exec" -"path/filepath" -"strings" -"testing" + "crypto/rand" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" ) // portableDriveSource is a TestSource backed by a real GPT-formatted RAM disk. // It exercises the full volume UUID auto-detection pipeline (Statfs → diskutil // info -plist → DiskUUID parsing) without any manual -volume-uuid flag. type portableDriveSource struct { -mountPoint string -rawDev string -volName string + mountPoint string + rawDev string + volName string } func newPortableDriveSource(_ *testing.T) *portableDriveSource { @@ -35,39 +35,39 @@ func (s *portableDriveSource) Env() TestEnv { return Hermetic } // Setup creates a GPT-formatted RAM disk. Called inside the subtest, so // t.Skip gracefully skips just this matrix entry if RAM disks aren't available. func (s *portableDriveSource) Setup(t *testing.T) []string { -t.Helper() + t.Helper() -// Create a 10 MB RAM disk (20480 × 512-byte sectors). -out, err := exec.Command("hdiutil", "attach", "-nomount", "ram://20480").CombinedOutput() -if err != nil { -t.Skipf("cannot create RAM disk (needs disk access): %v\n%s", err, out) -} -s.rawDev = strings.TrimSpace(string(out)) -t.Cleanup(func() { _ = exec.Command("diskutil", "eject", s.rawDev).Run() }) + // Create a 10 MB RAM disk (20480 × 512-byte sectors). + out, err := exec.Command("hdiutil", "attach", "-nomount", "ram://20480").CombinedOutput() + if err != nil { + t.Skipf("cannot create RAM disk (needs disk access): %v\n%s", err, out) + } + s.rawDev = strings.TrimSpace(string(out)) + t.Cleanup(func() { _ = exec.Command("diskutil", "eject", s.rawDev).Run() }) -// Partition as GPT with ExFAT — this gives us a real GPT partition UUID. -out, err = exec.Command("diskutil", "partitionDisk", s.rawDev, -"1", "GPT", "ExFAT", s.volName, "100%").CombinedOutput() -if err != nil { -_ = exec.Command("diskutil", "eject", s.rawDev).Run() -t.Skipf("cannot partition RAM disk: %v\n%s", err, out) -} + // Partition as GPT with ExFAT — this gives us a real GPT partition UUID. + out, err = exec.Command("diskutil", "partitionDisk", s.rawDev, + "1", "GPT", "ExFAT", s.volName, "100%").CombinedOutput() + if err != nil { + _ = exec.Command("diskutil", "eject", s.rawDev).Run() + t.Skipf("cannot partition RAM disk: %v\n%s", err, out) + } -s.mountPoint = "/Volumes/" + s.volName -if _, err := os.Stat(s.mountPoint); os.IsNotExist(err) { -t.Fatalf("expected mount point %s after partitioning", s.mountPoint) -} + s.mountPoint = "/Volumes/" + s.volName + if _, err := os.Stat(s.mountPoint); os.IsNotExist(err) { + t.Fatalf("expected mount point %s after partitioning", s.mountPoint) + } -return []string{"-source", "local:" + s.mountPoint} + return []string{"-source", "local:" + s.mountPoint} } func (s *portableDriveSource) WriteFile(t *testing.T, relPath, content string) { -t.Helper() -fullPath := filepath.Join(s.mountPoint, relPath) -if err := os.MkdirAll(filepath.Dir(fullPath), 0755); err != nil { -t.Fatal(err) -} -if err := os.WriteFile(fullPath, []byte(content), 0644); err != nil { -t.Fatal(err) -} + t.Helper() + fullPath := filepath.Join(s.mountPoint, relPath) + if err := os.MkdirAll(filepath.Dir(fullPath), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(fullPath, []byte(content), 0644); err != nil { + t.Fatal(err) + } } diff --git a/e2e/portable_stub.go b/e2e/portable_stub.go index 669dec9..e811517 100644 --- a/e2e/portable_stub.go +++ b/e2e/portable_stub.go @@ -10,7 +10,10 @@ type portableDriveSource struct{} func newPortableDriveSource(_ *testing.T) *portableDriveSource { return nil } -func (s *portableDriveSource) Name() string { return "portable" } -func (s *portableDriveSource) Env() TestEnv { return Hermetic } -func (s *portableDriveSource) Setup(t *testing.T) []string { t.Skip("portable drive not supported on this platform"); return nil } +func (s *portableDriveSource) Name() string { return "portable" } +func (s *portableDriveSource) Env() TestEnv { return Hermetic } +func (s *portableDriveSource) Setup(t *testing.T) []string { + t.Skip("portable drive not supported on this platform") + return nil +} func (s *portableDriveSource) WriteFile(t *testing.T, relPath, content string) {} diff --git a/pkg/source/gdrive.go b/pkg/source/gdrive.go index 2d3c528..de41bfd 100644 --- a/pkg/source/gdrive.go +++ b/pkg/source/gdrive.go @@ -7,6 +7,7 @@ import ( "io" "net/http" "os" + "strings" "time" "github.com/cloudstic/cli/internal/core" @@ -26,6 +27,7 @@ type gDriveOptions struct { tokenPath string driveID string rootFolderID string + rootPath string accountEmail string excludePatterns []string skipNativeFiles bool @@ -63,14 +65,21 @@ func WithDriveID(id string) GDriveOption { } } -// WithRootFolderID sets the root folder ID. -// If empty, defaults to the root of the specified drive. +// WithRootFolderID sets the root folder ID directly (for client API). func WithRootFolderID(id string) GDriveOption { return func(o *gDriveOptions) { o.rootFolderID = id } } +// WithRootPath sets the path to the root folder. +// This is resolved to a folder ID during initialization. +func WithRootPath(path string) GDriveOption { + return func(o *gDriveOptions) { + o.rootPath = path + } +} + // WithAccountEmail explicitly sets the account email instead of calling the API. func WithAccountEmail(email string) GDriveOption { return func(o *gDriveOptions) { @@ -101,6 +110,7 @@ type GDriveSource struct { service *drive.Service driveID string // shared drive ID; empty means "My Drive" rootFolderID string // if empty, defaults to "root" (entire drive) + rootPath string // The string path the user specified, or "/" account string // Google account email; populated automatically driveName string // shared drive name; populated during construction exclude *ExcludeMatcher @@ -164,6 +174,7 @@ func NewGDriveSource(ctx context.Context, opts ...GDriveOption) (*GDriveSource, service: srv, driveID: cfg.driveID, rootFolderID: cfg.rootFolderID, + rootPath: cfg.rootPath, account: cfg.accountEmail, exclude: NewExcludeMatcher(cfg.excludePatterns), skipNativeFiles: cfg.skipNativeFiles, @@ -176,6 +187,16 @@ func NewGDriveSource(ctx context.Context, opts ...GDriveOption) (*GDriveSource, } } + if src.rootPath != "" && src.rootPath != "/" { + id, err := src.resolvePathToFolderID(ctx, src.rootPath) + if err != nil { + return nil, fmt.Errorf("invalid source path %q: %w", src.rootPath, err) + } + src.rootFolderID = id + } else if src.rootPath == "" { + src.rootPath = "/" + } + return src, nil } @@ -188,15 +209,10 @@ func (s *GDriveSource) Info() core.SourceInfo { } } - path := "/" - if s.rootFolderID != "" { - path = s.rootFolderID - } - info := core.SourceInfo{ Type: "gdrive", Account: account, - Path: path, + Path: s.rootPath, } if s.isSharedDrive() { @@ -213,6 +229,47 @@ func (s *GDriveSource) isSharedDrive() bool { return s.driveID != "" } +// resolvePathToFolderID resolves a string path (e.g. "/foo/bar") to a Drive folder ID. +func (s *GDriveSource) resolvePathToFolderID(ctx context.Context, path string) (string, error) { + parts := strings.Split(strings.Trim(path, "/"), "/") + currentParent := "root" + if s.isSharedDrive() { + currentParent = s.driveID + } + + for _, part := range parts { + if part == "" { + continue + } + query := fmt.Sprintf("trashed = false and mimeType = 'application/vnd.google-apps.folder' and name = '%s' and '%s' in parents", + strings.ReplaceAll(part, "'", "\\'"), currentParent) + call := s.service.Files.List(). + Q(query). + Fields("files(id)"). + PageSize(2). + Context(ctx) + if s.isSharedDrive() { + call.DriveId(s.driveID). + Corpora("drive"). + SupportsAllDrives(true). + IncludeItemsFromAllDrives(true) + } + + r, err := driveCallWithRetry(ctx, func() (*drive.FileList, error) { return call.Do() }) + if err != nil { + return "", fmt.Errorf("resolve path segment %q: %w", part, err) + } + if len(r.Files) == 0 { + return "", fmt.Errorf("folder not found in Drive: %q", part) + } + if len(r.Files) > 1 { + return "", fmt.Errorf("ambiguous path: multiple folders named %q found", part) + } + currentParent = r.Files[0].Id + } + return currentParent, nil +} + // --------------------------------------------------------------------------- // OAuth helpers // --------------------------------------------------------------------------- @@ -321,6 +378,36 @@ func (s *GDriveSource) Walk(ctx context.Context, callback func(core.FileMeta) er } } + // If rootFolderID is set, we need to filter to only its descendants. + // We also don't want to yield the root folder itself. + var filteredFolders []*drive.File + if s.rootFolderID != "" { + descendants := make(map[string]bool) + descendants[s.rootFolderID] = true + + // folders are topo-sorted, so parents always come before children + topoFolders := topoSortFolders(folders) + for _, f := range topoFolders { + if f.Id == s.rootFolderID { + continue // Skip yielding the root folder itself + } + isDescendant := false + for _, pid := range f.Parents { + if descendants[pid] { + isDescendant = true + break + } + } + if isDescendant { + descendants[f.Id] = true + filteredFolders = append(filteredFolders, f) + } + } + folders = filteredFolders + } else { + folders = topoSortFolders(folders) + } + // pathMap tracks fileID → full path for all emitted entries. // Folders are topo-sorted (parents before children) so the parent // path is always known when we compute the child path. @@ -329,8 +416,11 @@ func (s *GDriveSource) Walk(ctx context.Context, callback func(core.FileMeta) er // their children are also skipped. excludedPaths := make(map[string]bool) + if s.rootFolderID != "" { + pathMap[s.rootFolderID] = "" // Root folder has empty path relative to itself + } + // Emit folders first (topo-sorted so parents before children). - folders = topoSortFolders(folders) for _, f := range folders { if err := s.visitEntryWithPath(f, pathMap, excludedPaths, callback); err != nil { return err @@ -361,6 +451,22 @@ func (s *GDriveSource) Walk(ctx context.Context, callback func(core.FileMeta) er } for _, f := range r.Files { + // If rootFolderID is set, filter files + if s.rootFolderID != "" { + isDescendant := false + for _, pid := range f.Parents { + // A file's parent must be in pathMap if it's a descendant of rootFolderID + // because all descendant folders have been processed and added to pathMap + if _, ok := pathMap[pid]; ok { + isDescendant = true + break + } + } + if !isDescendant { + continue + } + } + if err := s.visitEntryWithPath(f, pathMap, excludedPaths, callback); err != nil { return err } diff --git a/pkg/source/gdrive_changes.go b/pkg/source/gdrive_changes.go index b07b71a..c1b9d82 100644 --- a/pkg/source/gdrive_changes.go +++ b/pkg/source/gdrive_changes.go @@ -95,26 +95,14 @@ func (s *GDriveChangeSource) WalkChanges(ctx context.Context, token string, call excludedIDs := make(map[string]bool) hasExclude := !s.exclude.Empty() - for i := range folderChanges { - fc := &folderChanges[i] - if fc.Type != ChangeUpsert { - continue - } - p, err := s.resolveChangePath(ctx, fc.Meta, pathMap) - if err == nil && p != "" { - fc.Meta.Paths = []string{p} - pathMap[fc.Meta.FileID] = p - } + var err error + folderChanges, err = s.processChanges(ctx, folderChanges, pathMap, true) + if err != nil { + return "", err } - for i := range fileChanges { - fc := &fileChanges[i] - if fc.Type != ChangeUpsert { - continue - } - p, err := s.resolveChangePath(ctx, fc.Meta, pathMap) - if err == nil && p != "" { - fc.Meta.Paths = []string{p} - } + fileChanges, err = s.processChanges(ctx, fileChanges, pathMap, false) + if err != nil { + return "", err } for _, fc := range folderChanges { @@ -138,6 +126,80 @@ func (s *GDriveChangeSource) WalkChanges(ctx context.Context, token string, call } } +func (s *GDriveChangeSource) processChanges(ctx context.Context, changes []FileChange, pathMap map[string]string, isFolder bool) ([]FileChange, error) { + // Fast-path: if no root folder is specified, we don't need to filter anything out, + // and we can update the slice in-place without allocating a new one. + if s.rootFolderID == "" { + for i := range changes { + fc := &changes[i] + if fc.Type != ChangeUpsert { + continue + } + p, err := s.resolveChangePath(ctx, fc.Meta, pathMap) + if err != nil { + return nil, err + } + if p != "" { + fc.Meta.Paths = []string{p} + if isFolder { + pathMap[fc.Meta.FileID] = p + } + } + } + return changes, nil + } + + var validChanges []FileChange + for i := range changes { + fc := changes[i] + if fc.Type != ChangeUpsert { + validChanges = append(validChanges, fc) + continue + } + // Skip changes strictly outside our root folder + if !s.isDescendantOfRoot(ctx, fc.Meta) { + continue + } + p, err := s.resolveChangePath(ctx, fc.Meta, pathMap) + if err != nil { + return nil, err + } + // If p == "" and we have a rootFolderID, it means resolveChangePath + // determined this is not a descendant of rootFolderID. + if p == "" { + continue + } + if p != "" { + fc.Meta.Paths = []string{p} + if isFolder { + pathMap[fc.Meta.FileID] = p + } + } + validChanges = append(validChanges, fc) + } + return validChanges, nil +} + +// isDescendantOfRoot checks if a changed file belongs to the rootFolderID tree. +func (s *GDriveChangeSource) isDescendantOfRoot(ctx context.Context, meta core.FileMeta) bool { + if s.rootFolderID == "" { + return true // No root folder specified, everything is a descendant + } + if len(meta.Parents) == 0 { + return false // It's in the drive root, not inside rootFolderID + } + for _, pid := range meta.Parents { + if pid == s.rootFolderID { + return true + } + // We need to resolve the path up to the root to verify. + // s.resolveChangePath naturally stops at rootFolderID because + // we check it, and returns errNotDescendant if it goes past it. + // We'll rely on resolveChangePath for the full strict check. + } + return true // Optimistic check, rely on resolveChangePath for definitive answer +} + // topoSortFolderChanges orders folder upsert changes so that every parent // appears before its children, using raw Drive parent IDs in Meta.Parents. func topoSortFolderChanges(changes []FileChange) []FileChange { @@ -175,10 +237,16 @@ func topoSortFolderChanges(changes []FileChange) []FileChange { // Drive hierarchy via API calls and caches every resolved segment. func (s *GDriveChangeSource) resolveChangePath(ctx context.Context, meta core.FileMeta, pathMap map[string]string) (string, error) { if len(meta.Parents) == 0 { + if s.rootFolderID != "" { + return "", nil // Not in our root folder + } return meta.Name, nil } parentPath, err := s.resolveDrivePath(ctx, meta.Parents[0], pathMap) if err != nil { + if err == errNotDescendant { + return "", nil + } return "", err } if parentPath == "" { @@ -187,9 +255,14 @@ func (s *GDriveChangeSource) resolveChangePath(ctx context.Context, meta core.Fi return parentPath + "/" + meta.Name, nil } +var errNotDescendant = fmt.Errorf("not a descendant of root folder") + // resolveDrivePath resolves a Drive folder ID to its full path by walking // up the parent chain via the Files.Get API. Results are cached in pathMap. func (s *GDriveChangeSource) resolveDrivePath(ctx context.Context, folderID string, pathMap map[string]string) (string, error) { + if s.rootFolderID != "" && folderID == s.rootFolderID { + return "", nil // Base of the tree + } if p, ok := pathMap[folderID]; ok { return p, nil } @@ -204,6 +277,10 @@ func (s *GDriveChangeSource) resolveDrivePath(ctx context.Context, folderID stri p := f.Name if len(f.Parents) > 0 { + if s.rootFolderID != "" && f.Parents[0] == s.rootFolderID { + pathMap[folderID] = p + return p, nil + } parentPath, err := s.resolveDrivePath(ctx, f.Parents[0], pathMap) if err != nil { return "", err @@ -211,6 +288,9 @@ func (s *GDriveChangeSource) resolveDrivePath(ctx context.Context, folderID stri if parentPath != "" { p = parentPath + "/" + f.Name } + } else if s.rootFolderID != "" { + // Reached the root of the drive, but it wasn't our rootFolderID + return "", errNotDescendant } pathMap[folderID] = p return p, nil diff --git a/pkg/source/gdrive_changes_test.go b/pkg/source/gdrive_changes_test.go new file mode 100644 index 0000000..e21752a --- /dev/null +++ b/pkg/source/gdrive_changes_test.go @@ -0,0 +1,180 @@ +package source + +import ( + "context" + "testing" + + "github.com/cloudstic/cli/internal/core" +) + +func TestIsDescendantOfRoot(t *testing.T) { + ctx := context.Background() + s := &GDriveChangeSource{ + GDriveSource: GDriveSource{ + rootFolderID: "root123", + }, + } + + tests := []struct { + name string + meta core.FileMeta + expected bool + }{ + { + name: "no parents", + meta: core.FileMeta{ + Parents: nil, + }, + expected: false, + }, + { + name: "direct child of rootFolderID", + meta: core.FileMeta{ + Parents: []string{"root123"}, + }, + expected: true, + }, + { + name: "some other parent", + meta: core.FileMeta{ + Parents: []string{"other456"}, + }, + expected: true, // Optimistic true + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := s.isDescendantOfRoot(ctx, tt.meta) + if result != tt.expected { + t.Errorf("expected %v, got %v", tt.expected, result) + } + }) + } +} + +func TestResolveChangePath_WithRootFolder(t *testing.T) { + ctx := context.Background() + s := &GDriveChangeSource{ + GDriveSource: GDriveSource{ + rootFolderID: "root123", + }, + } + + pathMap := map[string]string{ + "folder1": "folder1", + "folder2": "folder1/folder2", + } + + tests := []struct { + name string + meta core.FileMeta + expected string + expectError bool + }{ + { + name: "no parents", + meta: core.FileMeta{ + Name: "file.txt", + Parents: nil, + }, + expected: "", + expectError: false, // returns empty string to be skipped + }, + { + name: "direct child of root", + meta: core.FileMeta{ + Name: "file.txt", + Parents: []string{"root123"}, + }, + expected: "file.txt", + expectError: false, + }, + { + name: "child of known folder", + meta: core.FileMeta{ + Name: "file.txt", + Parents: []string{"folder2"}, + }, + expected: "folder1/folder2/file.txt", + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p, err := s.resolveChangePath(ctx, tt.meta, pathMap) + if (err != nil) != tt.expectError { + t.Errorf("expected error %v, got %v", tt.expectError, err) + } + if p != tt.expected { + t.Errorf("expected path %q, got %q", tt.expected, p) + } + }) + } +} + +func TestProcessChanges(t *testing.T) { + ctx := context.Background() + s := &GDriveChangeSource{ + GDriveSource: GDriveSource{ + rootFolderID: "root123", + }, + } + + pathMap := map[string]string{ + "folder1": "folder1", + } + + changes := []FileChange{ + { + Type: ChangeUpsert, + Meta: core.FileMeta{ + FileID: "file1", + Name: "file1.txt", + Parents: []string{"root123"}, + }, + }, + { + Type: ChangeUpsert, + Meta: core.FileMeta{ + FileID: "file2", + Name: "file2.txt", + Parents: []string{"folder1"}, + }, + }, + { + Type: ChangeUpsert, + Meta: core.FileMeta{ + FileID: "file3", + Name: "file3.txt", + Parents: nil, // Should be skipped + }, + }, + { + Type: ChangeDelete, // Deletes should be passed through + Meta: core.FileMeta{ + FileID: "file4", + }, + }, + } + + valid, err := s.processChanges(ctx, changes, pathMap, false) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(valid) != 3 { + t.Fatalf("expected 3 valid changes, got %d", len(valid)) + } + + if valid[0].Meta.FileID != "file1" || len(valid[0].Meta.Paths) == 0 || valid[0].Meta.Paths[0] != "file1.txt" { + t.Errorf("unexpected valid[0]: %+v", valid[0]) + } + if valid[1].Meta.FileID != "file2" || len(valid[1].Meta.Paths) == 0 || valid[1].Meta.Paths[0] != "folder1/file2.txt" { + t.Errorf("unexpected valid[1]: %+v", valid[1]) + } + if valid[2].Meta.FileID != "file4" || valid[2].Type != ChangeDelete { + t.Errorf("unexpected valid[2]: %+v", valid[2]) + } +} diff --git a/pkg/source/gdrive_test.go b/pkg/source/gdrive_test.go index 87ae0e7..1faca34 100644 --- a/pkg/source/gdrive_test.go +++ b/pkg/source/gdrive_test.go @@ -10,15 +10,15 @@ import ( func TestToFileMeta_RegularFile(t *testing.T) { s := &GDriveSource{exclude: NewExcludeMatcher(nil)} f := &drive.File{ - Id: "file1", - Name: "photo.jpg", - MimeType: "image/jpeg", - Size: 1024, - ModifiedTime: "2024-01-15T10:30:00Z", - Sha256Checksum: "abc123", - Parents: []string{"folder1"}, - Owners: []*drive.User{{EmailAddress: "user@example.com"}}, - HeadRevisionId: "rev42", + Id: "file1", + Name: "photo.jpg", + MimeType: "image/jpeg", + Size: 1024, + ModifiedTime: "2024-01-15T10:30:00Z", + Sha256Checksum: "abc123", + Parents: []string{"folder1"}, + Owners: []*drive.User{{EmailAddress: "user@example.com"}}, + HeadRevisionId: "rev42", } meta := s.toFileMeta(f) @@ -488,7 +488,7 @@ func TestWithSkipNativeFiles(t *testing.T) { } func TestGDriveInfo_MyDrive_Root(t *testing.T) { - s := &GDriveSource{account: "user@gmail.com"} + s := &GDriveSource{account: "user@gmail.com", rootPath: "/"} info := s.Info() if info.Type != "gdrive" { @@ -509,11 +509,11 @@ func TestGDriveInfo_MyDrive_Root(t *testing.T) { } func TestGDriveInfo_MyDrive_Subfolder(t *testing.T) { - s := &GDriveSource{account: "user@gmail.com", rootFolderID: "folder123"} + s := &GDriveSource{account: "user@gmail.com", rootFolderID: "folder123", rootPath: "/myfolder"} info := s.Info() - if info.Path != "folder123" { - t.Errorf("Path = %q, want folder123", info.Path) + if info.Path != "/myfolder" { + t.Errorf("Path = %q, want /myfolder", info.Path) } if info.VolumeUUID != "" { t.Errorf("VolumeUUID = %q, want empty for My Drive", info.VolumeUUID) @@ -528,6 +528,7 @@ func TestGDriveInfo_SharedDrive_Root(t *testing.T) { account: "user@gmail.com", driveID: "shared-drive-abc", driveName: "Team Photos", + rootPath: "/", } info := s.Info() @@ -548,11 +549,12 @@ func TestGDriveInfo_SharedDrive_Subfolder(t *testing.T) { driveID: "shared-drive-abc", driveName: "Team Photos", rootFolderID: "folder456", + rootPath: "/team/folder456", } info := s.Info() - if info.Path != "folder456" { - t.Errorf("Path = %q, want folder456", info.Path) + if info.Path != "/team/folder456" { + t.Errorf("Path = %q, want /team/folder456", info.Path) } if info.VolumeUUID != "shared-drive-abc" { t.Errorf("VolumeUUID = %q, want shared-drive-abc", info.VolumeUUID) diff --git a/pkg/source/local_source_plist_test.go b/pkg/source/local_source_plist_test.go index 47031f2..8d398d9 100644 --- a/pkg/source/local_source_plist_test.go +++ b/pkg/source/local_source_plist_test.go @@ -5,7 +5,7 @@ package source import "testing" func TestExtractPlistValue(t *testing.T) { -plistXML := ` + plistXML := ` @@ -20,53 +20,53 @@ plistXML := ` ` -tests := []struct { -key string -want string -}{ -{"DiskUUID", "A1B2C3D4-E5F6-7890-ABCD-EF0123456789"}, -{"VolumeName", "MYDRIVE"}, -{"DeviceIdentifier", "DISK2S1"}, -{"NonExistentKey", ""}, -} + tests := []struct { + key string + want string + }{ + {"DiskUUID", "A1B2C3D4-E5F6-7890-ABCD-EF0123456789"}, + {"VolumeName", "MYDRIVE"}, + {"DeviceIdentifier", "DISK2S1"}, + {"NonExistentKey", ""}, + } -for _, tc := range tests { -t.Run(tc.key, func(t *testing.T) { -got := extractPlistValue([]byte(plistXML), tc.key) -if got != tc.want { -t.Errorf("extractPlistValue(%q) = %q, want %q", tc.key, got, tc.want) -} -}) -} + for _, tc := range tests { + t.Run(tc.key, func(t *testing.T) { + got := extractPlistValue([]byte(plistXML), tc.key) + if got != tc.want { + t.Errorf("extractPlistValue(%q) = %q, want %q", tc.key, got, tc.want) + } + }) + } } func TestExtractPlistValue_EmptyInput(t *testing.T) { -if got := extractPlistValue([]byte(""), "DiskUUID"); got != "" { -t.Errorf("expected empty for empty input, got %q", got) -} + if got := extractPlistValue([]byte(""), "DiskUUID"); got != "" { + t.Errorf("expected empty for empty input, got %q", got) + } } func TestExtractPlistValue_MalformedXML(t *testing.T) { -if got := extractPlistValue([]byte(" 0 { top := stack[len(stack)-1] @@ -274,7 +295,9 @@ func (s *OneDriveSource) Walk(ctx context.Context, callback func(core.FileMeta) p := meta.Name if item.ParentReference != nil && item.ParentReference.ID != "" { if parentPath, ok := pathMap[item.ParentReference.ID]; ok { - p = parentPath + "/" + meta.Name + if parentPath != "" { + p = parentPath + "/" + meta.Name + } } } meta.Paths = []string{p} diff --git a/pkg/source/onedrive_changes.go b/pkg/source/onedrive_changes.go index 972450d..6f76967 100644 --- a/pkg/source/onedrive_changes.go +++ b/pkg/source/onedrive_changes.go @@ -36,7 +36,11 @@ func (s *OneDriveChangeSource) Info() core.SourceInfo { // GetStartPageToken returns the current head of the OneDrive delta stream by // requesting a "latest" delta token. The returned string is a full deltaLink URL. func (s *OneDriveChangeSource) GetStartPageToken() (string, error) { - resp, err := s.fetchDeltaPage(context.Background(), "https://graph.microsoft.com/v1.0/me/drive/root/delta?token=latest") + url := "https://graph.microsoft.com/v1.0/me/drive/root/delta?token=latest" + if s.rootPath != "" && s.rootPath != "/" { + url = fmt.Sprintf("https://graph.microsoft.com/v1.0/me/drive/root:%s:/delta?token=latest", s.rootPath) + } + resp, err := s.fetchDeltaPage(context.Background(), url) if err != nil { return "", fmt.Errorf("get latest delta token: %w", err) } @@ -82,6 +86,9 @@ func (s *OneDriveChangeSource) WalkChanges(ctx context.Context, token string, ca hasExclude := !s.exclude.Empty() excludedIDs := make(map[string]bool) + folderChanges = s.filterChangesByRootPath(folderChanges) + fileChanges = s.filterChangesByRootPath(fileChanges) + for _, fc := range folderChanges { if hasExclude && fc.Type == ChangeUpsert && shouldExcludeOneDriveChange(s.exclude, fc, excludedIDs) { continue @@ -90,6 +97,7 @@ func (s *OneDriveChangeSource) WalkChanges(ctx context.Context, token string, ca return "", err } } + for _, fc := range fileChanges { if hasExclude && fc.Type == ChangeUpsert && shouldExcludeOneDriveChange(s.exclude, fc, excludedIDs) { continue @@ -102,6 +110,34 @@ func (s *OneDriveChangeSource) WalkChanges(ctx context.Context, token string, ca } } +func (s *OneDriveChangeSource) filterChangesByRootPath(changes []FileChange) []FileChange { + if s.rootPath == "" || s.rootPath == "/" { + return changes + } + var valid []FileChange + trimmedRoot := strings.TrimPrefix(s.rootPath, "/") + for _, fc := range changes { + if len(fc.Meta.Paths) > 0 { + p := fc.Meta.Paths[0] + if !strings.HasPrefix(p, trimmedRoot+"/") && p != trimmedRoot { + continue // Outside of root path + } + // Adjust path relative to root + stripped := strings.TrimPrefix(p, trimmedRoot+"/") + stripped = strings.TrimPrefix(stripped, trimmedRoot) + if stripped == "" { + fc.Meta.Paths = []string{fc.Meta.Name} + } else { + fc.Meta.Paths = []string{stripped} + } + } else if fc.Type == ChangeUpsert { + continue + } + valid = append(valid, fc) + } + return valid +} + func (s *OneDriveChangeSource) itemToFileChange(item graphItem) FileChange { if item.Deleted != nil { return FileChange{ diff --git a/pkg/source/onedrive_test.go b/pkg/source/onedrive_test.go index cf41666..37050b8 100644 --- a/pkg/source/onedrive_test.go +++ b/pkg/source/onedrive_test.go @@ -1,9 +1,13 @@ package source -import "testing" +import ( + "testing" + + "github.com/cloudstic/cli/internal/core" +) func TestOneDriveInfo(t *testing.T) { - s := &OneDriveSource{account: "user@outlook.com"} + s := &OneDriveSource{account: "user@outlook.com", rootPath: "/"} info := s.Info() if info.Type != "onedrive" { @@ -25,7 +29,7 @@ func TestOneDriveInfo(t *testing.T) { func TestOneDriveChangesInfo_Type(t *testing.T) { s := &OneDriveChangeSource{ - OneDriveSource: OneDriveSource{account: "user@outlook.com"}, + OneDriveSource: OneDriveSource{account: "user@outlook.com", rootPath: "/"}, } info := s.Info() @@ -39,3 +43,82 @@ func TestOneDriveChangesInfo_Type(t *testing.T) { t.Errorf("Path = %q, want /", info.Path) } } + +func TestOneDriveFilterChangesByRootPath(t *testing.T) { + s := &OneDriveChangeSource{ + OneDriveSource: OneDriveSource{rootPath: "/my/root/path"}, + } + + changes := []FileChange{ + { + Type: ChangeUpsert, + Meta: core.FileMeta{ + Name: "file1.txt", + Paths: []string{"my/root/path/file1.txt"}, + }, + }, + { + Type: ChangeUpsert, + Meta: core.FileMeta{ + Name: "file2.txt", + Paths: []string{"my/root/path/sub/file2.txt"}, + }, + }, + { + Type: ChangeUpsert, + Meta: core.FileMeta{ + Name: "file3.txt", + Paths: []string{"other/path/file3.txt"}, // Should be filtered out + }, + }, + { + Type: ChangeUpsert, + Meta: core.FileMeta{ + Name: "my", + Paths: []string{"my/root/path"}, // The root path itself + }, + }, + { + Type: ChangeDelete, + Meta: core.FileMeta{ + Name: "file4.txt", + Paths: nil, // Deletes don't have paths, should not be filtered out + }, + }, + { + Type: ChangeUpsert, + Meta: core.FileMeta{ + Name: "file5.txt", + Paths: nil, // Upserts without paths are filtered out if not at root + }, + }, + } + + filtered := s.filterChangesByRootPath(changes) + + if len(filtered) != 4 { + t.Fatalf("expected 4 filtered changes, got %d", len(filtered)) + } + + if filtered[0].Meta.Paths[0] != "file1.txt" { + t.Errorf("expected stripped path file1.txt, got %s", filtered[0].Meta.Paths[0]) + } + if filtered[1].Meta.Paths[0] != "sub/file2.txt" { + t.Errorf("expected stripped path sub/file2.txt, got %s", filtered[1].Meta.Paths[0]) + } + if filtered[2].Meta.Paths[0] != "my" { + t.Errorf("expected stripped path 'my', got %s", filtered[2].Meta.Paths[0]) + } + if filtered[3].Type != ChangeDelete { + t.Errorf("expected delete change, got %v", filtered[3].Type) + } + + // Test with rootPath = "" + s2 := &OneDriveChangeSource{ + OneDriveSource: OneDriveSource{rootPath: ""}, + } + filtered2 := s2.filterChangesByRootPath(changes) + if len(filtered2) != len(changes) { + t.Errorf("expected %d changes with empty rootPath, got %d", len(changes), len(filtered2)) + } +} diff --git a/pkg/source/onedrive_test.go.orig b/pkg/source/onedrive_test.go.orig new file mode 100644 index 0000000..cf41666 --- /dev/null +++ b/pkg/source/onedrive_test.go.orig @@ -0,0 +1,41 @@ +package source + +import "testing" + +func TestOneDriveInfo(t *testing.T) { + s := &OneDriveSource{account: "user@outlook.com"} + info := s.Info() + + if info.Type != "onedrive" { + t.Errorf("Type = %q, want onedrive", info.Type) + } + if info.Account != "user@outlook.com" { + t.Errorf("Account = %q, want user@outlook.com", info.Account) + } + if info.Path != "/" { + t.Errorf("Path = %q, want /", info.Path) + } + if info.VolumeUUID != "" { + t.Errorf("VolumeUUID = %q, want empty", info.VolumeUUID) + } + if info.VolumeLabel != "My Drive" { + t.Errorf("VolumeLabel = %q, want My Drive", info.VolumeLabel) + } +} + +func TestOneDriveChangesInfo_Type(t *testing.T) { + s := &OneDriveChangeSource{ + OneDriveSource: OneDriveSource{account: "user@outlook.com"}, + } + info := s.Info() + + if info.Type != "onedrive-changes" { + t.Errorf("Type = %q, want onedrive-changes", info.Type) + } + if info.VolumeLabel != "My Drive" { + t.Errorf("VolumeLabel = %q, want My Drive", info.VolumeLabel) + } + if info.Path != "/" { + t.Errorf("Path = %q, want /", info.Path) + } +} diff --git a/pkg/source/onedrive_test.go.rej b/pkg/source/onedrive_test.go.rej new file mode 100644 index 0000000..fbad673 --- /dev/null +++ b/pkg/source/onedrive_test.go.rej @@ -0,0 +1,18 @@ +@@ -10,7 +10,7 @@ + ) + + func TestOneDriveInfo(t *testing.T) { +- s := &OneDriveSource{account: "user@domain.com"} ++ s := &OneDriveSource{account: "user@domain.com", rootPath: "/"} + info := s.Info() + + if info.Type != "onedrive" { +@@ -33,7 +33,7 @@ + + func TestOneDriveChangesInfo_Type(t *testing.T) { + s := &OneDriveChangeSource{ +- OneDriveSource: OneDriveSource{account: "user@domain.com"}, ++ OneDriveSource: OneDriveSource{account: "user@domain.com", rootPath: "/"}, + } + info := s.Info() + diff --git a/rfcs/0007-cloud-subdirectory-backup.md b/rfcs/0007-cloud-subdirectory-backup.md new file mode 100644 index 0000000..b22a926 --- /dev/null +++ b/rfcs/0007-cloud-subdirectory-backup.md @@ -0,0 +1,51 @@ +# RFC 0007: Cloud Subdirectory Backup + +- **Status:** Adopted +- **Date:** 2026-03-14 + +## Abstract + +This RFC proposes native support for backing up subdirectories of cloud sources (Google Drive, OneDrive) by extending the source URI syntax and removing the confusing `-root-folder` CLI flag. + +## Context + +Previously, users had to use the `-source gdrive` flag in combination with a separate `-root-folder ` flag to scope backups to a specific folder. This design had several flaws: + +1. **Discoverability**: Folder IDs are abstract, opaque strings (e.g. `1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVwQA`) that are hard to find and manage. +2. **Inconsistency**: Local sources support paths in the URI (`local:/path/to/folder`), but cloud sources required a distinct CLI argument. +3. **Display Issues**: The `cloudstic list` command displayed the raw folder ID instead of a human-readable path. + +## Proposal + +### 1. URI Syntax Enhancement + +Extend the `-source` flag to accept an optional path component for cloud sources: + +- `gdrive:/Projects/Code` +- `gdrive-changes:/Important` +- `onedrive:/Documents/Finance` +- `onedrive-changes:/Photos` + +The CLI will parse this URI into the `scheme` and `path` components automatically. + +### 2. Deprecation of `-root-folder` + +Remove the `-root-folder` flag from the CLI to eliminate redundancy and confusion. The underlying client library option (`WithRootFolderID`) will be retained for programmatic use where providing a raw ID might be more efficient. + +### 3. API Translation + +The provided string path will be translated dynamically during initialization: + +- **Google Drive**: The string path will be resolved layer-by-layer using `Files.List` queries into a canonical Drive Folder ID. +- **OneDrive**: Microsoft Graph API natively supports path-based addressing (`/me/drive/root:/path/to/folder`), so translation is trivial. + +### 4. Implementation Details + +- **Full Scan (`Walk`)**: The source iterators will only fetch or yield descendants of the resolved root path. +- **Incrementals (`WalkChanges`)**: Changes returned by delta endpoints that fall outside the configured tree will be filtered out internally by resolving parent chains up to the root path. +- **Display**: `SourceInfo.Path` will store the human-readable path (e.g., `/Projects/Code`) instead of the opaque folder ID, improving the output of the `cloudstic list` command. + +## Trade-offs + +- **Google Drive API Load**: Resolving a string path layer-by-layer requires additional API calls (one per path segment). This is mitigated by only doing it once during initialization. +- **Path Changes**: If a user renames a folder in Google Drive or OneDrive but does not update their backup script, the backup will fail because the string path no longer resolves. This is standard behavior for file systems (like local or SFTP backups), but different from ID-based addressing which tracks the directory regardless of renames.