From da114c14a66ad6cb1fc07aba9c8c1b147a4fa0f0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 07:21:36 +0200 Subject: [PATCH 1/2] fix: refspec for shallow clones uses a single-branch (#2227) When doing shallow clones (depth != NoChange), it now uses a single-branch refspec instead of fetching all branches. This matches Git's behavior and significantly reduces the repository size for shallow clones. For shallow clones: - If ref_name is specified: uses that branch - Otherwise: attempts to detect from Protocol V1 handshake or falls back to init.defaultBranch config or "main" This addresses issue #2227 where `gix clone --depth 1` was creating repositories ~130MB vs Git's ~70MB due to fetching all branches. Co-authored-by: Byron <63622+Byron@users.noreply.github.com> --- gix/src/clone/fetch/mod.rs | 82 +++++++++++++++++++++++++++++++++++--- gix/tests/gix/clone.rs | 35 ++++++++++++++++ 2 files changed, 111 insertions(+), 6 deletions(-) diff --git a/gix/src/clone/fetch/mod.rs b/gix/src/clone/fetch/mod.rs index 34641a89fb1..c56f413061b 100644 --- a/gix/src/clone/fetch/mod.rs +++ b/gix/src/clone/fetch/mod.rs @@ -47,6 +47,8 @@ pub enum Error { }, #[error(transparent)] CommitterOrFallback(#[from] crate::config::time::Error), + #[error(transparent)] + RefMap(#[from] crate::remote::ref_map::Error), } /// Modification @@ -101,14 +103,81 @@ impl PrepareFetch { }; let mut remote = repo.remote_at(self.url.clone())?; + + // For shallow clones without custom configuration, we'll use a single-branch refspec + // to match git's behavior (matching git's single-branch behavior for shallow clones). + let use_single_branch_for_shallow = self.shallow != remote::fetch::Shallow::NoChange + && self.configure_remote.is_none() + && remote.fetch_specs.is_empty(); + + let target_ref = if use_single_branch_for_shallow { + // Determine target branch from user-specified ref_name or default branch + if let Some(ref_name) = &self.ref_name { + // User specified a branch, use that + Some(format!("refs/heads/{}", ref_name.as_ref().as_bstr())) + } else { + // For shallow clones without a specified ref, we need to determine the default branch. + // We'll connect to get HEAD information. For Protocol V2, we need to explicitly list refs. + let mut connection = remote.connect(remote::Direction::Fetch).await?; + + // Perform handshake and try to get HEAD from it (works for Protocol V1) + let _ = connection.ref_map_by_ref(&mut progress, Default::default()).await?; + + let target = if let Some(handshake) = &connection.handshake { + // Protocol V1: refs are in handshake + handshake.refs.as_ref().and_then(|refs| { + refs.iter().find_map(|r| match r { + gix_protocol::handshake::Ref::Symbolic { + full_ref_name, target, .. + } if full_ref_name == "HEAD" => Some(target.to_string()), + _ => None, + }) + }) + } else { + None + }; + + // For Protocol V2 or if we couldn't determine HEAD, use the configured default branch + let fallback_branch = target + .or_else(|| { + repo.config + .resolved + .string(crate::config::tree::Init::DEFAULT_BRANCH) + .and_then(|name| name.to_str().ok().map(|s| format!("refs/heads/{}", s))) + }) + .unwrap_or_else(|| "refs/heads/main".to_string()); + + // Drop the connection explicitly to release the borrow on remote + drop(connection); + + Some(fallback_branch) + } + } else { + None + }; + + // Set up refspec based on whether we're doing a single-branch shallow clone if remote.fetch_specs.is_empty() { - remote = remote - .with_refspecs( - Some(format!("+refs/heads/*:refs/remotes/{remote_name}/*").as_str()), - remote::Direction::Fetch, - ) - .expect("valid static spec"); + if let Some(target_ref) = &target_ref { + // Single-branch refspec for shallow clones + let short_name = target_ref.strip_prefix("refs/heads/").unwrap_or(target_ref.as_str()); + remote = remote + .with_refspecs( + Some(format!("+{target_ref}:refs/remotes/{remote_name}/{short_name}").as_str()), + remote::Direction::Fetch, + ) + .expect("valid refspec"); + } else { + // Wildcard refspec for non-shallow clones or when target couldn't be determined + remote = remote + .with_refspecs( + Some(format!("+refs/heads/*:refs/remotes/{remote_name}/*").as_str()), + remote::Direction::Fetch, + ) + .expect("valid static spec"); + } } + let mut clone_fetch_tags = None; if let Some(f) = self.configure_remote.as_mut() { remote = f(remote).map_err(Error::RemoteConfiguration)?; @@ -133,6 +202,7 @@ impl PrepareFetch { .expect("valid") .to_owned(); let pending_pack: remote::fetch::Prepare<'_, '_, _> = { + // For shallow clones, we already connected once, so we need to connect again let mut connection = remote.connect(remote::Direction::Fetch).await?; if let Some(f) = self.configure_connection.as_mut() { f(&mut connection).map_err(Error::RemoteConnection)?; diff --git a/gix/tests/gix/clone.rs b/gix/tests/gix/clone.rs index 573a8e3ddfe..3624afb7e55 100644 --- a/gix/tests/gix/clone.rs +++ b/gix/tests/gix/clone.rs @@ -83,6 +83,41 @@ mod blocking_io { Ok(()) } + #[test] + fn shallow_clone_uses_single_branch_refspec() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let (repo, _out) = gix::prepare_clone_bare(remote::repo("base").path(), tmp.path())? + .with_shallow(Shallow::DepthAtRemote(1.try_into()?)) + .fetch_only(gix::progress::Discard, &std::sync::atomic::AtomicBool::default())?; + + assert!(repo.is_shallow(), "repository should be shallow"); + + // Verify that only a single-branch refspec was configured + let remote = repo.find_remote("origin")?; + let refspecs: Vec<_> = remote + .refspecs(Direction::Fetch) + .iter() + .map(|spec| spec.to_ref().to_bstring()) + .collect(); + + assert_eq!(refspecs.len(), 1, "shallow clone should have only one fetch refspec"); + + // The refspec should be for a single branch (main), not a wildcard + let refspec_str = refspecs[0].to_str().expect("valid utf8"); + assert!( + !refspec_str.contains("*"), + "shallow clone refspec should not use wildcard: {}", + refspec_str + ); + assert!( + refspec_str.contains("refs/heads/main"), + "shallow clone refspec should reference the main branch: {}", + refspec_str + ); + + Ok(()) + } + #[test] fn from_shallow_prohibited_with_option() -> crate::Result { let tmp = gix_testtools::tempfile::TempDir::new()?; From de7289a6f32462241f46e27871df1227f45f5a0a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 24 Oct 2025 07:24:12 +0200 Subject: [PATCH 2/2] refactor --- gix/src/clone/fetch/mod.rs | 2 +- gix/tests/gix/clone.rs | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/gix/src/clone/fetch/mod.rs b/gix/src/clone/fetch/mod.rs index c56f413061b..e9c26f7075b 100644 --- a/gix/src/clone/fetch/mod.rs +++ b/gix/src/clone/fetch/mod.rs @@ -143,7 +143,7 @@ impl PrepareFetch { repo.config .resolved .string(crate::config::tree::Init::DEFAULT_BRANCH) - .and_then(|name| name.to_str().ok().map(|s| format!("refs/heads/{}", s))) + .and_then(|name| name.to_str().ok().map(|s| format!("refs/heads/{s}"))) }) .unwrap_or_else(|| "refs/heads/main".to_string()); diff --git a/gix/tests/gix/clone.rs b/gix/tests/gix/clone.rs index 3624afb7e55..f52f4a21a09 100644 --- a/gix/tests/gix/clone.rs +++ b/gix/tests/gix/clone.rs @@ -104,15 +104,9 @@ mod blocking_io { // The refspec should be for a single branch (main), not a wildcard let refspec_str = refspecs[0].to_str().expect("valid utf8"); - assert!( - !refspec_str.contains("*"), - "shallow clone refspec should not use wildcard: {}", - refspec_str - ); - assert!( - refspec_str.contains("refs/heads/main"), - "shallow clone refspec should reference the main branch: {}", - refspec_str + assert_eq!( + refspec_str, "+refs/heads/main:refs/remotes/origin/main", + "shallow clone refspec should not use wildcard and should be the main branch: {refspec_str}" ); Ok(())