From 2fc9dbe7e5ee7a0d122ee3ed9c94f7e9f8ee3afc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Sep 2025 19:02:55 +0200 Subject: [PATCH 1/2] fix: empty blob hashes are now automatically considered present. Co-authored-by: Byron <63622+Byron@users.noreply.github.com> --- gix/src/repository/object.rs | 30 ++++++++- gix/tests/gix/repository/object.rs | 103 +++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/gix/src/repository/object.rs b/gix/src/repository/object.rs index 6cbd6372d78..a7c05ef77bb 100644 --- a/gix/src/repository/object.rs +++ b/gix/src/repository/object.rs @@ -49,6 +49,14 @@ impl crate::Repository { repo: self, }); } + if id == ObjectId::empty_blob(self.object_hash()) { + return Ok(Object { + id, + kind: gix_object::Kind::Blob, + data: Vec::new(), + repo: self, + }); + } let mut buf = self.free_buf(); let kind = self.objects.find(&id, &mut buf)?.kind; Ok(Object::from_data(id, kind, buf, self)) @@ -95,6 +103,12 @@ impl crate::Repository { size: 0, }); } + if id == ObjectId::empty_blob(self.object_hash()) { + return Ok(gix_odb::find::Header::Loose { + kind: gix_object::Kind::Blob, + size: 0, + }); + } self.objects.header(id) } @@ -109,7 +123,7 @@ impl crate::Repository { #[doc(alias = "exists", alias = "git2")] pub fn has_object(&self, id: impl AsRef) -> bool { let id = id.as_ref(); - if id.to_owned().is_empty_tree() { + if id.to_owned().is_empty_tree() || id.to_owned().is_empty_blob() { true } else { self.objects.exists(id) @@ -130,6 +144,12 @@ impl crate::Repository { size: 0, })); } + if id == ObjectId::empty_blob(self.object_hash()) { + return Ok(Some(gix_odb::find::Header::Loose { + kind: gix_object::Kind::Blob, + size: 0, + })); + } self.objects.try_header(&id).map_err(Into::into) } @@ -144,6 +164,14 @@ impl crate::Repository { repo: self, })); } + if id == ObjectId::empty_blob(self.object_hash()) { + return Ok(Some(Object { + id, + kind: gix_object::Kind::Blob, + data: Vec::new(), + repo: self, + })); + } let mut buf = self.free_buf(); match self.objects.try_find(&id, &mut buf)? { diff --git a/gix/tests/gix/repository/object.rs b/gix/tests/gix/repository/object.rs index 040238591ec..8c9877d0494 100644 --- a/gix/tests/gix/repository/object.rs +++ b/gix/tests/gix/repository/object.rs @@ -507,6 +507,109 @@ mod find { ); Ok(()) } + + #[test] + fn empty_blob_can_always_be_found() -> crate::Result { + let repo = basic_repo()?; + let empty_blob = gix::hash::ObjectId::empty_blob(repo.object_hash()); + assert_eq!(repo.find_object(empty_blob)?.into_blob().data.len(), 0); + assert!(repo.has_object(empty_blob)); + assert_eq!( + repo.find_header(empty_blob)?, + gix_odb::find::Header::Loose { + kind: gix_object::Kind::Blob, + size: 0, + }, + "empty blob is considered a loose object" + ); + assert_eq!( + repo.try_find_object(empty_blob)? + .expect("present") + .into_blob() + .data + .len(), + 0 + ); + assert_eq!( + repo.try_find_header(empty_blob)?, + Some(gix_odb::find::Header::Loose { + kind: gix_object::Kind::Blob, + size: 0, + }), + "empty blob is considered a loose object" + ); + + // Note: Unlike empty tree, empty blobs might actually exist in the repository. + // The key point is that has_object() should always return true for empty blobs, + // regardless of whether they are physically stored or not. + Ok(()) + } +} + +#[test] +fn empty_blob_is_always_considered_present() -> crate::Result { + use gix_object::Find; + + // Test with an empty in-memory repository to ensure empty blob is considered present + // even when it doesn't physically exist + let repo = empty_bare_in_memory_repo()?; + let empty_blob = gix::hash::ObjectId::empty_blob(repo.object_hash()); + + // The key behavior being tested: has_object should return true for empty blob + assert!(repo.has_object(empty_blob), "empty blob should always be considered present"); + + // Verify that the lower-level object database doesn't have it + let mut buf = Vec::new(); + let lower_level_result = repo.objects.try_find(&empty_blob, &mut buf)?; + + // Empty blob might or might not exist at the lower level - that's implementation dependent + // But has_object should always return true regardless + match lower_level_result { + Some(_) => { + // If it exists at the lower level, that's fine + } + None => { + // If it doesn't exist at the lower level, has_object should still return true + // thanks to our special handling + } + } + + Ok(()) +} + +#[test] +fn empty_blob_edge_cases() -> crate::Result { + let repo = empty_bare_in_memory_repo()?; + let empty_blob_id = gix::hash::ObjectId::empty_blob(repo.object_hash()); + + // Test all the related methods for empty blobs + assert!(repo.has_object(&empty_blob_id), "has_object should return true"); + + // Test find_header + let header = repo.find_header(empty_blob_id)?; + assert_eq!(header.kind(), gix_object::Kind::Blob); + assert_eq!(header.size(), 0); + + // Test try_find_header + let header = repo.try_find_header(empty_blob_id)?.expect("should find header"); + assert_eq!(header.kind(), gix_object::Kind::Blob); + assert_eq!(header.size(), 0); + + // Test find_object + let obj = repo.find_object(empty_blob_id)?; + assert_eq!(obj.kind, gix_object::Kind::Blob); + assert_eq!(obj.data.len(), 0); + + // Test try_find_object + let obj = repo.try_find_object(empty_blob_id)?.expect("should find object"); + assert_eq!(obj.kind, gix_object::Kind::Blob); + assert_eq!(obj.data.len(), 0); + + // Test that we can get a blob from the object + let blob = obj.into_blob(); + assert_eq!(blob.data.len(), 0); + + Ok(()) } mod tag { From 689d839230cff2cf4407c59d443db841ab846b98 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 13 Sep 2025 19:03:04 +0200 Subject: [PATCH 2/2] refactor --- gix/src/repository/object.rs | 2 + gix/tests/gix/repository/object.rs | 71 +++++++++--------------------- 2 files changed, 22 insertions(+), 51 deletions(-) diff --git a/gix/src/repository/object.rs b/gix/src/repository/object.rs index a7c05ef77bb..2078c3e98d2 100644 --- a/gix/src/repository/object.rs +++ b/gix/src/repository/object.rs @@ -94,6 +94,8 @@ impl crate::Repository { /// Obtain information about an object without fully decoding it, or fail if the object doesn't exist. /// /// Note that despite being cheaper than [`Self::find_object()`], there is still some effort traversing delta-chains. + /// Also note that for empty trees and blobs, it will always report it to exist in loose objects, even if they don't + /// exist or if they exist in a pack. #[doc(alias = "read_header", alias = "git2")] pub fn find_header(&self, id: impl Into) -> Result { let id = id.into(); diff --git a/gix/tests/gix/repository/object.rs b/gix/tests/gix/repository/object.rs index 8c9877d0494..70d7c3fbec6 100644 --- a/gix/tests/gix/repository/object.rs +++ b/gix/tests/gix/repository/object.rs @@ -1,3 +1,5 @@ +use gix_odb::Header; +use gix_pack::Find; use gix_testtools::tempfile; use crate::util::named_subrepo_opts; @@ -538,77 +540,44 @@ mod find { }), "empty blob is considered a loose object" ); - - // Note: Unlike empty tree, empty blobs might actually exist in the repository. - // The key point is that has_object() should always return true for empty blobs, - // regardless of whether they are physically stored or not. Ok(()) } } #[test] -fn empty_blob_is_always_considered_present() -> crate::Result { - use gix_object::Find; - - // Test with an empty in-memory repository to ensure empty blob is considered present - // even when it doesn't physically exist - let repo = empty_bare_in_memory_repo()?; - let empty_blob = gix::hash::ObjectId::empty_blob(repo.object_hash()); - - // The key behavior being tested: has_object should return true for empty blob - assert!(repo.has_object(empty_blob), "empty blob should always be considered present"); - - // Verify that the lower-level object database doesn't have it - let mut buf = Vec::new(); - let lower_level_result = repo.objects.try_find(&empty_blob, &mut buf)?; - - // Empty blob might or might not exist at the lower level - that's implementation dependent - // But has_object should always return true regardless - match lower_level_result { - Some(_) => { - // If it exists at the lower level, that's fine - } - None => { - // If it doesn't exist at the lower level, has_object should still return true - // thanks to our special handling - } - } - - Ok(()) -} - -#[test] -fn empty_blob_edge_cases() -> crate::Result { +fn empty_objects_are_always_present_but_not_in_plumbing() -> crate::Result { let repo = empty_bare_in_memory_repo()?; let empty_blob_id = gix::hash::ObjectId::empty_blob(repo.object_hash()); - - // Test all the related methods for empty blobs - assert!(repo.has_object(&empty_blob_id), "has_object should return true"); - - // Test find_header + + assert!( + repo.has_object(empty_blob_id), + "empty object is always present even if it's not" + ); + assert!(!repo.objects.contains(&empty_blob_id)); + let header = repo.find_header(empty_blob_id)?; assert_eq!(header.kind(), gix_object::Kind::Blob); assert_eq!(header.size(), 0); - - // Test try_find_header + assert_eq!(repo.objects.try_header(&empty_blob_id)?, None); + let header = repo.try_find_header(empty_blob_id)?.expect("should find header"); assert_eq!(header.kind(), gix_object::Kind::Blob); assert_eq!(header.size(), 0); - - // Test find_object + let obj = repo.find_object(empty_blob_id)?; assert_eq!(obj.kind, gix_object::Kind::Blob); assert_eq!(obj.data.len(), 0); - - // Test try_find_object + + let mut buf = Vec::new(); + assert_eq!(repo.objects.try_find(&empty_blob_id, &mut buf)?, None); + let obj = repo.try_find_object(empty_blob_id)?.expect("should find object"); assert_eq!(obj.kind, gix_object::Kind::Blob); assert_eq!(obj.data.len(), 0); - - // Test that we can get a blob from the object - let blob = obj.into_blob(); + + let blob = obj.try_into_blob()?; assert_eq!(blob.data.len(), 0); - + Ok(()) }