From fcd43fe7834cc9b246a9d3c2482e2c6714f54048 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 15 Jan 2026 04:47:54 +0300 Subject: [PATCH] fix: Return newest blob when multiple match in metadata version lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, _get_blob_by_metadata_version returned the first blob that matched the requested version. When multiple blobs exist with the same metadata version (e.g., after re-uploading), this incorrectly returned the oldest blob instead of the newest. Now collects all matching blobs and returns the one with the highest generation number (the newest blob). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- changelog_entry.yaml | 4 ++ .../data/version_aware_storage_client.py | 19 +++++--- .../data/test_version_aware_storage_client.py | 43 +++++++++++++++++++ 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..5207c2eb 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Fixed `_get_blob_by_metadata_version` to return the newest blob (highest generation) when multiple blobs match the requested version. diff --git a/policyengine/utils/data/version_aware_storage_client.py b/policyengine/utils/data/version_aware_storage_client.py index 0fdc144d..5f2e76a9 100644 --- a/policyengine/utils/data/version_aware_storage_client.py +++ b/policyengine/utils/data/version_aware_storage_client.py @@ -112,16 +112,23 @@ def _get_blob_by_metadata_version( f"Searching for blob {bucket.name}/{key} with metadata version '{version}'" ) versions = bucket.list_blobs(prefix=key, versions=True) + matching_blobs = [] for blob in versions: if blob.metadata and blob.metadata.get("version") == version: - logger.info( - f"Found blob {bucket.name}/{key} with metadata version '{version}'" - ) - return blob + matching_blobs.append(blob) - raise ValueError( - f"No blob found with version '{version}' for {bucket.name}/{key}" + if not matching_blobs: + raise ValueError( + f"No blob found with version '{version}' for {bucket.name}/{key}" + ) + + # Return the blob with the highest generation number (newest) + newest_blob = max(matching_blobs, key=lambda b: b.generation) + logger.info( + f"Found blob {bucket.name}/{key} with metadata version '{version}' " + f"(generation {newest_blob.generation})" ) + return newest_blob def crc32c( self, diff --git a/tests/utils/data/test_version_aware_storage_client.py b/tests/utils/data/test_version_aware_storage_client.py index 38b6ebac..0ae448e2 100644 --- a/tests/utils/data/test_version_aware_storage_client.py +++ b/tests/utils/data/test_version_aware_storage_client.py @@ -164,10 +164,13 @@ def test_get_blob__metadata_version_searches_blobs( # Create mock blobs with different metadata versions blob1 = MagicMock() blob1.metadata = {"version": "1.0.0"} + blob1.generation = 100 blob2 = MagicMock() blob2.metadata = {"version": "1.2.3"} + blob2.generation = 200 blob3 = MagicMock() blob3.metadata = None + blob3.generation = 150 bucket.list_blobs.return_value = [blob1, blob3, blob2] @@ -177,6 +180,45 @@ def test_get_blob__metadata_version_searches_blobs( assert result == blob2 bucket.list_blobs.assert_called_with(prefix="test_key", versions=True) + @patch( + "policyengine.utils.data.version_aware_storage_client.Client", + autospec=True, + ) + def test_get_blob__metadata_version_returns_newest_when_multiple_match( + self, mock_client_class + ): + """Test that when multiple blobs have the same version, the newest (highest generation) is returned.""" + mock_instance = mock_client_class.return_value + bucket = mock_instance.bucket.return_value + + # Create mock blobs with the SAME metadata version but different generations + oldest_blob = MagicMock() + oldest_blob.metadata = {"version": "1.2.3"} + oldest_blob.generation = 100 + + middle_blob = MagicMock() + middle_blob.metadata = {"version": "1.2.3"} + middle_blob.generation = 200 + + newest_blob = MagicMock() + newest_blob.metadata = {"version": "1.2.3"} + newest_blob.generation = 300 + + # Return blobs in non-sorted order to ensure we're not relying on order + bucket.list_blobs.return_value = [ + middle_blob, + oldest_blob, + newest_blob, + ] + + client = VersionAwareStorageClient() + result = client.get_blob("test_bucket", "test_key", version="1.2.3") + + # Should return the blob with the highest generation number + assert result == newest_blob + assert result.generation == 300 + bucket.list_blobs.assert_called_with(prefix="test_key", versions=True) + @patch( "policyengine.utils.data.version_aware_storage_client.Client", autospec=True, @@ -217,6 +259,7 @@ def test_get_blob__generation_fallback_to_metadata( # Set up metadata-based lookup to succeed metadata_blob = MagicMock() metadata_blob.metadata = {"version": "999"} + metadata_blob.generation = 100 bucket.list_blobs.return_value = [metadata_blob] client = VersionAwareStorageClient()