Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion datadog_sync/commands/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
# This product includes software developed at Datadog (https://www.datadoghq.com/).
# Copyright 2019 Datadog, Inc.

from click import command
from click import command, option

from datadog_sync.commands.shared.options import (
CustomOptionClass,
common_options,
destination_auth_options,
diffs_options,
Expand All @@ -24,6 +25,20 @@
@diffs_options
@sync_options
@storage_options
@option(
"--minimize-reads",
required=False,
is_flag=True,
default=False,
show_default=True,
help="Minimize cloud storage reads by loading only needed resources. "
"Uses ID-targeted fetching when filters specify exact IDs, "
"otherwise falls back to type-scoped loading. "
"Requires --resource-per-file and --resources. "
"Must not be combined with --cleanup. "
"Only available on the sync command.",
cls=CustomOptionClass,
)
def sync(**kwargs):
"""Sync Datadog resources to destination."""
run_cmd(Command.SYNC, **kwargs)
20 changes: 20 additions & 0 deletions datadog_sync/utils/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,15 +306,35 @@ def build_config(cmd: Command, **kwargs: Optional[Any]) -> Configuration:
)

resource_per_file = kwargs.get(RESOURCE_PER_FILE, False)
minimize_reads = kwargs.get("minimize_reads", False)

# Validate --minimize-reads constraints
if minimize_reads:
if not resource_per_file:
raise click.UsageError("--minimize-reads requires --resource-per-file")
if not kwargs.get("resources", None):
raise click.UsageError("--minimize-reads requires --resources")
Comment thread
michael-richey marked this conversation as resolved.
if kwargs.get("cleanup") and kwargs["cleanup"].lower() in ("true", "force"):
raise click.UsageError("--minimize-reads cannot be combined with --cleanup")

# Determine loading strategy for minimize-reads
_state_resource_types = None # None = full load (existing behavior)
if minimize_reads and (rs := kwargs.get("resources", None)):
_state_resource_types = [r.strip().lower() for r in rs.split(",") if r.strip()]

# Initialize state
state = State(
type_=storage_type,
source_resources_path=source_resources_path,
destination_resources_path=destination_resources_path,
config=config,
resource_per_file=resource_per_file,
resource_types=_state_resource_types, # None = full load
)

if _state_resource_types is not None:
logger.info(f"minimize-reads: type-scoped loading for {_state_resource_types}")

# Initialize Configuration
config = Configuration(
logger=logger,
Expand Down
5 changes: 4 additions & 1 deletion datadog_sync/utils/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

class State:
def __init__(self, type_: StorageType = StorageType.LOCAL_FILE, **kwargs: object) -> None:
self._resource_types = kwargs.get("resource_types", None)
self._minimize_reads = self._resource_types is not None
resource_per_file = kwargs.get(RESOURCE_PER_FILE, False)
source_resources_path = kwargs.get(SOURCE_PATH_PARAM, SOURCE_PATH_DEFAULT)
destination_resources_path = kwargs.get(DESTINATION_PATH_PARAM, DESTINATION_PATH_DEFAULT)
Expand Down Expand Up @@ -76,7 +78,8 @@ def destination(self):
return self._data.destination

def load_state(self, origin: Origin = Origin.ALL) -> None:
self._data = self._storage.get(origin)
# resource_types=None → load all types (default behavior)
self._data = self._storage.get(origin, resource_types=self._resource_types)

def dump_state(self, origin: Origin = Origin.ALL) -> None:
self._storage.put(origin, self._data)
Expand Down
219 changes: 219 additions & 0 deletions tests/unit/test_minimize_reads_type_scoped.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
# Unless explicitly stated otherwise all files in this repository are licensed
# under the 3-clause BSD style license (see LICENSE).
# This product includes software developed at Datadog (https://www.datadoghq.com/).
# Copyright 2019 Datadog, Inc.

"""Tests for PR 2: type-scoped loading via --minimize-reads flag (type-scoped strategy)."""

from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest
from click.testing import CliRunner

from datadog_sync.cli import cli
from datadog_sync.constants import Origin
from datadog_sync.utils.storage._base_storage import StorageData
from datadog_sync.utils.storage.local_file import LocalFile
from datadog_sync.utils.storage.storage_types import StorageType


@pytest.fixture
def runner():
return CliRunner(mix_stderr=True)


class TestStateTypeScopedLoading:
def test_state_loads_only_requested_types_when_type_scoped(self, tmp_path):
"""State with resource_types only loads those types from storage."""
src_path = str(tmp_path / "source")
dst_path = str(tmp_path / "dest")
Path(src_path).mkdir()
Path(dst_path).mkdir()

# Write two resource types to storage
backend = LocalFile(source_resources_path=src_path, destination_resources_path=dst_path, resource_per_file=True)
data = StorageData()
data.source["roles"]["role-1"] = {"id": "role-1", "name": "Admin"}
data.source["dashboards"]["dash-1"] = {"id": "dash-1", "title": "My Dash"}
backend.put(Origin.SOURCE, data)

# Load only roles
result = backend.get(Origin.SOURCE, resource_types=["roles"])
assert "role-1" in result.source["roles"]
assert "dash-1" not in result.source["dashboards"], "dashboards should not be loaded"

def test_state_loads_all_when_no_resource_types(self, tmp_path):
"""Regression: no resource_types loads all (existing behavior unchanged)."""
src_path = str(tmp_path / "source")
dst_path = str(tmp_path / "dest")
Path(src_path).mkdir()
Path(dst_path).mkdir()

backend = LocalFile(source_resources_path=src_path, destination_resources_path=dst_path, resource_per_file=True)
data = StorageData()
data.source["roles"]["role-1"] = {"id": "role-1"}
data.source["dashboards"]["dash-1"] = {"id": "dash-1"}
backend.put(Origin.SOURCE, data)

result = backend.get(Origin.SOURCE, resource_types=None)
assert "role-1" in result.source["roles"]
assert "dash-1" in result.source["dashboards"]

def test_state_minimize_reads_flag(self, tmp_path):
"""State._minimize_reads is True when resource_types is set."""
from datadog_sync.utils.state import State

src_path = str(tmp_path / "source")
dst_path = str(tmp_path / "dest")
Path(src_path).mkdir()
Path(dst_path).mkdir()

state = State(
type_=StorageType.LOCAL_FILE,
source_resources_path=src_path,
destination_resources_path=dst_path,
resource_types=["roles"],
)
assert state._minimize_reads is True

def test_state_no_minimize_reads_by_default(self, tmp_path):
"""Regression: State._minimize_reads is False when resource_types is None."""
from datadog_sync.utils.state import State

src_path = str(tmp_path / "source")
dst_path = str(tmp_path / "dest")
Path(src_path).mkdir()
Path(dst_path).mkdir()

state = State(
type_=StorageType.LOCAL_FILE,
source_resources_path=src_path,
destination_resources_path=dst_path,
)
assert state._minimize_reads is False


class TestMinimizeReadsCLIValidation:
def test_minimize_reads_requires_resource_per_file(self, runner):
"""--minimize-reads without --resource-per-file must fail with error."""
result = runner.invoke(
cli,
[
"sync",
"--minimize-reads",
"--resources=roles",
"--source-api-key=x",
"--destination-api-key=y",
],
)
assert result.exit_code != 0
assert "resource-per-file" in (result.output + str(result.exception)).lower()

def test_minimize_reads_requires_resources_flag(self, runner):
"""--minimize-reads without --resources must fail with error."""
result = runner.invoke(
cli,
[
"sync",
"--minimize-reads",
"--resource-per-file",
"--source-api-key=x",
"--destination-api-key=y",
],
)
assert result.exit_code != 0
assert "resources" in (result.output + str(result.exception)).lower()

def test_minimize_reads_not_available_on_diffs_command(self, runner):
"""--minimize-reads must not be accepted by the diffs command."""
result = runner.invoke(
cli,
["diffs", "--minimize-reads", "--source-api-key=x", "--destination-api-key=y"],
)
assert result.exit_code != 0
assert "no such option" in result.output.lower()

def test_minimize_reads_not_available_on_import_command(self, runner):
"""--minimize-reads must not be accepted by the import command."""
result = runner.invoke(
cli,
["import", "--minimize-reads", "--source-api-key=x"],
)
assert result.exit_code != 0
assert "no such option" in result.output.lower()

def test_minimize_reads_cannot_be_combined_with_cleanup(self, runner):
"""--minimize-reads + --cleanup must be rejected before any I/O."""
result = runner.invoke(
cli,
[
"sync",
"--minimize-reads",
"--resource-per-file",
"--resources=roles",
"--cleanup=Force",
"--source-api-key=x",
"--destination-api-key=y",
],
)
assert result.exit_code != 0
assert "cleanup" in (result.output + str(result.exception)).lower()


Comment thread
michael-richey marked this conversation as resolved.
class TestS3TypeScopedGet:
def test_s3_get_type_scoped_uses_per_type_prefix(self):
"""S3 get() with resource_types uses type-specific prefix, not broad prefix."""
with patch("datadog_sync.utils.storage.aws_s3_bucket.boto3") as mock_boto3:
mock_client = MagicMock()
mock_boto3.client.return_value = mock_client

# Empty paginated response
mock_client.list_objects_v2.return_value = {"IsTruncated": False}

from datadog_sync.utils.storage.aws_s3_bucket import AWSS3Bucket

backend = AWSS3Bucket(
source_resources_path="resources/source",
destination_resources_path="resources/destination",
config={
"aws_bucket_name": "test-bucket",
"aws_region_name": "us-east-1",
"aws_access_key_id": "",
"aws_secret_access_key": "",
"aws_session_token": "",
},
)

backend.get(Origin.SOURCE, resource_types=["roles"])

# Should list with type-specific prefix, not broad prefix
call_kwargs = mock_client.list_objects_v2.call_args_list[0][1]
assert call_kwargs["Prefix"] == "resources/source/roles."
assert "dashboard" not in call_kwargs["Prefix"]

def test_s3_get_broad_prefix_when_no_resource_types(self):
"""S3 get() without resource_types uses the broad prefix (existing behavior)."""
with patch("datadog_sync.utils.storage.aws_s3_bucket.boto3") as mock_boto3:
mock_client = MagicMock()
mock_boto3.client.return_value = mock_client
mock_client.list_objects_v2.return_value = {"IsTruncated": False}

from datadog_sync.utils.storage.aws_s3_bucket import AWSS3Bucket

backend = AWSS3Bucket(
source_resources_path="resources/source",
destination_resources_path="resources/destination",
config={
"aws_bucket_name": "test-bucket",
"aws_region_name": "us-east-1",
"aws_access_key_id": "",
"aws_secret_access_key": "",
"aws_session_token": "",
},
)

backend.get(Origin.SOURCE, resource_types=None)

call_kwargs = mock_client.list_objects_v2.call_args_list[0][1]
assert call_kwargs["Prefix"] == "resources/source"