diff --git a/datadog_sync/utils/configuration.py b/datadog_sync/utils/configuration.py index a85f4274..c4c95222 100644 --- a/datadog_sync/utils/configuration.py +++ b/datadog_sync/utils/configuration.py @@ -41,7 +41,7 @@ from datadog_sync.utils.custom_client import CustomClient from datadog_sync.utils.base_resource import BaseResource from datadog_sync.utils.log import Log -from datadog_sync.utils.filter import Filter, process_filters +from datadog_sync.utils.filter import Filter, process_filters, EXACT_MATCH_OPERATOR from datadog_sync.utils.resource_utils import CustomClientHTTPError from datadog_sync.utils.state import State from datadog_sync.utils.storage.storage_types import StorageType @@ -135,6 +135,52 @@ async def exit_async(self): await self.destination_client._end_session() +def _unwrap_exact_match_pattern(pattern: str) -> str: + """Extract the raw ID value from an ExactMatch ^...$-wrapped regex pattern. + + Defensive check: ExactMatch always produces ^...$, so ValueError should not + fire in practice. Raises ValueError so callers can detect unexpected patterns. + """ + if not (pattern.startswith("^") and pattern.endswith("$")): + raise ValueError(f"Expected ExactMatch regex ^...$, got: {pattern!r}") + return pattern[1:-1] + + +def extract_exact_id_filters( + filters: Dict[str, list], + filter_operator: str, + resource_types: list, +) -> Optional[Dict[str, list]]: + """Return {type: [id1, id2, ...]} when all conditions allow ID-targeted loading. + + Conditions (all must be true): + - filter_operator is OR (case-insensitive) + - Every resource type in resource_types has at least one filter + - All filters for each type use Name=id + ExactMatch operator + + Returns None if any condition fails → caller falls back to type-scoped loading. + """ + if filter_operator.lower() != "or": + return None + result = {} + for rt in resource_types: + rt_filters = filters.get(rt, []) + if not rt_filters: + return None # No filters for this type — can't use ID-targeted + # All filters must be id-field ExactMatch + if not all(f.attr_name == ["id"] and f.operator == EXACT_MATCH_OPERATOR for f in rt_filters): + return None + # Extract raw IDs from ^...$-wrapped regex patterns. + # Defensive: ExactMatch guarantees ^...$, so ValueError should not fire. + # Kept as a safety net in case filter construction changes upstream. + try: + ids = [_unwrap_exact_match_pattern(f.attr_re.pattern) for f in rt_filters] + except ValueError: + return None # Pattern wasn't ^...$-wrapped — fall back gracefully + result[rt] = ids + return result + + def build_config(cmd: Command, **kwargs: Optional[Any]) -> Configuration: # configure logger — in JSON mode, Log writes NDJSON to stdout and silences stderr emit_json = kwargs.get("emit_json", False) @@ -318,9 +364,18 @@ def build_config(cmd: Command, **kwargs: Optional[Any]) -> Configuration: raise click.UsageError("--minimize-reads cannot be combined with --cleanup") # Determine loading strategy for minimize-reads - _state_resource_types = None # None = full load (existing behavior) + _state_resource_types = None # type-scoped; None = full load (existing behavior) + _state_exact_ids = None # ID-targeted; None = not using ID-targeted if minimize_reads and (rs := kwargs.get("resources", None)): - _state_resource_types = [r.strip().lower() for r in rs.split(",") if r.strip()] + raw_types = [r.strip().lower() for r in rs.split(",") if r.strip()] + # Try ID-targeted strategy first (fast path: exact IDs from filters) + early_filters = process_filters(kwargs.get("filter")) + filter_operator = kwargs.get("filter_operator", "or") + _state_exact_ids = extract_exact_id_filters(early_filters, filter_operator, raw_types) + if _state_exact_ids is None: + # Fall back to type-scoped loading + logger.debug("minimize-reads: ID-targeted not eligible — filters are not all id+ExactMatch+OR") + _state_resource_types = raw_types # Initialize state state = State( @@ -329,10 +384,14 @@ def build_config(cmd: Command, **kwargs: Optional[Any]) -> Configuration: destination_resources_path=destination_resources_path, config=config, resource_per_file=resource_per_file, - resource_types=_state_resource_types, # None = full load + resource_types=_state_resource_types, # None = full load or ID-targeted + exact_ids=_state_exact_ids, # None = not using ID-targeted ) - if _state_resource_types is not None: + if _state_exact_ids is not None: + total = sum(len(v) for v in _state_exact_ids.values()) + logger.info(f"minimize-reads: ID-targeted loading for {total} resources across {list(_state_exact_ids.keys())}") + elif _state_resource_types is not None: logger.info(f"minimize-reads: type-scoped loading for {_state_resource_types}") # Initialize Configuration diff --git a/datadog_sync/utils/resources_handler.py b/datadog_sync/utils/resources_handler.py index d4659701..36cf1afe 100644 --- a/datadog_sync/utils/resources_handler.py +++ b/datadog_sync/utils/resources_handler.py @@ -625,7 +625,21 @@ def _resource_connections(self, resource_type: str, _id: str) -> Tuple[Set[Tuple # After retrieving all of the failed connections, we check if # the resources are imported. Otherwise append to missing with its type. for f_id in failed: + # With --minimize-reads, dependency types may not be in the + # initial scoped load. Lazily load this specific dependency + # (source+destination) so the source check below is accurate, + # and so connect_resources() in _apply_resource_cb() can + # successfully remap the ID in the destination. + self.config.state.ensure_resource_loaded(resource_to_connect, f_id) + if f_id not in self.config.state.source[resource_to_connect]: + if self.config.state._minimize_reads: + self.config.logger.warning( + "minimize-reads: dependency %s.%s not found in storage; " + "ID remapping may be incomplete", + resource_to_connect, + f_id, + ) missing_resources.add((resource_to_connect, f_id)) failed_connections.add((resource_to_connect, f_id)) diff --git a/datadog_sync/utils/state.py b/datadog_sync/utils/state.py index 8ae28df0..f3b31186 100644 --- a/datadog_sync/utils/state.py +++ b/datadog_sync/utils/state.py @@ -2,12 +2,14 @@ # under the 3-clause BSD style license (see LICENSE). # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2019 Datadog, Inc. +import logging from typing import Any, Dict, List, Tuple from datadog_sync.constants import ( Origin, DESTINATION_PATH_DEFAULT, DESTINATION_PATH_PARAM, + LOGGER_NAME, RESOURCE_PER_FILE, SOURCE_PATH_DEFAULT, SOURCE_PATH_PARAM, @@ -19,11 +21,15 @@ from datadog_sync.utils.storage.local_file import LocalFile from datadog_sync.utils.storage.storage_types import StorageType +log = logging.getLogger(LOGGER_NAME) + class State: def __init__(self, type_: StorageType = StorageType.LOCAL_FILE, **kwargs: object) -> None: - self._resource_types = kwargs.get("resource_types", None) - self._minimize_reads = self._resource_types is not None + self._resource_types = kwargs.get("resource_types", None) # type-scoped loading + self._exact_ids = kwargs.get("exact_ids", None) # ID-targeted loading + self._minimize_reads = self._resource_types is not None or self._exact_ids is not None + self._ensure_attempted: set = set() # tracks IDs attempted by ensure_resource_loaded resource_per_file = kwargs.get(RESOURCE_PER_FILE, False) source_resources_path = kwargs.get(SOURCE_PATH_PARAM, SOURCE_PATH_DEFAULT) destination_resources_path = kwargs.get(DESTINATION_PATH_PARAM, DESTINATION_PATH_DEFAULT) @@ -78,8 +84,44 @@ def destination(self): return self._data.destination def load_state(self, origin: Origin = Origin.ALL) -> None: - # resource_types=None → load all types (default behavior) - self._data = self._storage.get(origin, resource_types=self._resource_types) + if self._exact_ids is not None: + # ID-targeted: fetch only specified resources by constructing keys directly + self._data = self._storage.get_by_ids(origin, self._exact_ids) + else: + # Type-scoped (resource_types set) or full load (resource_types=None) + self._data = self._storage.get(origin, resource_types=self._resource_types) + + def ensure_resource_loaded(self, resource_type: str, resource_id: str) -> None: + """Lazily load source+destination state for one dependency resource. + + Called from _resource_connections() in resources_handler.py when a + cross-type dependency is encountered that may not be in the initial + (scoped) load. Loads both source and destination state so that + connect_id() in _apply_resource_cb() can remap IDs correctly. + + Note: requires resource_per_file=True in the storage backend. + get_single constructs per-resource filenames; monolithic layout + will silently return (None, None) for every dependency. + + Contract: + - Idempotent: no-op if (resource_type, resource_id) already attempted + - No-op when not in minimize-reads mode (_minimize_reads=False) + - Appends to state: never replaces existing entries + - Missing file: (None, None) → resource stays absent (correct behavior) + - asyncio-safe: fully synchronous, no await points + """ + if not self._minimize_reads: + return + key = (resource_type, resource_id) + if key in self._ensure_attempted: + return + self._ensure_attempted.add(key) + log.debug(f"minimize-reads: lazy-loading dep {resource_type}.{resource_id}") + src, dst = self._storage.get_single(resource_type, resource_id) + if src is not None: + self._data.source[resource_type][resource_id] = src + if dst is not None: + self._data.destination[resource_type][resource_id] = dst def dump_state(self, origin: Origin = Origin.ALL) -> None: self._storage.put(origin, self._data) diff --git a/datadog_sync/utils/storage/_base_storage.py b/datadog_sync/utils/storage/_base_storage.py index 79ffaed8..551551a9 100644 --- a/datadog_sync/utils/storage/_base_storage.py +++ b/datadog_sync/utils/storage/_base_storage.py @@ -9,7 +9,7 @@ from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple -from datadog_sync.constants import LOGGER_NAME +from datadog_sync.constants import LOGGER_NAME, Origin log = logging.getLogger(LOGGER_NAME) @@ -80,7 +80,6 @@ def get(self, origin, resource_types=None) -> StorageData: """ pass - @abstractmethod def get_by_ids(self, origin, exact_ids: Dict[str, List[str]]) -> StorageData: """Load specific resources by ID, constructing keys directly. No listing needed. @@ -91,7 +90,17 @@ def get_by_ids(self, origin, exact_ids: Dict[str, List[str]]) -> StorageData: Returns StorageData with only the requested resources. Missing resources are silently skipped (no exception raised for NotFound). """ - pass + if not getattr(self, "resource_per_file", True): + raise ValueError("get_by_ids() requires --resource-per-file. Re-run with --resource-per-file enabled.") + data = StorageData() + for resource_type, ids in exact_ids.items(): + for resource_id in ids: + src, dst = self.get_single(resource_type, resource_id) + if origin in [Origin.SOURCE, Origin.ALL] and src is not None: + data.source[resource_type][resource_id] = src + if origin in [Origin.DESTINATION, Origin.ALL] and dst is not None: + data.destination[resource_type][resource_id] = dst + return data @abstractmethod def get_single(self, resource_type: str, resource_id: str) -> Tuple[Optional[Dict], Optional[Dict]]: diff --git a/datadog_sync/utils/storage/aws_s3_bucket.py b/datadog_sync/utils/storage/aws_s3_bucket.py index 0e58566c..d3f7d71e 100644 --- a/datadog_sync/utils/storage/aws_s3_bucket.py +++ b/datadog_sync/utils/storage/aws_s3_bucket.py @@ -6,7 +6,7 @@ import json import logging from collections import defaultdict -from typing import Dict, List, Optional, Tuple +from typing import Dict, Optional, Tuple import boto3 from botocore.exceptions import ClientError @@ -175,20 +175,6 @@ def _try_get_object(self, key: str) -> Optional[Dict]: log.warning(f"invalid json in aws resource file: {key}") return None - def get_by_ids(self, origin: Origin, exact_ids: Dict[str, List[str]]) -> StorageData: - """Load specific resources by ID without listing. Constructs keys directly.""" - if not self.resource_per_file: - raise ValueError("get_by_ids() requires --resource-per-file. " "Re-run with --resource-per-file enabled.") - data = StorageData() - for resource_type, ids in exact_ids.items(): - for resource_id in ids: - src, dst = self.get_single(resource_type, resource_id) - if origin in [Origin.SOURCE, Origin.ALL] and src is not None: - data.source[resource_type][resource_id] = src - if origin in [Origin.DESTINATION, Origin.ALL] and dst is not None: - data.destination[resource_type][resource_id] = dst - return data - def get_single(self, resource_type: str, resource_id: str) -> Tuple[Optional[Dict], Optional[Dict]]: """Load one resource's source and destination state by ID. diff --git a/datadog_sync/utils/storage/azure_blob_container.py b/datadog_sync/utils/storage/azure_blob_container.py index 265d292d..c8715ad6 100644 --- a/datadog_sync/utils/storage/azure_blob_container.py +++ b/datadog_sync/utils/storage/azure_blob_container.py @@ -6,7 +6,7 @@ import json import logging from collections import defaultdict -from typing import Dict, List, Optional, Tuple +from typing import Dict, Optional, Tuple from azure.core.exceptions import ResourceNotFoundError from azure.storage.blob import BlobServiceClient, ContainerClient @@ -138,20 +138,6 @@ def _try_get_blob(self, key: str) -> Optional[Dict]: log.warning(f"invalid json in azure resource file: {key}") return None - def get_by_ids(self, origin: Origin, exact_ids: Dict[str, List[str]]) -> StorageData: - """Load specific resources by ID without listing. Constructs keys directly.""" - if not self.resource_per_file: - raise ValueError("get_by_ids() requires --resource-per-file. " "Re-run with --resource-per-file enabled.") - data = StorageData() - for resource_type, ids in exact_ids.items(): - for resource_id in ids: - src, dst = self.get_single(resource_type, resource_id) - if origin in [Origin.SOURCE, Origin.ALL] and src is not None: - data.source[resource_type][resource_id] = src - if origin in [Origin.DESTINATION, Origin.ALL] and dst is not None: - data.destination[resource_type][resource_id] = dst - return data - def get_single(self, resource_type: str, resource_id: str) -> Tuple[Optional[Dict], Optional[Dict]]: """Load one resource's source and destination state by ID.""" safe_id = self._sanitize_id_for_filename(resource_id) diff --git a/datadog_sync/utils/storage/gcs_bucket.py b/datadog_sync/utils/storage/gcs_bucket.py index c80930d6..7b70456c 100644 --- a/datadog_sync/utils/storage/gcs_bucket.py +++ b/datadog_sync/utils/storage/gcs_bucket.py @@ -6,7 +6,7 @@ import json import logging from collections import defaultdict -from typing import Dict, List, Optional, Tuple +from typing import Dict, Optional, Tuple from google.api_core.exceptions import NotFound from google.cloud import storage as gcs_storage @@ -130,20 +130,6 @@ def _try_get_blob(self, key: str) -> Optional[Dict]: log.warning(f"invalid json in gcs resource file: {key}") return None - def get_by_ids(self, origin: Origin, exact_ids: Dict[str, List[str]]) -> StorageData: - """Load specific resources by ID without listing. Constructs keys directly.""" - if not self.resource_per_file: - raise ValueError("get_by_ids() requires --resource-per-file. " "Re-run with --resource-per-file enabled.") - data = StorageData() - for resource_type, ids in exact_ids.items(): - for resource_id in ids: - src, dst = self.get_single(resource_type, resource_id) - if origin in [Origin.SOURCE, Origin.ALL] and src is not None: - data.source[resource_type][resource_id] = src - if origin in [Origin.DESTINATION, Origin.ALL] and dst is not None: - data.destination[resource_type][resource_id] = dst - return data - def get_single(self, resource_type: str, resource_id: str) -> Tuple[Optional[Dict], Optional[Dict]]: """Load one resource's source and destination state by ID.""" safe_id = self._sanitize_id_for_filename(resource_id) diff --git a/datadog_sync/utils/storage/local_file.py b/datadog_sync/utils/storage/local_file.py index 8982e837..eb694f6f 100644 --- a/datadog_sync/utils/storage/local_file.py +++ b/datadog_sync/utils/storage/local_file.py @@ -6,7 +6,7 @@ import json import logging import os -from typing import Dict, List, Optional, Tuple +from typing import Dict, Optional, Tuple from datadog_sync.constants import ( Origin, @@ -112,20 +112,6 @@ def write_resources_file(self, origin: Origin, data: StorageData) -> None: with open(filename, "w+", encoding="utf-8") as out_file: json.dump(value, out_file) - def get_by_ids(self, origin: Origin, exact_ids: Dict[str, List[str]]) -> StorageData: - """Load specific resources by ID. Constructs filenames directly without listing.""" - if not self.resource_per_file: - raise ValueError("get_by_ids() requires --resource-per-file. " "Re-run with --resource-per-file enabled.") - data = StorageData() - for resource_type, ids in exact_ids.items(): - for resource_id in ids: - src, dst = self.get_single(resource_type, resource_id) - if origin in [Origin.SOURCE, Origin.ALL] and src is not None: - data.source[resource_type][resource_id] = src - if origin in [Origin.DESTINATION, Origin.ALL] and dst is not None: - data.destination[resource_type][resource_id] = dst - return data - def get_single(self, resource_type: str, resource_id: str) -> Tuple[Optional[Dict], Optional[Dict]]: """Load one resource's source and destination state by ID. diff --git a/tests/unit/test_minimize_reads_id_targeted.py b/tests/unit/test_minimize_reads_id_targeted.py new file mode 100644 index 00000000..9c2cf48e --- /dev/null +++ b/tests/unit/test_minimize_reads_id_targeted.py @@ -0,0 +1,435 @@ +# Unless explicitly stated otherwise all files in this repository are licensed +# under the 3-clause BSD style license (see LICENSE). +# This product includes software developed at Datadog (https://www.datadoghq.com/). +# Copyright 2019 Datadog, Inc. + +"""Tests for PR 3: ID-targeted loading and targeted dependency loading.""" + +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +from botocore.exceptions import ClientError + +from datadog_sync.constants import Origin +from datadog_sync.utils.storage._base_storage import StorageData +from datadog_sync.utils.storage.local_file import LocalFile +from datadog_sync.utils.storage.storage_types import StorageType + + +# ─── Helpers ──────────────────────────────────────────────────────────────── + + +def make_exact_id_filter(resource_type: str, resource_id: str): + """Build a Filter object matching ExactMatch id=resource_id. + + Mirrors process_filters() behavior: ExactMatch wraps value as ^...$ + without re.escape, since IDs are literal strings passed directly. + """ + import re + from datadog_sync.utils.filter import Filter + + return Filter( + resource_type=resource_type, + attr_name="id", + attr_re=re.compile(f"^{resource_id}$"), + operator="exactmatch", + ) + + +def make_title_filter(resource_type: str, title_value: str): + """Build a Filter object matching by title (non-ID field).""" + import re + from datadog_sync.utils.filter import Filter + + return Filter( + resource_type=resource_type, + attr_name="title", + attr_re=re.compile(f"^{title_value}$"), + operator="exactmatch", + ) + + +# ─── extract_exact_id_filters ─────────────────────────────────────────────── + + +class TestExtractExactIdFilters: + def test_happy_path_single_type(self): + from datadog_sync.utils.configuration import extract_exact_id_filters + + filters = { + "dashboards": [make_exact_id_filter("dashboards", "dash-1"), make_exact_id_filter("dashboards", "dash-2")] + } + result = extract_exact_id_filters(filters, "or", ["dashboards"]) + assert result == {"dashboards": ["dash-1", "dash-2"]} + + def test_non_id_field_returns_none(self): + from datadog_sync.utils.configuration import extract_exact_id_filters + + filters = {"dashboards": [make_title_filter("dashboards", "My Dashboard")]} + assert extract_exact_id_filters(filters, "or", ["dashboards"]) is None + + def test_and_operator_returns_none(self): + from datadog_sync.utils.configuration import extract_exact_id_filters + + filters = {"dashboards": [make_exact_id_filter("dashboards", "dash-1")]} + assert extract_exact_id_filters(filters, "and", ["dashboards"]) is None + + def test_no_filters_returns_none(self): + from datadog_sync.utils.configuration import extract_exact_id_filters + + assert extract_exact_id_filters({}, "or", ["dashboards"]) is None + + def test_missing_type_in_filters_returns_none(self): + """If --resources=dashboards,monitors but only dashboard filters → fallback.""" + from datadog_sync.utils.configuration import extract_exact_id_filters + + filters = {"dashboards": [make_exact_id_filter("dashboards", "dash-1")]} + # monitors has no filters → can't use ID-targeted for both types + assert extract_exact_id_filters(filters, "or", ["dashboards", "monitors"]) is None + + def test_or_operator_case_insensitive(self): + from datadog_sync.utils.configuration import extract_exact_id_filters + + filters = {"roles": [make_exact_id_filter("roles", "role-1")]} + result = extract_exact_id_filters(filters, "OR", ["roles"]) + assert result == {"roles": ["role-1"]} + + def test_end_to_end_through_process_filters(self): + """extract_exact_id_filters works end-to-end with process_filters output.""" + from datadog_sync.utils.configuration import extract_exact_id_filters + from datadog_sync.utils.filter import process_filters + + filters = process_filters( + [ + "Type=dashboards;Name=id;Value=dash-1;Operator=ExactMatch", + "Type=dashboards;Name=id;Value=dash-2;Operator=ExactMatch", + ] + ) + result = extract_exact_id_filters(filters, "or", ["dashboards"]) + assert result == {"dashboards": ["dash-1", "dash-2"]} + + +# ─── State with exact_ids ─────────────────────────────────────────────────── + + +class TestStateExactIdLoading: + def test_state_loads_exact_ids_without_listing(self, tmp_path): + """With exact_ids, only specific files are fetched — no directory listing.""" + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + + backend = LocalFile(source_resources_path=src_path, destination_resources_path=dst_path, resource_per_file=True) + data = StorageData() + data.source["dashboards"]["dash-1"] = {"id": "dash-1"} + data.source["dashboards"]["dash-2"] = {"id": "dash-2"} + data.source["monitors"]["mon-1"] = {"id": "mon-1"} + backend.put(Origin.SOURCE, data) + + # get_by_ids should only load dash-1, not dash-2 or mon-1 + result = backend.get_by_ids(Origin.SOURCE, {"dashboards": ["dash-1"]}) + assert "dash-1" in result.source["dashboards"] + assert "dash-2" not in result.source["dashboards"] + assert "mon-1" not in result.source["monitors"] + + def test_state_minimize_reads_true_with_exact_ids(self, tmp_path): + """State._minimize_reads is True when exact_ids is set.""" + from datadog_sync.utils.state import State + + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + + state = State( + type_=StorageType.LOCAL_FILE, + source_resources_path=src_path, + destination_resources_path=dst_path, + exact_ids={"dashboards": ["dash-1"]}, + resource_per_file=True, + ) + assert state._minimize_reads is True + + def test_state_uses_get_by_ids_when_exact_ids_set(self, tmp_path): + """State.load_state() calls get_by_ids (not get) when exact_ids is set.""" + from datadog_sync.utils.state import State + + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + + # Write a file + backend = LocalFile(source_resources_path=src_path, destination_resources_path=dst_path, resource_per_file=True) + data = StorageData() + data.source["dashboards"]["dash-1"] = {"id": "dash-1"} + backend.put(Origin.SOURCE, data) + + state = State( + type_=StorageType.LOCAL_FILE, + source_resources_path=src_path, + destination_resources_path=dst_path, + exact_ids={"dashboards": ["dash-1"]}, + resource_per_file=True, + ) + assert "dash-1" in state.source["dashboards"] + + +# ─── ensure_resource_loaded ───────────────────────────────────────────────── + + +class TestEnsureResourceLoaded: + def _make_state_with_exact_ids(self, tmp_path): + from datadog_sync.utils.state import State + + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + return State( + type_=StorageType.LOCAL_FILE, + source_resources_path=src_path, + destination_resources_path=dst_path, + exact_ids={"dashboards": ["dash-1"]}, + resource_per_file=True, + ) + + def test_ensure_resource_loaded_fetches_both_src_and_dst(self, tmp_path): + """ensure_resource_loaded loads both source and destination state.""" + from datadog_sync.utils.state import State + + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + + # Write a monitor to both source and destination + backend = LocalFile(source_resources_path=src_path, destination_resources_path=dst_path, resource_per_file=True) + data = StorageData() + data.source["monitors"]["mon-1"] = {"id": "mon-1", "name": "SrcMonitor"} + data.destination["monitors"]["mon-1"] = {"id": "mon-1", "name": "DstMonitor"} + backend.put(Origin.ALL, data) + + state = State( + type_=StorageType.LOCAL_FILE, + source_resources_path=src_path, + destination_resources_path=dst_path, + exact_ids={"dashboards": []}, + resource_per_file=True, + ) + + state.ensure_resource_loaded("monitors", "mon-1") + assert state.source["monitors"]["mon-1"] == {"id": "mon-1", "name": "SrcMonitor"} + assert state.destination["monitors"]["mon-1"] == {"id": "mon-1", "name": "DstMonitor"} + + def test_ensure_resource_loaded_skips_if_already_present(self, tmp_path): + """ensure_resource_loaded is idempotent — skips if already in state.""" + state = self._make_state_with_exact_ids(tmp_path) + sentinel = {"id": "mon-1", "already": "loaded"} + state._data.source["monitors"]["mon-1"] = sentinel + # Call ensure — should not overwrite + state.ensure_resource_loaded("monitors", "mon-1") + assert state._data.source["monitors"]["mon-1"] is sentinel + + def test_ensure_resource_loaded_noop_when_not_minimize_reads(self, tmp_path): + """ensure_resource_loaded is a no-op when not in minimize-reads mode.""" + from datadog_sync.utils.state import State + + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + + state = State( + type_=StorageType.LOCAL_FILE, + source_resources_path=src_path, + destination_resources_path=dst_path, + ) + # No-op: minimize_reads is False + state.ensure_resource_loaded("monitors", "nonexistent") + assert "nonexistent" not in state._data.source["monitors"] + + def test_ensure_resource_loaded_handles_missing_gracefully(self, tmp_path): + """Missing resource: state is unchanged for that ID.""" + state = self._make_state_with_exact_ids(tmp_path) + state.ensure_resource_loaded("monitors", "nonexistent-id") + assert "nonexistent-id" not in state._data.source["monitors"] + + def test_ensure_resource_loaded_partial_backend_failure(self, tmp_path): + """Source loaded but destination missing: src populated, dst absent.""" + from datadog_sync.utils.state import State + + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + + # Only write source, not destination + backend = LocalFile(source_resources_path=src_path, destination_resources_path=dst_path, resource_per_file=True) + data = StorageData() + data.source["monitors"]["mon-1"] = {"id": "mon-1"} + backend.put(Origin.SOURCE, data) # only source + + state = State( + type_=StorageType.LOCAL_FILE, + source_resources_path=src_path, + destination_resources_path=dst_path, + exact_ids={"dashboards": []}, + resource_per_file=True, + ) + state.ensure_resource_loaded("monitors", "mon-1") + assert "mon-1" in state.source["monitors"] + assert "mon-1" not in state.destination["monitors"] + + def test_ensure_resource_loaded_repeated_miss_does_not_refetch(self, tmp_path): + """Missing dependency: get_single called only once despite repeated calls.""" + from datadog_sync.utils.state import State + + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + + state = State( + type_=StorageType.LOCAL_FILE, + source_resources_path=src_path, + destination_resources_path=dst_path, + exact_ids={"dashboards": []}, + resource_per_file=True, + ) + with patch.object(state._storage, "get_single", return_value=(None, None)) as mock: + state.ensure_resource_loaded("monitors", "never-exists") + state.ensure_resource_loaded("monitors", "never-exists") + state.ensure_resource_loaded("monitors", "never-exists") + assert mock.call_count == 1 + + +# ─── get_single NotFound handling ─────────────────────────────────────────── + + +class TestGetSingleNotFound: + def test_s3_get_single_returns_none_for_nosuchkey(self): + """S3 NoSuchKey → (None, None), not an exception.""" + with patch("datadog_sync.utils.storage.aws_s3_bucket.boto3") as mock_boto3: + mock_client = MagicMock() + mock_boto3.client.return_value = mock_client + mock_client.get_object.side_effect = ClientError( + {"Error": {"Code": "NoSuchKey", "Message": "The key does not exist"}}, + "GetObject", + ) + from datadog_sync.utils.storage.aws_s3_bucket import AWSS3Bucket + + backend = AWSS3Bucket( + config={ + "aws_bucket_name": "test-bucket", + "aws_region_name": "us-east-1", + "aws_access_key_id": "", + "aws_secret_access_key": "", + "aws_session_token": "", + }, + ) + src, dst = backend.get_single("dashboards", "nonexistent") + assert src is None + assert dst is None + + def test_localfile_get_single_returns_none_for_missing_file(self, tmp_path): + """LocalFile get_single returns (None, None) when file doesn't exist.""" + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + backend = LocalFile(source_resources_path=src_path, destination_resources_path=dst_path, resource_per_file=True) + src, dst = backend.get_single("dashboards", "nonexistent-id") + assert src is None + assert dst is None + + +# ─── get_by_ids partial match ─────────────────────────────────────────────── + + +class TestGetByIdsPartialMatch: + def test_get_by_ids_partial_match_localfile(self, tmp_path): + """get_by_ids() returns only found IDs — missing IDs are silently skipped.""" + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + + backend = LocalFile(source_resources_path=src_path, destination_resources_path=dst_path, resource_per_file=True) + data = StorageData() + data.source["dashboards"]["dash-1"] = {"id": "dash-1"} + backend.put(Origin.SOURCE, data) + + # dash-1 exists, dash-2 doesn't + result = backend.get_by_ids(Origin.SOURCE, {"dashboards": ["dash-1", "dash-2"]}) + assert "dash-1" in result.source["dashboards"] + assert "dash-2" not in result.source["dashboards"] + + def test_s3_get_by_ids_partial_match(self): + """S3 get_by_ids() gracefully skips IDs that produce NoSuchKey.""" + with patch("datadog_sync.utils.storage.aws_s3_bucket.boto3") as mock_boto3: + mock_client = MagicMock() + mock_boto3.client.return_value = mock_client + + # dash-1 exists, dash-2 → NoSuchKey + nosuchkey = ClientError({"Error": {"Code": "NoSuchKey", "Message": ""}}, "GetObject") + + def get_object_side_effect(**kwargs): + key = kwargs["Key"] + if "dash-1" in key: + body = MagicMock() + body.read.return_value = json.dumps({"dash-1": {"id": "dash-1"}}).encode() + import io + + return {"Body": io.BytesIO(json.dumps({"dash-1": {"id": "dash-1"}}).encode())} + raise nosuchkey + + mock_client.get_object.side_effect = get_object_side_effect + + from datadog_sync.utils.storage.aws_s3_bucket import AWSS3Bucket + + backend = AWSS3Bucket( + config={ + "aws_bucket_name": "test-bucket", + "aws_region_name": "us-east-1", + "aws_access_key_id": "", + "aws_secret_access_key": "", + "aws_session_token": "", + }, + resource_per_file=True, + ) + + result = backend.get_by_ids(Origin.SOURCE, {"dashboards": ["dash-1", "dash-2"]}) + assert "dash-1" in result.source["dashboards"] + assert "dash-2" not in result.source["dashboards"] + + +# ─── Backward compatibility ────────────────────────────────────────────────── + + +class TestBackwardCompatibility: + def test_full_load_unchanged_without_minimize_reads(self, tmp_path): + """State without exact_ids or resource_types loads everything.""" + from datadog_sync.utils.state import State + + src_path = str(tmp_path / "source") + dst_path = str(tmp_path / "dest") + Path(src_path).mkdir() + Path(dst_path).mkdir() + + backend = LocalFile(source_resources_path=src_path, destination_resources_path=dst_path, resource_per_file=True) + data = StorageData() + data.source["dashboards"]["dash-1"] = {"id": "dash-1"} + data.source["monitors"]["mon-1"] = {"id": "mon-1"} + backend.put(Origin.SOURCE, data) + + state = State( + type_=StorageType.LOCAL_FILE, + source_resources_path=src_path, + destination_resources_path=dst_path, + ) + assert state._minimize_reads is False + assert "dash-1" in state.source["dashboards"] + assert "mon-1" in state.source["monitors"]