diff --git a/cumulusci/core/datasets.py b/cumulusci/core/datasets.py index 7ebca75e13..8414df1bcc 100644 --- a/cumulusci/core/datasets.py +++ b/cumulusci/core/datasets.py @@ -162,6 +162,7 @@ def extract( extraction_definition: T.Optional[Path] = None, opt_in_only: T.Sequence[str] = (), loading_rules_file: T.Optional[Path] = None, + drop_missing_schema: bool = False, ): options = options or {} logger = logger or DEFAULT_LOGGER @@ -177,6 +178,7 @@ def extract( org_config=self.org_config, sql_path=self.data_file, mapping=str(extract_mapping), + drop_missing_schema=drop_missing_schema, ) task() loading_rules = self._parse_loading_rules_file(loading_rules_file) @@ -233,7 +235,6 @@ def load( ) def _sql_dataload(self, options: T.Dict): - task = _make_task( LoadData, project_config=self.project_config, diff --git a/cumulusci/tasks/bulkdata/extract_dataset_utils/calculate_dependencies.py b/cumulusci/tasks/bulkdata/extract_dataset_utils/calculate_dependencies.py index 19162e35b9..64e237a66f 100644 --- a/cumulusci/tasks/bulkdata/extract_dataset_utils/calculate_dependencies.py +++ b/cumulusci/tasks/bulkdata/extract_dataset_utils/calculate_dependencies.py @@ -1,4 +1,5 @@ import typing as T +from logging import Logger, getLogger from cumulusci.salesforce_api.filterable_objects import NOT_EXTRACTABLE from cumulusci.salesforce_api.org_schema import Schema @@ -8,6 +9,8 @@ synthesize_declaration_for_sobject, ) +DEFAULT_LOGGER = getLogger(__file__) + class SObjDependency(T.NamedTuple): table_name_from: str @@ -39,6 +42,7 @@ def _collect_dependencies_for_sobject( fields: T.List[str], schema: Schema, only_required_fields: bool, + logger: Logger = DEFAULT_LOGGER, ) -> T.Dict[str, T.List[SObjDependency]]: """Ensure that required lookups are fulfilled for a single SObject @@ -46,27 +50,40 @@ def _collect_dependencies_for_sobject( Module-internal helper function. """ dependencies = {} + obj_info = schema[source_sfobject] + for field_name in fields: - field_info = schema[source_sfobject].fields[field_name] - if not field_info.createable: # pragma: no cover - continue - references = field_info.referenceTo - if len(references) == 1 and not references[0] == "RecordType": - target = references[0] - - target_disallowed = target in NOT_EXTRACTABLE - field_disallowed = target_disallowed or not field_info.createable - field_allowed = not (only_required_fields or field_disallowed) - if field_info.requiredOnCreate or field_allowed: - dependencies.setdefault(source_sfobject, []).append( - SObjDependency( - source_sfobject, target, field_name, field_info.requiredOnCreate - ) - ) + field_info = obj_info.fields.get(field_name) + if not field_info: + logger.warning(f"Cannot find field {field_name} in {obj_info.name}.") + if field_info and field_info.createable: + dep = dependency_for_field( + field_info, only_required_fields, source_sfobject, field_name + ) + if dep: + sobjdeps = dependencies.setdefault(source_sfobject, []) + sobjdeps.append(dep) return dependencies +def dependency_for_field( + field_info, only_required_fields, source_sfobject, field_name +) -> T.Optional[SObjDependency]: + references = field_info.referenceTo + if len(references) == 1 and not references[0] == "RecordType": + target = references[0] + + target_disallowed = target in NOT_EXTRACTABLE + field_disallowed = target_disallowed or not field_info.createable + field_allowed = not (only_required_fields or field_disallowed) + if field_info.requiredOnCreate or field_allowed: + return SObjDependency( + source_sfobject, target, field_name, field_info.requiredOnCreate + ) + return None + + def extend_declarations_to_include_referenced_tables( decl_list: T.Sequence[SimplifiedExtractDeclaration], schema: Schema ) -> T.Sequence[SimplifiedExtractDeclaration]: diff --git a/cumulusci/tasks/bulkdata/extract_dataset_utils/synthesize_extract_declarations.py b/cumulusci/tasks/bulkdata/extract_dataset_utils/synthesize_extract_declarations.py index bfec28ef54..019d272833 100644 --- a/cumulusci/tasks/bulkdata/extract_dataset_utils/synthesize_extract_declarations.py +++ b/cumulusci/tasks/bulkdata/extract_dataset_utils/synthesize_extract_declarations.py @@ -1,6 +1,7 @@ import collections import re import typing as T +from logging import Logger, getLogger from pydantic import validator @@ -10,6 +11,8 @@ from .extract_yml import ExtractDeclaration, SFFieldGroupTypes, SFObjectGroupTypes from .hardcoded_default_declarations import DEFAULT_DECLARATIONS +DEFAULT_LOGGER = getLogger(__file__) + class SimplifiedExtractDeclaration(ExtractDeclaration): # a model where sf_object references a single sf_object @@ -70,7 +73,10 @@ def flatten_declarations( def _simplify_sfobject_declarations( - declarations, schema: Schema, opt_in_only: T.Sequence[str] + declarations: T.Iterable[ExtractDeclaration], + schema: Schema, + opt_in_only: T.Sequence[str], + logger: T.Optional[Logger] = DEFAULT_LOGGER, ) -> T.List[SimplifiedExtractDeclaration]: """Generate a new list of declarations such that all sf_object patterns (like OBJECTS(CUSTOM)) have been expanded into many declarations @@ -78,6 +84,13 @@ def _simplify_sfobject_declarations( atomic_declarations, group_declarations = partition( lambda d: d.is_group, declarations ) + missing_objs, atomic_declarations = partition( + lambda d: d.sf_object in schema.keys(), declarations + ) + if missing_objs: + logger.warning( + f"Cannot find objects: {','.join(o.sf_object for o in missing_objs)}" + ) atomic_declarations = list(atomic_declarations) normalized_atomic_declarations = _normalize_user_supplied_simple_declarations( atomic_declarations, DEFAULT_DECLARATIONS diff --git a/cumulusci/tasks/bulkdata/extract_dataset_utils/tests/test_synthesize_extract_declarations.py b/cumulusci/tasks/bulkdata/extract_dataset_utils/tests/test_synthesize_extract_declarations.py index 8fb4033cb6..9db602e707 100644 --- a/cumulusci/tasks/bulkdata/extract_dataset_utils/tests/test_synthesize_extract_declarations.py +++ b/cumulusci/tasks/bulkdata/extract_dataset_utils/tests/test_synthesize_extract_declarations.py @@ -315,7 +315,7 @@ def test_required_lookups__pulled_in(self, org_config): ) ) - def test_parse_real_file(self, cumulusci_test_repo_root, org_config): + def test_parse_real_file(self, cumulusci_test_repo_root, org_config, caplog): declarations = ExtractRulesFile.parse_extract( cumulusci_test_repo_root / "datasets/test_minimal.extract.yml" ) @@ -338,11 +338,15 @@ def test_parse_real_file(self, cumulusci_test_repo_root, org_config): include_counts=True, ) as schema: decls = flatten_declarations(declarations.values(), schema) + logs = str(caplog.record_tuples) + assert "MissingFieldShouldWarn" in logs + assert "MissingObjectShouldWarn__c" in logs decls = {decl.sf_object: decl for decl in decls} assert decls["Opportunity"].fields == [ "Name", "ContactId", "AccountId", + "MissingFieldShouldWarn", "CloseDate", # pull these in because they required "StageName", ] diff --git a/cumulusci/tasks/bulkdata/generate_mapping_utils/generate_mapping_from_declarations.py b/cumulusci/tasks/bulkdata/generate_mapping_utils/generate_mapping_from_declarations.py index 8912bb6e57..55a810fabe 100644 --- a/cumulusci/tasks/bulkdata/generate_mapping_utils/generate_mapping_from_declarations.py +++ b/cumulusci/tasks/bulkdata/generate_mapping_utils/generate_mapping_from_declarations.py @@ -98,8 +98,8 @@ def is_lookup(field_name): # Record types are not treated as lookup. if field_name == "RecordTypeId": return False - schema_info_for_field = sobject_schema_info.fields[field_name] - target = schema_info_for_field.referenceTo + schema_info_for_field = sobject_schema_info.fields.get(field_name) + target = schema_info_for_field.referenceTo if schema_info_for_field else None return target simple_fields, lookups = partition(is_lookup, decl.fields) diff --git a/cumulusci/tasks/sample_data/capture_sample_data.py b/cumulusci/tasks/sample_data/capture_sample_data.py index 33785263bc..5885020890 100644 --- a/cumulusci/tasks/sample_data/capture_sample_data.py +++ b/cumulusci/tasks/sample_data/capture_sample_data.py @@ -3,6 +3,7 @@ from cumulusci.core.config.org_config import OrgConfig from cumulusci.core.datasets import Dataset from cumulusci.core.exceptions import TaskOptionsError +from cumulusci.core.utils import process_bool_arg from cumulusci.salesforce_api.filterable_objects import OPT_IN_ONLY from cumulusci.salesforce_api.org_schema import Filters, get_org_schema from cumulusci.tasks.salesforce.BaseSalesforceApiTask import BaseSalesforceApiTask @@ -30,6 +31,9 @@ class CaptureSampleData(BaseSalesforceApiTask): "Multiple files can be comma separated." ) }, + "drop_missing_schema": { + "description": "Set to True to skip any missing objects or fields instead of stopping with an error." + }, } org_config: OrgConfig @@ -37,9 +41,13 @@ class CaptureSampleData(BaseSalesforceApiTask): def _init_options(self, kwargs): super()._init_options(kwargs) self.options.setdefault("dataset", "default") + self.options["drop_missing_schema"] = process_bool_arg( + self.options.get("drop_missing_schema") or False + ) def _run_task(self): name = self.options["dataset"] + drop_missing_schema = self.options["drop_missing_schema"] if extraction_definition := self.options.get("extraction_definition"): extraction_definition = Path(extraction_definition) if not extraction_definition.exists(): @@ -71,7 +79,12 @@ def _run_task(self): opt_in_only += OPT_IN_ONLY self.return_values = dataset.extract( - {}, self.logger, extraction_definition, opt_in_only, loading_rules + {}, + self.logger, + extraction_definition, + opt_in_only, + loading_rules, + drop_missing_schema, ) self.logger.info(f"{verb} dataset '{name}' in 'datasets/{name}'") return self.return_values diff --git a/cumulusci/tasks/sample_data/test_capture_sample_data.py b/cumulusci/tasks/sample_data/test_capture_sample_data.py index d4819de50a..c8401e3226 100644 --- a/cumulusci/tasks/sample_data/test_capture_sample_data.py +++ b/cumulusci/tasks/sample_data/test_capture_sample_data.py @@ -59,8 +59,9 @@ def test_simple_extract( # default dataset should created Dataset.assert_any_call("default", mock.ANY, mock.ANY, org_config, mock.ANY) # and extracted + drop_missing_schema = False Dataset().__enter__().extract.assert_called_with( - {}, task.logger, None, mock.ANY, None + {}, task.logger, None, mock.ANY, None, drop_missing_schema ) @mock.patch("cumulusci.tasks.sample_data.capture_sample_data.Dataset") @@ -86,6 +87,7 @@ def test_named_extract( "dataset": "mydataset", "extraction_definition": extraction_definition, "loading_rules": loading_rules, + "drop_missing_schema": True, }, ) task() @@ -96,7 +98,12 @@ def test_named_extract( Dataset().__enter__().create.assert_called_with() # and extracted Dataset().__enter__().extract.assert_called_with( - {}, task.logger, extraction_definition, mock.ANY, loading_rules + {}, + task.logger, + extraction_definition, + mock.ANY, + loading_rules, + task.options["drop_missing_schema"], ) @mock.patch("cumulusci.tasks.sample_data.capture_sample_data.Dataset") diff --git a/datasets/test_minimal.extract.yml b/datasets/test_minimal.extract.yml index 8828b813bc..4fa9c2c5db 100644 --- a/datasets/test_minimal.extract.yml +++ b/datasets/test_minimal.extract.yml @@ -5,9 +5,12 @@ extract: - Name - ContactId - AccountId + - MissingFieldShouldWarn Contact: # Extract required Contact fields fields: FIELDS(REQUIRED) Account: # Extract required Account fields fields: FIELDS(REQUIRED) OBJECTS(CUSTOM): # Extract every field on Custom objects fields: FIELDS(ALL) + MissingObjectShouldWarn__c: # Filtered out because not in org + fields: FIELDS(ALL)