From 0cd7679ab7a173cfc8f91dda9dad257e29b7d46c Mon Sep 17 00:00:00 2001 From: Laveesh Rohra Date: Wed, 5 Jan 2022 22:49:46 +0530 Subject: [PATCH 01/84] Fix failing test for releases (#2458) --- tests/ga/test_update.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 25fd599358..9ab3f4bfde 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1345,7 +1345,7 @@ def test_run_latest_creates_only_one_signal_handler(self, mock_signal): self.assertEqual(0, mock_signal.call_count) def test_get_latest_agent_should_return_latest_agent_even_on_bad_error_json(self): - self.prepare_agents() + dst_ver = self.prepare_agents() # Add a malformed error.json file in all existing agents for agent_dir in self.agent_dirs(): error_file_path = os.path.join(agent_dir, AGENT_ERROR_FILE) @@ -1353,7 +1353,7 @@ def test_get_latest_agent_should_return_latest_agent_even_on_bad_error_json(self f.write("") latest_agent = self.update_handler.get_latest_agent() - self.assertEqual(latest_agent.name, 'WALinuxAgent-9.9.9.28', "Latest agent is invalid") + self.assertEqual(latest_agent.version, dst_ver, "Latest agent version is invalid") def _test_run(self, invocations=1, calls=1, enable_updates=False, sleep_interval=(6,)): conf.get_autoupdate_enabled = Mock(return_value=enable_updates) From 88f0501c07b6fca02a03e981b5a6d5f4accae4de Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 6 Jan 2022 09:46:36 -0800 Subject: [PATCH 02/84] Cgroup: Remove the path section under services. (#2459) * remove path from service section * fix test cases --- azurelinuxagent/common/cgroupconfigurator.py | 4 ++-- azurelinuxagent/ga/exthandlers.py | 6 ++---- tests/common/test_cgroupconfigurator.py | 15 +++++---------- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/azurelinuxagent/common/cgroupconfigurator.py b/azurelinuxagent/common/cgroupconfigurator.py index b0bb012ef5..92abe442ce 100644 --- a/azurelinuxagent/common/cgroupconfigurator.py +++ b/azurelinuxagent/common/cgroupconfigurator.py @@ -780,7 +780,7 @@ def set_extension_services_cpu_memory_quota(self, services_list): if self.enabled() and services_list is not None: for service in services_list: service_name = service.get('name', None) - unit_file_path = service.get('path', None) + unit_file_path = systemd.get_unit_file_install_path() if service_name is not None and unit_file_path is not None: files_to_create = [] drop_in_path = os.path.join(unit_file_path, "{0}.d".format(service_name)) @@ -804,7 +804,7 @@ def remove_extension_services_drop_in_files(self, services_list): if services_list is not None: for service in services_list: service_name = service.get('name', None) - unit_file_path = service.get('path', None) + unit_file_path = systemd.get_unit_file_install_path() if service_name is not None and unit_file_path is not None: files_to_cleanup = [] drop_in_path = os.path.join(unit_file_path, "{0}.d".format(service_name)) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index d1ca9a4fe8..a654806e60 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -2312,8 +2312,7 @@ def get_resource_limits(self, extension_name, str_version): "resourceLimits": { "services": [ { - "name": "mdsd.service", - "path": "/lib/systemd/system" + "name": "mdsd.service" } ] } @@ -2324,8 +2323,7 @@ def get_resource_limits(self, extension_name, str_version): "resourceLimits": { "services": [ { - "name": "azuremonitoragent.service", - "path": "/lib/systemd/system" + "name": "azuremonitoragent.service" } ] } diff --git a/tests/common/test_cgroupconfigurator.py b/tests/common/test_cgroupconfigurator.py index 1b6995c127..448c4b2c79 100644 --- a/tests/common/test_cgroupconfigurator.py +++ b/tests/common/test_cgroupconfigurator.py @@ -578,8 +578,7 @@ def mock_popen(command, *args, **kwargs): def test_it_should_set_extension_services_cpu_memory_quota(self): service_list = [ { - "name": "extension.service", - "path": "/lib/systemd/system" + "name": "extension.service" } ] with self._get_cgroup_configurator() as configurator: @@ -598,8 +597,7 @@ def test_it_should_set_extension_services_cpu_memory_quota(self): def test_it_should_set_extension_services_when_quotas_not_defined(self): service_list = [ { - "name": "extension.service", - "path": "/lib/systemd/system", + "name": "extension.service" } ] with self._get_cgroup_configurator() as configurator: @@ -617,8 +615,7 @@ def test_it_should_set_extension_services_when_quotas_not_defined(self): def test_it_should_start_tracking_extension_services_cgroups(self): service_list = [ { - "name": "extension.service", - "path": "/lib/systemd/system", + "name": "extension.service" } ] with self._get_cgroup_configurator() as configurator: @@ -633,8 +630,7 @@ def test_it_should_start_tracking_extension_services_cgroups(self): def test_it_should_stop_tracking_extension_services_cgroups(self): service_list = [ { - "name": "extension.service", - "path": "/lib/systemd/system", + "name": "extension.service" } ] @@ -653,8 +649,7 @@ def test_it_should_stop_tracking_extension_services_cgroups(self): def test_it_should_remove_extension_services_drop_in_files(self): service_list = [ { - "name": "extension.service", - "path": "/lib/systemd/system", + "name": "extension.service" } ] with self._get_cgroup_configurator() as configurator: From 99f7ec838b256148c9f0dddebe66ef1af456cade Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Fri, 7 Jan 2022 13:45:34 -0800 Subject: [PATCH 03/84] Add Rocky Linux support to Azure Linux Agent (#2463) Co-authored-by: nazunalika --- azurelinuxagent/common/osutil/factory.py | 3 ++- azurelinuxagent/pa/rdma/factory.py | 4 +++- setup.py | 2 +- tests/common/osutil/test_factory.py | 7 +++++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index b8f4291b3e..2ed4be78ba 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -102,7 +102,8 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) return DebianOSBaseUtil() - if distro_name in ("redhat", "rhel", "centos", "oracle", "almalinux", "cloudlinux"): + if distro_name in ("redhat", "rhel", "centos", "oracle", "almalinux", + "cloudlinux", "rocky"): if Version(distro_version) < Version("7"): return Redhat6xOSUtil() diff --git a/azurelinuxagent/pa/rdma/factory.py b/azurelinuxagent/pa/rdma/factory.py index 8fb506655d..c114dc3803 100644 --- a/azurelinuxagent/pa/rdma/factory.py +++ b/azurelinuxagent/pa/rdma/factory.py @@ -38,7 +38,9 @@ def get_rdma_handler( ): return SUSERDMAHandler() - if distro_full_name in ('CentOS Linux', 'CentOS', 'Red Hat Enterprise Linux Server', 'AlmaLinux', 'CloudLinux'): + if distro_full_name in ('CentOS Linux', 'CentOS', + 'Red Hat Enterprise Linux Server', 'AlmaLinux', + 'CloudLinux', 'Rocky Linux'): return CentOSRDMAHandler(distro_version) if distro_full_name == 'Ubuntu': diff --git a/setup.py b/setup.py index 12c9e1d619..4c38585d87 100755 --- a/setup.py +++ b/setup.py @@ -96,7 +96,7 @@ def get_data_files(name, version, fullname): # pylint: disable=R0912 systemd_dir_path = osutil.get_systemd_unit_file_install_path() agent_bin_path = osutil.get_agent_bin_path() - if name in ('redhat', 'centos', 'almalinux', 'cloudlinux'): # pylint: disable=R1714 + if name in ('redhat', 'centos', 'almalinux', 'cloudlinux', 'rocky'): if version.startswith("8.2"): # redhat8+ default to py3 set_bin_files(data_files, dest=agent_bin_path, diff --git a/tests/common/osutil/test_factory.py b/tests/common/osutil/test_factory.py index 9a76eb8237..5007242733 100644 --- a/tests/common/osutil/test_factory.py +++ b/tests/common/osutil/test_factory.py @@ -258,6 +258,13 @@ def test_get_osutil_it_should_return_redhat(self): self.assertTrue(isinstance(ret, RedhatOSUtil)) self.assertEqual(ret.get_service_name(), "waagent") + ret = _get_osutil(distro_name="rocky", + distro_code_name="", + distro_full_name="", + distro_version="8") + self.assertTrue(isinstance(ret, RedhatOSUtil)) + self.assertEqual(ret.get_service_name(), "waagent") + def test_get_osutil_it_should_return_euleros(self): ret = _get_osutil(distro_name="euleros", distro_code_name="", From c1f1dbdcb22c57988bbc78f809c77fe69641e256 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 7 Jan 2022 16:00:32 -0800 Subject: [PATCH 04/84] supprees memory cgroup warning (#2464) --- dcr/scenario_utils/check_waagent_log.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dcr/scenario_utils/check_waagent_log.py b/dcr/scenario_utils/check_waagent_log.py index 1889082174..93f7c559eb 100644 --- a/dcr/scenario_utils/check_waagent_log.py +++ b/dcr/scenario_utils/check_waagent_log.py @@ -93,6 +93,12 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None): { 'message': r"WARNING EnvHandler ExtHandler Dhcp client is not running." }, + # 2021-12-20T07:46:23.020197Z INFO ExtHandler ExtHandler [CGW] The agent's process is not within a memory cgroup + { + 'message': r"The agent's process is not within a memory cgroup", + 'if': lambda log_line: re.match(r"((centos7\.8)|(redhat7\.8)|(redhat8\.2))\D*", distro, + flags=re.IGNORECASE) + } ] if ignore is not None: From c964d70282de17ab439be21d7ab32373c3965f59 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 11 Jan 2022 17:18:34 -0800 Subject: [PATCH 05/84] Cent7.9 in DCR v2 (#2465) --- dcr/scenario_utils/check_waagent_log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcr/scenario_utils/check_waagent_log.py b/dcr/scenario_utils/check_waagent_log.py index 93f7c559eb..b3795fd730 100644 --- a/dcr/scenario_utils/check_waagent_log.py +++ b/dcr/scenario_utils/check_waagent_log.py @@ -96,7 +96,7 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None): # 2021-12-20T07:46:23.020197Z INFO ExtHandler ExtHandler [CGW] The agent's process is not within a memory cgroup { 'message': r"The agent's process is not within a memory cgroup", - 'if': lambda log_line: re.match(r"((centos7\.8)|(redhat7\.8)|(redhat8\.2))\D*", distro, + 'if': lambda log_line: re.match(r"((centos7\.8)|(centos7\.9)|(redhat7\.8)|(redhat8\.2))\D*", distro, flags=re.IGNORECASE) } ] From 2f6b4467c717dc7d404a8e3cabdb2b8dccb8eb00 Mon Sep 17 00:00:00 2001 From: Laveesh Rohra Date: Fri, 14 Jan 2022 02:05:04 +0530 Subject: [PATCH 06/84] GA Versioning: Download required version if available (#2467) --- azurelinuxagent/ga/update.py | 139 ++++++++++---- .../ext_conf_missing_requested_version.xml | 38 ++++ tests/data/wire/ga_manifest.xml | 6 + tests/ga/test_update.py | 174 +++++++++++++++++- tests/protocol/mockwiredata.py | 1 + 5 files changed, 324 insertions(+), 34 deletions(-) create mode 100644 tests/data/wire/ext_conf_missing_requested_version.xml diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 293dd2313a..83db4be7a1 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -46,7 +46,7 @@ from azurelinuxagent.common.exception import ResourceGoneError, UpdateError, ExitException, AgentUpgradeExitException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd -from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.utils import shellutil @@ -449,6 +449,15 @@ def _try_update_goal_state(self, protocol): return False return True + def __goal_state_updated(self, incarnation): + """ + This function returns if the Goal State updated. + We currently rely on the incarnation number to determine that; i.e. if it changed from the last processed GS + """ + # TODO: This check should be based on the ExtensionsGoalState.id property + # (this property abstracts incarnation/etag logic based on the delivery pipeline of the Goal State) + return incarnation != self.last_incarnation + def _process_goal_state(self, exthandlers_handler, remote_access_handler): protocol = exthandlers_handler.protocol if not self._try_update_goal_state(protocol): @@ -479,7 +488,7 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): incarnation = protocol.get_incarnation() try: - if incarnation != self.last_incarnation: # TODO: This check should be based in the etag for the extensions goal state + if self.__goal_state_updated(incarnation): if not self._extensions_summary.converged: message = "A new goal state was received, but not all the extensions in the previous goal state have completed: {0}".format(self._extensions_summary) logger.warn(message) @@ -491,9 +500,9 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): # report status always, even if the goal state did not change # do it before processing the remote access, since that operation can take a long time - self._report_status(exthandlers_handler, incarnation_changed=incarnation != self.last_incarnation) + self._report_status(exthandlers_handler, incarnation_changed=self.__goal_state_updated(incarnation)) - if incarnation != self.last_incarnation: + if self.__goal_state_updated(incarnation): remote_access_handler.run() finally: self.last_incarnation = incarnation @@ -903,7 +912,9 @@ def _shutdown(self): def _check_and_download_agent_if_upgrade_available(self, protocol, base_version=CURRENT_VERSION): """ - This function periodically (1hr by default) checks if new Agent upgrade is available and downloads it on filesystem if it is. + This function downloads the new agent if an update is available. + If a requested version is available in goal state, then only that version is downloaded (new-update model) + Else, we periodically (1hr by default) checks if new Agent upgrade is available and download it on filesystem if available (old-update model) rtype: Boolean return: True if current agent is no longer available or an agent with a higher version number is available else False @@ -912,40 +923,103 @@ def _check_and_download_agent_if_upgrade_available(self, protocol, base_version= if not conf.get_autoupdate_enabled(): return False - now = time.time() - if self.last_attempt_time is not None: - next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency() - else: - next_attempt_time = now - if next_attempt_time > now: - return False + def report_error(msg_, version=CURRENT_VERSION): + logger.warn(msg_) + add_event(AGENT_NAME, op=WALAEventOperation.Download, version=version, is_success=False, message=msg_) family = conf.get_autoupdate_gafamily() - logger.info("Checking for agent updates (family: {0})", family) - - self.last_attempt_time = now - + incarnation_changed = False try: - manifest_list, etag = protocol.get_vmagent_manifests() - + # Fetch the agent manifests from the latest Goal State + manifest_list, incarnation = protocol.get_vmagent_manifests() + incarnation_changed = self.__goal_state_updated(incarnation) manifests = [m for m in manifest_list if m.family == family and len(m.uris) > 0] if len(manifests) == 0: - logger.verbose(u"Incarnation {0} has no {1} agent updates", - etag, family) + logger.verbose( + u"No manifest links found for agent family: {0} for incarnation: {1}, skipping update check".format( + family, incarnation)) return False + except Exception as err: + # If there's some issues in fetching the agent manifests, report it only on incarnation change + if incarnation_changed: + msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(err)) + report_error(msg) + return False - pkg_list = protocol.get_vmagent_pkgs(manifests[0]) + requested_version = None + if conf.get_enable_ga_versioning() and manifests[0].is_requested_version_specified: + # If GA versioning is enabled and requested version present in GS, and it's a new GS, follow new logic + if incarnation_changed: + # With the new model, we will get a new GS when CRP wants us to auto-update using required version. + # If there's no new incarnation, don't proceed with anything + requested_version = manifests[0].requested_version + msg = "Found requested version in manifest: {0} for incarnation: {1}".format( + requested_version, incarnation) + logger.info(msg) + add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg) + else: + # If incarnation didn't change, don't process anything. + return False + else: + # If no requested version specified in the Goal State, follow the old auto-update logic + # Note: If the first Goal State contains a requested version, this timer won't start (i.e. self.last_attempt_time won't be updated). + # If any subsequent goal state does not contain requested version, this timer will start then, and we will + # download all versions available in PIR and auto-update to the highest available version on that goal state. + now = time.time() + if self.last_attempt_time is not None: + next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency() + else: + next_attempt_time = now + if next_attempt_time > now: + return False - # Set the agents to those available for download at least as - # current as the existing agent and remove from disk any agent - # no longer reported to the VM. - # Note: - # The code leaves on disk available, but blacklisted, agents - # so as to preserve the state. Otherwise, those agents could be - # again downloaded and inappropriately retried. + logger.info("No requested version specified, checking for all versions for agent update (family: {0})", + family) + + self.last_attempt_time = now + + try: + # If we make it to this point, then either there is a requested version in a new GS (new auto-update model), + # or the 1hr time limit has elapsed for us to check the agent manifest for updates (old auto-update model). + pkg_list = ExtHandlerPackageList() + + # If the requested version is the current version, don't download anything; + # the call to purge() below will delete all other agents from disk + # In this case, no need to even fetch the GA family manifest as we don't need to download any agent. + if requested_version is not None and requested_version == CURRENT_VERSION: + packages_to_download = [] + logger.info("The requested version is running as the current version: {0}".format(requested_version)) + else: + pkg_list = protocol.get_vmagent_pkgs(manifests[0]) + packages_to_download = pkg_list.versions + + # Verify the requested version is in GA family manifest (if specified) + if requested_version is not None and requested_version != CURRENT_VERSION: + package_found = False + for pkg in pkg_list.versions: + if FlexibleVersion(pkg.version) == requested_version: + # Found a matching package, only download that one + packages_to_download = [pkg] + package_found = True + break + + if not package_found: + msg = "No matching package found in the agent manifest for requested version: {0} in incarnation: {1}, skipping agent update".format( + requested_version, incarnation) + report_error(msg, version=requested_version) + return False + + # Set the agents to those available for download at least as current as the existing agent + # or to the requested version (if specified) host = self._get_host_plugin(protocol=protocol) - self._set_agents([GuestAgent(pkg=pkg, host=host) for pkg in pkg_list.versions]) + self._set_agents([GuestAgent(pkg=pkg, host=host) for pkg in packages_to_download]) + # Remove from disk any agent no longer needed in the VM. + # If requested version is provided, this would delete all other agents present on the VM except the + # current one and the requested one if they're different, and only the current one if same. + # Note: + # The code leaves on disk available, but blacklisted, agents to preserve the state. + # Otherwise, those agents could be downloaded again and inappropriately retried. self._purge_agents() self._filter_blacklisted_agents() @@ -954,10 +1028,9 @@ def _check_and_download_agent_if_upgrade_available(self, protocol, base_version= return not self._is_version_eligible(base_version) \ or (len(self.agents) > 0 and self.agents[0].version > base_version) - except Exception as e: # pylint: disable=W0612 - msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(e)) - add_event(AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, - message=msg) + except Exception as err: + msg = u"Exception downloading agents for update: {0}".format(textutil.format_exception(err)) + report_error(msg) return False def _write_pid_file(self): diff --git a/tests/data/wire/ext_conf_missing_requested_version.xml b/tests/data/wire/ext_conf_missing_requested_version.xml new file mode 100644 index 0000000000..e68fcaf995 --- /dev/null +++ b/tests/data/wire/ext_conf_missing_requested_version.xml @@ -0,0 +1,38 @@ + + + + + Prod + 5.2.1.0 + + http://mock-goal-state/manifest_of_ga.xml + + + + Test + 5.2.1.0 + + http://mock-goal-state/manifest_of_ga.xml + + + + + + + + + + + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + + + + + https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + + diff --git a/tests/data/wire/ga_manifest.xml b/tests/data/wire/ga_manifest.xml index 48a1f4f94c..e12f054916 100644 --- a/tests/data/wire/ga_manifest.xml +++ b/tests/data/wire/ga_manifest.xml @@ -25,6 +25,12 @@ 2.1.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.1.0 + + 9.9.9.10 + + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__99999.0.0.0 + + 99999.0.0.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__99999.0.0.0 diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 9ab3f4bfde..e40c05eaf3 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1918,7 +1918,8 @@ def create_conf_mocks(self, hotfix_frequency, normal_frequency): return_value=hotfix_frequency): with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", return_value=normal_frequency): - yield + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + yield @contextlib.contextmanager def __get_update_handler(self, iterations=1, test_data=None, hotfix_frequency=1.0, normal_frequency=2.0, @@ -1934,12 +1935,14 @@ def get_handler(url, **kwargs): if HttpRequestPredicates.is_agent_package_request(url): agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) + protocol.mock_wire_data.call_counts['agentArtifact'] += 1 return ResponseMock(response=agent_pkg) return protocol.mock_wire_data.mock_http_get(url, **kwargs) protocol.set_http_handlers(http_get_handler=get_handler) with self.create_conf_mocks(hotfix_frequency, normal_frequency): with patch("azurelinuxagent.ga.update.add_event") as mock_telemetry: + update_handler._protocol = protocol yield update_handler, mock_telemetry def __assert_exit_code_successful(self, exit_mock): @@ -1958,6 +1961,13 @@ def __assert_agent_directories_available(self, versions): for version in versions: self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version)) + def __assert_agent_directories_exist_and_others_dont_exist(self, versions): + self.__assert_agent_directories_available(versions=versions) + other_agents = [agent_dir for agent_dir in self.agent_dirs() if + agent_dir not in [self.agent_dir(version) for version in versions]] + self.assertFalse(any(other_agents), + "All other agents should be purged from agent dir: {0}".format(other_agents)) + def __assert_no_agent_upgrade_telemetry(self, mock_telemetry): self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if "Agent upgrade discovered, updating to" in kwarg['message'] and kwarg[ @@ -2073,6 +2083,168 @@ def reload_conf(url, mock_wire_data): 'Discovered new {0} upgrade WALinuxAgent-99999.0.0.0; Will upgrade on or after'.format( AgentUpgradeType.Normal) in msg]), 0, "Error message not propagated properly") + def test_it_should_download_only_requested_version_if_available(self): + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=True): + update_handler.run(debug=True) + + self.__assert_exit_code_successful(update_handler.exit_mock) + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Agent upgrade discovered, updating to WALinuxAgent-9.9.9.10 -- exiting' in kwarg[ + 'message'] and kwarg['op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the agent was upgraded. Got: {0}".format( + mock_telemetry.call_args_list)) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"]) + + def test_it_should_cleanup_all_agents_except_requested_version_and_current_version(self): + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=True): + update_handler.run(debug=True) + + self.__assert_exit_code_successful(update_handler.exit_mock) + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Agent upgrade discovered, updating to WALinuxAgent-9.9.9.10 -- exiting' in kwarg[ + 'message'] and kwarg['op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), "Agent not upgraded properly") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) + + def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_missing_requested_version.xml" + with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=True): + update_handler.run(debug=True) + + self.__assert_exit_code_successful(update_handler.exit_mock) + self.__assert_no_agent_upgrade_telemetry(mock_telemetry) + agent_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if + kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] + # This will throw if corresponding message not found so not asserting on that + requested_version_found = next(kwarg for kwarg in agent_msgs if + "Found requested version in manifest: 5.2.1.0 for incarnation: 1" in kwarg['message']) + self.assertTrue(requested_version_found['is_success'], + "The requested version found op should be reported as a success") + + skipping_update = next(kwarg for kwarg in agent_msgs if + "No matching package found in the agent manifest for requested version: 5.2.1.0 in incarnation: 1, skipping agent update" in kwarg['message']) + self.assertEqual(skipping_update['version'], FlexibleVersion("5.2.1.0"), + "The not found message should be reported from requested agent version") + self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure") + + def test_it_should_only_try_downloading_requested_version_on_new_incarnation(self): + no_of_iterations = 1000 + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + def reload_conf(url, mock_wire_data): + # This function reloads the conf mid-run to mimic an actual customer scenario + + if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[ + "goalstate"] >= 10 and mock_wire_data.call_counts["goalstate"] < 15: + + # Ensure we didn't try to download any agents except during the incarnation change + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + + # Update the requested version to "99999.0.0.0" + update_handler._protocol.mock_wire_data.set_extension_config_requested_version("99999.0.0.0") + reload_conf.call_count += 1 + self._add_write_permission_to_goal_state_files() + reload_conf.incarnation += 1 + mock_wire_data.set_incarnation(reload_conf.incarnation) + + reload_conf.call_count = 0 + reload_conf.incarnation = 2 + + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, + normal_frequency=0.01, hotfix_frequency=0.01) as (update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=True): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) + + self.assertGreaterEqual(reload_conf.call_count, 1, "Reload conf not updated as expected") + self.__assert_exit_code_successful(update_handler.exit_mock) + self.__assert_upgrade_telemetry_emitted(mock_telemetry) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) + self.assertEqual(update_handler._protocol.mock_wire_data.call_counts['agentArtifact'], 1, + "only 1 agent should've been downloaded - 1 per incarnation") + self.assertEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1, + "only 1 agent manifest call should've been made - 1 per incarnation") + + def test_it_should_fallback_to_old_update_logic_if_requested_version_not_available(self): + no_of_iterations = 100 + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + def reload_conf(url, mock_wire_data): + # This function reloads the conf mid-run to mimic an actual customer scenario + if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[ + "goalstate"] >= 5: + reload_conf.call_count += 1 + + # By this point, the GS with requested version should've been executed. Verify that + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + + # Update the ext-conf and incarnation and remove requested versions from GS, + # this should download all versions requested in config + mock_wire_data.data_files["ext_conf"] = "wire/ext_conf.xml" + mock_wire_data.reload() + self._add_write_permission_to_goal_state_files() + reload_conf.incarnation += 1 + mock_wire_data.set_incarnation(reload_conf.incarnation) + + reload_conf.call_count = 0 + reload_conf.incarnation = 2 + + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, + normal_frequency=0.001) as (update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=True): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) + + self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") + self.__assert_exit_code_successful(update_handler.exit_mock) + self.__assert_upgrade_telemetry_emitted(mock_telemetry) + self.__assert_agent_directories_exist_and_others_dont_exist( + versions=["1.0.0", "1.1.0", "1.2.0", "2.0.0", "2.1.0", "9.9.9.10", "99999.0.0.0", str(CURRENT_VERSION)]) + + def test_it_should_not_download_anything_if_requested_version_is_current_version_and_delete_all_agents(self): + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=True): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) + + self.__assert_exit_code_successful(update_handler.exit_mock) + self.__assert_no_agent_upgrade_telemetry(mock_telemetry) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + @patch('azurelinuxagent.ga.update.get_collect_telemetry_events_handler') @patch('azurelinuxagent.ga.update.get_send_telemetry_events_handler') diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index b49f91569b..d7d6a09391 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -140,6 +140,7 @@ def __init__(self, data_files=None): "extensionsconfiguri": 0, "remoteaccessinfouri": 0, "extensionArtifact": 0, + "agentArtifact": 0, "manifest.xml": 0, "manifest_of_ga.xml": 0, "ExampleHandlerLinux": 0, From 2f4b33e8c76260ad7a0f52f4e52852ec043e7a95 Mon Sep 17 00:00:00 2001 From: Laveesh Rohra Date: Fri, 14 Jan 2022 02:10:48 +0530 Subject: [PATCH 07/84] Don't test for agent log in between tests ETP (#2471) --- dcr/scenario_utils/agent_log_parser.py | 8 +++++ dcr/scenario_utils/check_waagent_log.py | 10 ++---- .../etp_helpers.py | 4 +++ .../extension-telemetry-pipeline/run.py | 31 ++++++++++++++++--- 4 files changed, 41 insertions(+), 12 deletions(-) diff --git a/dcr/scenario_utils/agent_log_parser.py b/dcr/scenario_utils/agent_log_parser.py index ceca589ba6..b316b0cdb0 100644 --- a/dcr/scenario_utils/agent_log_parser.py +++ b/dcr/scenario_utils/agent_log_parser.py @@ -1,3 +1,5 @@ +from __future__ import print_function + import os import re from datetime import datetime @@ -37,6 +39,8 @@ class AgentLogRecord: + __ERROR_TAGS = ['Exception', 'Traceback', '[CGW]'] + def __init__(self, match): self.text = match.string self.when = match.group("when") @@ -48,6 +52,10 @@ def __init__(self, match): def get_timestamp(self): return datetime.strptime(self.when, u'%Y-%m-%dT%H:%M:%S.%fZ') + @property + def is_error(self): + return self.level in ('ERROR', 'WARNING') or any(err in self.text for err in self.__ERROR_TAGS) + def parse_agent_log_file(waagent_log_path=AGENT_LOG_FILE): if not os.path.exists(waagent_log_path): diff --git a/dcr/scenario_utils/check_waagent_log.py b/dcr/scenario_utils/check_waagent_log.py index b3795fd730..0dcb8972a4 100644 --- a/dcr/scenario_utils/check_waagent_log.py +++ b/dcr/scenario_utils/check_waagent_log.py @@ -16,8 +16,6 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None): distro = "".join(get_distro()) systemd_enabled = is_systemd_distro() - error_tags = ['ERROR', 'Exception', 'Traceback', 'WARNING', '[CGW]'] - # # NOTES: # * 'message' is matched using re.search; be sure to escape any regex metacharacters @@ -80,7 +78,8 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None): # Ignoring this error for Deb 8 as its not a blocker and since Deb 8 is old and not widely used { 'message': r"journalctl: unrecognized option '--utc'", - 'if': lambda log_line: re.match(r"(debian8\.11)\D*", distro, flags=re.IGNORECASE) is not None and log_line.level == "WARNING" + 'if': lambda log_line: re.match(r"(debian8\.11)\D*", distro, + flags=re.IGNORECASE) is not None and log_line.level == "WARNING" }, # 2021-07-09T01:46:53.307959Z INFO MonitorHandler ExtHandler [CGW] Disabling resource usage monitoring. Reason: Check on cgroups failed: # [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 2367] UNKNOWN'] @@ -104,16 +103,13 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None): if ignore is not None: ignore_list.extend(ignore) - def is_error(log_line): - return any(err in log_line.text for err in error_tags) - def can_be_ignored(log_line): return any(re.search(msg['message'], log_line.text) is not None and ('if' not in msg or msg['if'](log_line)) for msg in ignore_list) errors = [] for agent_log_line in parse_agent_log_file(waagent_log): - if is_error(agent_log_line) and not can_be_ignored(agent_log_line): + if agent_log_line.is_error and not can_be_ignored(agent_log_line): # Handle "/proc/net/route contains no routes" as a special case since it can take time for the # primary interface to come up and we don't want to report transient errors as actual errors if "/proc/net/route contains no routes" in agent_log_line.text: diff --git a/dcr/scenarios/extension-telemetry-pipeline/etp_helpers.py b/dcr/scenarios/extension-telemetry-pipeline/etp_helpers.py index 282526ed57..1e5ad6e1dc 100644 --- a/dcr/scenarios/extension-telemetry-pipeline/etp_helpers.py +++ b/dcr/scenarios/extension-telemetry-pipeline/etp_helpers.py @@ -10,6 +10,10 @@ from random import choice +def get_collect_telemetry_thread_name(): + return "TelemetryEventsCollector" + + def wait_for_extension_events_dir_empty(timeout=timedelta(minutes=2)): # By ensuring events dir to be empty, we verify that the telemetry events collector has completed its run event_dirs = glob.glob(os.path.join("/var/log/azure/", "*", "events")) diff --git a/dcr/scenarios/extension-telemetry-pipeline/run.py b/dcr/scenarios/extension-telemetry-pipeline/run.py index 61c1c895d3..9ae7d63b46 100644 --- a/dcr/scenarios/extension-telemetry-pipeline/run.py +++ b/dcr/scenarios/extension-telemetry-pipeline/run.py @@ -3,13 +3,12 @@ import random import time -from dotenv import load_dotenv - +from dcr.scenario_utils.agent_log_parser import parse_agent_log_file from dcr.scenario_utils.check_waagent_log import is_data_in_waagent_log, check_waagent_log_for_errors -from dcr.scenario_utils.models import get_vm_data_from_env from dcr.scenario_utils.test_orchestrator import TestFuncObj from dcr.scenario_utils.test_orchestrator import TestOrchestrator -from etp_helpers import add_extension_events_and_get_count, wait_for_extension_events_dir_empty +from etp_helpers import add_extension_events_and_get_count, wait_for_extension_events_dir_empty, \ + get_collect_telemetry_thread_name def add_good_extension_events_and_verify(): @@ -22,7 +21,15 @@ def add_good_extension_events_and_verify(): # Sleep for a min to ensure that the TelemetryService has enough time to send events and report errors if any time.sleep(60) - check_waagent_log_for_errors() + telemetry_event_collector_name = get_collect_telemetry_thread_name() + errors_reported = False + for agent_log_line in parse_agent_log_file(): + if agent_log_line.thread == telemetry_event_collector_name and agent_log_line.is_error: + if not errors_reported: + print( + f"waagent.log contains the following errors emitted by the {telemetry_event_collector_name} thread (none expected):") + errors_reported = True + print(agent_log_line.text.rstrip()) for ext_name in ext_event_count: good_count = ext_event_count[ext_name]['good'] @@ -65,12 +72,26 @@ def verify_etp_enabled(): raise AssertionError("Event directory not found for all extensions!") +def check_agent_log(): + # Since we're injecting bad events in the add_bad_events_and_verify_count() function test, + # we expect some warnings to be emitted by the agent. + # We're already verifying if these warnings are being emitted properly in the specified test, so ignoring those here. + ignore = [ + { + 'message': r"Dropped events for Extension: Microsoft\.(OSTCExtensions.VMAccessForLinux|Azure.Extensions.CustomScript); Details:", + 'if': lambda log_line: log_line.level == "WARNING" and log_line.thread == get_collect_telemetry_thread_name() + } + ] + check_waagent_log_for_errors(ignore=ignore) + + if __name__ == '__main__': tests = [ TestFuncObj("Verify ETP enabled", verify_etp_enabled, raise_on_error=True, retry=3), TestFuncObj("Add Good extension events and verify", add_good_extension_events_and_verify), TestFuncObj("Add Bad extension events and verify", add_bad_events_and_verify_count), TestFuncObj("Verify all events processed", wait_for_extension_events_dir_empty), + TestFuncObj("Check Agent log", check_agent_log), ] test_orchestrator = TestOrchestrator("ETPTests-VM", tests=tests) From 0535686bdb16262ddaf60bd77a3e18ff6cda2870 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 14 Jan 2022 07:10:38 -0800 Subject: [PATCH 08/84] Simplify the logic to update the extensions goal state (#2466) (#2472) * Simplify the logic to update the extensions goal state * Added telemetry for NotSupported * Added comments * Do not support old hostgaplugin Co-authored-by: narrieta --- azurelinuxagent/common/protocol/wire.py | 116 ++++++++++--------- tests/protocol/test_extensions_goal_state.py | 8 +- 2 files changed, 68 insertions(+), 56 deletions(-) diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 401f9abd0f..6f35abd062 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -577,7 +577,7 @@ def __init__(self, endpoint): self._endpoint = endpoint self._goal_state = None self._extensions_goal_state = None # The goal state to use for extensions; can be an ExtensionsGoalStateFromVmSettings or ExtensionsGoalStateFromExtensionsConfig - self._vm_settings_goal_state = None # Cached value of the most recent ExtensionsGoalStateFromVmSettings + self._cached_vm_settings = None # Cached value of the most recent ExtensionsGoalStateFromVmSettings self._host_plugin = None self._host_plugin_version = FlexibleVersion("0.0.0.0") # Version 0 means "unknown" self._host_plugin_supports_vm_settings = False @@ -785,6 +785,13 @@ def update_goal_state(self, force_update=False): Updates the goal state if the incarnation or etag changed or if 'force_update' is True """ try: + # + # The entire goal state needs to be retrieved from the WireServer (via the GoalState class), and the HostGAPlugin + # (via the self._fetch_vm_settings_goal_state method.) + # + # Start by fetching the goal state from the WireServer; if fetch_full_goal_state becomes True this will also + # update the ExtensionsConfig instance in GoalState. + # goal_state = GoalState(self) # Always update the hostgaplugin, since the agent may issue requests to it even if there are no other changes @@ -806,42 +813,35 @@ def update_goal_state(self, force_update=False): self._goal_state = goal_state goal_state_updated = True - vm_settings_goal_state_updated = False - if not conf.get_enable_fast_track(): - # if Fast Track is not enabled use extensionsConfig - self._extensions_goal_state = self._goal_state.extensions_config + # + # Now fetch the vmSettings (which contain the extensions goal state) from the HostGAPlugin + # + vm_settings_goal_state, vm_settings_goal_state_updated = (None, False) + vm_settings_goal_state_is_valid = False + + if conf.get_enable_fast_track(): + try: + vm_settings_goal_state, vm_settings_goal_state_updated = self._fetch_vm_settings_goal_state(force_update=force_update) + + if goal_state_updated or vm_settings_goal_state_updated: + # compare() raises a GoalStateMismatchError if the goal states don't match + ExtensionsGoalState.compare(self._goal_state.extensions_config, vm_settings_goal_state) + + vm_settings_goal_state_is_valid = True + + except Exception as error: + # _fetch_vm_settings_goal_state() does its own detailed error reporting and raises ProtocolError; do not report those + if not isinstance(error, ProtocolError): + self._vm_settings_error_reporter.report_error(format_exception(error)) + self._vm_settings_error_reporter.report_summary() + + # + # If we fetched a valid vmSettings, use that for the extensions goal state. Otherwise use ExtensionsConfig + # + if vm_settings_goal_state_is_valid: + self._extensions_goal_state = vm_settings_goal_state else: - if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.now(): - # if vmSettings are not supported use extensionsConfig - self._extensions_goal_state = self._goal_state.extensions_config - else: - try: - vm_settings_goal_state = self._fetch_vm_settings_goal_state(force_update=force_update) - - self._host_plugin_supports_vm_settings = True - - if self._vm_settings_goal_state is None or self._vm_settings_goal_state.etag != vm_settings_goal_state.etag: - self._vm_settings_goal_state = vm_settings_goal_state - self._extensions_goal_state = vm_settings_goal_state - vm_settings_goal_state_updated = True - - if goal_state_updated or vm_settings_goal_state_updated: - # compare() raises a GoalStateMismatchError if the goal states don't match - ExtensionsGoalState.compare(self._goal_state.extensions_config, self._vm_settings_goal_state) - - except _VmSettingsNotSupportedError: - # if vmSettings are not supported use extensionsConfig - self._extensions_goal_state = self._goal_state.extensions_config - # mark vmSettings as not supported for the next 6 hours - self._host_plugin_supports_vm_settings = False - self._host_plugin_supports_vm_settings_next_check = datetime.now() + timedelta(hours=6) - except Exception as error: - # if there is any errors on vmSettings then use extensionsConfig - self._extensions_goal_state = self._goal_state.extensions_config - # _fetch_vm_settings_goal_state() does its own detailed error reporting and raises ProtocolError; do not report those - if not isinstance(error, ProtocolError): - self._vm_settings_error_reporter.report_error(format_exception(error)) - self._vm_settings_error_reporter.report_summary() + self._extensions_goal_state = self._goal_state.extensions_config # If either goal state changed (goal_state or vm_settings_goal_state) save them if goal_state_updated or vm_settings_goal_state_updated: @@ -854,10 +854,25 @@ def update_goal_state(self, force_update=False): def _fetch_vm_settings_goal_state(self, force_update): """ - Queries the vmSettings from the HostGAPlugin and returns an instance of ExtensionsGoalStateFromVmSettings. - Raises _VmSettingsNotSupportedError if the HostGAPlugin does not support FastTrack or ProtocolError on other kinds of errors. + Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and + a boolean indicating if they are an updated (True) or a cached value (False). + + Raises ProtocolError if the request fails for any reason (e.g. not supported, time out, server error) """ - etag = None if force_update or self._vm_settings_goal_state is None else self._vm_settings_goal_state.etag + def raise_not_supported(reset_state=False): + if reset_state: + self._host_plugin_supports_vm_settings = False + self._host_plugin_supports_vm_settings_next_check = datetime.now() + timedelta(hours=6) # check again in 6 hours + # "Not supported" is not considered an error, so don't use self._vm_settings_error_reporter to report it + logger.info("vmSettings is not supported") + add_event(op=WALAEventOperation.HostPlugin, message="vmSettings is not supported", is_success=True) + raise ProtocolError("VmSettings not supported") + + # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check + if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.now(): + raise_not_supported() + + etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag correlation_id = str(uuid.uuid4()) def format_message(msg): @@ -879,10 +894,10 @@ def get_vm_settings(): response = get_vm_settings() if response.status == httpclient.NOT_FOUND: # the HostGAPlugin does not support FastTrack - raise _VmSettingsNotSupportedError() + raise_not_supported(reset_state=True) if response.status == httpclient.NOT_MODIFIED: # The goal state hasn't changed, return the current instance - return self._vm_settings_goal_state + return self._cached_vm_settings, False if response.status != httpclient.OK: error_description = restutil.read_response_error(response) @@ -923,11 +938,12 @@ def get_vm_settings(): # Don't support HostGAPlugin versions older than 115 if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.115"): - raise _VmSettingsNotSupportedError() + raise_not_supported(reset_state=True) logger.info("Fetched new vmSettings [correlation ID: {0} New eTag: {1}]", correlation_id, vm_settings.etag) - - return vm_settings + self._host_plugin_supports_vm_settings = True + self._cached_vm_settings = vm_settings + return vm_settings, True except ProtocolError: raise @@ -966,11 +982,11 @@ def save_if_not_none(goal_state_property, file_name): text = self._goal_state.extensions_config.get_redacted_text() if text != '': self._save_cache(text, EXT_CONF_FILE_NAME.format(self._goal_state.extensions_config.incarnation)) - # TODO: When Fast Track is fully enabled self._vm_settings_goal_state will go away and this can be deleted - if self._vm_settings_goal_state is not None: - text = self._vm_settings_goal_state.get_redacted_text() + # TODO: When Fast Track is fully enabled self._cached_vm_settings will go away and this can be deleted + if self._cached_vm_settings is not None: + text = self._cached_vm_settings.get_redacted_text() if text != '': - self._save_cache(text, VM_SETTINGS_FILE_NAME.format(self._vm_settings_goal_state.id)) + self._save_cache(text, VM_SETTINGS_FILE_NAME.format(self._cached_vm_settings.id)) # END TODO except Exception as e: @@ -1554,7 +1570,3 @@ def report_summary(self): logger.info("[VmSettingsSummary] {0}", message) self._reset() - - -class _VmSettingsNotSupportedError(ProtocolError): - pass diff --git a/tests/protocol/test_extensions_goal_state.py b/tests/protocol/test_extensions_goal_state.py index eac60f8119..235e553fa3 100644 --- a/tests/protocol/test_extensions_goal_state.py +++ b/tests/protocol/test_extensions_goal_state.py @@ -19,7 +19,7 @@ class ExtensionsGoalStateTestCase(AgentTestCase): def test_compare_should_succeed_when_extensions_config_and_vm_settings_are_equal(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: from_extensions_config = protocol.client.get_extensions_goal_state() - from_vm_settings = protocol.client._vm_settings_goal_state + from_vm_settings = protocol.client._cached_vm_settings try: ExtensionsGoalState.compare(from_extensions_config, from_vm_settings) @@ -29,7 +29,7 @@ def test_compare_should_succeed_when_extensions_config_and_vm_settings_are_equal def test_compare_should_report_mismatches_between_extensions_config_and_vm_settings(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: from_extensions_config = protocol.client.get_extensions_goal_state() - from_vm_settings = protocol.client._vm_settings_goal_state + from_vm_settings = protocol.client._cached_vm_settings def assert_compare_raises(setup_copy, failing_attribute): from_vm_settings_copy = copy.deepcopy(from_vm_settings) @@ -80,7 +80,7 @@ def test_extension_goal_state_should_parse_requested_version_properly(self): for manifest in fabric_manifests: self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") - vm_settings_ga_manifests = protocol.client._vm_settings_goal_state.agent_manifests + vm_settings_ga_manifests = protocol.client._cached_vm_settings.agent_manifests for manifest in vm_settings_ga_manifests: self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") @@ -92,6 +92,6 @@ def test_extension_goal_state_should_parse_requested_version_properly(self): for manifest in fabric_manifests: self.assertEqual(manifest.requested_version_string, "9.9.9.10", "Version should be 9.9.9.10") - vm_settings_ga_manifests = protocol.client._vm_settings_goal_state.agent_manifests + vm_settings_ga_manifests = protocol.client._cached_vm_settings.agent_manifests for manifest in vm_settings_ga_manifests: self.assertEqual(manifest.requested_version_string, "9.9.9.9", "Version should be 9.9.9.9") From ec0609d673fa31ee4ad803885a02dbb55411c77c Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 14 Jan 2022 11:29:40 -0800 Subject: [PATCH 09/84] Retry update_goal_state on GoalStateMismatchError (#2470) (#2474) * Retry update_goal_state on GoalStateMismatchError * Add sleep before retry * Enable test * Enable test * Update test * Add unit test * Add data file * pylint warning * Add comment; fix typos * fix typo Co-authored-by: narrieta --- .../common/protocol/extensions_goal_state.py | 20 +-- azurelinuxagent/common/protocol/wire.py | 82 ++++++---- .../ext_conf-requested_version.xml | 148 ++++++++++++++++++ .../hostgaplugin/vm_settings-out-of-sync.json | 65 ++++++++ tests/protocol/mockwiredata.py | 19 ++- tests/protocol/test_extensions_goal_state.py | 6 +- tests/protocol/test_wire.py | 27 +++- 7 files changed, 315 insertions(+), 52 deletions(-) create mode 100644 tests/data/hostgaplugin/ext_conf-requested_version.xml create mode 100644 tests/data/hostgaplugin/vm_settings-out-of-sync.json diff --git a/azurelinuxagent/common/protocol/extensions_goal_state.py b/azurelinuxagent/common/protocol/extensions_goal_state.py index 6ce44e2c8f..68ce74dc77 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state.py @@ -18,15 +18,15 @@ import datetime import azurelinuxagent.common.logger as logger -from azurelinuxagent.common.future import ustr from azurelinuxagent.common.AgentGlobals import AgentGlobals from azurelinuxagent.common.exception import AgentError from azurelinuxagent.common.utils import textutil class GoalStateMismatchError(AgentError): - def __init__(self, msg): - super(GoalStateMismatchError, self).__init__(msg) + def __init__(self, message, attribute): + super(GoalStateMismatchError, self).__init__(message) + self.attribute = attribute class ExtensionsGoalState(object): @@ -100,9 +100,12 @@ def compare(from_extensions_config, from_vm_settings): context = [] # used to keep track of the attribute that is being compared def compare_goal_states(first, second): + # A mismatch on the timestamp or the activity ID (and maybe also on the correlation ID) most likely indicate that we are comparing two + # different goal states so we check them first (we raise an exception as soon as a mismatch is detected). A mismatch on the other + # attributes likely indicates an actual issue on vmSettings or extensionsConfig). + compare_attributes(first, second, "created_on_timestamp") compare_attributes(first, second, "activity_id") compare_attributes(first, second, "correlation_id") - compare_attributes(first, second, "created_on_timestamp") compare_attributes(first, second, "status_upload_blob") compare_attributes(first, second, "status_upload_blob_type") compare_attributes(first, second, "required_features") @@ -154,14 +157,13 @@ def compare_attributes(first, second, attribute, ignore_order=False): second_value.sort() if first_value != second_value: - raise Exception("[{0}] != [{1}] (Attribute: {2})".format(first_value, second_value, ".".join(context))) + mistmatch = "[{0}] != [{1}] (Attribute: {2})".format(first_value, second_value, ".".join(context)) + message = "Mismatch in Goal States [Incarnation {0}] != [Etag: {1}]: {2}".format(from_extensions_config.id, from_vm_settings.id, mistmatch) + raise GoalStateMismatchError(message, attribute) finally: context.pop() - try: - compare_goal_states(from_extensions_config, from_vm_settings) - except Exception as exception: - raise GoalStateMismatchError("Mismatch in Goal States [Incarnation {0}] != [Etag: {1}]: {2}".format(from_extensions_config.id, from_vm_settings.id, ustr(exception))) + compare_goal_states(from_extensions_config, from_vm_settings) def _do_common_validations(self): """ diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 6f35abd062..abf8b4188d 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -35,7 +35,7 @@ from azurelinuxagent.common.exception import ProtocolNotFoundError, \ ResourceGoneError, ExtensionDownloadError, InvalidContainerError, ProtocolError, HttpError from azurelinuxagent.common.future import httpclient, bytebuffer, ustr -from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState +from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateMismatchError from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.protocol.goal_state import GoalState, TRANSPORT_CERT_FILE_NAME, TRANSPORT_PRV_FILE_NAME from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol @@ -780,55 +780,36 @@ def update_host_plugin_from_goal_state(self): goal_state = GoalState(self) self._update_host_plugin(goal_state.container_id, goal_state.role_config_name) - def update_goal_state(self, force_update=False): + def update_goal_state(self, force_update=False, is_retry=False): """ Updates the goal state if the incarnation or etag changed or if 'force_update' is True """ try: # # The entire goal state needs to be retrieved from the WireServer (via the GoalState class), and the HostGAPlugin - # (via the self._fetch_vm_settings_goal_state method.) + # (via the self._fetch_vm_settings_goal_state method). # - # Start by fetching the goal state from the WireServer; if fetch_full_goal_state becomes True this will also - # update the ExtensionsConfig instance in GoalState. + # We fetch it in 3 parts: + # + # 1) The "main" goal state from the WireServer, which includes the incarnation, container ID, role config, and URLs + # to the rest of the goal state (certificates, remote users, extensions config, etc). We do this first because + # we need to initialize the HostGAPlugin with the container ID and role config. # goal_state = GoalState(self) - # Always update the hostgaplugin, since the agent may issue requests to it even if there are no other changes - # in the goal state (e.g. report status) self._update_host_plugin(goal_state.container_id, goal_state.role_config_name) - # Fetch the full goal state if needed - if force_update: - logger.info("Forcing an update of the goal state..") - - fetch_full_goal_state = force_update or \ - self._goal_state is None or self._extensions_goal_state is None or \ - self._goal_state.incarnation != goal_state.incarnation - - if not fetch_full_goal_state: - goal_state_updated = False - else: - goal_state.fetch_full_goal_state(self) - self._goal_state = goal_state - goal_state_updated = True - # - # Now fetch the vmSettings (which contain the extensions goal state) from the HostGAPlugin + # 2) Then we fetch the vmSettings from the HostGAPlugin. We do this before fetching the rest of the goal state from the + # WireServer to minimize the time between the initial call to the WireServer and the call to the HostGAPlugin (and hence + # reduce the window in which a new goal state may arrive in-between the 2 calls) # vm_settings_goal_state, vm_settings_goal_state_updated = (None, False) - vm_settings_goal_state_is_valid = False if conf.get_enable_fast_track(): try: vm_settings_goal_state, vm_settings_goal_state_updated = self._fetch_vm_settings_goal_state(force_update=force_update) - if goal_state_updated or vm_settings_goal_state_updated: - # compare() raises a GoalStateMismatchError if the goal states don't match - ExtensionsGoalState.compare(self._goal_state.extensions_config, vm_settings_goal_state) - - vm_settings_goal_state_is_valid = True - except Exception as error: # _fetch_vm_settings_goal_state() does its own detailed error reporting and raises ProtocolError; do not report those if not isinstance(error, ProtocolError): @@ -836,14 +817,51 @@ def update_goal_state(self, force_update=False): self._vm_settings_error_reporter.report_summary() # - # If we fetched a valid vmSettings, use that for the extensions goal state. Otherwise use ExtensionsConfig + # 3) Lastly we, fetch the rest of the goal state from the WireServer (but ony if needed: initialization, a "forced" update, or + # a change in the incarnation). Note that if we fetch the full goal state we also update self._goal_state. + # + if force_update: + logger.info("Forcing an update of the goal state..") + + fetch_full_goal_state = force_update or self._goal_state is None or self._goal_state.incarnation != goal_state.incarnation + + if not fetch_full_goal_state: + goal_state_updated = False + else: + goal_state.fetch_full_goal_state(self) + self._goal_state = goal_state + goal_state_updated = True + + # + # If we fetched the vmSettings then compare them against extensionsConfig and use them for the extensions goal state if + # everything matches, otherwise use extensionsConfig. # - if vm_settings_goal_state_is_valid: + use_vm_settings = False + if vm_settings_goal_state is not None: + if not goal_state_updated and not vm_settings_goal_state_updated: # no need to compare them, just use vmSettings + use_vm_settings = True + else: + try: + ExtensionsGoalState.compare(self._goal_state.extensions_config, vm_settings_goal_state) + use_vm_settings = True + except GoalStateMismatchError as mismatch: + if not is_retry and mismatch.attribute in ("created_on_timestamp", "activity_id"): + # this may be OK; a new goal state may have arrived in-between the calls to the HostGAPlugin and the WireServer; + # retry one time after a delay and then report the error if it happens again. + time.sleep(conf.get_goal_state_period()) + self.update_goal_state(is_retry=True) + return + self._vm_settings_error_reporter.report_error(ustr(mismatch)) + self._vm_settings_error_reporter.report_summary() + + if use_vm_settings: self._extensions_goal_state = vm_settings_goal_state else: self._extensions_goal_state = self._goal_state.extensions_config + # # If either goal state changed (goal_state or vm_settings_goal_state) save them + # if goal_state_updated or vm_settings_goal_state_updated: self._save_goal_state() diff --git a/tests/data/hostgaplugin/ext_conf-requested_version.xml b/tests/data/hostgaplugin/ext_conf-requested_version.xml new file mode 100644 index 0000000000..c3bd928236 --- /dev/null +++ b/tests/data/hostgaplugin/ext_conf-requested_version.xml @@ -0,0 +1,148 @@ + + + + + Prod + 9.9.9.10 + + https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml + https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml + + + + Test + 9.9.9.10 + + https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml + https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml + + + + CentralUSEUAP + CRP + + + + MultipleExtensionsPerHandler + + +https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w + + + + https://zrdfepirv2cbn09pr02a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml + + + + + https://zrdfepirv2cbn06prdstr01a.blob.core.windows.net/4ef06ad957494df49c807a5334f2b5d2/Microsoft.Azure.Security.Monitoring_AzureSecurityLinuxAgent_useast2euap_manifest.xml + + + + + https://umsanh4b5rfz0q0p4pwm.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml + + + + + https://umsawqtlsshtn5v2nfgh.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml + + + + + https://umsah3cwjlctnmhsvzqv.blob.core.windows.net/2bbece4f-0283-d415-b034-cc0adc6997a1/2bbece4f-0283-d415-b034-cc0adc6997a1_manifest.xml + + + + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "publicSettings": {"GCS_AUTO_CONFIG":true} + } + } + ] +} + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "publicSettings": {"enableGenevaUpload":true} + } + } + ] +} + + + + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"commandToExecute":"echo 'cee174d4-4daa-4b07-9958-53b9649445c2'"} + } + } + ] +} + + + + + + + + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"source":{"script":"echo '4abb1e88-f349-41f8-8442-247d9fdfcac5'"}} + } + } + ] +} + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"source":{"script":"echo 'e865c9bc-a7b3-42c6-9a79-cfa98a1ee8b3'"}} + } + } + ] +} + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"source":{"script":"echo 'f923e416-0340-485c-9243-8b84fb9930c6'"}} + } + } + ] +} + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "protectedSettingsCertThumbprint": "59A10F50FFE2A0408D3F03FE336C8FD5716CF25C", + "protectedSettings": "*** REDACTED ***" + } + } + ] +} + + +https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.vmSettings?sv=2018-03-28&sr=b&sk=system-1&sig=PaiLic%3d&se=9999-01-01T00%3a00%3a00Z&sp=r + diff --git a/tests/data/hostgaplugin/vm_settings-out-of-sync.json b/tests/data/hostgaplugin/vm_settings-out-of-sync.json new file mode 100644 index 0000000000..db755d249d --- /dev/null +++ b/tests/data/hostgaplugin/vm_settings-out-of-sync.json @@ -0,0 +1,65 @@ +{ + "hostGAPluginVersion": "1.0.8.115", + "vmSettingsSchemaVersion": "0.0", + "activityId": "AAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE", + "correlationId": "EEEEEEEE-DDDD-CCCC-BBBB-AAAAAAAAAAAA", + "extensionsLastModifiedTickCount": 637726657000000000, + "extensionGoalStatesSource": "Fabric", + "onHold": true, + "statusUploadBlob": { + "statusBlobType": "BlockBlob", + "value": "https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w" + }, + "inVMMetadata": { + "subscriptionId": "8e037ad4-618f-4466-8bc8-5099d41ac15b", + "resourceGroupName": "rg-dc-86fjzhp", + "vmName": "edp0plkw2b", + "location": "CentralUSEUAP", + "vmId": "86f4ae0a-61f8-48ae-9199-40f402d56864", + "vmSize": "Standard_B2s", + "osType": "Linux" + }, + "requiredFeatures": [ + { + "name": "MultipleExtensionsPerHandler" + } + ], + "gaFamilies": [ + { + "name": "Prod", + "uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml" + ] + }, + { + "name": "Test", + "uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml" + ] + } + ], + "extensionGoalStates": [ + { + "name": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent", + "version": "1.9.1", + "location": "https://zrdfepirv2cbn04prdstr01a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml", + "failoverlocation": "https://zrdfepirv2cbn06prdstr01a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml", + "additionalLocations": ["https://zrdfepirv2cbn09pr02a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml"], + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "settings": [ + { + "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" + } + ] + } + ] +} diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index d7d6a09391..3fd6c75613 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -38,8 +38,8 @@ "test_ext": "ext/sample_ext-1.3.0.zip", "remote_access": None, "in_vm_artifacts_profile": None, - "vm_settings": "hostgaplugin/vm_settings.json", - "ETag": "1" + "vm_settings": None, + "ETag": None } DATA_FILE_IN_VM_ARTIFACTS_PROFILE = DATA_FILE.copy() @@ -114,6 +114,7 @@ DATA_FILE_VM_SETTINGS = DATA_FILE.copy() DATA_FILE_VM_SETTINGS["vm_settings"] = "hostgaplugin/vm_settings.json" +DATA_FILE_VM_SETTINGS["ETag"] ="1" DATA_FILE_VM_SETTINGS["ext_conf"] = "hostgaplugin/ext_conf.xml" DATA_FILE_VM_SETTINGS["in_vm_artifacts_profile"] = "hostgaplugin/in_vm_artifacts_profile.json" @@ -181,8 +182,11 @@ def reload(self): self.trans_prv = load_data(self.data_files.get("trans_prv")) self.trans_cert = load_data(self.data_files.get("trans_cert")) self.ext = load_bin_data(self.data_files.get("test_ext")) - self.vm_settings = load_data(self.data_files.get("vm_settings")) - self.etag = self.data_files.get("ETag") + + vm_settings = self.data_files.get("vm_settings") + if vm_settings is not None: + self.vm_settings = load_data(self.data_files.get("vm_settings")) + self.etag = self.data_files.get("ETag") remote_access_data_file = self.data_files.get("remote_access") if remote_access_data_file is not None: @@ -230,8 +234,11 @@ def mock_http_get(self, url, *_, **kwargs): content = self.in_vm_artifacts_profile self.call_counts["in_vm_artifacts_profile"] += 1 elif "/vmSettings" in url: - content = self.vm_settings - response_headers = [('ETag', self.etag)] + if self.vm_settings is None: + resp.status = httpclient.NOT_FOUND + else: + content = self.vm_settings + response_headers = [('ETag', self.etag)] self.call_counts["vm_settings"] += 1 else: diff --git a/tests/protocol/test_extensions_goal_state.py b/tests/protocol/test_extensions_goal_state.py index 235e553fa3..1138df3a43 100644 --- a/tests/protocol/test_extensions_goal_state.py +++ b/tests/protocol/test_extensions_goal_state.py @@ -75,7 +75,7 @@ def test_create_from_vm_settings_should_assume_block_when_blob_type_is_not_valid self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBob for an invalid statusBlobType') def test_extension_goal_state_should_parse_requested_version_properly(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: fabric_manifests, _ = protocol.get_vmagent_manifests() for manifest in fabric_manifests: self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") @@ -84,9 +84,9 @@ def test_extension_goal_state_should_parse_requested_version_properly(self): for manifest in vm_settings_ga_manifests: self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") - data_file = mockwiredata.DATA_FILE.copy() + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-requested_version.json" - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "hostgaplugin/ext_conf-requested_version.xml" with mock_wire_protocol(data_file) as protocol: fabric_manifests, _ = protocol.get_vmagent_manifests() for manifest in fabric_manifests: diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 0ac5fc135d..e84ea2a658 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -52,7 +52,7 @@ from tests.protocol.HttpRequestPredicates import HttpRequestPredicates from tests.protocol.mockwiredata import DATA_FILE_NO_EXT, DATA_FILE from tests.protocol.mockwiredata import WireProtocolData -from tests.tools import Mock, PropertyMock, patch, AgentTestCase +from tests.tools import Mock, PropertyMock, patch, AgentTestCase, load_bin_data, mock_sleep data_with_bom = b'\xef\xbb\xbfhehe' testurl = 'http://foo' @@ -1276,7 +1276,7 @@ def http_get_handler(url, *_, **__): # Lastly, test the goal state comparison def fail_compare(): - error = GoalStateMismatchError("TEST COMPARE FAILED") + error = GoalStateMismatchError("TEST COMPARE FAILED", "dummy_attribute") with patch("azurelinuxagent.common.protocol.extensions_goal_state.ExtensionsGoalState.compare", side_effect=error): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: protocol.client.update_goal_state() @@ -1371,6 +1371,29 @@ def test_it_should_use_extensions_config_when_vm_settings_do_not_match_extension reported = [kwargs for _, kwargs in add_event_patcher.call_args_list if kwargs['op'] == "VmSettings" and "GoalStateMismatchError" in kwargs['message']] self.assertEqual(1, len(reported), "The goal state mismatch should have been reported exactly once; got: {0}".format([kwargs['message'] for _, kwargs in add_event_patcher.call_args_list])) + def test_it_should_retry_vm_settings_and_extensions_config_do_not_match(self): + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + response = MockHttpResponse(httpclient.OK) + response.body = load_bin_data("hostgaplugin/vm_settings-out-of-sync.json") + response.headers = [('ETag', "0123456789")] + return response + return None + + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + with mock_wire_protocol(data_file) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + + with patch('time.sleep', side_effect=lambda _: mock_sleep()): # avoid the sleep during retry + with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: + protocol.client.update_goal_state() + + vm_settings_call_count = len([url for url in protocol.get_tracked_urls() if "vmSettings" in url]) + self.assertEqual(2, vm_settings_call_count, "Expected 2 calls to vmSettings (original and retry)") + + errors = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + self.assertTrue(any("[GoalStateMismatchError]" in e for e in errors), "Expected GoalStateMismatchError to have been reported. Got: {0}".format(errors)) + def test_it_should_compare_goal_states_when_vm_settings_change(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: protocol.mock_wire_data.set_etag("aNewEtag") From 034c8683bd4198473eb62e8634c3bdafb7539269 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 18 Jan 2022 15:37:17 -0800 Subject: [PATCH 10/84] Set agent version to 8.8.8.8 on Fast Track branch (#2476) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index e35e28b94a..af01a9861a 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. Current agent version: 2.4.0.0 # -AGENT_VERSION = '9.9.9.9' +AGENT_VERSION = '8.8.8.8' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From ea0043677093cc7167285ed16f88b2fabe87d1c2 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 20 Jan 2022 09:59:12 -0800 Subject: [PATCH 11/84] Set vmSettings as primary channel for extensions goal state (#2475) * Remove goal state comparison * Remove fallback to extensionsConfig * pylint warnings * Remove double-logging * Update comments Co-authored-by: narrieta --- .../common/protocol/extensions_goal_state.py | 82 ----------- azurelinuxagent/common/protocol/wire.py | 85 +++++------ tests/protocol/mockwiredata.py | 2 +- tests/protocol/test_extensions_goal_state.py | 53 +------ tests/protocol/test_wire.py | 139 +++--------------- 5 files changed, 61 insertions(+), 300 deletions(-) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state.py b/azurelinuxagent/common/protocol/extensions_goal_state.py index 68ce74dc77..d2d8421201 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state.py @@ -19,16 +19,9 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.AgentGlobals import AgentGlobals -from azurelinuxagent.common.exception import AgentError from azurelinuxagent.common.utils import textutil -class GoalStateMismatchError(AgentError): - def __init__(self, message, attribute): - super(GoalStateMismatchError, self).__init__(message) - self.attribute = attribute - - class ExtensionsGoalState(object): """ ExtensionsGoalState represents the extensions information in the goal state; that information can originate from @@ -90,81 +83,6 @@ def get_redacted_text(self): """ raise NotImplementedError() - @staticmethod - def compare(from_extensions_config, from_vm_settings): - """ - Compares the two instances given as argument and logs a GoalStateMismatch message if they are different. - - NOTE: The order of the two instances is important for the debug info to be logged correctly (ExtensionsConfig first, vmSettings second) - """ - context = [] # used to keep track of the attribute that is being compared - - def compare_goal_states(first, second): - # A mismatch on the timestamp or the activity ID (and maybe also on the correlation ID) most likely indicate that we are comparing two - # different goal states so we check them first (we raise an exception as soon as a mismatch is detected). A mismatch on the other - # attributes likely indicates an actual issue on vmSettings or extensionsConfig). - compare_attributes(first, second, "created_on_timestamp") - compare_attributes(first, second, "activity_id") - compare_attributes(first, second, "correlation_id") - compare_attributes(first, second, "status_upload_blob") - compare_attributes(first, second, "status_upload_blob_type") - compare_attributes(first, second, "required_features") - compare_attributes(first, second, "on_hold") - compare_array(first.agent_manifests, second.agent_manifests, compare_agent_manifests, "agent_manifests") - compare_array(first.extensions, second.extensions, compare_extensions, "extensions") - - def compare_agent_manifests(first, second): - compare_attributes(first, second, "family") - compare_attributes(first, second, "requested_version_string") - compare_attributes(first, second, "uris", ignore_order=True) - - def compare_extensions(first, second): - compare_attributes(first, second, "name") - compare_attributes(first, second, "version") - compare_attributes(first, second, "state") - compare_attributes(first, second, "supports_multi_config") - compare_attributes(first, second, "manifest_uris", ignore_order=True) - compare_array(first.settings, second.settings, compare_settings, "settings") - - def compare_settings(first, second): - compare_attributes(first, second, "name") - compare_attributes(first, second, "sequenceNumber") - compare_attributes(first, second, "publicSettings") - compare_attributes(first, second, "protectedSettings") - compare_attributes(first, second, "certificateThumbprint") - compare_attributes(first, second, "dependencyLevel") - compare_attributes(first, second, "state") - - def compare_array(first, second, comparer, name): - if len(first) != len(second): - raise Exception("Number of items in {0} mismatch: {1} != {2}".format(name, len(first), len(second))) - for i in range(len(first)): - context.append("{0}[{1}]".format(name, i)) - try: - comparer(first[i], second[i]) - finally: - context.pop() - - def compare_attributes(first, second, attribute, ignore_order=False): - context.append(attribute) - try: - first_value = getattr(first, attribute) - second_value = getattr(second, attribute) - if ignore_order: - first_value = first_value[:] - first_value.sort() - second_value = second_value[:] - second_value.sort() - - if first_value != second_value: - mistmatch = "[{0}] != [{1}] (Attribute: {2})".format(first_value, second_value, ".".join(context)) - message = "Mismatch in Goal States [Incarnation {0}] != [Etag: {1}]: {2}".format(from_extensions_config.id, from_vm_settings.id, mistmatch) - raise GoalStateMismatchError(message, attribute) - finally: - context.pop() - - compare_goal_states(from_extensions_config, from_vm_settings) - def _do_common_validations(self): """ Does validations common to vmSettings and ExtensionsConfig diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index abf8b4188d..c5c19a8f60 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -35,7 +35,6 @@ from azurelinuxagent.common.exception import ProtocolNotFoundError, \ ResourceGoneError, ExtensionDownloadError, InvalidContainerError, ProtocolError, HttpError from azurelinuxagent.common.future import httpclient, bytebuffer, ustr -from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateMismatchError from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.protocol.goal_state import GoalState, TRANSPORT_CERT_FILE_NAME, TRANSPORT_PRV_FILE_NAME from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol @@ -47,7 +46,7 @@ from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, \ - findtext, gettext, remove_bom, get_bytes_from_pem, parse_json, format_exception + findtext, gettext, remove_bom, get_bytes_from_pem, parse_json from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION VERSION_INFO_URI = "http://{0}/?comp=versions" @@ -780,45 +779,42 @@ def update_host_plugin_from_goal_state(self): goal_state = GoalState(self) self._update_host_plugin(goal_state.container_id, goal_state.role_config_name) - def update_goal_state(self, force_update=False, is_retry=False): + def update_goal_state(self, force_update=False): """ Updates the goal state if the incarnation or etag changed or if 'force_update' is True """ try: # - # The entire goal state needs to be retrieved from the WireServer (via the GoalState class), and the HostGAPlugin + # The goal state needs to be retrieved using both the WireServer (via the GoalState class) and the HostGAPlugin # (via the self._fetch_vm_settings_goal_state method). # - # We fetch it in 3 parts: + # We always need at least 2 queries: one to the WireServer (to check for incarnation changes) and one to the HostGAPlugin + # (to check for extension updates). Note that vmSettings are not a full goal state; they include only the extension information + # (minus certificates). The check on incarnation (which is also not included in the vmSettings) is needed to check for changes + # in, for example, the remote users for JIT access. # - # 1) The "main" goal state from the WireServer, which includes the incarnation, container ID, role config, and URLs - # to the rest of the goal state (certificates, remote users, extensions config, etc). We do this first because - # we need to initialize the HostGAPlugin with the container ID and role config. + # We start by fetching the goal state from the WireServer. The response to this initial query will include the incarnation, + # container ID, role config, and URLs to the rest of the goal state (certificates, remote users, extensions config, etc). We + # do this first because we need to initialize the HostGAPlugin with the container ID and role config. # goal_state = GoalState(self) self._update_host_plugin(goal_state.container_id, goal_state.role_config_name) # - # 2) Then we fetch the vmSettings from the HostGAPlugin. We do this before fetching the rest of the goal state from the - # WireServer to minimize the time between the initial call to the WireServer and the call to the HostGAPlugin (and hence - # reduce the window in which a new goal state may arrive in-between the 2 calls) + # Then we fetch the vmSettings from the HostGAPlugin; the response will include the goal state for extensions. # vm_settings_goal_state, vm_settings_goal_state_updated = (None, False) if conf.get_enable_fast_track(): try: vm_settings_goal_state, vm_settings_goal_state_updated = self._fetch_vm_settings_goal_state(force_update=force_update) - - except Exception as error: - # _fetch_vm_settings_goal_state() does its own detailed error reporting and raises ProtocolError; do not report those - if not isinstance(error, ProtocolError): - self._vm_settings_error_reporter.report_error(format_exception(error)) - self._vm_settings_error_reporter.report_summary() + except VmSettingsNotSupported: + pass # if vmSettings are not supported we use extensionsConfig below # - # 3) Lastly we, fetch the rest of the goal state from the WireServer (but ony if needed: initialization, a "forced" update, or - # a change in the incarnation). Note that if we fetch the full goal state we also update self._goal_state. + # Now we fetch the rest of the goal state from the WireServer (but ony if needed: initialization, a "forced" update, or + # a change in the incarnation). Note that if we fetch the full goal state we also update self._goal_state. # if force_update: logger.info("Forcing an update of the goal state..") @@ -833,28 +829,9 @@ def update_goal_state(self, force_update=False, is_retry=False): goal_state_updated = True # - # If we fetched the vmSettings then compare them against extensionsConfig and use them for the extensions goal state if - # everything matches, otherwise use extensionsConfig. + # And, lastly, we use extensionsConfig if we don't have the vmSettings (Fast Track may be disabled or not supported). # - use_vm_settings = False if vm_settings_goal_state is not None: - if not goal_state_updated and not vm_settings_goal_state_updated: # no need to compare them, just use vmSettings - use_vm_settings = True - else: - try: - ExtensionsGoalState.compare(self._goal_state.extensions_config, vm_settings_goal_state) - use_vm_settings = True - except GoalStateMismatchError as mismatch: - if not is_retry and mismatch.attribute in ("created_on_timestamp", "activity_id"): - # this may be OK; a new goal state may have arrived in-between the calls to the HostGAPlugin and the WireServer; - # retry one time after a delay and then report the error if it happens again. - time.sleep(conf.get_goal_state_period()) - self.update_goal_state(is_retry=True) - return - self._vm_settings_error_reporter.report_error(ustr(mismatch)) - self._vm_settings_error_reporter.report_summary() - - if use_vm_settings: self._extensions_goal_state = vm_settings_goal_state else: self._extensions_goal_state = self._goal_state.extensions_config @@ -875,7 +852,8 @@ def _fetch_vm_settings_goal_state(self, force_update): Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and a boolean indicating if they are an updated (True) or a cached value (False). - Raises ProtocolError if the request fails for any reason (e.g. not supported, time out, server error) + Raises VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API, or ProtocolError if the request fails for any other reason + (e.g. not supported, time out, server error). """ def raise_not_supported(reset_state=False): if reset_state: @@ -884,19 +862,19 @@ def raise_not_supported(reset_state=False): # "Not supported" is not considered an error, so don't use self._vm_settings_error_reporter to report it logger.info("vmSettings is not supported") add_event(op=WALAEventOperation.HostPlugin, message="vmSettings is not supported", is_success=True) - raise ProtocolError("VmSettings not supported") + raise VmSettingsNotSupported() - # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check - if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.now(): - raise_not_supported() + try: + # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check + if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.now(): + raise_not_supported() - etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag - correlation_id = str(uuid.uuid4()) + etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag + correlation_id = str(uuid.uuid4()) - def format_message(msg): - return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) + def format_message(msg): + return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) - try: def get_vm_settings(): url, headers = self.get_host_plugin().get_vm_settings_request(correlation_id) if etag is not None: @@ -963,7 +941,7 @@ def get_vm_settings(): self._cached_vm_settings = vm_settings return vm_settings, True - except ProtocolError: + except (ProtocolError, VmSettingsNotSupported): raise except Exception as exception: if isinstance(exception, IOError) and "timed out" in ustr(exception): @@ -973,6 +951,8 @@ def get_vm_settings(): message = format_message("Request failed: {0}".format(textutil.format_exception(exception))) self._vm_settings_error_reporter.report_error(message, _VmSettingsError.RequestFailed) raise ProtocolError(message) + finally: + self._vm_settings_error_reporter.report_summary() def _update_host_plugin(self, container_id, role_config_name): if self._host_plugin is not None: @@ -1559,7 +1539,6 @@ def report_error(self, error, category=None): self._error_count += 1 if self._error_count <= _VmSettingsErrorReporter._MaxErrors: - logger.info("[VmSettings] [Informational only, the Agent will continue normal operation] {0}", error) add_event(op=WALAEventOperation.VmSettings, message=error, is_success=False, log_event=False) if category == _VmSettingsError.ServerError: @@ -1588,3 +1567,7 @@ def report_summary(self): logger.info("[VmSettingsSummary] {0}", message) self._reset() + + +class VmSettingsNotSupported(TypeError): + pass diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index 3fd6c75613..40c3633c4a 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -197,7 +197,7 @@ def reload(self): self.in_vm_artifacts_profile = load_data(in_vm_artifacts_profile_file) def mock_http_get(self, url, *_, **kwargs): - content = None + content = '' response_headers = [] resp = MagicMock() diff --git a/tests/protocol/test_extensions_goal_state.py b/tests/protocol/test_extensions_goal_state.py index 1138df3a43..dcc0c2c79a 100644 --- a/tests/protocol/test_extensions_goal_state.py +++ b/tests/protocol/test_extensions_goal_state.py @@ -4,9 +4,7 @@ import re import sys -from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateMismatchError from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory -from azurelinuxagent.common.utils import textutil from tests.protocol.mocks import mockwiredata, mock_wire_protocol from tests.tools import AgentTestCase, load_data @@ -16,56 +14,9 @@ class ExtensionsGoalStateTestCase(AgentTestCase): - def test_compare_should_succeed_when_extensions_config_and_vm_settings_are_equal(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - from_extensions_config = protocol.client.get_extensions_goal_state() - from_vm_settings = protocol.client._cached_vm_settings - - try: - ExtensionsGoalState.compare(from_extensions_config, from_vm_settings) - except Exception as exception: - self.fail("Compare goal state failed: {0}".format(textutil.format_exception(exception))) - - def test_compare_should_report_mismatches_between_extensions_config_and_vm_settings(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - from_extensions_config = protocol.client.get_extensions_goal_state() - from_vm_settings = protocol.client._cached_vm_settings - - def assert_compare_raises(setup_copy, failing_attribute): - from_vm_settings_copy = copy.deepcopy(from_vm_settings) - setup_copy(from_vm_settings_copy) - - with self.assertRaisesRegexCM(GoalStateMismatchError, re.escape("(Attribute: {0})".format(failing_attribute)), re.DOTALL): - ExtensionsGoalState.compare(from_extensions_config, from_vm_settings_copy) - - assert_compare_raises(lambda c: setattr(c, "_activity_id", 'MOCK_ACTIVITY_ID'), "activity_id") - assert_compare_raises(lambda c: setattr(c, "_correlation_id", 'MOCK_CORRELATION_ID'), "correlation_id") - assert_compare_raises(lambda c: setattr(c, "_created_on_timestamp", 'MOCK_TIMESTAMP'), "created_on_timestamp") - assert_compare_raises(lambda c: setattr(c, "_status_upload_blob", 'MOCK_UPLOAD_BLOB'), "status_upload_blob") - assert_compare_raises(lambda c: setattr(c, "_status_upload_blob_type", 'MOCK_UPLOAD_BLOB_TYPE'), "status_upload_blob_type") - assert_compare_raises(lambda c: setattr(c, "_required_features", ['MOCK_REQUIRED_FEATURE']), "required_features") - assert_compare_raises(lambda c: setattr(c, "_on_hold", False), "on_hold") - - assert_compare_raises(lambda c: setattr(c.agent_manifests[0], "family", 'MOCK_FAMILY'), r"agent_manifests[0].family") - assert_compare_raises(lambda c: setattr(c.agent_manifests[0], "requested_version_string", 'MOCK_VERSION'), r"agent_manifests[0].requested_version_string") - assert_compare_raises(lambda c: setattr(c.agent_manifests[0], "uris", ['MOCK_URI']), r"agent_manifests[0].uris") - - assert_compare_raises(lambda c: setattr(c.extensions[0], "version", 'MOCK_NAME'), r"extensions[0].version") - assert_compare_raises(lambda c: setattr(c.extensions[0], "state", 'MOCK_STATE'), r"extensions[0].state") - assert_compare_raises(lambda c: setattr(c.extensions[0], "manifest_uris", ['MOCK_URI']), r"extensions[0].manifest_uris") - assert_compare_raises(lambda c: setattr(c.extensions[0], "supports_multi_config", True), r"extensions[0].supports_multi_config") - - assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "name", 'MOCK_NAME'), r"extensions[0].settings[0].name") - assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "sequenceNumber", 98765), r"extensions[0].settings[0].sequenceNumber") - assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "publicSettings", {'MOCK_NAME': 'MOCK_VALUE'}), r"extensions[0].settings[0].publicSettings") - assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "protectedSettings", 'MOCK_SETTINGS'), r"extensions[0].settings[0].protectedSettings") - assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "certificateThumbprint", 'MOCK_CERT'), r"extensions[0].settings[0].certificateThumbprint") - assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "dependencyLevel", 56789), r"extensions[0].settings[0].dependencyLevel") - assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "state", 'MOCK_STATE'), r"extensions[0].settings[0].state") - def test_create_from_extensions_config_should_assume_block_when_blob_type_is_not_valid(self): data_file = mockwiredata.DATA_FILE.copy() - data_file["vm_settings"] = "hostgaplugin/ext_conf-invalid_blob_type.xml" + data_file["ext_conf"] = "hostgaplugin/ext_conf-invalid_blob_type.xml" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = ExtensionsGoalStateFactory.create_from_extensions_config(123, load_data("hostgaplugin/ext_conf-invalid_blob_type.xml"), protocol) self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBob for an invalid statusBlobType') @@ -88,7 +39,7 @@ def test_extension_goal_state_should_parse_requested_version_properly(self): data_file["vm_settings"] = "hostgaplugin/vm_settings-requested_version.json" data_file["ext_conf"] = "hostgaplugin/ext_conf-requested_version.xml" with mock_wire_protocol(data_file) as protocol: - fabric_manifests, _ = protocol.get_vmagent_manifests() + fabric_manifests = protocol.client.get_goal_state().extensions_config.agent_manifests for manifest in fabric_manifests: self.assertEqual(manifest.requested_version_string, "9.9.9.10", "Version should be 9.9.9.10") diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index e84ea2a658..0d505710f0 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -34,7 +34,6 @@ from azurelinuxagent.common.exception import ResourceGoneError, ProtocolError, \ ExtensionDownloadError, HttpError from azurelinuxagent.common.protocol import hostplugin -from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateMismatchError from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings @@ -43,7 +42,7 @@ StatusBlob, VMStatus, EXT_CONF_FILE_NAME, _VmSettingsErrorReporter from azurelinuxagent.common.telemetryevent import GuestAgentExtensionEventsSchema, \ TelemetryEventParam, TelemetryEvent -from azurelinuxagent.common.utils import restutil, textutil +from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.version import CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION from azurelinuxagent.ga.exthandlers import get_exthandlers_handler from tests.ga.test_monitor import random_generator @@ -52,7 +51,7 @@ from tests.protocol.HttpRequestPredicates import HttpRequestPredicates from tests.protocol.mockwiredata import DATA_FILE_NO_EXT, DATA_FILE from tests.protocol.mockwiredata import WireProtocolData -from tests.tools import Mock, PropertyMock, patch, AgentTestCase, load_bin_data, mock_sleep +from tests.tools import Mock, PropertyMock, patch, AgentTestCase data_with_bom = b'\xef\xbb\xbfhehe' testurl = 'http://foo' @@ -1242,46 +1241,13 @@ def http_get_vm_settings(_method, _host, _relative_url, **kwargs): self.assertEqual("GET_VM_SETTINGS_TEST_CONTAINER_ID", request_headers[1][hostplugin._HEADER_CONTAINER_ID], "The retry request did not include the expected header for the ContainerId") self.assertEqual("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME", request_headers[1][hostplugin._HEADER_HOST_CONFIG_NAME], "The retry request did not include the expected header for the RoleConfigName") - def test_it_should_not_be_interrupted_by_errors_on_vm_settings(self): - def assert_no_exception(test_case, test_function, expected_error): - try: - with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - test_function() - messages = [kwargs["message"] for _, kwargs in add_event.call_args_list] - self.assertTrue(any(expected_error in m for m in messages), "The expected error [{0}] did not occur. Got: {1}".format(expected_error, messages)) - except Exception as e: - self.fail("Error [{0}] produced an unexpected exception: {1}".format(test_case, textutil.format_exception(e))) - - def test_error_in_http_request(test_case, mock_response, expected_error): - def do_mock_request(): - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - if isinstance(mock_response, Exception): - raise mock_response - return mock_response - return None - - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: - protocol.client.update_goal_state() - - assert_no_exception(test_case, do_mock_request, expected_error) - # - # We test errors different kind of errors; none of them should make update_protocol raise an exception, but all of them should be reported - # - test_error_in_http_request("Internal error in the HostGAPlugin", MockHttpResponse(httpclient.BAD_GATEWAY), "[Internal error in HostGAPlugin] [HTTP Failed] [502: None]") - test_error_in_http_request("Arbitrary error in the request (BAD_REQUEST)", MockHttpResponse(httpclient.BAD_REQUEST), "[HTTP Failed] [400: None]") - test_error_in_http_request("Generic error in the request", Exception("GENERIC REQUEST ERROR"), "GENERIC REQUEST ERROR") - test_error_in_http_request("Response headers with no Etag", MockHttpResponse(200, b""), "The vmSettings response does not include an Etag header") - test_error_in_http_request("Invalid response (bad json)", MockHttpResponse(200, b"{ INVALID JSON ]", headers=[("Etag", 123)]), "Error parsing vmSettings") - - # Lastly, test the goal state comparison - def fail_compare(): - error = GoalStateMismatchError("TEST COMPARE FAILED", "dummy_attribute") - with patch("azurelinuxagent.common.protocol.extensions_goal_state.ExtensionsGoalState.compare", side_effect=error): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - protocol.client.update_goal_state() - assert_no_exception("Goal state mismatch", fail_compare, "TEST COMPARE FAILED") + @staticmethod + def _update_goal_state_ignoring_errors(protocol): + try: + protocol.client.update_goal_state() + except ProtocolError: + pass def test_it_should_limit_the_number_of_errors_it_reports(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: @@ -1293,7 +1259,7 @@ def http_get_handler(url, *_, **__): with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: for _ in range(_VmSettingsErrorReporter._MaxErrors + 3): - protocol.client.update_goal_state() + self._update_goal_state_ignoring_errors(protocol) messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] @@ -1301,11 +1267,11 @@ def http_get_handler(url, *_, **__): # Reset the error reporter and verify that additional errors are reported protocol.client._vm_settings_error_reporter._next_period = datetime.now() - protocol.client.update_goal_state() # this triggers the reset + self._update_goal_state_ignoring_errors(protocol) # this triggers the reset with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: for _ in range(3): - protocol.client.update_goal_state() + self._update_goal_state_ignoring_errors(protocol) messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] @@ -1337,14 +1303,16 @@ def http_get_handler(url, *_, **__): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) - def test_it_should_use_extensions_config_when_the_vm_settings_request_fails(self): + def test_it_should_raise_protocol_error_when_the_vm_settings_request_fails(self): def http_get_handler(url, *_, **__): if self.is_host_plugin_vm_settings_request(url): - return MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR) + return MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR, body="TEST ERROR") return None - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: - self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + with self.assertRaisesRegexCM(ProtocolError, r'GET vmSettings \[correlation ID: .* eTag: .*\]: \[HTTP Failed\] \[500: None].*TEST ERROR.*'): + protocol.update_goal_state() def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_supported(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() @@ -1353,65 +1321,6 @@ def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_ with mock_wire_protocol(data_file) as protocol: self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) - def test_it_should_use_extensions_config_when_vm_settings_can_not_be_parsed(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() - data_file["vm_settings"] = "hostgaplugin/vm_settings-parse_error.json" - - with mock_wire_protocol(data_file) as protocol: - self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) - - def test_it_should_use_extensions_config_when_vm_settings_do_not_match_extensions_config(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() - data_file["vm_settings"] = "hostgaplugin/vm_settings-difference_in_required_features.json" - - with patch('azurelinuxagent.common.event.EventLogger.add_event') as add_event_patcher: - with mock_wire_protocol(data_file) as protocol: - self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) - - reported = [kwargs for _, kwargs in add_event_patcher.call_args_list if kwargs['op'] == "VmSettings" and "GoalStateMismatchError" in kwargs['message']] - self.assertEqual(1, len(reported), "The goal state mismatch should have been reported exactly once; got: {0}".format([kwargs['message'] for _, kwargs in add_event_patcher.call_args_list])) - - def test_it_should_retry_vm_settings_and_extensions_config_do_not_match(self): - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - response = MockHttpResponse(httpclient.OK) - response.body = load_bin_data("hostgaplugin/vm_settings-out-of-sync.json") - response.headers = [('ETag', "0123456789")] - return response - return None - - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() - with mock_wire_protocol(data_file) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - - with patch('time.sleep', side_effect=lambda _: mock_sleep()): # avoid the sleep during retry - with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - protocol.client.update_goal_state() - - vm_settings_call_count = len([url for url in protocol.get_tracked_urls() if "vmSettings" in url]) - self.assertEqual(2, vm_settings_call_count, "Expected 2 calls to vmSettings (original and retry)") - - errors = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] - self.assertTrue(any("[GoalStateMismatchError]" in e for e in errors), "Expected GoalStateMismatchError to have been reported. Got: {0}".format(errors)) - - def test_it_should_compare_goal_states_when_vm_settings_change(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - protocol.mock_wire_data.set_etag("aNewEtag") - - with patch('azurelinuxagent.common.protocol.extensions_goal_state.ExtensionsGoalState.compare') as compare_patcher: - protocol.update_goal_state() - - self.assertEqual(1, compare_patcher.call_count, "ExtensionsGoalState.compare() should have been called exactly once") - - def test_it_should_compare_goal_states_when_extensions_config_change(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - protocol.mock_wire_data.set_incarnation(468753) - - with patch('azurelinuxagent.common.protocol.extensions_goal_state.ExtensionsGoalState.compare') as compare_patcher: - protocol.update_goal_state() - - self.assertEqual(1, compare_patcher.call_count, "ExtensionsGoalState.compare() should have been called exactly once") - def test_it_should_keep_track_of_errors_in_vm_settings_requests(self): mock_response = None @@ -1425,24 +1334,24 @@ def http_get_handler(url, *_, **__): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: mock_response = MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR) - protocol.client.update_goal_state() + self._update_goal_state_ignoring_errors(protocol) mock_response = MockHttpResponse(httpclient.BAD_REQUEST) - protocol.client.update_goal_state() - protocol.client.update_goal_state() + self._update_goal_state_ignoring_errors(protocol) + self._update_goal_state_ignoring_errors(protocol) mock_response = IOError("timed out") - protocol.client.update_goal_state() + self._update_goal_state_ignoring_errors(protocol) mock_response = httpclient.HTTPException() - protocol.client.update_goal_state() - protocol.client.update_goal_state() + self._update_goal_state_ignoring_errors(protocol) + self._update_goal_state_ignoring_errors(protocol) # force the summary by resetting its period and calling update_goal_state with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: mock_response = None # stop producing errors protocol.client._vm_settings_error_reporter._next_period = datetime.now() - protocol.client.update_goal_state() + self._update_goal_state_ignoring_errors(protocol) summary_text = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettingsSummary"] self.assertEqual(1, len(summary_text), "Exactly 1 summary should have been produced. Got: {0} ".format(summary_text)) From 483dfe383341b1182b1f53d359d93843320469ed Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 24 Jan 2022 11:25:48 -0800 Subject: [PATCH 12/84] Redact settings from mismatch message (#2477) (#2484) Co-authored-by: narrieta --- .../common/protocol/extensions_goal_state.py | 5 ++++- tests/protocol/test_extensions_goal_state.py | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state.py b/azurelinuxagent/common/protocol/extensions_goal_state.py index 68ce74dc77..fcc7e25061 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state.py @@ -157,7 +157,10 @@ def compare_attributes(first, second, attribute, ignore_order=False): second_value.sort() if first_value != second_value: - mistmatch = "[{0}] != [{1}] (Attribute: {2})".format(first_value, second_value, ".".join(context)) + if attribute.lower() in ('protectedsettings', 'publicsettings'): + mistmatch = "[REDACTED] != [REDACTED] (Attribute: {0})".format(".".join(context)) + else: + mistmatch = "[{0}] != [{1}] (Attribute: {2})".format(first_value, second_value, ".".join(context)) message = "Mismatch in Goal States [Incarnation {0}] != [Etag: {1}]: {2}".format(from_extensions_config.id, from_vm_settings.id, mistmatch) raise GoalStateMismatchError(message, attribute) finally: diff --git a/tests/protocol/test_extensions_goal_state.py b/tests/protocol/test_extensions_goal_state.py index 1138df3a43..90b4136d06 100644 --- a/tests/protocol/test_extensions_goal_state.py +++ b/tests/protocol/test_extensions_goal_state.py @@ -31,12 +31,21 @@ def test_compare_should_report_mismatches_between_extensions_config_and_vm_setti from_extensions_config = protocol.client.get_extensions_goal_state() from_vm_settings = protocol.client._cached_vm_settings + mismatch_messages = { + 'publicSettings': None, + 'protectedSettings': None + } + def assert_compare_raises(setup_copy, failing_attribute): from_vm_settings_copy = copy.deepcopy(from_vm_settings) setup_copy(from_vm_settings_copy) - with self.assertRaisesRegexCM(GoalStateMismatchError, re.escape("(Attribute: {0})".format(failing_attribute)), re.DOTALL): + with self.assertRaisesRegexCM(GoalStateMismatchError, re.escape("(Attribute: {0})".format(failing_attribute)), re.DOTALL) as context_manager: ExtensionsGoalState.compare(from_extensions_config, from_vm_settings_copy) + if context_manager.exception.attribute == 'publicSettings': + mismatch_messages['publicSettings'] = str(context_manager.exception) + elif context_manager.exception.attribute == 'protectedSettings': + mismatch_messages['protectedSettings'] = str(context_manager.exception) assert_compare_raises(lambda c: setattr(c, "_activity_id", 'MOCK_ACTIVITY_ID'), "activity_id") assert_compare_raises(lambda c: setattr(c, "_correlation_id", 'MOCK_CORRELATION_ID'), "correlation_id") @@ -63,6 +72,12 @@ def assert_compare_raises(setup_copy, failing_attribute): assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "dependencyLevel", 56789), r"extensions[0].settings[0].dependencyLevel") assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "state", 'MOCK_STATE'), r"extensions[0].settings[0].state") + expected = r'^\[GoalStateMismatchError\] Mismatch in Goal States \[Incarnation 1\] != \[Etag: 1\]: \[REDACTED\] != \[REDACTED\] \(Attribute: .*\.publicSettings\)$' + self.assertRegex(mismatch_messages['publicSettings'], expected, 'Expected the protected settings to be redacted. Got: "{0}"'.format(mismatch_messages['publicSettings'])) + + expected = r'^\[GoalStateMismatchError\] Mismatch in Goal States \[Incarnation 1\] != \[Etag: 1\]: \[REDACTED\] != \[REDACTED\] \(Attribute: .*\.protectedSettings\)$' + self.assertRegex(mismatch_messages['protectedSettings'], expected, 'Expected the protected settings to be redacted. Got: "{0}"'.format(mismatch_messages['protectedSettings'])) + def test_create_from_extensions_config_should_assume_block_when_blob_type_is_not_valid(self): data_file = mockwiredata.DATA_FILE.copy() data_file["vm_settings"] = "hostgaplugin/ext_conf-invalid_blob_type.xml" From 2b6dc403b9cf1e5267aa3ed503a63d05d6bd9bf6 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 24 Jan 2022 11:36:52 -0800 Subject: [PATCH 13/84] Improvements in telemetry for vmSettings (#2482) (#2485) Co-authored-by: narrieta --- .../common/protocol/extensions_goal_state.py | 5 +++-- .../extensions_goal_state_from_vm_settings.py | 10 ++++++---- azurelinuxagent/common/protocol/wire.py | 2 +- azurelinuxagent/ga/exthandlers.py | 5 +++-- tests/protocol/test_extensions_goal_state.py | 16 ++++------------ 5 files changed, 17 insertions(+), 21 deletions(-) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state.py b/azurelinuxagent/common/protocol/extensions_goal_state.py index fcc7e25061..ee7079277d 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state.py @@ -127,10 +127,11 @@ def compare_extensions(first, second): compare_array(first.settings, second.settings, compare_settings, "settings") def compare_settings(first, second): + # Note that we do not compare protectedSettings since the same settings can be re-encrypted, resulting + # on different encrypted text for the same plain text. compare_attributes(first, second, "name") compare_attributes(first, second, "sequenceNumber") compare_attributes(first, second, "publicSettings") - compare_attributes(first, second, "protectedSettings") compare_attributes(first, second, "certificateThumbprint") compare_attributes(first, second, "dependencyLevel") compare_attributes(first, second, "state") @@ -157,7 +158,7 @@ def compare_attributes(first, second, attribute, ignore_order=False): second_value.sort() if first_value != second_value: - if attribute.lower() in ('protectedsettings', 'publicsettings'): + if attribute.lower() == 'publicsettings': mistmatch = "[REDACTED] != [REDACTED] (Attribute: {0})".format(".".join(context)) else: mistmatch = "[{0}] != [{1}] (Attribute: {2})".format(first_value, second_value, ".".join(context)) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index efc374beca..dcf6c2c2d9 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -53,7 +53,7 @@ def __init__(self, etag, json_text): self._parse_vm_settings(json_text) self._do_common_validations() except Exception as e: - raise VmSettingsError("Error parsing vmSettings (etag: {0}): {1}\n{2}".format(etag, format_exception(e), self.get_redacted_text())) + raise VmSettingsError("Error parsing vmSettings (etag: {0} HGAP: {1}): {2}\n{3}".format(etag, self._host_ga_plugin_version, format_exception(e), self.get_redacted_text())) @property def id(self): @@ -139,14 +139,16 @@ def _parse_simple_attributes(self, vm_settings): # "extensionGoalStatesSource": "Fabric", # ... # } - self._activity_id = self._string_to_id(vm_settings.get("activityId")) - self._correlation_id = self._string_to_id(vm_settings.get("correlationId")) - self._created_on_timestamp = self._ticks_to_utc_timestamp(vm_settings.get("extensionsLastModifiedTickCount")) + # The HGAP version is included in some messages, so parse it first host_ga_plugin_version = vm_settings.get("hostGAPluginVersion") if host_ga_plugin_version is not None: self._host_ga_plugin_version = FlexibleVersion(host_ga_plugin_version) + self._activity_id = self._string_to_id(vm_settings.get("activityId")) + self._correlation_id = self._string_to_id(vm_settings.get("correlationId")) + self._created_on_timestamp = self._ticks_to_utc_timestamp(vm_settings.get("extensionsLastModifiedTickCount")) + schema_version = vm_settings.get("vmSettingsSchemaVersion") if schema_version is not None: self._schema_version = FlexibleVersion(schema_version) diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index abf8b4188d..2f80ac212f 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -894,7 +894,7 @@ def raise_not_supported(reset_state=False): correlation_id = str(uuid.uuid4()) def format_message(msg): - return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) + return "GET vmSettings [correlation ID: {0} eTag: {1} HGAP: {2}]: {3}".format(correlation_id, etag, self._host_plugin_version, msg) try: def get_vm_settings(): diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index a654806e60..29133c59c3 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -300,8 +300,9 @@ def run(self): try: extensions_goal_state = self.protocol.get_extensions_goal_state() - # self.ext_handlers and etag need to be initialized first, since status reporting depends on them - self.ext_handlers = extensions_goal_state.extensions + # self.ext_handlers and etag need to be initialized first, since status reporting depends on them; also + # we make a deep copy of the extensions, since changes are made to self.ext_handlers while processing the extensions + self.ext_handlers = copy.deepcopy(extensions_goal_state.extensions) etag = self.protocol.client.get_goal_state().incarnation if not self._extension_processing_allowed(): diff --git a/tests/protocol/test_extensions_goal_state.py b/tests/protocol/test_extensions_goal_state.py index 90b4136d06..15c3f605a0 100644 --- a/tests/protocol/test_extensions_goal_state.py +++ b/tests/protocol/test_extensions_goal_state.py @@ -31,10 +31,7 @@ def test_compare_should_report_mismatches_between_extensions_config_and_vm_setti from_extensions_config = protocol.client.get_extensions_goal_state() from_vm_settings = protocol.client._cached_vm_settings - mismatch_messages = { - 'publicSettings': None, - 'protectedSettings': None - } + public_settings_mismatch = [""] def assert_compare_raises(setup_copy, failing_attribute): from_vm_settings_copy = copy.deepcopy(from_vm_settings) @@ -43,9 +40,7 @@ def assert_compare_raises(setup_copy, failing_attribute): with self.assertRaisesRegexCM(GoalStateMismatchError, re.escape("(Attribute: {0})".format(failing_attribute)), re.DOTALL) as context_manager: ExtensionsGoalState.compare(from_extensions_config, from_vm_settings_copy) if context_manager.exception.attribute == 'publicSettings': - mismatch_messages['publicSettings'] = str(context_manager.exception) - elif context_manager.exception.attribute == 'protectedSettings': - mismatch_messages['protectedSettings'] = str(context_manager.exception) + public_settings_mismatch[0] = str(context_manager.exception) assert_compare_raises(lambda c: setattr(c, "_activity_id", 'MOCK_ACTIVITY_ID'), "activity_id") assert_compare_raises(lambda c: setattr(c, "_correlation_id", 'MOCK_CORRELATION_ID'), "correlation_id") @@ -64,19 +59,16 @@ def assert_compare_raises(setup_copy, failing_attribute): assert_compare_raises(lambda c: setattr(c.extensions[0], "manifest_uris", ['MOCK_URI']), r"extensions[0].manifest_uris") assert_compare_raises(lambda c: setattr(c.extensions[0], "supports_multi_config", True), r"extensions[0].supports_multi_config") + # NOTE: protectedSettings are not compared, so we skip them below assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "name", 'MOCK_NAME'), r"extensions[0].settings[0].name") assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "sequenceNumber", 98765), r"extensions[0].settings[0].sequenceNumber") assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "publicSettings", {'MOCK_NAME': 'MOCK_VALUE'}), r"extensions[0].settings[0].publicSettings") - assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "protectedSettings", 'MOCK_SETTINGS'), r"extensions[0].settings[0].protectedSettings") assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "certificateThumbprint", 'MOCK_CERT'), r"extensions[0].settings[0].certificateThumbprint") assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "dependencyLevel", 56789), r"extensions[0].settings[0].dependencyLevel") assert_compare_raises(lambda c: setattr(c.extensions[0].settings[0], "state", 'MOCK_STATE'), r"extensions[0].settings[0].state") expected = r'^\[GoalStateMismatchError\] Mismatch in Goal States \[Incarnation 1\] != \[Etag: 1\]: \[REDACTED\] != \[REDACTED\] \(Attribute: .*\.publicSettings\)$' - self.assertRegex(mismatch_messages['publicSettings'], expected, 'Expected the protected settings to be redacted. Got: "{0}"'.format(mismatch_messages['publicSettings'])) - - expected = r'^\[GoalStateMismatchError\] Mismatch in Goal States \[Incarnation 1\] != \[Etag: 1\]: \[REDACTED\] != \[REDACTED\] \(Attribute: .*\.protectedSettings\)$' - self.assertRegex(mismatch_messages['protectedSettings'], expected, 'Expected the protected settings to be redacted. Got: "{0}"'.format(mismatch_messages['protectedSettings'])) + self.assertRegex(public_settings_mismatch[0], expected, 'Expected the protected settings to be redacted. Got: "{0}"'.format(public_settings_mismatch[0])) def test_create_from_extensions_config_should_assume_block_when_blob_type_is_not_valid(self): data_file = mockwiredata.DATA_FILE.copy() From 951908d8b14029fdd15b9506a8103e53af716012 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 26 Jan 2022 13:44:53 -0800 Subject: [PATCH 14/84] Move fetch_vm_settings() to HostPluginProtocol (#2486) * Move get_vm_settings() to HostPluginProtocol * remove comment * Remove comment * fix variable * fix comment Co-authored-by: narrieta --- azurelinuxagent/common/protocol/hostplugin.py | 181 ++++++++++++++- azurelinuxagent/common/protocol/wire.py | 210 ++---------------- tests/protocol/test_extensions_goal_state.py | 4 +- tests/protocol/test_hostplugin.py | 125 ++++++++++- tests/protocol/test_wire.py | 120 +--------- 5 files changed, 321 insertions(+), 319 deletions(-) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 3198161275..9b6ab0762e 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -25,9 +25,11 @@ from azurelinuxagent.common import logger from azurelinuxagent.common.errorstate import ErrorState, ERROR_STATE_HOST_PLUGIN_FAILURE from azurelinuxagent.common.event import WALAEventOperation, add_event -from azurelinuxagent.common.exception import HttpError, ProtocolError -from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.protocol.healthservice import HealthService +from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.utils import textutil from azurelinuxagent.common.utils.textutil import remove_bom @@ -79,6 +81,11 @@ def __init__(self, endpoint, container_id, role_config_name): self.status_error_state = ErrorState(min_timedelta=ERROR_STATE_HOST_PLUGIN_FAILURE) self.fetch_last_timestamp = None self.status_last_timestamp = None + self._host_plugin_version = FlexibleVersion("0.0.0.0") # Version 0 means "unknown" + self._host_plugin_supports_vm_settings = False + self._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() + self._vm_settings_error_reporter = _VmSettingsErrorReporter() + self._cached_vm_settings = None # Cached value of the most recent ExtensionsGoalStateFromVmSettings @staticmethod def _extract_deployment_id(role_config_name): @@ -383,3 +390,173 @@ def _base64_encode(self, data): if PY_VERSION_MAJOR > 2: return s.decode('utf-8') return s + + def fetch_vm_settings(self, force_update): + """ + Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and + a boolean indicating if they are an updated (True) or a cached value (False). + + Raises VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API, ResourceGoneError if the container ID and roleconfig name + need to be refreshed, or ProtocolError if the request fails for any other reason (e.g. not supported, time out, server error). + """ + def raise_not_supported(reset_state=False): + if reset_state: + self._host_plugin_supports_vm_settings = False + self._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() + datetime.timedelta(hours=6) # check again in 6 hours + # "Not supported" is not considered an error, so don't use self._vm_settings_error_reporter to report it + logger.info("vmSettings is not supported") + add_event(op=WALAEventOperation.HostPlugin, message="vmSettings is not supported", is_success=True) + raise VmSettingsNotSupported() + + try: + # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check + if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.datetime.now(): + raise_not_supported() + + etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag + correlation_id = str(uuid.uuid4()) + + def format_message(msg): + return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) + + def get_vm_settings(): + url, headers = self.get_vm_settings_request(correlation_id) + if etag is not None: + headers['if-none-match'] = etag + return restutil.http_get(url, headers=headers, use_proxy=False, max_retry=1, return_raw_response=True) + + self._vm_settings_error_reporter.report_request() + + response = get_vm_settings() + + if response.status == httpclient.GONE: + raise ResourceGoneError() + + if response.status == httpclient.NOT_FOUND: # the HostGAPlugin does not support FastTrack + raise_not_supported(reset_state=True) + + if response.status == httpclient.NOT_MODIFIED: # The goal state hasn't changed, return the current instance + return self._cached_vm_settings, False + + if response.status != httpclient.OK: + error_description = restutil.read_response_error(response) + # For historical reasons the HostGAPlugin returns 502 (BAD_GATEWAY) for internal errors instead of using + # 500 (INTERNAL_SERVER_ERROR). We add a short prefix to the error message in the hope that it will help + # clear any confusion produced by the poor choice of status code. + if response.status == httpclient.BAD_GATEWAY: + error_description = "[Internal error in HostGAPlugin] {0}".format(error_description) + error_description = format_message(error_description) + + if 400 <= response.status <= 499: + self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ClientError) + elif 500 <= response.status <= 599: + self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ServerError) + else: + self._vm_settings_error_reporter.report_error(error_description) + + raise ProtocolError(error_description) + + for h in response.getheaders(): + if h[0].lower() == 'etag': + response_etag = h[1] + break + else: # since the vmSettings were updated, the response must include an etag + message = format_message("The vmSettings response does not include an Etag header") + self._vm_settings_error_reporter.report_error(message) + raise ProtocolError(message) + + response_content = ustr(response.read(), encoding='utf-8') + + vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content) + + # log the HostGAPlugin version + if vm_settings.host_ga_plugin_version != self._host_plugin_version: + self._host_plugin_version = vm_settings.host_ga_plugin_version + message = "HostGAPlugin version: {0}".format(vm_settings.host_ga_plugin_version) + logger.info(message) + add_event(op=WALAEventOperation.HostPlugin, message=message, is_success=True) + + # Don't support HostGAPlugin versions older than 115 + if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.115"): + raise_not_supported(reset_state=True) + + logger.info("Fetched new vmSettings [correlation ID: {0} New eTag: {1}]", correlation_id, vm_settings.etag) + self._host_plugin_supports_vm_settings = True + self._cached_vm_settings = vm_settings + return vm_settings, True + + except (ProtocolError, ResourceGoneError, VmSettingsNotSupported): + raise + except Exception as exception: + if isinstance(exception, IOError) and "timed out" in ustr(exception): + message = format_message("Timeout") + self._vm_settings_error_reporter.report_error(message, _VmSettingsError.Timeout) + else: + message = format_message("Request failed: {0}".format(textutil.format_exception(exception))) + self._vm_settings_error_reporter.report_error(message, _VmSettingsError.RequestFailed) + raise ProtocolError(message) + finally: + self._vm_settings_error_reporter.report_summary() + + +class VmSettingsNotSupported(TypeError): + pass + + +class _VmSettingsError(object): + ServerError = 'ServerError' + ClientError = 'ClientError' + Timeout = 'Timeout' + RequestFailed = 'RequestFailed' + + +class _VmSettingsErrorReporter(object): + _MaxErrors = 5 # Max number of error reported by period + _Period = datetime.timedelta(hours=1) # How often to report the summary + + def __init__(self): + self._reset() + + def _reset(self): + self._request_count = 0 # Total number of vmSettings HTTP requests + self._error_count = 0 # Total number of errors issuing vmSettings requests (includes all kinds of errors) + self._server_error_count = 0 # Count of server side errors (HTTP status in the 500s) + self._client_error_count = 0 # Count of client side errors (HTTP status in the 400s) + self._timeout_count = 0 # Count of timeouts on vmSettings requests + self._request_failure_count = 0 # Total count of requests that could not be issued (does not include timeouts or requests that were actually issued and failed, for example, with 500 or 400 statuses) + self._next_period = datetime.datetime.now() + _VmSettingsErrorReporter._Period + + def report_request(self): + self._request_count += 1 + + def report_error(self, error, category=None): + self._error_count += 1 + + if self._error_count <= _VmSettingsErrorReporter._MaxErrors: + add_event(op=WALAEventOperation.VmSettings, message=error, is_success=False, log_event=False) + + if category == _VmSettingsError.ServerError: + self._server_error_count += 1 + elif category == _VmSettingsError.ClientError: + self._client_error_count += 1 + elif category == _VmSettingsError.Timeout: + self._timeout_count += 1 + elif category == _VmSettingsError.RequestFailed: + self._request_failure_count += 1 + + def report_summary(self): + if datetime.datetime.now() >= self._next_period: + summary = { + "requests": self._request_count, + "errors": self._error_count, + "serverErrors": self._server_error_count, + "clientErrors": self._client_error_count, + "timeouts": self._timeout_count, + "failedRequests": self._request_failure_count + } + message = json.dumps(summary) + add_event(op=WALAEventOperation.VmSettingsSummary, message=message, is_success=False, log_event=False) + if self._error_count > 0: + logger.info("[VmSettingsSummary] {0}", message) + + self._reset() diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index c5c19a8f60..efce4e6d7c 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -20,7 +20,6 @@ import os import random import time -import uuid import xml.sax.saxutils as saxutils from collections import defaultdict from datetime import datetime, timedelta @@ -35,16 +34,14 @@ from azurelinuxagent.common.exception import ProtocolNotFoundError, \ ResourceGoneError, ExtensionDownloadError, InvalidContainerError, ProtocolError, HttpError from azurelinuxagent.common.future import httpclient, bytebuffer, ustr -from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.protocol.goal_state import GoalState, TRANSPORT_CERT_FILE_NAME, TRANSPORT_PRV_FILE_NAME -from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol +from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol, VmSettingsNotSupported from azurelinuxagent.common.protocol.restapi import DataContract, ExtHandlerPackage, \ ExtHandlerPackageList, ProvisionStatus, VMInfo, VMStatus from azurelinuxagent.common.telemetryevent import GuestAgentExtensionEventsSchema from azurelinuxagent.common.utils import fileutil, restutil from azurelinuxagent.common.utils.archive import StateFlusher from azurelinuxagent.common.utils.cryptutil import CryptUtil -from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, \ findtext, gettext, remove_bom, get_bytes_from_pem, parse_json from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION @@ -576,14 +573,9 @@ def __init__(self, endpoint): self._endpoint = endpoint self._goal_state = None self._extensions_goal_state = None # The goal state to use for extensions; can be an ExtensionsGoalStateFromVmSettings or ExtensionsGoalStateFromExtensionsConfig - self._cached_vm_settings = None # Cached value of the most recent ExtensionsGoalStateFromVmSettings self._host_plugin = None - self._host_plugin_version = FlexibleVersion("0.0.0.0") # Version 0 means "unknown" - self._host_plugin_supports_vm_settings = False - self._host_plugin_supports_vm_settings_next_check = datetime.now() self.status_blob = StatusBlob(self) self.goal_state_flusher = StateFlusher(conf.get_lib_dir()) - self._vm_settings_error_reporter = _VmSettingsErrorReporter() def get_endpoint(self): return self._endpoint @@ -799,18 +791,23 @@ def update_goal_state(self, force_update=False): # goal_state = GoalState(self) - self._update_host_plugin(goal_state.container_id, goal_state.role_config_name) + host_ga_plugin = self.get_host_plugin() + host_ga_plugin.update_container_id(goal_state.container_id) + host_ga_plugin.update_role_config_name(goal_state.role_config_name) # # Then we fetch the vmSettings from the HostGAPlugin; the response will include the goal state for extensions. # - vm_settings_goal_state, vm_settings_goal_state_updated = (None, False) + vm_settings, vm_settings_updated = (None, False) if conf.get_enable_fast_track(): try: - vm_settings_goal_state, vm_settings_goal_state_updated = self._fetch_vm_settings_goal_state(force_update=force_update) + vm_settings, vm_settings_updated = host_ga_plugin.fetch_vm_settings(force_update=force_update) except VmSettingsNotSupported: pass # if vmSettings are not supported we use extensionsConfig below + except ResourceGoneError: + self.update_host_plugin_from_goal_state() + vm_settings, vm_settings_updated = host_ga_plugin.fetch_vm_settings(force_update=force_update) # # Now we fetch the rest of the goal state from the WireServer (but ony if needed: initialization, a "forced" update, or @@ -831,135 +828,28 @@ def update_goal_state(self, force_update=False): # # And, lastly, we use extensionsConfig if we don't have the vmSettings (Fast Track may be disabled or not supported). # - if vm_settings_goal_state is not None: - self._extensions_goal_state = vm_settings_goal_state + if vm_settings is not None: + self._extensions_goal_state = vm_settings else: self._extensions_goal_state = self._goal_state.extensions_config # # If either goal state changed (goal_state or vm_settings_goal_state) save them # - if goal_state_updated or vm_settings_goal_state_updated: - self._save_goal_state() + if goal_state_updated or vm_settings_updated: + self._save_goal_state(vm_settings) except ProtocolError: raise except Exception as exception: raise ProtocolError("Error fetching goal state: {0}".format(ustr(exception))) - def _fetch_vm_settings_goal_state(self, force_update): - """ - Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and - a boolean indicating if they are an updated (True) or a cached value (False). - - Raises VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API, or ProtocolError if the request fails for any other reason - (e.g. not supported, time out, server error). - """ - def raise_not_supported(reset_state=False): - if reset_state: - self._host_plugin_supports_vm_settings = False - self._host_plugin_supports_vm_settings_next_check = datetime.now() + timedelta(hours=6) # check again in 6 hours - # "Not supported" is not considered an error, so don't use self._vm_settings_error_reporter to report it - logger.info("vmSettings is not supported") - add_event(op=WALAEventOperation.HostPlugin, message="vmSettings is not supported", is_success=True) - raise VmSettingsNotSupported() - - try: - # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check - if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.now(): - raise_not_supported() - - etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag - correlation_id = str(uuid.uuid4()) - - def format_message(msg): - return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) - - def get_vm_settings(): - url, headers = self.get_host_plugin().get_vm_settings_request(correlation_id) - if etag is not None: - headers['if-none-match'] = etag - return restutil.http_get(url, headers=headers, use_proxy=False, max_retry=1, return_raw_response=True) - - self._vm_settings_error_reporter.report_request() - - response = get_vm_settings() - - if response.status == httpclient.GONE: # retry after refreshing the HostGAPlugin - self.update_host_plugin_from_goal_state() - response = get_vm_settings() - - if response.status == httpclient.NOT_FOUND: # the HostGAPlugin does not support FastTrack - raise_not_supported(reset_state=True) - - if response.status == httpclient.NOT_MODIFIED: # The goal state hasn't changed, return the current instance - return self._cached_vm_settings, False - - if response.status != httpclient.OK: - error_description = restutil.read_response_error(response) - # For historical reasons the HostGAPlugin returns 502 (BAD_GATEWAY) for internal errors instead of using - # 500 (INTERNAL_SERVER_ERROR). We add a short prefix to the error message in the hope that it will help - # clear any confusion produced by the poor choice of status code. - if response.status == httpclient.BAD_GATEWAY: - error_description = "[Internal error in HostGAPlugin] {0}".format(error_description) - error_description = format_message(error_description) - - if 400 <= response.status <= 499: - self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ClientError) - elif 500 <= response.status <= 599: - self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ServerError) - else: - self._vm_settings_error_reporter.report_error(error_description) - - raise ProtocolError(error_description) - - for h in response.getheaders(): - if h[0].lower() == 'etag': - response_etag = h[1] - break - else: # since the vmSettings were updated, the response must include an etag - message = format_message("The vmSettings response does not include an Etag header") - self._vm_settings_error_reporter.report_error(message) - raise ProtocolError(message) - - response_content = self.decode_config(response.read()) - vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content) - - # log the HostGAPlugin version - if vm_settings.host_ga_plugin_version != self._host_plugin_version: - self._host_plugin_version = vm_settings.host_ga_plugin_version - message = "HostGAPlugin version: {0}".format(vm_settings.host_ga_plugin_version) - logger.info(message) - add_event(op=WALAEventOperation.HostPlugin, message=message, is_success=True) - - # Don't support HostGAPlugin versions older than 115 - if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.115"): - raise_not_supported(reset_state=True) - - logger.info("Fetched new vmSettings [correlation ID: {0} New eTag: {1}]", correlation_id, vm_settings.etag) - self._host_plugin_supports_vm_settings = True - self._cached_vm_settings = vm_settings - return vm_settings, True - - except (ProtocolError, VmSettingsNotSupported): - raise - except Exception as exception: - if isinstance(exception, IOError) and "timed out" in ustr(exception): - message = format_message("Timeout") - self._vm_settings_error_reporter.report_error(message, _VmSettingsError.Timeout) - else: - message = format_message("Request failed: {0}".format(textutil.format_exception(exception))) - self._vm_settings_error_reporter.report_error(message, _VmSettingsError.RequestFailed) - raise ProtocolError(message) - finally: - self._vm_settings_error_reporter.report_summary() - def _update_host_plugin(self, container_id, role_config_name): if self._host_plugin is not None: self._host_plugin.update_container_id(container_id) self._host_plugin.update_role_config_name(role_config_name) - def _save_goal_state(self): + def _save_goal_state(self, vm_settings): try: self.goal_state_flusher.flush() except Exception as e: @@ -980,12 +870,10 @@ def save_if_not_none(goal_state_property, file_name): text = self._goal_state.extensions_config.get_redacted_text() if text != '': self._save_cache(text, EXT_CONF_FILE_NAME.format(self._goal_state.extensions_config.incarnation)) - # TODO: When Fast Track is fully enabled self._cached_vm_settings will go away and this can be deleted - if self._cached_vm_settings is not None: - text = self._cached_vm_settings.get_redacted_text() + if vm_settings is not None: + text = vm_settings.get_redacted_text() if text != '': - self._save_cache(text, VM_SETTINGS_FILE_NAME.format(self._cached_vm_settings.id)) - # END TODO + self._save_cache(text, VM_SETTINGS_FILE_NAME.format(vm_settings.id)) except Exception as e: logger.warn("Failed to save the goal state to disk: {0}", ustr(e)) @@ -1507,67 +1395,3 @@ def is_on_hold(self): if 'onHold' in self.__dict__: return str(self.onHold).lower() == 'true' # pylint: disable=E1101 return False - - -class _VmSettingsError(object): - ServerError = 'ServerError' - ClientError = 'ClientError' - Timeout = 'Timeout' - RequestFailed = 'RequestFailed' - - -class _VmSettingsErrorReporter(object): - _MaxErrors = 5 # Max number of error reported by period - _Period = timedelta(hours=1) # How often to report the summary - - def __init__(self): - self._reset() - - def _reset(self): - self._request_count = 0 # Total number of vmSettings HTTP requests - self._error_count = 0 # Total number of errors issuing vmSettings requests (includes all kinds of errors) - self._server_error_count = 0 # Count of server side errors (HTTP status in the 500s) - self._client_error_count = 0 # Count of client side errors (HTTP status in the 400s) - self._timeout_count = 0 # Count of timeouts on vmSettings requests - self._request_failure_count = 0 # Total count of requests that could not be issued (does not include timeouts or requests that were actually issued and failed, for example, with 500 or 400 statuses) - self._next_period = datetime.now() + _VmSettingsErrorReporter._Period - - def report_request(self): - self._request_count += 1 - - def report_error(self, error, category=None): - self._error_count += 1 - - if self._error_count <= _VmSettingsErrorReporter._MaxErrors: - add_event(op=WALAEventOperation.VmSettings, message=error, is_success=False, log_event=False) - - if category == _VmSettingsError.ServerError: - self._server_error_count += 1 - elif category == _VmSettingsError.ClientError: - self._client_error_count += 1 - elif category == _VmSettingsError.Timeout: - self._timeout_count += 1 - elif category == _VmSettingsError.RequestFailed: - self._request_failure_count += 1 - - def report_summary(self): - if datetime.now() >= self._next_period: - summary = { - "requests": self._request_count, - "errors": self._error_count, - "serverErrors": self._server_error_count, - "clientErrors": self._client_error_count, - "timeouts": self._timeout_count, - "failedRequests": self._request_failure_count - } - # always send telemetry, but log errors only - message = json.dumps(summary) - add_event(op=WALAEventOperation.VmSettingsSummary, message=message, is_success=False, log_event=False) - if self._error_count > 0: - logger.info("[VmSettingsSummary] {0}", message) - - self._reset() - - -class VmSettingsNotSupported(TypeError): - pass diff --git a/tests/protocol/test_extensions_goal_state.py b/tests/protocol/test_extensions_goal_state.py index dcc0c2c79a..279d4193fa 100644 --- a/tests/protocol/test_extensions_goal_state.py +++ b/tests/protocol/test_extensions_goal_state.py @@ -31,7 +31,7 @@ def test_extension_goal_state_should_parse_requested_version_properly(self): for manifest in fabric_manifests: self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") - vm_settings_ga_manifests = protocol.client._cached_vm_settings.agent_manifests + vm_settings_ga_manifests = protocol.client._host_plugin._cached_vm_settings.agent_manifests for manifest in vm_settings_ga_manifests: self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") @@ -43,6 +43,6 @@ def test_extension_goal_state_should_parse_requested_version_properly(self): for manifest in fabric_manifests: self.assertEqual(manifest.requested_version_string, "9.9.9.10", "Version should be 9.9.9.10") - vm_settings_ga_manifests = protocol.client._cached_vm_settings.agent_manifests + vm_settings_ga_manifests = protocol.client._host_plugin._cached_vm_settings.agent_manifests for manifest in vm_settings_ga_manifests: self.assertEqual(manifest.requested_version_string, "9.9.9.9", "Version should be 9.9.9.9") diff --git a/tests/protocol/test_hostplugin.py b/tests/protocol/test_hostplugin.py index 2a18e48a27..a55ae41bb7 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/protocol/test_hostplugin.py @@ -26,13 +26,13 @@ import azurelinuxagent.common.protocol.restapi as restapi import azurelinuxagent.common.protocol.wire as wire from azurelinuxagent.common.errorstate import ErrorState -from azurelinuxagent.common.exception import HttpError, ResourceGoneError +from azurelinuxagent.common.exception import HttpError, ResourceGoneError, ProtocolError from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.osutil.default import UUID_PATTERN -from azurelinuxagent.common.protocol.hostplugin import API_VERSION +from azurelinuxagent.common.protocol.hostplugin import API_VERSION, _VmSettingsErrorReporter, VmSettingsNotSupported from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.version import AGENT_VERSION, AGENT_NAME -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse +from tests.protocol.mocks import mock_wire_protocol, mockwiredata, MockHttpResponse from tests.protocol.HttpRequestPredicates import HttpRequestPredicates from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_NO_EXT from tests.tools import AgentTestCase, PY_VERSION_MAJOR, Mock, PropertyMock, patch @@ -862,6 +862,125 @@ def test_should_report(self): self.assertEqual(False, actual) +class TestHostPluginVmSettings(HttpRequestPredicates, AgentTestCase): + def test_it_should_raise_protocol_error_when_the_vm_settings_request_fails(self): + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR, body="TEST ERROR") + return None + + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + with self.assertRaisesRegexCM(ProtocolError, r'GET vmSettings \[correlation ID: .* eTag: .*\]: \[HTTP Failed\] \[500: None].*TEST ERROR.*'): + protocol.client.get_host_plugin().fetch_vm_settings(False) + + @staticmethod + def _fetch_vm_settings_ignoring_errors(protocol): + try: + protocol.client.get_host_plugin().fetch_vm_settings(False) + except (ProtocolError, VmSettingsNotSupported): + pass + + def test_it_should_keep_track_of_errors_in_vm_settings_requests(self): + mock_response = None + + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + if isinstance(mock_response, Exception): + # E0702: Raising NoneType while only classes or instances are allowed (raising-bad-type) - Disabled: we never raise None + raise mock_response # pylint: disable=raising-bad-type + return mock_response + return None + + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: + mock_response = MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR) + self._fetch_vm_settings_ignoring_errors(protocol) + + mock_response = MockHttpResponse(httpclient.BAD_REQUEST) + self._fetch_vm_settings_ignoring_errors(protocol) + self._fetch_vm_settings_ignoring_errors(protocol) + + mock_response = IOError("timed out") + self._fetch_vm_settings_ignoring_errors(protocol) + + mock_response = httpclient.HTTPException() + self._fetch_vm_settings_ignoring_errors(protocol) + self._fetch_vm_settings_ignoring_errors(protocol) + + # force the summary by resetting its period and calling update_goal_state + with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: + mock_response = None # stop producing errors + protocol.client._host_plugin._vm_settings_error_reporter._next_period = datetime.datetime.now() + self._fetch_vm_settings_ignoring_errors(protocol) + summary_text = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettingsSummary"] + + self.assertEqual(1, len(summary_text), "Exactly 1 summary should have been produced. Got: {0} ".format(summary_text)) + + summary = json.loads(summary_text[0]) + + expected = { + "requests": 6 + 2, # two extra calls to update_goal_state (when creating the mock protocol and when forcing the summary) + "errors": 6, + "serverErrors": 1, + "clientErrors": 2, + "timeouts": 1, + "failedRequests": 2 + } + + self.assertEqual(expected, summary, "The count of errors is incorrect") + + def test_it_should_limit_the_number_of_errors_it_reports(self): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.BAD_GATEWAY) # HostGAPlugin returns 502 for internal errors + return None + protocol.set_http_handlers(http_get_handler=http_get_handler) + + with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: + for _ in range(_VmSettingsErrorReporter._MaxErrors + 3): + self._fetch_vm_settings_ignoring_errors(protocol) + + messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + + self.assertEqual(_VmSettingsErrorReporter._MaxErrors, len(messages), "The number of errors reported is not the max allowed (got: {0})".format(messages)) + + # Reset the error reporter and verify that additional errors are reported + protocol.client._host_plugin._vm_settings_error_reporter._next_period = datetime.datetime.now() + self._fetch_vm_settings_ignoring_errors(protocol) # this triggers the reset + + with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: + for _ in range(3): + self._fetch_vm_settings_ignoring_errors(protocol) + + messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + + self.assertEqual(3, len(messages), "Expected additional errors to be reported in the next period (got: {0})".format(messages)) + + def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.NOT_FOUND) # HostGAPlugin returns 404 if the API is not supported + return None + protocol.set_http_handlers(http_get_handler=http_get_handler) + + def get_vm_settings_call_count(): + return len([url for url in protocol.get_tracked_urls() if "vmSettings" in url]) + + self._fetch_vm_settings_ignoring_errors(protocol) + self.assertEqual(1, get_vm_settings_call_count(), "There should have been an initial call to vmSettings.") + + protocol.client.update_goal_state() + protocol.client.update_goal_state() + self.assertEqual(1, get_vm_settings_call_count(), "Additional calls to update_goal_state should not have produced extra calls to vmSettings.") + + # reset the vmSettings check period; this should restart the calls to the API + protocol.client._host_plugin._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() + protocol.client.update_goal_state() + self.assertEqual(2, get_vm_settings_call_count(), "A second call to vmSettings was expecting after the check period has elapsed.") + + class MockResponse: def __init__(self, body, status_code, reason=''): self.body = body diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 0d505710f0..198d5c3375 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -39,7 +39,7 @@ from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.wire import WireProtocol, WireClient, \ - StatusBlob, VMStatus, EXT_CONF_FILE_NAME, _VmSettingsErrorReporter + StatusBlob, VMStatus, EXT_CONF_FILE_NAME from azurelinuxagent.common.telemetryevent import GuestAgentExtensionEventsSchema, \ TelemetryEventParam, TelemetryEvent from azurelinuxagent.common.utils import restutil @@ -1241,42 +1241,6 @@ def http_get_vm_settings(_method, _host, _relative_url, **kwargs): self.assertEqual("GET_VM_SETTINGS_TEST_CONTAINER_ID", request_headers[1][hostplugin._HEADER_CONTAINER_ID], "The retry request did not include the expected header for the ContainerId") self.assertEqual("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME", request_headers[1][hostplugin._HEADER_HOST_CONFIG_NAME], "The retry request did not include the expected header for the RoleConfigName") - - @staticmethod - def _update_goal_state_ignoring_errors(protocol): - try: - protocol.client.update_goal_state() - except ProtocolError: - pass - - def test_it_should_limit_the_number_of_errors_it_reports(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - return MockHttpResponse(httpclient.BAD_GATEWAY) # HostGAPlugin returns 502 for internal errors - return None - protocol.set_http_handlers(http_get_handler=http_get_handler) - - with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - for _ in range(_VmSettingsErrorReporter._MaxErrors + 3): - self._update_goal_state_ignoring_errors(protocol) - - messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] - - self.assertEqual(_VmSettingsErrorReporter._MaxErrors, len(messages), "The number of errors reported is not the max allowed (got: {0})".format(messages)) - - # Reset the error reporter and verify that additional errors are reported - protocol.client._vm_settings_error_reporter._next_period = datetime.now() - self._update_goal_state_ignoring_errors(protocol) # this triggers the reset - - with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - for _ in range(3): - self._update_goal_state_ignoring_errors(protocol) - - messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] - - self.assertEqual(3, len(messages), "Expected additional errors to be reported in the next period (got: {0})".format(messages)) - def test_it_should_use_vm_settings_by_default(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: extensions_goal_state = protocol.get_extensions_goal_state() @@ -1303,17 +1267,6 @@ def http_get_handler(url, *_, **__): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) - def test_it_should_raise_protocol_error_when_the_vm_settings_request_fails(self): - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - return MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR, body="TEST ERROR") - return None - - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - with self.assertRaisesRegexCM(ProtocolError, r'GET vmSettings \[correlation ID: .* eTag: .*\]: \[HTTP Failed\] \[500: None].*TEST ERROR.*'): - protocol.update_goal_state() - def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_supported(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-unsupported_version.json" @@ -1321,77 +1274,6 @@ def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_ with mock_wire_protocol(data_file) as protocol: self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) - def test_it_should_keep_track_of_errors_in_vm_settings_requests(self): - mock_response = None - - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - if isinstance(mock_response, Exception): - # E0702: Raising NoneType while only classes or instances are allowed (raising-bad-type) - Disabled: we never raise None - raise mock_response # pylint: disable=raising-bad-type - return mock_response - return None - - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: - mock_response = MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR) - self._update_goal_state_ignoring_errors(protocol) - - mock_response = MockHttpResponse(httpclient.BAD_REQUEST) - self._update_goal_state_ignoring_errors(protocol) - self._update_goal_state_ignoring_errors(protocol) - - mock_response = IOError("timed out") - self._update_goal_state_ignoring_errors(protocol) - - mock_response = httpclient.HTTPException() - self._update_goal_state_ignoring_errors(protocol) - self._update_goal_state_ignoring_errors(protocol) - - # force the summary by resetting its period and calling update_goal_state - with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - mock_response = None # stop producing errors - protocol.client._vm_settings_error_reporter._next_period = datetime.now() - self._update_goal_state_ignoring_errors(protocol) - summary_text = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettingsSummary"] - - self.assertEqual(1, len(summary_text), "Exactly 1 summary should have been produced. Got: {0} ".format(summary_text)) - - summary = json.loads(summary_text[0]) - - expected = { - "requests": 6 + 2, # two extra calls to update_goal_state (when creating the mock protocol and when forcing the summary) - "errors": 6, - "serverErrors": 1, - "clientErrors": 2, - "timeouts": 1, - "failedRequests": 2 - } - - self.assertEqual(expected, summary, "The count of errors is incorrect") - - def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - return MockHttpResponse(httpclient.NOT_FOUND) # HostGAPlugin returns 404 if the API is not supported - return None - protocol.set_http_handlers(http_get_handler=http_get_handler) - - def get_vm_settings_call_count(): - return len([url for url in protocol.get_tracked_urls() if "vmSettings" in url]) - - protocol.client.update_goal_state() - self.assertEqual(1, get_vm_settings_call_count(), "There should have been an initial call to vmSettings.") - - protocol.client.update_goal_state() - protocol.client.update_goal_state() - self.assertEqual(1, get_vm_settings_call_count(), "Additional calls to update_goal_state should not have produced extra calls to vmSettings.") - - # reset the vmSettings check period; this should restart the calls to the API - protocol.client._host_plugin_supports_vm_settings_next_check = datetime.now() - protocol.client.update_goal_state() - self.assertEqual(2, get_vm_settings_call_count(), "A second call to vmSettings was expecting after the check period has elapsed.") - class UpdateHostPluginFromGoalStateTestCase(AgentTestCase): """ From b27e475aeefaf8e29cdb531eae71f98c93563fb3 Mon Sep 17 00:00:00 2001 From: Laveesh Rohra Date: Fri, 28 Jan 2022 14:48:58 +0530 Subject: [PATCH 15/84] Upgrade/Downgrade agent if Requested Version available (#2479) --- azurelinuxagent/common/version.py | 3 - azurelinuxagent/ga/update.py | 325 +++++++++++++++++++----------- tests/ga/test_update.py | 255 +++++++++++++++-------- 3 files changed, 377 insertions(+), 206 deletions(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index e35e28b94a..ff9c903b93 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -298,6 +298,3 @@ def set_goal_state_agent(): GOAL_STATE_AGENT_VERSION = set_goal_state_agent() - -def is_current_agent_installed(): - return CURRENT_AGENT == AGENT_LONG_VERSION diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 83db4be7a1..d795d26bb5 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -29,7 +29,6 @@ import time import uuid import zipfile - from datetime import datetime, timedelta import azurelinuxagent.common.conf as conf @@ -38,34 +37,33 @@ import azurelinuxagent.common.utils.restutil as restutil import azurelinuxagent.common.utils.textutil as textutil from azurelinuxagent.common.agent_supported_feature import get_supported_feature_by_name, SupportedFeatureNames -from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator - from azurelinuxagent.common.event import add_event, initialize_event_logger_vminfo_common_parameters, \ WALAEventOperation, EVENTS_DIRECTORY from azurelinuxagent.common.exception import ResourceGoneError, UpdateError, ExitException, AgentUpgradeExitException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd -from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList -from azurelinuxagent.common.protocol.util import get_protocol_util +from azurelinuxagent.common.osutil.default import get_firewall_drop_command, \ + get_accept_tcp_rule +from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList, \ + VERSION_0 +from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import AddFirewallRules from azurelinuxagent.common.utils.shellutil import CommandError -from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, AGENT_DIR_PATTERN, CURRENT_AGENT,\ - CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION, is_current_agent_installed, get_lis_version, \ - has_logrotate, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO +from azurelinuxagent.common.version import AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT, \ + CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION, get_lis_version, \ + has_logrotate, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, get_daemon_version from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed -from azurelinuxagent.ga.env import get_env_handler from azurelinuxagent.ga.collect_telemetry_events import get_collect_telemetry_events_handler - -from azurelinuxagent.ga.exthandlers import HandlerManifest, ExtHandlersHandler, list_agent_lib_directory, ExtensionStatusValue, ExtHandlerStatusValue +from azurelinuxagent.ga.env import get_env_handler +from azurelinuxagent.ga.exthandlers import HandlerManifest, ExtHandlersHandler, list_agent_lib_directory, \ + ExtensionStatusValue, ExtHandlerStatusValue from azurelinuxagent.ga.monitor import get_monitor_handler - from azurelinuxagent.ga.send_telemetry_events import get_send_telemetry_events_handler -from azurelinuxagent.common.osutil.default import get_firewall_drop_command, \ - get_accept_tcp_rule AGENT_ERROR_FILE = "error.json" # File name for agent error record AGENT_MANIFEST_FILE = "HandlerManifest.json" @@ -199,7 +197,8 @@ def run_latest(self, child_args=None): if self.signal_handler is None: self.signal_handler = signal.signal(signal.SIGTERM, self.forward_signal) - latest_agent = self.get_latest_agent() + latest_agent = None if not conf.get_autoupdate_enabled() else self.get_latest_agent_greater_than_daemon( + daemon_version=CURRENT_VERSION) if latest_agent is None: logger.info(u"Installed Agent {0} is the most current agent", CURRENT_AGENT) agent_cmd = "python -u {0} -run-exthandlers".format(sys.argv[0]) @@ -396,8 +395,7 @@ def run(self, debug=False): time.sleep(self._goal_state_period) except AgentUpgradeExitException as exitException: - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, version=CURRENT_VERSION, is_success=True, - message=exitException.reason, log_event=False) + add_event(op=WALAEventOperation.AgentUpgrade, message=exitException.reason, log_event=False) logger.info(exitException.reason) except ExitException as exitException: logger.info(exitException.reason) @@ -449,6 +447,80 @@ def _try_update_goal_state(self, protocol): return False return True + def __update_guest_agent(self, protocol): + """ + This function checks for new Agent updates and raises AgentUpgradeExitException if available. + There are 2 different ways the agent checks for an update - + 1) Requested Version is specified in the Goal State. + - In this case, the Agent will download the requested version and upgrade/downgrade instantly. + 2) No requested version. + - In this case, the agent will periodically check (1 hr) for new agent versions in GA Manifest. + - If available, it will download all versions > CURRENT_VERSION. + - Depending on the highest version > CURRENT_VERSION, + the agent will update within 4 hrs (for a Hotfix update) or 24 hrs (for a Normal update) + """ + + def log_next_update_time(): + next_normal_time, next_hotfix_time = self.__get_next_upgrade_times() + upgrade_type = self.__get_agent_upgrade_type(available_agent) + next_time = next_hotfix_time if upgrade_type == AgentUpgradeType.Hotfix else next_normal_time + message_ = "Discovered new {0} upgrade {1}; Will upgrade on or after {2}".format( + upgrade_type, available_agent.name, + datetime.utcfromtimestamp(next_time).strftime(logger.Logger.LogTimeFormatInUTC)) + add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, version=CURRENT_VERSION, is_success=True, + message=message_, log_event=False) + logger.info(message_) + + def handle_updates_for_requested_version(): + if requested_version < CURRENT_VERSION: + prefix = "downgrade" + # In case of a downgrade, we blacklist the current agent to avoid starting it back up ever again + # (the expectation here being that if RSM is asking us to a downgrade, + # there's a good reason for not wanting the current version). + try: + # We should always have an agent directory for the CURRENT_VERSION + # (unless the CURRENT_VERSION == daemon version, but since we don't support downgrading + # below daemon version, we will never reach this code path if that's the scenario) + current_agent = next(agent for agent in self.agents if agent.version == CURRENT_VERSION) + logger.info( + "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " + "suggesting that we really don't want to execute any extensions using this version".format( + CURRENT_VERSION)) + current_agent.mark_failure(is_fatal=True) + except StopIteration: + logger.warn( + "Could not find a matching agent with current version {0} to blacklist, skipping it".format( + CURRENT_VERSION)) + else: + # In case of an upgrade, we don't need to blacklist anything as the daemon will automatically + # start the next available highest version which would be the requested version + prefix = "upgrade" + raise AgentUpgradeExitException( + "Exiting current process to {0} to the request Agent version {1}".format(prefix, requested_version)) + + # Ignore new agents if updating is disabled + if not conf.get_autoupdate_enabled(): + return False + + if self._download_agent_if_upgrade_available(protocol): + # The call to get_latest_agent_greater_than_daemon() also finds all agents in directory and sets the self.agents property. + # This state is used to find the GuestAgent object with the current version later if requested version is available in last GS. + available_agent = self.get_latest_agent_greater_than_daemon() + requested_version, _ = self.__get_requested_version_and_manifest_from_last_gs(protocol) + if requested_version is not None: + # If requested version specified, upgrade/downgrade to the specified version instantly as this is + # driven by the goal state (as compared to the agent periodically checking for new upgrades every hour) + handle_updates_for_requested_version() + elif available_agent is None: + # Legacy behavior: The current agent can become unavailable and needs to be reverted. + # In that case, self._upgrade_available() returns True and available_agent would be None. Handling it here. + raise AgentUpgradeExitException( + "Agent {0} is reverting to the installed agent -- exiting".format(CURRENT_AGENT)) + else: + log_next_update_time() + + self.__upgrade_agent_if_permitted() + def __goal_state_updated(self, incarnation): """ This function returns if the Goal State updated. @@ -459,6 +531,7 @@ def __goal_state_updated(self, incarnation): return incarnation != self.last_incarnation def _process_goal_state(self, exthandlers_handler, remote_access_handler): + protocol = exthandlers_handler.protocol if not self._try_update_goal_state(protocol): self._heartbeat_update_goal_state_error_count += 1 @@ -466,25 +539,8 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): self._report_status(exthandlers_handler, incarnation_changed=False) return - if self._check_and_download_agent_if_upgrade_available(protocol): - available_agent = self.get_latest_agent() - # Legacy behavior: The current agent can become unavailable and needs to be reverted. - # In that case, self._upgrade_available() returns True and available_agent would be None. Handling it here. - if available_agent is None: - raise AgentUpgradeExitException("Agent {0} is reverting to the installed agent -- exiting".format(CURRENT_AGENT)) - else: - next_normal_time, next_hotfix_time = self.__get_next_upgrade_times() - upgrade_type = self.__get_agent_upgrade_type(available_agent) - next_time = next_hotfix_time if upgrade_type == AgentUpgradeType.Hotfix else next_normal_time - message = "Discovered new {0} upgrade {1}; Will upgrade on or after {2}".format( - upgrade_type, available_agent.name, - datetime.utcfromtimestamp(next_time).strftime(logger.Logger.LogTimeFormatInUTC)) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, version=CURRENT_VERSION, is_success=True, - message=message, log_event=False) - logger.info(message) - - self.__upgrade_agent_if_permitted() - + # Update the Guest Agent if a new version is available + self.__update_guest_agent(protocol) incarnation = protocol.get_incarnation() try: @@ -507,8 +563,7 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): finally: self.last_incarnation = incarnation - @staticmethod - def __get_vmagent_update_status(protocol, incarnation_changed): + def __get_vmagent_update_status(self, protocol, incarnation_changed): """ This function gets the VMAgent update status as per the last GoalState. Returns: None if the last GS does not ask for requested version else VMAgentUpdateStatus @@ -519,20 +574,14 @@ def __get_vmagent_update_status(protocol, incarnation_changed): update_status = None try: - agent_manifests, _ = protocol.get_vmagent_manifests() - - try: - # Expectation here is that there will only be one manifest per family passed down from CRP - # (already verified during validations), we pick the first matching one here. - manifest = next(m for m in agent_manifests if m.family == conf.get_autoupdate_gafamily()) - except StopIteration: - if incarnation_changed: - logger.info("Unable to report update status as no matching manifest found for family: {0}".format( - conf.get_autoupdate_gafamily())) + requested_version, manifest = self.__get_requested_version_and_manifest_from_last_gs(protocol) + if manifest is None and incarnation_changed: + logger.info("Unable to report update status as no matching manifest found for family: {0}".format( + conf.get_autoupdate_gafamily())) return None - if manifest.is_requested_version_specified: - if CURRENT_VERSION == manifest.requested_version: + if requested_version is not None: + if CURRENT_VERSION == requested_version: status = VMAgentUpdateStatuses.Success code = 0 else: @@ -609,20 +658,30 @@ def forward_signal(self, signum, frame): sys.exit(0) return - def get_latest_agent(self): + @staticmethod + def __get_daemon_version_for_update(): + daemon_version = get_daemon_version() + if daemon_version != FlexibleVersion(VERSION_0): + return daemon_version + # We return 0.0.0.0 if daemon version is not specified. In that case, + # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53. + return FlexibleVersion("2.2.53") + + def get_latest_agent_greater_than_daemon(self, daemon_version=None): """ If autoupdate is enabled, return the most current, downloaded, - non-blacklisted agent which is not the current version (if any). + non-blacklisted agent which is not the current version (if any) and is greater than the `daemon_version`. Otherwise, return None (implying to use the installed agent). + If `daemon_version` is None, we fetch it from the environment variable set by the DaemonHandler """ - if not conf.get_autoupdate_enabled(): - return None - self._find_agents() + daemon_version = self.__get_daemon_version_for_update() if daemon_version is None else daemon_version + + # Fetch the downloaded agents that are different from the current version and greater than the daemon version available_agents = [agent for agent in self.agents if agent.is_available - and agent.version > FlexibleVersion(AGENT_VERSION)] + and agent.version != CURRENT_VERSION and agent.version > daemon_version] return available_agents[0] if len(available_agents) >= 1 else None @@ -770,7 +829,7 @@ def _find_agents(self): Load all non-blacklisted agents currently on disk. """ try: - self._set_agents(self._load_agents()) + self._set_and_sort_agents(self._load_agents()) self._filter_blacklisted_agents() except Exception as e: logger.warn(u"Exception occurred loading available agents: {0}", ustr(e)) @@ -815,17 +874,6 @@ def _is_orphaned(self): return fileutil.read_file(conf.get_agent_pid_file_path()) != ustr(parent_pid) - def _is_version_eligible(self, version): - # Ensure the installed version is always eligible - if version == CURRENT_VERSION and is_current_agent_installed(): - return True - - for agent in self.agents: - if agent.version == version: - return agent.is_available - - return False - def _load_agents(self): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent(path=agent_dir) @@ -867,7 +915,7 @@ def _purge_agents(self): logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) return - def _set_agents(self, agents=None): + def _set_and_sort_agents(self, agents=None): if agents is None: agents = [] self.agents = agents @@ -910,7 +958,24 @@ def _shutdown(self): str(e)) return - def _check_and_download_agent_if_upgrade_available(self, protocol, base_version=CURRENT_VERSION): + @staticmethod + def __get_requested_version_and_manifest_from_last_gs(protocol): + """ + Get the requested version and corresponding manifests from last GS if supported + Returns: (Requested Version, Manifest) if supported and available + (None, None) if no manifests found in the last GS + (None, manifest) if not supported or not specified in GS + """ + family = conf.get_autoupdate_gafamily() + manifest_list, _ = protocol.get_vmagent_manifests() + manifests = [m for m in manifest_list if m.family == family and len(m.uris) > 0] + if len(manifests) == 0: + return None, None + if conf.get_enable_ga_versioning() and manifests[0].is_requested_version_specified: + return manifests[0].requested_version, manifests[0] + return None, manifests[0] + + def _download_agent_if_upgrade_available(self, protocol, base_version=CURRENT_VERSION): """ This function downloads the new agent if an update is available. If a requested version is available in goal state, then only that version is downloaded (new-update model) @@ -919,46 +984,67 @@ def _check_and_download_agent_if_upgrade_available(self, protocol, base_version= return: True if current agent is no longer available or an agent with a higher version number is available else False """ - # Ignore new agents if updating is disabled - if not conf.get_autoupdate_enabled(): - return False - def report_error(msg_, version=CURRENT_VERSION): + def report_error(msg_, version_=CURRENT_VERSION, op=WALAEventOperation.Download): logger.warn(msg_) - add_event(AGENT_NAME, op=WALAEventOperation.Download, version=version, is_success=False, message=msg_) + add_event(AGENT_NAME, op=op, version=version_, is_success=False, message=msg_, log_event=False) + + def can_proceed_with_requested_version(): + if not gs_updated: + # If incarnation didn't change, don't process anything. + return False + + # With the new model, we will get a new GS when CRP wants us to auto-update using required version. + # If there's no new incarnation, don't proceed with anything + msg_ = "Found requested version in manifest: {0} for incarnation: {1}".format( + requested_version, incarnation) + logger.info(msg_) + add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg_, log_event=False) + + if requested_version < daemon_version: + # Don't process the update if the requested version is lesser than daemon version, + # as we don't support downgrades below daemon versions. + report_error( + "Can't process the upgrade as the requested version: {0} is < current daemon version: {1}".format( + requested_version, daemon_version), op=WALAEventOperation.AgentUpgrade) + return False + + return True + + def agent_upgrade_time_elapsed(now_): + if self.last_attempt_time is not None: + next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency() + else: + next_attempt_time = now_ + if next_attempt_time > now_: + return False + return True family = conf.get_autoupdate_gafamily() - incarnation_changed = False + gs_updated = False + daemon_version = self.__get_daemon_version_for_update() try: # Fetch the agent manifests from the latest Goal State - manifest_list, incarnation = protocol.get_vmagent_manifests() - incarnation_changed = self.__goal_state_updated(incarnation) - manifests = [m for m in manifest_list if m.family == family and len(m.uris) > 0] - if len(manifests) == 0: + incarnation = protocol.get_incarnation() + gs_updated = self.__goal_state_updated(incarnation) + requested_version, manifest = self.__get_requested_version_and_manifest_from_last_gs(protocol) + if manifest is None: logger.verbose( u"No manifest links found for agent family: {0} for incarnation: {1}, skipping update check".format( family, incarnation)) return False except Exception as err: # If there's some issues in fetching the agent manifests, report it only on incarnation change - if incarnation_changed: - msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(err)) + msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(err)) + if gs_updated: report_error(msg) + else: + logger.verbose(msg) return False - requested_version = None - if conf.get_enable_ga_versioning() and manifests[0].is_requested_version_specified: + if requested_version is not None: # If GA versioning is enabled and requested version present in GS, and it's a new GS, follow new logic - if incarnation_changed: - # With the new model, we will get a new GS when CRP wants us to auto-update using required version. - # If there's no new incarnation, don't proceed with anything - requested_version = manifests[0].requested_version - msg = "Found requested version in manifest: {0} for incarnation: {1}".format( - requested_version, incarnation) - logger.info(msg) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg) - else: - # If incarnation didn't change, don't process anything. + if not can_proceed_with_requested_version(): return False else: # If no requested version specified in the Goal State, follow the old auto-update logic @@ -966,16 +1052,11 @@ def report_error(msg_, version=CURRENT_VERSION): # If any subsequent goal state does not contain requested version, this timer will start then, and we will # download all versions available in PIR and auto-update to the highest available version on that goal state. now = time.time() - if self.last_attempt_time is not None: - next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency() - else: - next_attempt_time = now - if next_attempt_time > now: + if not agent_upgrade_time_elapsed(now): return False logger.info("No requested version specified, checking for all versions for agent update (family: {0})", family) - self.last_attempt_time = now try: @@ -988,45 +1069,52 @@ def report_error(msg_, version=CURRENT_VERSION): # In this case, no need to even fetch the GA family manifest as we don't need to download any agent. if requested_version is not None and requested_version == CURRENT_VERSION: packages_to_download = [] - logger.info("The requested version is running as the current version: {0}".format(requested_version)) + msg = "The requested version is running as the current version: {0}".format(requested_version) + logger.info(msg) + add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg) else: - pkg_list = protocol.get_vmagent_pkgs(manifests[0]) + pkg_list = protocol.get_vmagent_pkgs(manifest) packages_to_download = pkg_list.versions # Verify the requested version is in GA family manifest (if specified) if requested_version is not None and requested_version != CURRENT_VERSION: - package_found = False for pkg in pkg_list.versions: if FlexibleVersion(pkg.version) == requested_version: # Found a matching package, only download that one packages_to_download = [pkg] - package_found = True break - - if not package_found: + else: msg = "No matching package found in the agent manifest for requested version: {0} in incarnation: {1}, skipping agent update".format( requested_version, incarnation) - report_error(msg, version=requested_version) + report_error(msg, version_=requested_version) return False # Set the agents to those available for download at least as current as the existing agent # or to the requested version (if specified) host = self._get_host_plugin(protocol=protocol) - self._set_agents([GuestAgent(pkg=pkg, host=host) for pkg in packages_to_download]) + self._set_and_sort_agents([GuestAgent(pkg=pkg, host=host) for pkg in packages_to_download]) # Remove from disk any agent no longer needed in the VM. - # If requested version is provided, this would delete all other agents present on the VM except the - # current one and the requested one if they're different, and only the current one if same. + # If requested version is provided, this would delete all other agents present on the VM except - + # - the current version and the requested version if requested version != current version + # - only the current version if requested version == current version # Note: # The code leaves on disk available, but blacklisted, agents to preserve the state. # Otherwise, those agents could be downloaded again and inappropriately retried. self._purge_agents() self._filter_blacklisted_agents() - # Return True if current agent is no longer available or an - # agent with a higher version number is available - return not self._is_version_eligible(base_version) \ - or (len(self.agents) > 0 and self.agents[0].version > base_version) + # If there are no agents available to upgrade/downgrade to, return False + if len(self.agents) == 0: + return False + + if requested_version is not None: + # In case of requested version, return True if an agent with a different version number than the + # current version is available that is higher than the current daemon version + return self.agents[0].version != base_version and self.agents[0].version > daemon_version + else: + # Else, return True if the highest agent is > base_version (CURRENT_VERSION) + return self.agents[0].version > base_version except Exception as err: msg = u"Exception downloading agents for update: {0}".format(textutil.format_exception(err)) @@ -1232,8 +1320,8 @@ def __upgrade_agent_if_permitted(self): self._last_hotfix_upgrade_time = now if next_hotfix_time <= now else self._last_hotfix_upgrade_time self._last_normal_upgrade_time = now if next_normal_time <= now else self._last_normal_upgrade_time - available_agent = self.get_latest_agent() - if available_agent is None: + available_agent = self.get_latest_agent_greater_than_daemon() + if available_agent is None or available_agent.version <= CURRENT_VERSION: logger.verbose("No agent upgrade discovered") return @@ -1341,7 +1429,10 @@ def mark_failure(self, is_fatal=False): self.error.mark_failure(is_fatal=is_fatal) self.error.save() if self.error.is_blacklisted: - logger.warn(u"Agent {0} is permanently blacklisted", self.name) + msg = u"Agent {0} is permanently blacklisted".format(self.name) + logger.warn(msg) + add_event(op=WALAEventOperation.AgentBlacklisted, is_success=False, message=msg, log_event=False, + version=self.version) except Exception as e: logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e)) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index e40c05eaf3..e1dbe55bd6 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -42,7 +42,8 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules from azurelinuxagent.common.version import AGENT_PKG_GLOB, AGENT_DIR_GLOB, AGENT_NAME, AGENT_DIR_PATTERN, \ - AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION + AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION, set_daemon_version, \ + __DAEMON_VERSION_ENV_VARIABLE as DAEMON_VERSION_ENV_VARIABLE from azurelinuxagent.ga.exthandlers import ExtHandlersHandler, ExtHandlerInstance, HandlerEnvironment, ExtensionStatusValue from azurelinuxagent.ga.update import GuestAgent, GuestAgentError, MAX_FAILURE, AGENT_MANIFEST_FILE, \ get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, AGENT_ERROR_FILE, ORPHAN_WAIT_INTERVAL, \ @@ -943,7 +944,7 @@ def test_ensure_readonly_leaves_unmodified(self): def _test_evaluate_agent_health(self, child_agent_index=0): self.prepare_agents() - latest_agent = self.update_handler.get_latest_agent() + latest_agent = self.update_handler.get_latest_agent_greater_than_daemon() self.assertTrue(latest_agent.is_available) self.assertFalse(latest_agent.is_blacklisted) self.assertTrue(len(self.update_handler.agents) > 1) @@ -982,7 +983,7 @@ def test_evaluate_agent_health_resets_with_new_agent(self): def test_filter_blacklisted_agents(self): self.prepare_agents() - self.update_handler._set_agents([GuestAgent(path=path) for path in self.agent_dirs()]) + self.update_handler._set_and_sort_agents([GuestAgent(path=path) for path in self.agent_dirs()]) self.assertEqual(len(self.agent_dirs()), len(self.update_handler.agents)) kept_agents = self.update_handler.agents[::2] @@ -1029,7 +1030,7 @@ def test_get_host_plugin_returns_host_for_wireserver(self, mock_get_host): def test_get_latest_agent(self): latest_version = self.prepare_agents() - latest_agent = self.update_handler.get_latest_agent() + latest_agent = self.update_handler.get_latest_agent_greater_than_daemon() self.assertEqual(len(self._get_agents(self.tmp_dir)), len(self.update_handler.agents)) self.assertEqual(latest_version, latest_agent.version) @@ -1038,24 +1039,24 @@ def test_get_latest_agent_excluded(self): self.assertFalse(self._test_upgrade_available( versions=self.agent_versions(), count=1)) - self.assertEqual(None, self.update_handler.get_latest_agent()) + self.assertEqual(None, self.update_handler.get_latest_agent_greater_than_daemon()) def test_get_latest_agent_no_updates(self): - self.assertEqual(None, self.update_handler.get_latest_agent()) + self.assertEqual(None, self.update_handler.get_latest_agent_greater_than_daemon()) def test_get_latest_agent_skip_updates(self): conf.get_autoupdate_enabled = Mock(return_value=False) - self.assertEqual(None, self.update_handler.get_latest_agent()) + self.assertEqual(None, self.update_handler.get_latest_agent_greater_than_daemon()) def test_get_latest_agent_skips_unavailable(self): self.prepare_agents() - prior_agent = self.update_handler.get_latest_agent() + prior_agent = self.update_handler.get_latest_agent_greater_than_daemon() latest_version = self.prepare_agents(count=self.agent_count() + 1, is_available=False) latest_path = os.path.join(self.tmp_dir, "{0}-{1}".format(AGENT_NAME, latest_version)) self.assertFalse(GuestAgent(latest_path).is_available) - latest_agent = self.update_handler.get_latest_agent() + latest_agent = self.update_handler.get_latest_agent_greater_than_daemon() self.assertTrue(latest_agent.version < latest_version) self.assertEqual(latest_agent.version, prior_agent.version) @@ -1100,34 +1101,6 @@ def test_is_orphaned_returns_true_if_parent_does_not_exist(self): with patch('os.getppid', return_value=42): self.assertTrue(self.update_handler._is_orphaned) - def test_is_version_available(self): - self.prepare_agents(is_available=True) - self.update_handler.agents = self.agents() - - for agent in self.agents(): - self.assertTrue(self.update_handler._is_version_eligible(agent.version)) - - @patch("azurelinuxagent.ga.update.is_current_agent_installed", return_value=False) - def test_is_version_available_rejects(self, mock_current): # pylint: disable=unused-argument - self.prepare_agents(is_available=True) - self.update_handler.agents = self.agents() - - self.update_handler.agents[0].mark_failure(is_fatal=True) - self.assertFalse(self.update_handler._is_version_eligible(self.agents()[0].version)) - - @patch("azurelinuxagent.ga.update.is_current_agent_installed", return_value=True) - def test_is_version_available_accepts_current(self, mock_current): # pylint: disable=unused-argument - self.update_handler.agents = [] - self.assertTrue(self.update_handler._is_version_eligible(CURRENT_VERSION)) - - @patch("azurelinuxagent.ga.update.is_current_agent_installed", return_value=False) - def test_is_version_available_rejects_by_default(self, mock_current): # pylint: disable=unused-argument - self.prepare_agents() - self.update_handler.agents = [] - - v = self.agents()[0].version - self.assertFalse(self.update_handler._is_version_eligible(v)) - def test_purge_agents(self): self.prepare_agents() self.update_handler._find_agents() @@ -1191,7 +1164,7 @@ def _test_run_latest(self, mock_child=None, mock_time=None, child_args=None): def test_run_latest(self): self.prepare_agents() - agent = self.update_handler.get_latest_agent() + agent = self.update_handler.get_latest_agent_greater_than_daemon() args, kwargs = self._test_run_latest() args = args[0] cmds = textutil.safe_shlex_split(agent.get_agent_cmd()) @@ -1209,8 +1182,8 @@ def test_run_latest(self): def test_run_latest_passes_child_args(self): self.prepare_agents() - agent = self.update_handler.get_latest_agent() # pylint: disable=unused-variable - args, kwargs = self._test_run_latest(child_args="AnArgument") # pylint: disable=unused-variable + self.update_handler.get_latest_agent_greater_than_daemon() + args, _ = self._test_run_latest(child_args="AnArgument") args = args[0] self.assertTrue(len(args) > 1) @@ -1252,7 +1225,7 @@ def test_run_latest_polls_every_second_if_installed_not_latest(self): self.assertEqual(1, mock_time.sleep_interval) def test_run_latest_defaults_to_current(self): - self.assertEqual(None, self.update_handler.get_latest_agent()) + self.assertEqual(None, self.update_handler.get_latest_agent_greater_than_daemon()) args, kwargs = self._test_run_latest() @@ -1287,12 +1260,12 @@ def test_run_latest_nonzero_code_marks_failures(self): # logger.add_logger_appender(logger.AppenderType.STDOUT) self.prepare_agents() - latest_agent = self.update_handler.get_latest_agent() + latest_agent = self.update_handler.get_latest_agent_greater_than_daemon() self.assertTrue(latest_agent.is_available) self.assertEqual(0.0, latest_agent.error.last_failure) self.assertEqual(0, latest_agent.error.failure_count) - with patch('azurelinuxagent.ga.update.UpdateHandler.get_latest_agent', return_value=latest_agent): + with patch('azurelinuxagent.ga.update.UpdateHandler.get_latest_agent_greater_than_daemon', return_value=latest_agent): self._test_run_latest(mock_child=ChildMock(return_value=1)) self.assertTrue(latest_agent.is_blacklisted) @@ -1303,12 +1276,12 @@ def test_run_latest_nonzero_code_marks_failures(self): def test_run_latest_exception_blacklists(self): self.prepare_agents() - latest_agent = self.update_handler.get_latest_agent() + latest_agent = self.update_handler.get_latest_agent_greater_than_daemon() self.assertTrue(latest_agent.is_available) self.assertEqual(0.0, latest_agent.error.last_failure) self.assertEqual(0, latest_agent.error.failure_count) - with patch('azurelinuxagent.ga.update.UpdateHandler.get_latest_agent', return_value=latest_agent): + with patch('azurelinuxagent.ga.update.UpdateHandler.get_latest_agent_greater_than_daemon', return_value=latest_agent): self._test_run_latest(mock_child=ChildMock(side_effect=Exception("Force blacklisting"))) self.assertFalse(latest_agent.is_available) @@ -1319,12 +1292,12 @@ def test_run_latest_exception_blacklists(self): def test_run_latest_exception_does_not_blacklist_if_terminating(self): self.prepare_agents() - latest_agent = self.update_handler.get_latest_agent() + latest_agent = self.update_handler.get_latest_agent_greater_than_daemon() self.assertTrue(latest_agent.is_available) self.assertEqual(0.0, latest_agent.error.last_failure) self.assertEqual(0, latest_agent.error.failure_count) - with patch('azurelinuxagent.ga.update.UpdateHandler.get_latest_agent', return_value=latest_agent): + with patch('azurelinuxagent.ga.update.UpdateHandler.get_latest_agent_greater_than_daemon', return_value=latest_agent): self.update_handler.is_running = False self._test_run_latest(mock_child=ChildMock(side_effect=Exception("Attempt blacklisting"))) @@ -1352,7 +1325,7 @@ def test_get_latest_agent_should_return_latest_agent_even_on_bad_error_json(self with open(error_file_path, 'w') as f: f.write("") - latest_agent = self.update_handler.get_latest_agent() + latest_agent = self.update_handler.get_latest_agent_greater_than_daemon() self.assertEqual(latest_agent.version, dst_ver, "Latest agent version is invalid") def _test_run(self, invocations=1, calls=1, enable_updates=False, sleep_interval=(6,)): @@ -1404,7 +1377,7 @@ def test_run(self): self._test_run() def test_run_stops_if_update_available(self): - self.update_handler._check_and_download_agent_if_upgrade_available = Mock(return_value=True) + self.update_handler._download_agent_if_upgrade_available = Mock(return_value=True) self._test_run(invocations=0, calls=0, enable_updates=True) def test_run_stops_if_orphaned(self): @@ -1416,7 +1389,7 @@ def test_run_clears_sentinel_on_successful_exit(self): self.assertFalse(os.path.isfile(self.update_handler._sentinel_file_path())) def test_run_leaves_sentinel_on_unsuccessful_exit(self): - self.update_handler._check_and_download_agent_if_upgrade_available = Mock(side_effect=Exception) + self.update_handler._download_agent_if_upgrade_available = Mock(side_effect=Exception) self._test_run(invocations=1, calls=0, enable_updates=True) self.assertTrue(os.path.isfile(self.update_handler._sentinel_file_path())) @@ -1428,14 +1401,14 @@ def test_run_emits_restart_event(self): def test_set_agents_sets_agents(self): self.prepare_agents() - self.update_handler._set_agents([GuestAgent(path=path) for path in self.agent_dirs()]) + self.update_handler._set_and_sort_agents([GuestAgent(path=path) for path in self.agent_dirs()]) self.assertTrue(len(self.update_handler.agents) > 0) self.assertEqual(len(self.agent_dirs()), len(self.update_handler.agents)) def test_set_agents_sorts_agents(self): self.prepare_agents() - self.update_handler._set_agents([GuestAgent(path=path) for path in self.agent_dirs()]) + self.update_handler._set_and_sort_agents([GuestAgent(path=path) for path in self.agent_dirs()]) v = FlexibleVersion("100000") for a in self.update_handler.agents: @@ -1487,7 +1460,7 @@ def _test_upgrade_available( self.update_handler.protocol_util = protocol conf.get_autoupdate_gafamily = Mock(return_value=protocol.family) - return self.update_handler._check_and_download_agent_if_upgrade_available(protocol, base_version=base_version) + return self.update_handler._download_agent_if_upgrade_available(protocol, base_version=base_version) def test_upgrade_available_returns_true_on_first_use(self): self.assertTrue(self._test_upgrade_available()) @@ -1500,7 +1473,7 @@ def test_upgrade_available_handles_missing_family(self): self.update_handler.protocol_util = protocol with patch('azurelinuxagent.common.logger.warn') as mock_logger: with patch('tests.ga.test_update.ProtocolMock.get_vmagent_pkgs', side_effect=ProtocolError): - self.assertFalse(self.update_handler._check_and_download_agent_if_upgrade_available(protocol, base_version=CURRENT_VERSION)) + self.assertFalse(self.update_handler._download_agent_if_upgrade_available(protocol, base_version=CURRENT_VERSION)) self.assertEqual(0, mock_logger.call_count) def test_upgrade_available_includes_old_agents(self): @@ -1529,28 +1502,19 @@ def test_upgrade_available_purges_old_agents(self): agent_versions.append(CURRENT_VERSION) self.assertEqual(agent_versions, self.agent_versions()) - def test_update_available_returns_true_if_current_gets_blacklisted(self): - self.update_handler._is_version_eligible = Mock(return_value=False) - self.assertTrue(self._test_upgrade_available()) - def test_upgrade_available_skips_if_too_frequent(self): conf.get_autoupdate_frequency = Mock(return_value=10000) self.update_handler.last_attempt_time = time.time() self.assertFalse(self._test_upgrade_available()) - def test_upgrade_available_skips_if_when_no_new_versions(self): + def test_upgrade_available_skips_when_no_new_versions(self): self.prepare_agents() base_version = self.agent_versions()[0] + 1 - self.update_handler._is_version_eligible = lambda x: x == base_version self.assertFalse(self._test_upgrade_available(base_version=base_version)) def test_upgrade_available_skips_when_no_versions(self): self.assertFalse(self._test_upgrade_available(protocol=ProtocolMock())) - def test_upgrade_available_skips_when_updates_are_disabled(self): - conf.get_autoupdate_enabled = Mock(return_value=False) - self.assertFalse(self._test_upgrade_available()) - def test_upgrade_available_sorts(self): self.prepare_agents() self._test_upgrade_available() @@ -1584,7 +1548,7 @@ def test_update_happens_when_extensions_disabled(self, _): before an update is found, this test attempts to ensure that behavior never changes. """ - self.update_handler._check_and_download_agent_if_upgrade_available = Mock(return_value=True) + self.update_handler._download_agent_if_upgrade_available = Mock(return_value=True) self._test_run(invocations=0, calls=0, enable_updates=True, sleep_interval=(300,)) @patch("azurelinuxagent.common.logger.info") @@ -1929,9 +1893,11 @@ def __get_update_handler(self, iterations=1, test_data=None, hotfix_frequency=1. with _get_update_handler(iterations, test_data) as (update_handler, protocol): + protocol.aggregate_status = None + def get_handler(url, **kwargs): if reload_conf is not None: - reload_conf(url, protocol.mock_wire_data) + reload_conf(url, protocol) if HttpRequestPredicates.is_agent_package_request(url): agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) @@ -1939,7 +1905,14 @@ def get_handler(url, **kwargs): return ResponseMock(response=agent_pkg) return protocol.mock_wire_data.mock_http_get(url, **kwargs) - protocol.set_http_handlers(http_get_handler=get_handler) + def put_handler(url, *args, **_): + if HttpRequestPredicates.is_host_plugin_status_request(url): + # Skip reading the HostGA request data as its encoded + return MockHttpResponse(status=500) + protocol.aggregate_status = json.loads(args[0]) + return MockHttpResponse(status=201) + + protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) with self.create_conf_mocks(hotfix_frequency, normal_frequency): with patch("azurelinuxagent.ga.update.add_event") as mock_telemetry: update_handler._protocol = protocol @@ -1950,12 +1923,23 @@ def __assert_exit_code_successful(self, exit_mock): exit_args, _ = exit_mock.call_args self.assertEqual(exit_args[0], 0, "Exit code should be 0") + def __assert_upgrade_telemetry_emitted_for_requested_version(self, mock_telemetry, upgrade=True, version="99999.0.0.0"): + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Exiting current process to {0} to the request Agent version {1}'.format( + "upgrade" if upgrade else "downgrade", version) in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the agent was upgraded. Got: {0}".format( + mock_telemetry.call_args_list)) + def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade_type=AgentUpgradeType.Normal): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if '{0} Agent upgrade discovered, updating to WALinuxAgent-99999.0.0.0 -- exiting'.format( upgrade_type) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] - self.assertEqual(1, len(upgrade_event_msgs), "Agent not upgraded properly") + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the agent was upgraded. Got: {0}".format( + mock_telemetry.call_args_list)) def __assert_agent_directories_available(self, versions): for version in versions: @@ -1973,6 +1957,13 @@ def __assert_no_agent_upgrade_telemetry(self, mock_telemetry): "Agent upgrade discovered, updating to" in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade]), "Unwanted upgrade") + def __assert_ga_version_in_status(self, aggregate_status, version=str(CURRENT_VERSION)): + self.assertIsNotNone(aggregate_status, "Status should be reported") + self.assertEqual(aggregate_status['aggregateStatus']['guestAgentStatus']['version'], version, + "Status should be reported from the Current version") + self.assertEqual(aggregate_status['aggregateStatus']['guestAgentStatus']['status'], 'Ready', + "Guest Agent should be reported as Ready") + def test_it_should_upgrade_agent_on_process_start_if_auto_upgrade_enabled(self): with self.__get_update_handler(iterations=10) as (update_handler, mock_telemetry): @@ -1988,7 +1979,8 @@ def test_it_should_download_new_agents_and_not_auto_upgrade_if_not_permitted(sel data_file = DATA_FILE.copy() data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - def reload_conf(url, mock_wire_data): + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data # This function reloads the conf mid-run to mimic an actual customer scenario if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= no_of_iterations/2: reload_conf.call_count += 1 @@ -2015,7 +2007,8 @@ def test_it_should_upgrade_agent_in_given_time_window_if_permitted(self): data_file = DATA_FILE.copy() data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - def reload_conf(url, mock_wire_data): + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data # This function reloads the conf mid-run to mimic an actual customer scenario if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= 2: reload_conf.call_count += 1 @@ -2059,7 +2052,8 @@ def test_it_should_not_auto_upgrade_if_corresponding_time_not_elapsed(self): data_file = DATA_FILE.copy() data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - def reload_conf(url, mock_wire_data): + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data # This function reloads the conf mid-run to mimic an actual customer scenario if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= no_of_iterations / 2: reload_conf.call_count += 1 @@ -2091,12 +2085,7 @@ def test_it_should_download_only_requested_version_if_available(self): update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler.exit_mock) - upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Agent upgrade discovered, updating to WALinuxAgent-9.9.9.10 -- exiting' in kwarg[ - 'message'] and kwarg['op'] == WALAEventOperation.AgentUpgrade] - self.assertEqual(1, len(upgrade_event_msgs), - "Did not find the event indicating that the agent was upgraded. Got: {0}".format( - mock_telemetry.call_args_list)) + self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"]) def test_it_should_cleanup_all_agents_except_requested_version_and_current_version(self): @@ -2112,10 +2101,7 @@ def test_it_should_cleanup_all_agents_except_requested_version_and_current_versi update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler.exit_mock) - upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Agent upgrade discovered, updating to WALinuxAgent-9.9.9.10 -- exiting' in kwarg[ - 'message'] and kwarg['op'] == WALAEventOperation.AgentUpgrade] - self.assertEqual(1, len(upgrade_event_msgs), "Agent not upgraded properly") + self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): @@ -2148,9 +2134,10 @@ def test_it_should_only_try_downloading_requested_version_on_new_incarnation(sel self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - def reload_conf(url, mock_wire_data): - # This function reloads the conf mid-run to mimic an actual customer scenario + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data + # This function reloads the conf mid-run to mimic an actual customer scenario if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[ "goalstate"] >= 10 and mock_wire_data.call_counts["goalstate"] < 15: @@ -2178,7 +2165,7 @@ def reload_conf(url, mock_wire_data): self.assertGreaterEqual(reload_conf.call_count, 1, "Reload conf not updated as expected") self.__assert_exit_code_successful(update_handler.exit_mock) - self.__assert_upgrade_telemetry_emitted(mock_telemetry) + self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry) self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) self.assertEqual(update_handler._protocol.mock_wire_data.call_counts['agentArtifact'], 1, "only 1 agent should've been downloaded - 1 per incarnation") @@ -2192,7 +2179,9 @@ def test_it_should_fallback_to_old_update_logic_if_requested_version_not_availab self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - def reload_conf(url, mock_wire_data): + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data + # This function reloads the conf mid-run to mimic an actual customer scenario if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[ "goalstate"] >= 5: @@ -2245,6 +2234,97 @@ def test_it_should_not_download_anything_if_requested_version_is_current_version self.__assert_no_agent_upgrade_telemetry(mock_telemetry) self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + def test_it_should_skip_wait_to_update_if_requested_version_available(self): + no_of_iterations = 100 + + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data + + # This function reloads the conf mid-run to mimic an actual customer scenario + if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts["goalstate"] >= 5: + reload_conf.call_count += 1 + + # Assert GA version from status to ensure agent is running fine from the current version + self.__assert_ga_version_in_status(protocol.aggregate_status) + + # Update the ext-conf and incarnation and add requested version from GS + mock_wire_data.data_files["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file['ga_manifest'] = "wire/ga_manifest.xml" + mock_wire_data.reload() + self._add_write_permission_to_goal_state_files() + mock_wire_data.set_incarnation(2) + + reload_conf.call_count = 0 + + data_file = mockwiredata.DATA_FILE.copy() + data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, + normal_frequency=10, hotfix_frequency=10) as (update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=True): + update_handler.run(debug=True) + + self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") + self.assertLess(update_handler.get_iterations(), no_of_iterations, + "The code should've exited as soon as requested version was found") + self.__assert_exit_code_successful(update_handler.exit_mock) + self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") + + def test_it_should_blacklist_current_agent_on_downgrade(self): + # Create Agent directory for current agent + self.prepare_agents(count=1) + self.assertTrue(os.path.exists(self.agent_dir(CURRENT_VERSION))) + self.assertFalse(next(agent for agent in self.agents() if agent.version == CURRENT_VERSION).is_blacklisted, + "The current agent should not be blacklisted") + downgraded_version = "1.2.0" + + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=True): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + update_handler._protocol.mock_wire_data.set_incarnation(2) + try: + set_daemon_version("1.0.0.0") + update_handler.run(debug=True) + finally: + os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) + + self.__assert_exit_code_successful(update_handler.exit_mock) + self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, upgrade=False, + version=downgraded_version) + self.assertTrue(next(agent for agent in self.agents() if agent.version == CURRENT_VERSION).is_blacklisted, + "The current agent should be blacklisted") + + def test_it_should_not_downgrade_below_daemon_version(self): + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=True): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version("1.0.0.0") + update_handler._protocol.mock_wire_data.set_incarnation(2) + + try: + set_daemon_version("1.2.3.4") + update_handler.run(debug=True) + finally: + os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) + + self.__assert_exit_code_successful(update_handler.exit_mock) + upgrade_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if + kwarg['op'] == WALAEventOperation.AgentUpgrade] + # This will throw if corresponding message not found so not asserting on that + requested_version_found = next(kwarg for kwarg in upgrade_msgs if + "Found requested version in manifest: 1.0.0.0 for incarnation: 2" in kwarg[ + 'message']) + self.assertTrue(requested_version_found['is_success'], + "The requested version found op should be reported as a success") + + skipping_update = next(kwarg for kwarg in upgrade_msgs if + "Can't process the upgrade as the requested version: 1.0.0.0 is < current daemon version: 1.2.3.4" in + kwarg['message']) + self.assertFalse(skipping_update['is_success'], "Failed Event should be reported as a failure") + self.__assert_ga_version_in_status(update_handler._protocol.aggregate_status) + @patch('azurelinuxagent.ga.update.get_collect_telemetry_events_handler') @patch('azurelinuxagent.ga.update.get_send_telemetry_events_handler') @@ -2406,6 +2486,9 @@ def create_packages(self): def get_protocol(self): return self + def get_incarnation(self): + return self.etag + def get_vmagent_manifests(self): self.call_counts["get_vmagent_manifests"] += 1 if self.goal_state_is_stale: @@ -2584,7 +2667,7 @@ def _create_update_handler(): Creates an UpdateHandler in which agent updates are mocked as a no-op. """ update_handler = get_update_handler() - update_handler._check_and_download_agent_if_upgrade_available = Mock(return_value=False) + update_handler._download_agent_if_upgrade_available = Mock(return_value=False) return update_handler From 55be32686d1c9be79d833c2e2209d4a11bc793df Mon Sep 17 00:00:00 2001 From: Laveesh Rohra Date: Sat, 29 Jan 2022 01:38:33 +0530 Subject: [PATCH 16/84] Modify deny-listing logic of the Agent (#2487) --- azurelinuxagent/ga/update.py | 102 ++++++++++++++++++++++------------- tests/ga/test_update.py | 84 +++++++++++++++++++---------- 2 files changed, 120 insertions(+), 66 deletions(-) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index d795d26bb5..3c7b6f0a74 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -265,7 +265,13 @@ def run_latest(self, child_args=None): log_event=False) if ret is None: - ret = self.child_process.wait() + # Wait for the process to exit + if self.child_process.wait() > 0: + msg = u"ExtHandler process {0} launched with command '{1}' exited with return code: {2}".format( + agent_name, + agent_cmd, + ret) + logger.warn(msg) else: msg = u"Agent {0} launched with command '{1}' failed with return code: {2}".format( @@ -280,15 +286,6 @@ def run_latest(self, child_args=None): is_success=False, message=msg) - if ret is not None and ret > 0: - msg = u"Agent {0} launched with command '{1}' returned code: {2}".format( - agent_name, - agent_cmd, - ret) - logger.warn(msg) - if latest_agent is not None: - latest_agent.mark_failure(is_fatal=True) - except Exception as e: # Ignore child errors during termination if self.is_running: @@ -304,7 +301,7 @@ def run_latest(self, child_args=None): is_success=False, message=detailed_message) if latest_agent is not None: - latest_agent.mark_failure(is_fatal=True) + latest_agent.mark_failure(is_fatal=True, reason=detailed_message) self.child_process = None return @@ -368,6 +365,7 @@ def run(self, debug=False): self._ensure_extension_telemetry_state_configured_properly(protocol) self._ensure_firewall_rules_persisted(dst_ip=protocol.get_endpoint()) self._add_accept_tcp_firewall_rule_if_not_enabled(dst_ip=protocol.get_endpoint()) + self._reset_legacy_blacklisted_agents() # Get all thread handlers telemetry_handler = get_send_telemetry_events_handler(self.protocol_util) @@ -482,11 +480,11 @@ def handle_updates_for_requested_version(): # (unless the CURRENT_VERSION == daemon version, but since we don't support downgrading # below daemon version, we will never reach this code path if that's the scenario) current_agent = next(agent for agent in self.agents if agent.version == CURRENT_VERSION) - logger.info( - "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " - "suggesting that we really don't want to execute any extensions using this version".format( - CURRENT_VERSION)) - current_agent.mark_failure(is_fatal=True) + msg = "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " \ + "suggesting that we really don't want to execute any extensions using this version".format( + CURRENT_VERSION) + logger.info(msg) + current_agent.mark_failure(is_fatal=True, reason=msg) except StopIteration: logger.warn( "Could not find a matching agent with current version {0} to blacklist, skipping it".format( @@ -1092,7 +1090,11 @@ def agent_upgrade_time_elapsed(now_): # Set the agents to those available for download at least as current as the existing agent # or to the requested version (if specified) host = self._get_host_plugin(protocol=protocol) - self._set_and_sort_agents([GuestAgent(pkg=pkg, host=host) for pkg in packages_to_download]) + agents_to_download = [GuestAgent(pkg=pkg, host=host) for pkg in packages_to_download] + + # Filter out the agents that were downloaded/extracted successfully. If the agent was not installed properly, + # we delete the directory and the zip package from the filesystem + self._set_and_sort_agents([agent for agent in agents_to_download if agent.is_available]) # Remove from disk any agent no longer needed in the VM. # If requested version is provided, this would delete all other agents present on the VM except - @@ -1333,6 +1335,19 @@ def __upgrade_agent_if_permitted(self): upgrade_type == AgentUpgradeType.Normal and next_normal_time <= now): raise AgentUpgradeExitException(upgrade_message) + def _reset_legacy_blacklisted_agents(self): + # Reset the state of all blacklisted agents that were blacklisted by legacy agents (i.e. not during auto-update) + + # Filter legacy agents which are blacklisted but do not contain a `reason` in their error.json files + # (this flag signifies that this agent was blacklisted by the newer agents). + try: + legacy_blacklisted_agents = [agent for agent in self._load_agents() if + agent.is_blacklisted and agent.error.reason == ''] + for agent in legacy_blacklisted_agents: + agent.clear_error() + except Exception as err: + logger.warn("Unable to reset legacy blacklisted agents due to: {0}".format(err)) + class GuestAgent(object): def __init__(self, path=None, pkg=None, host=None): @@ -1341,13 +1356,13 @@ def __init__(self, path=None, pkg=None, host=None): version = None if path is not None: m = AGENT_DIR_PATTERN.match(path) - if m == None: + if m is None: raise UpdateError(u"Illegal agent directory: {0}".format(path)) version = m.group(1) elif self.pkg is not None: version = pkg.version - if version == None: + if version is None: raise UpdateError(u"Illegal agent version: {0}".format(version)) self.version = FlexibleVersion(version) @@ -1371,10 +1386,15 @@ def __init__(self, path=None, pkg=None, host=None): if isinstance(e, IOError): raise - # Note the failure, blacklist the agent if the package downloaded - # - An exception with a downloaded package indicates the package - # is corrupt (e.g., missing the HandlerManifest.json file) - self.mark_failure(is_fatal=os.path.isfile(self.get_agent_pkg_path())) + # If we're unable to download/unpack the agent, delete the Agent directory and the zip file (if exists) to + # ensure we try downloading again in the next round. + try: + if os.path.isdir(self.get_agent_dir()): + shutil.rmtree(self.get_agent_dir(), ignore_errors=True) + if os.path.isfile(self.get_agent_pkg_path()): + os.remove(self.get_agent_pkg_path()) + except Exception as err: + logger.warn("Unable to delete Agent files: {0}".format(err)) msg = u"Agent {0} install failed with exception:".format( self.name) @@ -1422,11 +1442,11 @@ def is_downloaded(self): return self.is_blacklisted or \ os.path.isfile(self.get_agent_manifest_path()) - def mark_failure(self, is_fatal=False): + def mark_failure(self, is_fatal=False, reason=''): try: if not os.path.isdir(self.get_agent_dir()): os.makedirs(self.get_agent_dir()) - self.error.mark_failure(is_fatal=is_fatal) + self.error.mark_failure(is_fatal=is_fatal, reason=reason) self.error.save() if self.error.is_blacklisted: msg = u"Agent {0} is permanently blacklisted".format(self.name) @@ -1617,23 +1637,29 @@ def _unpack(self): class GuestAgentError(object): def __init__(self, path): + self.last_failure = 0.0 + self.was_fatal = False if path is None: raise UpdateError(u"GuestAgentError requires a path") self.path = path + self.failure_count = 0 + self.reason = '' self.clear() return - def mark_failure(self, is_fatal=False): - self.last_failure = time.time() # pylint: disable=W0201 + def mark_failure(self, is_fatal=False, reason=''): + self.last_failure = time.time() self.failure_count += 1 - self.was_fatal = is_fatal # pylint: disable=W0201 + self.was_fatal = is_fatal + self.reason = reason return def clear(self): self.last_failure = 0.0 self.failure_count = 0 self.was_fatal = False + self.reason = '' return @property @@ -1665,25 +1691,25 @@ def save(self): return def from_json(self, data): - self.last_failure = max( # pylint: disable=W0201 - self.last_failure, - data.get(u"last_failure", 0.0)) - self.failure_count = max( # pylint: disable=W0201 - self.failure_count, - data.get(u"failure_count", 0)) - self.was_fatal = self.was_fatal or data.get(u"was_fatal", False) # pylint: disable=W0201 + self.last_failure = max(self.last_failure, data.get(u"last_failure", 0.0)) + self.failure_count = max(self.failure_count, data.get(u"failure_count", 0)) + self.was_fatal = self.was_fatal or data.get(u"was_fatal", False) + reason = data.get(u"reason", '') + self.reason = reason if reason != '' else self.reason return def to_json(self): data = { u"last_failure": self.last_failure, u"failure_count": self.failure_count, - u"was_fatal": self.was_fatal + u"was_fatal": self.was_fatal, + u"reason": ustr(self.reason) } return data def __str__(self): - return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}".format( + return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( self.last_failure, self.failure_count, - self.was_fatal) + self.was_fatal, + self.reason) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index e1dbe55bd6..9cc5bdb1a6 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -15,6 +15,7 @@ import tempfile import time import unittest +import uuid import zipfile from datetime import datetime, timedelta @@ -59,19 +60,22 @@ NO_ERROR = { "last_failure": 0.0, "failure_count": 0, - "was_fatal": False + "was_fatal": False, + "reason": '' } FATAL_ERROR = { "last_failure": 42.42, "failure_count": 2, - "was_fatal": True + "was_fatal": True, + "reason": "Test failure" } WITH_ERROR = { "last_failure": 42.42, "failure_count": 2, - "was_fatal": False + "was_fatal": False, + "reason": "Test failure" } EMPTY_MANIFEST = { @@ -413,17 +417,19 @@ def test_mark_failure_permanent(self): def test_str(self): err = self.create_error(error_data=NO_ERROR) - s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}".format( + s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( NO_ERROR["last_failure"], NO_ERROR["failure_count"], - NO_ERROR["was_fatal"]) + NO_ERROR["was_fatal"], + NO_ERROR["reason"]) self.assertEqual(s, str(err)) err = self.create_error(error_data=WITH_ERROR) - s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}".format( + s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( WITH_ERROR["last_failure"], WITH_ERROR["failure_count"], - WITH_ERROR["was_fatal"]) + WITH_ERROR["was_fatal"], + WITH_ERROR["reason"]) self.assertEqual(s, str(err)) return @@ -727,7 +733,7 @@ def test_ensure_downloaded(self, mock_http_get): self.assertTrue(agent.is_downloaded) @patch("azurelinuxagent.ga.update.GuestAgent._download", side_effect=UpdateError) - def test_ensure_downloaded_download_fails(self, mock_download): # pylint: disable=unused-argument + def test_ensure_failure_in_download_cleans_up_filesystem(self, _): self.remove_agents() self.assertFalse(os.path.isdir(self.agent_path)) @@ -735,36 +741,36 @@ def test_ensure_downloaded_download_fails(self, mock_download): # pylint: disab pkg.uris.append(None) agent = GuestAgent(pkg=pkg) - self.assertEqual(1, agent.error.failure_count) - self.assertFalse(agent.error.was_fatal) - self.assertFalse(agent.is_blacklisted) + self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") + self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") + self.assertFalse(os.path.exists(agent.get_agent_pkg_path()), "Agent package should be cleaned up") @patch("azurelinuxagent.ga.update.GuestAgent._download") @patch("azurelinuxagent.ga.update.GuestAgent._unpack", side_effect=UpdateError) - def test_ensure_downloaded_unpack_fails(self, mock_unpack, mock_download): # pylint: disable=unused-argument + def test_ensure_downloaded_unpack_failure_cleans_file_system(self, *_): self.assertFalse(os.path.isdir(self.agent_path)) pkg = ExtHandlerPackage(version=str(self._get_agent_version())) pkg.uris.append(None) agent = GuestAgent(pkg=pkg) - self.assertEqual(1, agent.error.failure_count) - self.assertTrue(agent.error.was_fatal) - self.assertTrue(agent.is_blacklisted) + self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") + self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") + self.assertFalse(os.path.exists(agent.get_agent_pkg_path()), "Agent package should be cleaned up") @patch("azurelinuxagent.ga.update.GuestAgent._download") @patch("azurelinuxagent.ga.update.GuestAgent._unpack") @patch("azurelinuxagent.ga.update.GuestAgent._load_manifest", side_effect=UpdateError) - def test_ensure_downloaded_load_manifest_fails(self, mock_manifest, mock_unpack, mock_download): # pylint: disable=unused-argument + def test_ensure_downloaded_load_manifest_cleans_up_agent_directories(self, *_): self.assertFalse(os.path.isdir(self.agent_path)) pkg = ExtHandlerPackage(version=str(self._get_agent_version())) pkg.uris.append(None) agent = GuestAgent(pkg=pkg) - self.assertEqual(1, agent.error.failure_count) - self.assertTrue(agent.error.was_fatal) - self.assertTrue(agent.is_blacklisted) + self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") + self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") + self.assertFalse(os.path.exists(agent.get_agent_pkg_path()), "Agent package should be cleaned up") @patch("azurelinuxagent.ga.update.GuestAgent._download") @patch("azurelinuxagent.ga.update.GuestAgent._unpack") @@ -1256,8 +1262,7 @@ def test_run_latest_forwards_output(self): finally: shutil.rmtree(tempdir, True) - def test_run_latest_nonzero_code_marks_failures(self): - # logger.add_logger_appender(logger.AppenderType.STDOUT) + def test_run_latest_nonzero_code_does_not_mark_failure(self): self.prepare_agents() latest_agent = self.update_handler.get_latest_agent_greater_than_daemon() @@ -1268,10 +1273,7 @@ def test_run_latest_nonzero_code_marks_failures(self): with patch('azurelinuxagent.ga.update.UpdateHandler.get_latest_agent_greater_than_daemon', return_value=latest_agent): self._test_run_latest(mock_child=ChildMock(return_value=1)) - self.assertTrue(latest_agent.is_blacklisted) - self.assertFalse(latest_agent.is_available) - self.assertNotEqual(0.0, latest_agent.error.last_failure) - self.assertEqual(1, latest_agent.error.failure_count) + self.assertFalse(latest_agent.is_blacklisted, "Agent should not be blacklisted") def test_run_latest_exception_blacklists(self): self.prepare_agents() @@ -1280,14 +1282,16 @@ def test_run_latest_exception_blacklists(self): self.assertTrue(latest_agent.is_available) self.assertEqual(0.0, latest_agent.error.last_failure) self.assertEqual(0, latest_agent.error.failure_count) + verify_string = "Force blacklisting: {0}".format(str(uuid.uuid4())) with patch('azurelinuxagent.ga.update.UpdateHandler.get_latest_agent_greater_than_daemon', return_value=latest_agent): - self._test_run_latest(mock_child=ChildMock(side_effect=Exception("Force blacklisting"))) + self._test_run_latest(mock_child=ChildMock(side_effect=Exception(verify_string))) self.assertFalse(latest_agent.is_available) self.assertTrue(latest_agent.error.is_blacklisted) self.assertNotEqual(0.0, latest_agent.error.last_failure) self.assertEqual(1, latest_agent.error.failure_count) + self.assertIn(verify_string, latest_agent.error.reason, "Error reason not found while blacklisting") def test_run_latest_exception_does_not_blacklist_if_terminating(self): self.prepare_agents() @@ -1869,6 +1873,27 @@ def get_handler(url, **kwargs): "Retrieving the goal state recovered from previous errors" in args[0]] self.assertTrue(len(info_msgs) > 0, "Agent should've logged a message when recovered from GS errors") + def test_it_should_reset_legacy_blacklisted_agents_on_process_start(self): + # Add some good agents + self.prepare_agents(count=10) + good_agents = [agent.name for agent in self.agents()] + + # Add a set of blacklisted agents + self.prepare_agents(count=20, is_available=False) + for agent in self.agents(): + # Assert the test environment is correctly set + if agent.name not in good_agents: + self.assertTrue(agent.is_blacklisted, "Agent {0} should be blacklisted".format(agent.name)) + else: + self.assertFalse(agent.is_blacklisted, "Agent {0} should not be blacklisted".format(agent.name)) + + with _get_update_handler() as (update_handler, _): + update_handler.run(debug=True) + self.assertEqual(20, self.agent_count(), "All agents should be available on disk") + # Ensure none of the agents are blacklisted + for agent in self.agents(): + self.assertFalse(agent.is_blacklisted, "Legacy Agent should not be blacklisted") + class TestAgentUpgrade(UpdateTestCase): @@ -2292,8 +2317,11 @@ def test_it_should_blacklist_current_agent_on_downgrade(self): self.__assert_exit_code_successful(update_handler.exit_mock) self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, upgrade=False, version=downgraded_version) - self.assertTrue(next(agent for agent in self.agents() if agent.version == CURRENT_VERSION).is_blacklisted, - "The current agent should be blacklisted") + current_agent = next(agent for agent in self.agents() if agent.version == CURRENT_VERSION) + self.assertTrue(current_agent.is_blacklisted, "The current agent should be blacklisted") + self.assertEqual(current_agent.error.reason, "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " + "suggesting that we really don't want to execute any extensions using this version".format(CURRENT_VERSION), + "Invalid reason specified for blacklisting agent") def test_it_should_not_downgrade_below_daemon_version(self): data_file = mockwiredata.DATA_FILE.copy() From 40d98de87566245956f5d056a84e32f83a1eaf9d Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 1 Feb 2022 16:05:45 -0800 Subject: [PATCH 17/84] Add check for all Firewall rules periodically (#2480) * Add check for all Firewall rules periodically * fix unit tests * insert command * add logging * verbose check * log first time * refactor * updated log * added documentation * address comments * fix test cases * append to the bottom * fix tests * tests * wait command * address rewrite * fix tests * added comments * address new comments * fix tests * change config flag and comments * fix tests * fix protocal tests * update comments * fixed comments * import * undo add_event * removed extra line * updated msg --- azurelinuxagent/common/conf.py | 12 +- azurelinuxagent/common/osutil/default.py | 66 ++++---- azurelinuxagent/common/osutil/nsbsd.py | 21 +-- .../common/persist_firewall_rules.py | 10 +- .../metadata_server_migration_util.py | 4 +- azurelinuxagent/common/utils/networkutil.py | 124 ++++++++------- azurelinuxagent/ga/env.py | 26 +++- azurelinuxagent/ga/update.py | 8 +- tests/common/osutil/test_default.py | 142 ++++++++++++------ tests/common/test_persist_firewall_rules.py | 57 +++++-- tests/ga/test_update.py | 42 ++++-- .../test_metadata_server_migration_util.py | 5 +- tests/protocol/test_protocol_util.py | 2 + tests/test_agent.py | 1 + 14 files changed, 325 insertions(+), 195 deletions(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 65d0703094..a666e53ebb 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -188,7 +188,8 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Debug.AgentCpuQuota": 75, "Debug.EtpCollectionPeriod": 300, "Debug.AutoUpdateHotfixFrequency": 14400, - "Debug.AutoUpdateNormalFrequency": 86400 + "Debug.AutoUpdateNormalFrequency": 86400, + "Debug.FirewallRulesLogPeriod": 86400 } @@ -605,3 +606,12 @@ def get_enable_ga_versioning(conf=__conf__): NOTE: This option is experimental and may be removed in later versions of the Agent. """ return conf.get_switch("Debug.EnableGAVersioning", False) + + +def get_firewall_rules_log_period(conf=__conf__): + """ + Determine the frequency to perform the periodic operation of logging firewall rules. + + NOTE: This option is experimental and may be removed in later versions of the Agent. + """ + return conf.get_int("Debug.FirewallRulesLogPeriod", 86400) diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index 1c6042165c..f18ff9048c 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -75,18 +75,6 @@ def get_iptables_version_command(): return ["iptables", "--version"] -def get_accept_tcp_rule(wait, command, destination): - return AddFirewallRules.get_accept_tcp_rule(command, destination, wait=wait) - - -def get_firewall_accept_command(wait, command, destination, owner_uid): - return AddFirewallRules.get_iptables_accept_command(wait, command, destination, owner_uid) - - -def get_firewall_drop_command(wait, command, destination): - return AddFirewallRules.get_iptables_drop_command(wait, command, destination) - - def get_firewall_list_command(wait): return _add_wait(wait, ["iptables", "-t", "security", "-L", "-nxv"]) @@ -228,15 +216,13 @@ def _delete_rule(self, rule): if e.returncode == 2: raise Exception("invalid firewall deletion rule '{0}'".format(rule)) - def remove_firewall(self, dst_ip, uid): + def remove_firewall(self, dst_ip, uid, wait): # If a previous attempt failed, do not retry global _enable_firewall # pylint: disable=W0603 if not _enable_firewall: return False try: - wait = self.get_firewall_will_wait() - # This rule was <= 2.2.25 only, and may still exist on some VMs. Until 2.2.25 # has aged out, keep this cleanup in place. self._delete_rule(get_firewall_delete_conntrack_accept_command(wait, dst_ip)) @@ -269,50 +255,56 @@ def remove_legacy_firewall_rule(self, dst_ip): "Unable to remove legacy firewall rule, won't try removing it again. Error: {0}".format(ustr(error))) def enable_firewall(self, dst_ip, uid): + """ + It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, update rules flag) + enable firewall success status: Returns True if every firewall rule exists otherwise False + update rules flag: Returns True if rules are updated otherwise False + """ + # This is to send telemetry when iptable rules updated + is_firewall_rules_updated = False # If a previous attempt failed, do not retry global _enable_firewall # pylint: disable=W0603 if not _enable_firewall: - return False + return False, is_firewall_rules_updated try: wait = self.get_firewall_will_wait() - # If the DROP rule exists, make no changes + # check every iptable rule and delete others if any rule is missing + # and append every iptable rule to the end of the chain. try: - drop_rule = get_firewall_drop_command(wait, AddFirewallRules.CHECK_COMMAND, dst_ip) - shellutil.run_command(drop_rule) - logger.verbose("Firewall appears established") - return True + if not AddFirewallRules.verify_iptables_rules_exist(wait, dst_ip, uid): + self.remove_firewall(dst_ip, uid, wait) + AddFirewallRules.add_iptables_rules(wait, dst_ip, uid) + is_firewall_rules_updated = True except CommandError as e: if e.returncode == 2: - self.remove_firewall(dst_ip, uid) + self.remove_firewall(dst_ip, uid, wait) msg = "please upgrade iptables to a version that supports the -C option" logger.warn(msg) - raise Exception(msg) - - # Otherwise, append all rules - try: - AddFirewallRules.add_iptables_rules(wait, dst_ip, uid) + raise except Exception as error: logger.warn(ustr(error)) raise - logger.info("Successfully added Azure fabric firewall rules") - - try: - output = shellutil.run_command(get_firewall_list_command(wait)) - logger.info("Firewall rules:\n{0}".format(output)) - except Exception as e: - logger.warn("Listing firewall rules failed: {0}".format(ustr(e))) - - return True + return True, is_firewall_rules_updated except Exception as e: _enable_firewall = False logger.info("Unable to establish firewall -- " "no further attempts will be made: " "{0}".format(ustr(e))) - return False + return False, is_firewall_rules_updated + + def get_firewall_list(self, wait=None): + try: + if wait is None: + wait = self.get_firewall_will_wait() + output = shellutil.run_command(get_firewall_list_command(wait)) + return output + except Exception as e: + logger.warn("Listing firewall rules failed: {0}".format(ustr(e))) + return "" @staticmethod def _correct_instance_id(instance_id): diff --git a/azurelinuxagent/common/osutil/nsbsd.py b/azurelinuxagent/common/osutil/nsbsd.py index 31b8a34372..016f506f0d 100644 --- a/azurelinuxagent/common/osutil/nsbsd.py +++ b/azurelinuxagent/common/osutil/nsbsd.py @@ -24,7 +24,6 @@ class NSBSDOSUtil(FreeBSDOSUtil): - resolver = None def __init__(self): @@ -45,7 +44,7 @@ def __init__(self): for server in output.split("\n"): if server == '': break - server = server[:-1] # remove last '=' + server = server[:-1] # remove last '=' cmd = "grep '{}' /etc/hosts".format(server) + " | awk '{print $1}'" ret, ip = shellutil.run_get_output(cmd) servers.append(ip) @@ -53,7 +52,8 @@ def __init__(self): dns.resolver.override_system_resolver(self.resolver) def set_hostname(self, hostname): - self._run_command_without_raising(['/usr/Firewall/sbin/setconf', '/usr/Firewall/System/global', 'SystemName', hostname]) + self._run_command_without_raising( + ['/usr/Firewall/sbin/setconf', '/usr/Firewall/System/global', 'SystemName', hostname]) self._run_command_without_raising(["/usr/Firewall/sbin/enlog"]) self._run_command_without_raising(["/usr/Firewall/sbin/enproxy", "-u"]) self._run_command_without_raising(["/usr/Firewall/sbin/ensl", "-u"]) @@ -94,16 +94,15 @@ def chpasswd(self, username, password, crypt_id=6, salt_len=10): commands = [['setconf', '/usr/Firewall/ConfigFiles/webadmin', 'ACL', 'any'], ['ensl']] self._run_multiple_commands_without_raising(commands, log_error=False, continue_on_error=False) - def deploy_ssh_pubkey(self, username, pubkey): """ Deploy authorized_key """ path, thumbprint, value = pubkey # pylint: disable=W0612 - #overide parameters + # overide parameters super(NSBSDOSUtil, self).deploy_ssh_pubkey('admin', - ["/usr/Firewall/.ssh/authorized_keys", thumbprint, value]) + ["/usr/Firewall/.ssh/authorized_keys", thumbprint, value]) def del_root_password(self): logger.warn("Root password deletion disabled") @@ -130,7 +129,7 @@ def restart_if(self, ifname=None, retries=None, wait=None): shellutil.run("ennetwork", chk_err=False) def set_dhcp_hostname(self, hostname): - #already done by the dhcp client + # already done by the dhcp client pass def get_firewall_dropped_packets(self, dst_ip=None): @@ -145,10 +144,14 @@ def _delete_rule(self, rule): # disable iptables methods return - def remove_firewall(self, dst_ip=None, uid=None): + def remove_firewall(self, dst_ip=None, uid=None, wait=""): # disable iptables methods return True def enable_firewall(self, dst_ip=None, uid=None): # disable iptables methods - return True + return True, True + + def get_firewall_list(self, wait=""): + # disable iptables methods + return "" diff --git a/azurelinuxagent/common/persist_firewall_rules.py b/azurelinuxagent/common/persist_firewall_rules.py index bd143454ab..55b19c41ef 100644 --- a/azurelinuxagent/common/persist_firewall_rules.py +++ b/azurelinuxagent/common/persist_firewall_rules.py @@ -143,12 +143,21 @@ def __verify_firewall_rules_enabled(self): return True + def __remove_firewalld_rules(self): + try: + AddFirewallRules.remove_firewalld_rules(self._dst_ip, self._uid) + except Exception as error: + logger.warn( + "failed to remove rule using firewalld.service: {0}".format(ustr(error))) + def _setup_permanent_firewalld_rules(self): if self.__verify_firewall_rules_enabled(): logger.info("Firewall rules already set. No change needed.") return logger.info("Firewall rules not added yet, adding them now using firewalld.service") + # Remove first if partial list present + self.__remove_firewalld_rules() # Add rules if not already set AddFirewallRules.add_firewalld_rules(self._dst_ip, self._uid) logger.info("Successfully added the firewall commands using firewalld.service") @@ -327,4 +336,3 @@ def __unit_file_version_modified(self): logger.info( "Unit file version matches with expected version: {0}, not overwriting unit file".format(unit_file_version)) return False - diff --git a/azurelinuxagent/common/protocol/metadata_server_migration_util.py b/azurelinuxagent/common/protocol/metadata_server_migration_util.py index ab3a6bb5be..8a3b9b1c64 100644 --- a/azurelinuxagent/common/protocol/metadata_server_migration_util.py +++ b/azurelinuxagent/common/protocol/metadata_server_migration_util.py @@ -60,9 +60,9 @@ def _reset_firewall_rules(osutil): Removes MetadataServer firewall rule so IMDS can be used. Enables WireServer firewall rule based on if firewall is configured to be on. """ - osutil.remove_firewall(dst_ip=_KNOWN_METADATASERVER_IP, uid=os.getuid()) + osutil.remove_firewall(dst_ip=_KNOWN_METADATASERVER_IP, uid=os.getuid(), wait=osutil.get_firewall_will_wait()) if conf.enable_firewall(): - success = osutil.enable_firewall(dst_ip=KNOWN_WIRESERVER_IP, uid=os.getuid()) + success, _ = osutil.enable_firewall(dst_ip=KNOWN_WIRESERVER_IP, uid=os.getuid()) add_event( AGENT_NAME, version=CURRENT_VERSION, diff --git a/azurelinuxagent/common/utils/networkutil.py b/azurelinuxagent/common/utils/networkutil.py index 8908d841cc..160f17514b 100644 --- a/azurelinuxagent/common/utils/networkutil.py +++ b/azurelinuxagent/common/utils/networkutil.py @@ -26,6 +26,7 @@ class RouteEntry(object): Represents a single route. The destination, gateway, and mask members are hex representations of the IPv4 address in network byte order. """ + def __init__(self, interface, destination, gateway, mask, flags, metric): self.interface = interface self.destination = destination @@ -40,7 +41,7 @@ def _net_hex_to_dotted_quad(value): raise Exception("String to dotted quad conversion must be 8 characters") octets = [] for idx in range(6, -2, -2): - octets.append(str(int(value[idx:idx+2], 16))) + octets.append(str(int(value[idx:idx + 2], 16))) return ".".join(octets) def destination_quad(self): @@ -55,7 +56,7 @@ def mask_quad(self): def to_json(self): f = '{{"Iface": "{0}", "Destination": "{1}", "Gateway": "{2}", "Mask": "{3}", "Flags": "{4:#06x}", "Metric": "{5}"}}' return f.format(self.interface, self.destination_quad(), self.gateway_quad(), self.mask_quad(), - self.flags, self.metric) + self.flags, self.metric) def __str__(self): f = "Iface: {0}\tDestination: {1}\tGateway: {2}\tMask: {3}\tFlags: {4:#06x}\tMetric: {5}" @@ -63,7 +64,7 @@ def __str__(self): self.flags, self.metric) def __repr__(self): - return 'RouteEntry("{0}", "{1}", "{2}", "{3}", "{4:#04x}", "{5}")'\ + return 'RouteEntry("{0}", "{1}", "{2}", "{3}", "{4:#04x}", "{5}")' \ .format(self.interface, self.destination, self.gateway, self.mask, self.flags, self.metric) @@ -112,6 +113,11 @@ class FirewallCmdDirectCommands(object): # checks if the firewalld rule is present or not QueryPassThrough = "--query-passthrough" + # firewall-cmd --permanent --direct --remove-passthrough ipv4 -t security -A OUTPUT -d 168.63.129.16 -p tcp -m owner --uid-owner 0 -j ACCEPT + # success + # remove the firewalld rule + RemovePassThrough = "--remove-passthrough" + class AddFirewallRules(object): """ @@ -136,16 +142,10 @@ class AddFirewallRules(object): CHECK_COMMAND = "-C" @staticmethod - def _add_wait(wait, command): + def __get_iptables_base_command(wait=""): """ If 'wait' is True, adds the wait option (-w) to the given iptables command line """ - if wait: - command.append("-w") - return command - - @staticmethod - def __get_iptables_base_command(wait=""): if wait != "": return ["iptables", "-w"] return ["iptables"] @@ -157,56 +157,35 @@ def __get_firewalld_base_command(command): @staticmethod def __get_common_command_params(command, destination): - return ["-t", "security", command, "OUTPUT", "-d", destination, "-p", "tcp", "-m"] - - @staticmethod - def __get_common_accept_command_params(wait, command, destination, uid): - cmd = AddFirewallRules.__get_common_command_params(command, destination) - cmd.extend(["owner", "--uid-owner", str(uid), "-j", "ACCEPT"]) - return AddFirewallRules._add_wait(wait, cmd) + return ["-t", "security", command, "OUTPUT", "-d", destination, "-p", "tcp"] @staticmethod - def __get_common_drop_command_params(wait, command, destination): - cmd = AddFirewallRules.__get_common_command_params(command, destination) - cmd.extend(["conntrack", "--ctstate", "INVALID,NEW", "-j", "DROP"]) - return AddFirewallRules._add_wait(wait, cmd) - - @staticmethod - def get_accept_tcp_rule(command, destination, firewalld_command="", wait=""): - # This rule allows DNS TCP request to wireserver ip for non root users - + def __get_firewall_base_command(command, destination, firewalld_command="", wait=""): + # Firewalld.service fails if we set `-w` in the iptables command, so not adding it at all for firewalld commands if firewalld_command != "": cmd = AddFirewallRules.__get_firewalld_base_command(firewalld_command) else: cmd = AddFirewallRules.__get_iptables_base_command(wait) - cmd = cmd + ['-t', 'security', command, 'OUTPUT', '-d', destination, '-p', 'tcp', '--destination-port', '53', '-j', 'ACCEPT'] + cmd.extend(AddFirewallRules.__get_common_command_params(command, destination)) return cmd @staticmethod - def get_iptables_accept_command(wait, command, destination, owner_uid): - cmd = AddFirewallRules.__get_iptables_base_command() - cmd.extend(AddFirewallRules.__get_common_accept_command_params(wait, command, destination, owner_uid)) - return cmd - - @staticmethod - def get_iptables_drop_command(wait, command, destination): - cmd = AddFirewallRules.__get_iptables_base_command() - cmd.extend(AddFirewallRules.__get_common_drop_command_params(wait, command, destination)) + def get_accept_tcp_rule(command, destination, firewalld_command="", wait=""): + # This rule allows DNS TCP request to wireserver ip for non root users + cmd = AddFirewallRules.__get_firewall_base_command(command, destination, firewalld_command, wait) + cmd.extend(['--destination-port', '53', '-j', 'ACCEPT']) return cmd @staticmethod - def get_firewalld_accept_command(command, destination, uid, wait=""): - cmd = AddFirewallRules.__get_firewalld_base_command(command) - cmd.extend( - AddFirewallRules.__get_common_accept_command_params(wait, AddFirewallRules.APPEND_COMMAND, destination, - uid)) + def get_wire_root_accept_rule(command, destination, owner_uid, firewalld_command="", wait=""): + cmd = AddFirewallRules.__get_firewall_base_command(command, destination, firewalld_command, wait) + cmd.extend(["-m", "owner", "--uid-owner", str(owner_uid), "-j", "ACCEPT"]) return cmd @staticmethod - def get_firewalld_drop_command(command, destination, wait=""): - cmd = AddFirewallRules.__get_firewalld_base_command(command) - cmd.extend( - AddFirewallRules.__get_common_drop_command_params(wait, AddFirewallRules.APPEND_COMMAND, destination)) + def get_wire_non_root_drop_rule(command, destination, firewalld_command="", wait=""): + cmd = AddFirewallRules.__get_firewall_base_command(command, destination, firewalld_command, wait) + cmd.extend(["-m", "conntrack", "--ctstate", "INVALID,NEW", "-j", "DROP"]) return cmd @staticmethod @@ -226,43 +205,60 @@ def __execute_cmd(cmd): raise Exception(msg) @staticmethod - def add_iptables_rules(wait, dst_ip, uid): - # The order in which the below rules are added matters for the ip table rules to work as expected - AddFirewallRules.__raise_if_empty(dst_ip, "Destination IP") - AddFirewallRules.__raise_if_empty(uid, "User ID") - - accept_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.APPEND_COMMAND, dst_ip, wait=wait) - AddFirewallRules.__execute_cmd(accept_tcp_rule) + def __execute_check_command(cmd): + # Here we primarily check if an iptable rule exist. True if it exits , false if not + try: + shellutil.run_command(cmd) + return True + except CommandError as err: + # return code 1 is expected while using the check command. Raise if encounter any other return code + if err.returncode != 1: + raise + return False - accept_rule = AddFirewallRules.get_iptables_accept_command(wait, AddFirewallRules.APPEND_COMMAND, dst_ip, uid) - AddFirewallRules.__execute_cmd(accept_rule) + @staticmethod + def verify_iptables_rules_exist(wait, dst_ip, uid): + check_cmd_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait) + check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid, + wait=wait) + check_cmd_drop_rule = AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait) - drop_rule = AddFirewallRules.get_iptables_drop_command(wait, AddFirewallRules.APPEND_COMMAND, dst_ip) - AddFirewallRules.__execute_cmd(drop_rule) + return AddFirewallRules.__execute_check_command(check_cmd_tcp_rule) and AddFirewallRules.__execute_check_command(check_cmd_accept_rule) \ + and AddFirewallRules.__execute_check_command(check_cmd_drop_rule) @staticmethod - def __execute_firewalld_commands(command, dst_ip, uid): + def __execute_firewall_commands(dst_ip, uid, command=APPEND_COMMAND, firewalld_command="", wait=""): + # The order in which the below rules are added matters for the ip table rules to work as expected + AddFirewallRules.__raise_if_empty(dst_ip, "Destination IP") AddFirewallRules.__raise_if_empty(uid, "User ID") - # Firewalld.service fails if we set `-w` in the iptables command, so not adding it at all for firewalld commands - - accept_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, dst_ip, firewalld_command=command) + accept_tcp_rule = AddFirewallRules.get_accept_tcp_rule(command, dst_ip, + firewalld_command=firewalld_command, wait=wait) AddFirewallRules.__execute_cmd(accept_tcp_rule) - accept_cmd = AddFirewallRules.get_firewalld_accept_command(command, dst_ip, uid) + accept_cmd = AddFirewallRules.get_wire_root_accept_rule(command, dst_ip, uid, + firewalld_command=firewalld_command, wait=wait) AddFirewallRules.__execute_cmd(accept_cmd) - drop_cmd = AddFirewallRules.get_firewalld_drop_command(command, dst_ip) + drop_cmd = AddFirewallRules.get_wire_non_root_drop_rule(command, dst_ip, firewalld_command=firewalld_command, wait=wait) AddFirewallRules.__execute_cmd(drop_cmd) + @staticmethod + def add_iptables_rules(wait, dst_ip, uid): + AddFirewallRules.__execute_firewall_commands(dst_ip, uid, command=AddFirewallRules.APPEND_COMMAND, wait=wait) + @staticmethod def add_firewalld_rules(dst_ip, uid): # Firewalld.service fails if we set `-w` in the iptables command, so not adding it at all for firewalld commands # Firewalld.service with the "--permanent --passthrough" parameter ensures that a firewall rule is set only once even if command is executed multiple times - AddFirewallRules.__execute_firewalld_commands(FirewallCmdDirectCommands.PassThrough, dst_ip, uid) + AddFirewallRules.__execute_firewall_commands(dst_ip, uid, firewalld_command=FirewallCmdDirectCommands.PassThrough) @staticmethod def check_firewalld_rule_applied(dst_ip, uid): - AddFirewallRules.__execute_firewalld_commands(FirewallCmdDirectCommands.QueryPassThrough, dst_ip, uid) + AddFirewallRules.__execute_firewall_commands(dst_ip, uid, firewalld_command=FirewallCmdDirectCommands.QueryPassThrough) + + @staticmethod + def remove_firewalld_rules(dst_ip, uid): + AddFirewallRules.__execute_firewall_commands(dst_ip, uid, firewalld_command=FirewallCmdDirectCommands.RemovePassThrough) diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index e8780cbe6f..771f47657c 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -26,7 +26,7 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.dhcp import get_dhcp_handler -from azurelinuxagent.common.event import add_periodic, WALAEventOperation +from azurelinuxagent.common.event import add_periodic, WALAEventOperation, add_event from azurelinuxagent.common.future import ustr from azurelinuxagent.common.interfaces import ThreadHandlerInterface from azurelinuxagent.common.osutil import get_osutil @@ -131,7 +131,13 @@ def _operation(self): self._osutil.remove_legacy_firewall_rule(dst_ip=self._protocol.get_endpoint()) self._try_remove_legacy_firewall_rule = True - success = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid()) + success, is_firewall_rules_updated = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), + uid=os.getuid()) + + if is_firewall_rules_updated: + msg = "Successfully added Azure fabric firewall rules. Current Firewall rules:\n{0}".format(self._osutil.get_firewall_list()) + logger.info(msg) + add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.Firewall, message=msg, log_event=False) add_periodic( logger.EVERY_HOUR, @@ -142,6 +148,21 @@ def _operation(self): log_event=False) +class LogFirewallRules(PeriodicOperation): + """ + Log firewall rules state once a day. + Goal is to capture the firewall state when the agent service startup, + in addition to add more debug data and would be more useful long term. + """ + def __init__(self, osutil): + super(LogFirewallRules, self).__init__(conf.get_firewall_rules_log_period()) + self._osutil = osutil + + def _operation(self): + # Log firewall rules state once a day + logger.info("Current Firewall rules:\n{0}".format(self._osutil.get_firewall_list())) + + class SetRootDeviceScsiTimeout(PeriodicOperation): def __init__(self, osutil): super(SetRootDeviceScsiTimeout, self).__init__(conf.get_root_device_scsi_timeout_period()) @@ -223,6 +244,7 @@ def daemon(self): if conf.enable_firewall(): periodic_operations.append(EnableFirewall(osutil, protocol)) + periodic_operations.append(LogFirewallRules(osutil)) if conf.get_root_device_scsi_timeout() is not None: periodic_operations.append(SetRootDeviceScsiTimeout(osutil)) if conf.get_monitor_hostname(): diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 3c7b6f0a74..8852575250 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -43,8 +43,6 @@ from azurelinuxagent.common.exception import ResourceGoneError, UpdateError, ExitException, AgentUpgradeExitException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd -from azurelinuxagent.common.osutil.default import get_firewall_drop_command, \ - get_accept_tcp_rule from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList, \ @@ -1254,7 +1252,7 @@ def _execute_run_command(command): wait = self.osutil.get_firewall_will_wait() # "-C" checks if the iptable rule is available in the chain. It throws an exception with return code 1 if the ip table rule doesnt exist - drop_rule = get_firewall_drop_command(wait, AddFirewallRules.CHECK_COMMAND, dst_ip) + drop_rule = AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait) if not _execute_run_command(drop_rule): # DROP command doesn't exist indicates then none of the firewall rules are set yet # exiting here as the environment thread will set up all firewall rules @@ -1262,12 +1260,12 @@ def _execute_run_command(command): return else: # DROP rule exists in the ip table chain. Hence checking if the DNS TCP to wireserver rule exists. If not we add it. - accept_tcp_rule = get_accept_tcp_rule(wait, AddFirewallRules.CHECK_COMMAND, dst_ip) + accept_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait) if not _execute_run_command(accept_tcp_rule): try: logger.info( "Firewall rule to allow DNS TCP request to wireserver for a non root user unavailable. Setting it now.") - accept_tcp_rule = get_accept_tcp_rule(wait, AddFirewallRules.INSERT_COMMAND, dst_ip) + accept_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, dst_ip, wait=wait) shellutil.run_command(accept_tcp_rule) logger.info( "Succesfully added firewall rule to allow non root users to do a DNS TCP request to wireserver") diff --git a/tests/common/osutil/test_default.py b/tests/common/osutil/test_default.py index e34e3d4f47..0f7d445f73 100644 --- a/tests/common/osutil/test_default.py +++ b/tests/common/osutil/test_default.py @@ -661,12 +661,13 @@ def set_command(command, output='', exit_code=0): version_command = set_command(osutil.get_iptables_version_command(), output=str(version)) list_command = set_command(osutil.get_firewall_list_command(wait), output="Mock Output") set_command(osutil.get_firewall_packets_command(wait)) - set_command(osutil.get_firewall_drop_command(wait, AddFirewallRules.CHECK_COMMAND, destination)) - set_command(osutil.get_firewall_drop_command(wait, AddFirewallRules.APPEND_COMMAND, destination)) - set_command(osutil.get_firewall_accept_command(wait, AddFirewallRules.APPEND_COMMAND, destination, uid)) - set_command(osutil.get_accept_tcp_rule(wait, AddFirewallRules.APPEND_COMMAND, destination)) - set_command(osutil.get_accept_tcp_rule(wait, AddFirewallRules.INSERT_COMMAND, destination)) - set_command(osutil.get_accept_tcp_rule(wait, AddFirewallRules.CHECK_COMMAND, destination)) + set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, destination, wait=wait)) + set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.APPEND_COMMAND, destination, wait=wait)) + set_command(AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, destination, uid, wait=wait)) + set_command(AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.APPEND_COMMAND, destination, uid, wait=wait)) + set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.APPEND_COMMAND, destination, wait=wait)) + set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, destination, wait=wait)) + set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, destination, wait=wait)) # the agent assumes the rules have been deleted when these commands return 1 set_command(osutil.get_firewall_delete_conntrack_accept_command(wait, destination), exit_code=1) set_command(osutil.get_delete_accept_tcp_rule(wait, destination), exit_code=1) @@ -737,24 +738,52 @@ def test_enable_firewall_should_set_up_the_firewall(self): with TestOSUtil._mock_iptables() as mock_iptables: with patch.object(osutil, '_enable_firewall', True): # fail the rule check to force enable of the firewall - mock_iptables.set_command(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=1) - - success = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) - - drop_check_command = TestOSUtil._command_to_string(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) - accept_tcp_append_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.APPEND_COMMAND, mock_iptables.destination)) - accept_command = TestOSUtil._command_to_string(osutil.get_firewall_accept_command(mock_iptables.wait, AddFirewallRules.APPEND_COMMAND, mock_iptables.destination, mock_iptables.uid)) - drop_add_command = TestOSUtil._command_to_string(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.APPEND_COMMAND, mock_iptables.destination)) + mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=0) + mock_iptables.set_command(AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, mock_iptables.uid, + wait=mock_iptables.wait), exit_code=0) + mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=1) + + success, _ = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) + + tcp_check_command = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) + accept_check_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, mock_iptables.uid, + wait=mock_iptables.wait)) + drop_check_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) + delete_conntrack_accept_command = TestOSUtil._command_to_string(osutil.get_firewall_delete_conntrack_accept_command(mock_iptables.wait, mock_iptables.destination)) + delete_accept_tcp_rule = TestOSUtil._command_to_string(osutil.get_delete_accept_tcp_rule(mock_iptables.wait, mock_iptables.destination)) + delete_owner_accept_command = TestOSUtil._command_to_string(osutil.get_firewall_delete_owner_accept_command(mock_iptables.wait, mock_iptables.destination, mock_iptables.uid)) + delete_conntrack_drop_command = TestOSUtil._command_to_string(osutil.get_firewall_delete_conntrack_drop_command(mock_iptables.wait, mock_iptables.destination)) + accept_tcp_command = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.APPEND_COMMAND, mock_iptables.destination, wait=mock_iptables.wait)) + accept_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.APPEND_COMMAND, mock_iptables.destination, mock_iptables.uid, wait=mock_iptables.wait)) + drop_add_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.APPEND_COMMAND, mock_iptables.destination, wait=mock_iptables.wait)) self.assertTrue(success, "Enabling the firewall was not successful") - # Exactly 4 calls have to be made. - # First is the DROP rule check which was mocked to fail, Second, third and forth to Append the three IPTable rules - self.assertEqual(len(mock_iptables.command_calls), 4, "Incorrect number of calls to iptables: [{0}]". format(mock_iptables.command_calls)) - self.assertEqual(mock_iptables.command_calls[0], drop_check_command, "The first command should check the drop rule") - self.assertEqual(mock_iptables.command_calls[1], accept_tcp_append_rule, - "The second command should add the dns tcp accept rule") - self.assertEqual(mock_iptables.command_calls[2], accept_command, "The third command should add the accept rule") - self.assertEqual(mock_iptables.command_calls[3], drop_add_command, "The fourth command should add the drop rule") + # Exactly 10 calls have to be made. + # First is the check rule check which was mocked to fail, and delete call and then append calls + self.assertEqual(len(mock_iptables.command_calls), 10, "Incorrect number of calls to iptables: [{0}]". format(mock_iptables.command_calls)) + self.assertEqual(mock_iptables.command_calls[0], tcp_check_command, "The first command should check the tcp rule") + self.assertEqual(mock_iptables.command_calls[1], accept_check_command, "The second command should check the accept rule") + self.assertEqual(mock_iptables.command_calls[2], drop_check_command, "The third command should check the drop rule") + self.assertEqual(mock_iptables.command_calls[3], delete_conntrack_accept_command, + "The fourth command should delete the conntrack accept rule: {0}".format( + mock_iptables.command_calls[3])) + self.assertEqual(mock_iptables.command_calls[4], delete_accept_tcp_rule, + "The fifth command should delete the dns tcp accept rule: {0}".format( + mock_iptables.command_calls[4])) + self.assertEqual(mock_iptables.command_calls[5], delete_owner_accept_command, + "The sixth command should delete the owner accept rule: {0}".format( + mock_iptables.command_calls[5])) + self.assertEqual(mock_iptables.command_calls[6], delete_conntrack_drop_command, + "The seventh command should delete the conntrack accept rule : {0}".format( + mock_iptables.command_calls[6])) + self.assertEqual(mock_iptables.command_calls[7], accept_tcp_command, + "The eighth command should add the dns tcp accept rule") + self.assertEqual(mock_iptables.command_calls[8], accept_command, "The ninth command should add the accept rule") + self.assertEqual(mock_iptables.command_calls[9], drop_add_command, "The tenth command should add the drop rule") self.assertTrue(osutil._enable_firewall, "The firewall should not have been disabled") @@ -762,34 +791,44 @@ def test_enable_firewall_should_not_use_wait_when_iptables_does_not_support_it(s with TestOSUtil._mock_iptables(version=osutil._IPTABLES_LOCKING_VERSION - 1) as mock_iptables: with patch.object(osutil, '_enable_firewall', True): # fail the rule check to force enable of the firewall - mock_iptables.set_command( - osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, - mock_iptables.destination), exit_code=1) + mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=1) + mock_iptables.set_command(AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, mock_iptables.uid, + wait=mock_iptables.wait), exit_code=1) + mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=1) - success = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) + success, _ = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) self.assertTrue(success, "Enabling the firewall was not successful") - # Exactly 4 calls have to be made. - # First is the DROP rule check which was mocked to fail, Second, third and forth - # to Append the three IPTable rules - self.assertEqual(len(mock_iptables.command_calls), 4, + # Exactly 8 calls have to be made. + # First check rule, delete 4 rules, + # and Append the IPTable 3 rules. + self.assertEqual(len(mock_iptables.command_calls), 8, "Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls)) for command in mock_iptables.command_calls: self.assertNotIn("-w", command, "The -w option should have been used in {0}".format(command)) self.assertTrue(osutil._enable_firewall, "The firewall should not have been disabled") - def test_enable_firewall_should_not_set_firewall_if_the_drop_rule_exists(self): + def test_enable_firewall_should_not_set_firewall_if_the_all_the_rules_exists(self): with TestOSUtil._mock_iptables() as mock_iptables: with patch.object(osutil, '_enable_firewall', True): - drop_check_command = mock_iptables.set_command(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=0) + tcp_check_command = mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=0) + accept_check_command = mock_iptables.set_command(AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, mock_iptables.uid, + wait=mock_iptables.wait), exit_code=0) + drop_check_command = mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=0) - success = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) + success, _ = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) self.assertTrue(success, "Enabling the firewall was not successful") - self.assertEqual(len(mock_iptables.command_calls), 1, "Incorrect number of calls to iptables: [{0}]". format(mock_iptables.command_calls)) - self.assertEqual(mock_iptables.command_calls[0], drop_check_command, "Unexpected command: {0}".format(mock_iptables.command_calls[0])) + self.assertEqual(len(mock_iptables.command_calls), 3, "Incorrect number of calls to iptables: [{0}]". format(mock_iptables.command_calls)) + self.assertEqual(mock_iptables.command_calls[0], tcp_check_command, "Unexpected command: {0}".format(mock_iptables.command_calls[0])) + self.assertEqual(mock_iptables.command_calls[1], accept_check_command, "Unexpected command: {0}".format(mock_iptables.command_calls[1])) + self.assertEqual(mock_iptables.command_calls[2], drop_check_command, "Unexpected command: {0}".format(mock_iptables.command_calls[2])) self.assertTrue(osutil._enable_firewall) @@ -802,9 +841,14 @@ def test_enable_firewall_should_check_for_invalid_iptables_options(self): # 1 - other errors # 2 - errors which appear to be caused by invalid or abused command # line parameters - drop_check_command = mock_iptables.set_command(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=2) + tcp_check_command = mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=0) + accept_check_command = mock_iptables.set_command(AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, mock_iptables.uid, + wait=mock_iptables.wait), exit_code=0) + drop_check_command = mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=2) - success = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) + success, _ = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) delete_conntrack_accept_command = TestOSUtil._command_to_string(osutil.get_firewall_delete_conntrack_accept_command(mock_iptables.wait, mock_iptables.destination)) delete_accept_tcp_rule = TestOSUtil._command_to_string(osutil.get_delete_accept_tcp_rule(mock_iptables.wait, mock_iptables.destination)) @@ -812,14 +856,16 @@ def test_enable_firewall_should_check_for_invalid_iptables_options(self): delete_conntrack_drop_command = TestOSUtil._command_to_string(osutil.get_firewall_delete_conntrack_drop_command(mock_iptables.wait, mock_iptables.destination)) self.assertFalse(success, "Enable firewall should have failed") - self.assertEqual(len(mock_iptables.command_calls), 5, "Incorrect number of calls to iptables: [{0}]". format(mock_iptables.command_calls)) - self.assertEqual(mock_iptables.command_calls[0], drop_check_command, "The first command should check the drop rule: {0}".format(mock_iptables.command_calls[0])) - self.assertEqual(mock_iptables.command_calls[1], delete_conntrack_accept_command, "The second command should delete the conntrack accept rule: {0}".format(mock_iptables.command_calls[1])) - self.assertEqual(mock_iptables.command_calls[2], delete_accept_tcp_rule, - "The third command should delete the dns tcp accept rule: {0}".format( - mock_iptables.command_calls[2])) - self.assertEqual(mock_iptables.command_calls[3], delete_owner_accept_command, "The forth command should delete the owner accept rule: {0}".format(mock_iptables.command_calls[3])) - self.assertEqual(mock_iptables.command_calls[4], delete_conntrack_drop_command, "The fifth command should delete the conntrack accept rule : {0}".format(mock_iptables.command_calls[4])) + self.assertEqual(len(mock_iptables.command_calls), 7, "Incorrect number of calls to iptables: [{0}]". format(mock_iptables.command_calls)) + self.assertEqual(mock_iptables.command_calls[0], tcp_check_command, "The first command should check the tcp rule: {0}".format(mock_iptables.command_calls[0])) + self.assertEqual(mock_iptables.command_calls[1], accept_check_command, "The second command should check the accept rule: {0}".format(mock_iptables.command_calls[1])) + self.assertEqual(mock_iptables.command_calls[2], drop_check_command, "The third command should check the drop rule: {0}".format(mock_iptables.command_calls[2])) + self.assertEqual(mock_iptables.command_calls[3], delete_conntrack_accept_command, "The fourth command should delete the conntrack accept rule: {0}".format(mock_iptables.command_calls[3])) + self.assertEqual(mock_iptables.command_calls[4], delete_accept_tcp_rule, + "The fifth command should delete the dns tcp accept rule: {0}".format( + mock_iptables.command_calls[4])) + self.assertEqual(mock_iptables.command_calls[5], delete_owner_accept_command, "The sixth command should delete the owner accept rule: {0}".format(mock_iptables.command_calls[5])) + self.assertEqual(mock_iptables.command_calls[6], delete_conntrack_drop_command, "The seventh command should delete the conntrack accept rule : {0}".format(mock_iptables.command_calls[6])) self.assertFalse(osutil._enable_firewall) @@ -827,7 +873,7 @@ def test_enable_firewall_skips_if_disabled(self): with TestOSUtil._mock_iptables() as mock_iptables: with patch.object(osutil, '_enable_firewall', False): - success = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) + success, _ = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) self.assertFalse(success, "The firewall should not have been disabled") self.assertEqual(len(mock_iptables.command_calls), 0, "iptables should not have been invoked: [{0}]". format(mock_iptables.command_calls)) @@ -858,7 +904,7 @@ def mock_popen(command, *args, **kwargs): mock_popen.original = subprocess.Popen with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", side_effect=mock_popen): - success = osutil.DefaultOSUtil().remove_firewall(mock_iptables.destination, mock_iptables.uid) + success = osutil.DefaultOSUtil().remove_firewall(mock_iptables.destination, mock_iptables.uid, mock_iptables.wait) delete_conntrack_accept_command = TestOSUtil._command_to_string(osutil.get_firewall_delete_conntrack_accept_command(mock_iptables.wait, mock_iptables.destination)) delete_accept_tcp_rule = TestOSUtil._command_to_string( @@ -889,7 +935,7 @@ def test_remove_firewall_should_not_retry_invalid_rule(self): # Note that the command is actually a valid rule, but we use the mock to report it as invalid (exit code 2) delete_conntrack_accept_command = mock_iptables.set_command(command, exit_code=2) - success = osutil.DefaultOSUtil().remove_firewall(mock_iptables.destination, mock_iptables.uid) + success = osutil.DefaultOSUtil().remove_firewall(mock_iptables.destination, mock_iptables.uid, mock_iptables.wait) self.assertFalse(success, "Removing the firewall should not have succeeded") self.assertEqual(len(mock_iptables.command_calls), 1, "Expected a single call to iptables: [{0}]". format(mock_iptables.command_calls)) diff --git a/tests/common/test_persist_firewall_rules.py b/tests/common/test_persist_firewall_rules.py index 29d845d2bb..307c8536e7 100644 --- a/tests/common/test_persist_firewall_rules.py +++ b/tests/common/test_persist_firewall_rules.py @@ -99,21 +99,25 @@ def _get_persist_firewall_rules_handler(self, systemd=True): def __assert_firewall_called(self, cmd, validate_command_called=True): if validate_command_called: - self.assertIn(AddFirewallRules.get_firewalld_accept_command(command=cmd, - destination=self.__test_dst_ip, - uid=self.__test_uid), + self.assertIn(AddFirewallRules.get_wire_root_accept_rule(command=AddFirewallRules.APPEND_COMMAND, + destination=self.__test_dst_ip, + owner_uid=self.__test_uid, + firewalld_command=cmd), self.__executed_commands, "Firewall {0} command not found".format(cmd)) - self.assertIn(AddFirewallRules.get_firewalld_drop_command(command=cmd, - destination=self.__test_dst_ip), + self.assertIn(AddFirewallRules.get_wire_non_root_drop_rule(command=AddFirewallRules.APPEND_COMMAND, + destination=self.__test_dst_ip, + firewalld_command=cmd), self.__executed_commands, "Firewall {0} command not found".format(cmd)) else: - self.assertNotIn(AddFirewallRules.get_firewalld_accept_command(command=cmd, - destination=self.__test_dst_ip, - uid=self.__test_uid), + self.assertNotIn(AddFirewallRules.get_wire_root_accept_rule(command=AddFirewallRules.APPEND_COMMAND, + destination=self.__test_dst_ip, + owner_uid=self.__test_uid, + firewalld_command=cmd), self.__executed_commands, "Firewall {0} command found".format(cmd)) - self.assertNotIn(AddFirewallRules.get_firewalld_drop_command(command=cmd, - destination=self.__test_dst_ip), + self.assertNotIn(AddFirewallRules.get_wire_non_root_drop_rule(command=AddFirewallRules.APPEND_COMMAND, + destination=self.__test_dst_ip, + firewalld_command=cmd), self.__executed_commands, "Firewall {0} command found".format(cmd)) def __assert_systemctl_called(self, cmd="enable", validate_command_called=True): @@ -173,6 +177,22 @@ def __mock_firewalld_running_and_not_applied(cmd): return True, ["echo", "enabled"] return False, [] + @staticmethod + def __mock_firewalld_running_and_remove_not_successful(cmd): + if cmd == PersistFirewallRulesHandler._FIREWALLD_RUNNING_CMD: + return True, ["echo", "running"] + # This is to fail the check if firewalld-rules are already applied + cmds_to_fail = ["firewall-cmd", FirewallCmdDirectCommands.QueryPassThrough, "conntrack"] + if all(cmd_to_fail in cmd for cmd_to_fail in cmds_to_fail): + return True, ["exit", "1"] + # This is to fail the remove if firewalld-rules fails to remove rule + cmds_to_fail = ["firewall-cmd", FirewallCmdDirectCommands.RemovePassThrough, "conntrack"] + if all(cmd_to_fail in cmd for cmd_to_fail in cmds_to_fail): + return True, ["exit", "2"] + if "firewall-cmd" in cmd: + return True, ["echo", "enabled"] + return False, [] + def __setup_and_assert_network_service_setup_scenario(self, handler, mock_popen=None): mock_popen = TestPersistFirewallRulesHandler.__mock_network_setup_service_disabled if mock_popen is None else mock_popen self.__replace_popen_cmd = mock_popen @@ -183,6 +203,7 @@ def __setup_and_assert_network_service_setup_scenario(self, handler, mock_popen= self.__assert_systemctl_reloaded(validate_command_called=True) self.__assert_firewall_cmd_running_called(validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False) + self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=False) self.assertTrue(os.path.exists(handler.get_service_file_path()), "Service unit file not found") @@ -194,6 +215,7 @@ def test_it_should_skip_setup_if_firewalld_already_enabled(self): # Assert we verified that rules were set using firewall-cmd self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=True) # Assert no commands for adding rules using firewall-cmd were called + self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=False) # Assert no commands for systemctl were called self.assertFalse(any("systemctl" in cmd for cmd in self.__executed_commands), "Systemctl shouldn't be called") @@ -215,6 +237,7 @@ def test_it_should_skip_setup_if_agent_network_setup_service_already_enabled_and self.__assert_systemctl_reloaded(validate_command_called=False) self.__assert_firewall_cmd_running_called(validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False) + self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=False) self.assertTrue(os.path.exists(handler.get_service_file_path()), "Service unit file not found") @@ -280,6 +303,19 @@ def test_it_should_use_firewalld_if_available(self): self.__assert_firewall_cmd_running_called(validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=True) + self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=True) + self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=True) + self.assertFalse(any("systemctl" in cmd for cmd in self.__executed_commands), "Systemctl shouldn't be called") + + def test_it_should_add_firewalld_rules_if_remove_raises_exception(self): + + self.__replace_popen_cmd = self.__mock_firewalld_running_and_remove_not_successful + with self._get_persist_firewall_rules_handler() as handler: + handler.setup() + + self.__assert_firewall_cmd_running_called(validate_command_called=True) + self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=True) + self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=True) self.assertFalse(any("systemctl" in cmd for cmd in self.__executed_commands), "Systemctl shouldn't be called") @@ -356,6 +392,7 @@ def test_it_should_delete_custom_service_files_if_firewalld_enabled(self): self.__assert_firewall_cmd_running_called(validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=True) + self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=True) self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=False) self.__assert_systemctl_called(cmd="enable", validate_command_called=False) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 9cc5bdb1a6..b0f15b8035 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1668,14 +1668,19 @@ def test_it_should_set_dns_tcp_iptable_if_drop_available_accept_unavailable(self with _get_update_handler(test_data=DATA_FILE) as (update_handler, _): with patch.object(osutil, '_enable_firewall', True): # drop rule is present - mock_iptables.set_command(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=0) + mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=0) # non root tcp iptable rule is absent - mock_iptables.set_command(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=1) + mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=1) update_handler.run(debug=True) - drop_check_command = TestOSUtil._command_to_string(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) - accept_tcp_check_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) - accept_tcp_insert_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.INSERT_COMMAND, mock_iptables.destination)) + drop_check_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_check_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_insert_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) # Filtering the mock iptable command calls with only the once related to this test. filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if cmd in [drop_check_command, accept_tcp_check_rule, accept_tcp_insert_rule]] @@ -1694,13 +1699,17 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_unavailable(self): with _get_update_handler(test_data=DATA_FILE) as (update_handler, _): with patch.object(osutil, '_enable_firewall', True): # drop rule is not available - mock_iptables.set_command(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=1) + mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=1) update_handler.run(debug=True) - drop_check_command = TestOSUtil._command_to_string(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) - accept_tcp_check_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) - accept_tcp_insert_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.INSERT_COMMAND, mock_iptables.destination)) + drop_check_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_check_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_insert_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) # Filtering the mock iptable command calls with only the once related to this test. filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if cmd in [drop_check_command, accept_tcp_check_rule, accept_tcp_insert_rule]] @@ -1715,15 +1724,20 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_and_accept_available(self): with _get_update_handler(test_data=DATA_FILE) as (update_handler, _): with patch.object(osutil, '_enable_firewall', True): # drop rule is available - mock_iptables.set_command(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=0) + mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=0) # non root tcp iptable rule is available - mock_iptables.set_command(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=0) + mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait), exit_code=0) update_handler.run(debug=True) - drop_check_command = TestOSUtil._command_to_string(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) - accept_tcp_check_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) - accept_tcp_insert_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.INSERT_COMMAND, mock_iptables.destination)) + drop_check_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_check_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_insert_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, mock_iptables.destination, + wait=mock_iptables.wait)) # Filtering the mock iptable command calls with only the once related to this test. filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if cmd in [drop_check_command, accept_tcp_check_rule, accept_tcp_insert_rule]] diff --git a/tests/protocol/test_metadata_server_migration_util.py b/tests/protocol/test_metadata_server_migration_util.py index 3ac690305e..3f5c2cf9c9 100644 --- a/tests/protocol/test_metadata_server_migration_util.py +++ b/tests/protocol/test_metadata_server_migration_util.py @@ -70,6 +70,7 @@ def test_cleanup_metadata_server_artifacts_firewall_enabled(self, mock_os_getuid fixed_uid = 0 mock_os_getuid.return_value = fixed_uid osutil = MagicMock() # pylint: disable=redefined-outer-name + osutil.enable_firewall.return_value = (MagicMock(), MagicMock()) # Run migration_util.cleanup_metadata_server_artifacts(osutil) @@ -80,7 +81,7 @@ def test_cleanup_metadata_server_artifacts_firewall_enabled(self, mock_os_getuid self.assertFalse(os.path.exists(metadata_server_p7b_file)) # Assert Firewall rule calls - osutil.remove_firewall.assert_called_once_with(dst_ip=_KNOWN_METADATASERVER_IP, uid=fixed_uid) + osutil.remove_firewall.assert_called_once_with(dst_ip=_KNOWN_METADATASERVER_IP, uid=fixed_uid, wait=osutil.get_firewall_will_wait()) osutil.enable_firewall.assert_called_once_with(dst_ip=KNOWN_WIRESERVER_IP, uid=fixed_uid) @patch('azurelinuxagent.common.conf.enable_firewall') @@ -112,7 +113,7 @@ def test_cleanup_metadata_server_artifacts_firewall_disabled(self, mock_os_getui self.assertFalse(os.path.exists(metadata_server_p7b_file)) # Assert Firewall rule calls - osutil.remove_firewall.assert_called_once_with(dst_ip=_KNOWN_METADATASERVER_IP, uid=fixed_uid) + osutil.remove_firewall.assert_called_once_with(dst_ip=_KNOWN_METADATASERVER_IP, uid=fixed_uid, wait=osutil.get_firewall_will_wait()) osutil.enable_firewall.assert_not_called() # Cleanup certificate files diff --git a/tests/protocol/test_protocol_util.py b/tests/protocol/test_protocol_util.py index 216c6cb45c..6af6dbf843 100644 --- a/tests/protocol/test_protocol_util.py +++ b/tests/protocol/test_protocol_util.py @@ -174,6 +174,7 @@ def test_get_protocol_wireserver_to_wireserver_update_removes_metadataserver_art mock_enable_firewall.return_value = True protocol_util = get_protocol_util() protocol_util.osutil = MagicMock() + protocol_util.osutil.enable_firewall.return_value = (MagicMock(), MagicMock()) protocol_util.dhcp_handler = MagicMock() protocol_util.dhcp_handler.endpoint = KNOWN_WIRESERVER_IP @@ -213,6 +214,7 @@ def test_get_protocol_metadataserver_to_wireserver_update_removes_metadataserver mock_enable_firewall.return_value = True protocol_util = get_protocol_util() protocol_util.osutil = MagicMock() + protocol_util.osutil.enable_firewall.return_value = (MagicMock(), MagicMock()) mock_wire_client.return_value = MagicMock() protocol_util.dhcp_handler = MagicMock() protocol_util.dhcp_handler.endpoint = KNOWN_WIRESERVER_IP diff --git a/tests/test_agent.py b/tests/test_agent.py index 1ce321290c..386fab61c1 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -41,6 +41,7 @@ Debug.EnableFastTrack = True Debug.EnableGAVersioning = False Debug.EtpCollectionPeriod = 300 +Debug.FirewallRulesLogPeriod = 86400 DetectScvmmEnv = False EnableOverProvisioning = True Extension.LogDir = /var/log/azure From ff8b46e8e2fee1da2369512c8bafef593bb30359 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 7 Feb 2022 13:57:27 -0800 Subject: [PATCH 18/84] Merge ExtensionsGoalState into GoalState (#2490) * Merge ExtensionsGoalState into GoalState * Add data files * Begin remove get_extensions_goal_state() * Rename extensions to extensions_goal_state * Remove get_extensions_goal_state() * Simplify API * fix timestamp * fix timestamp * Cleanup manifests * Cleanup legacy files * Save agent status to history * Delete obsolete test * improvements in waagent_status.json * Do not compare the guestOSInfo proerty * Code review feedback Co-authored-by: narrieta --- azurelinuxagent/common/exception.py | 6 - azurelinuxagent/common/logcollector.py | 7 +- .../common/logcollector_manifests.py | 15 +- .../extensions_goal_state_from_vm_settings.py | 3 + azurelinuxagent/common/protocol/goal_state.py | 265 ++++++++++++++---- azurelinuxagent/common/protocol/hostplugin.py | 29 +- azurelinuxagent/common/protocol/wire.py | 163 ++--------- azurelinuxagent/common/utils/archive.py | 179 ++++++------ azurelinuxagent/ga/exthandlers.py | 71 +++-- dcr/scenario_utils/check_waagent_log.py | 3 +- dcr/scenarios/agent-bvt/get_blob_content.py | 50 ---- dcr/scenarios/agent-bvt/run2.py | 3 - tests/data/hostgaplugin/ext_conf.xml | 6 +- tests/data/hostgaplugin/vm_settings.json | 6 +- .../ext_conf_no_extensions-block_blob.xml | 12 + .../ext_conf_no_extensions-no_status_blob.xml | 11 + .../wire/ext_conf_no_extensions-page_blob.xml | 24 ++ tests/ga/test_extension.py | 180 +++++------- tests/ga/test_multi_config_extension.py | 2 +- tests/ga/test_update.py | 4 +- tests/protocol/mockwiredata.py | 3 +- tests/protocol/test_extensions_goal_state.py | 21 -- ...sions_goal_state_from_extensions_config.py | 19 +- ..._extensions_goal_state_from_vm_settings.py | 15 + tests/protocol/test_goal_state.py | 69 ++++- tests/protocol/test_hostplugin.py | 52 ++-- tests/protocol/test_wire.py | 179 ++---------- tests/utils/test_archive.py | 73 ++--- 28 files changed, 673 insertions(+), 797 deletions(-) delete mode 100644 dcr/scenarios/agent-bvt/get_blob_content.py create mode 100644 tests/data/wire/ext_conf_no_extensions-block_blob.xml create mode 100644 tests/data/wire/ext_conf_no_extensions-no_status_blob.xml create mode 100644 tests/data/wire/ext_conf_no_extensions-page_blob.xml diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py index d39b1b959c..bfeb039639 100644 --- a/azurelinuxagent/common/exception.py +++ b/azurelinuxagent/common/exception.py @@ -184,12 +184,6 @@ class ProtocolNotFoundError(ProtocolError): """ -class IncompleteGoalStateError(ProtocolError): - """ - Goal state is returned incomplete. - """ - - class HttpError(AgentError): """ Http request failure diff --git a/azurelinuxagent/common/logcollector.py b/azurelinuxagent/common/logcollector.py index 84055777b8..a462c5e206 100644 --- a/azurelinuxagent/common/logcollector.py +++ b/azurelinuxagent/common/logcollector.py @@ -49,12 +49,7 @@ _MUST_COLLECT_FILES = [ _AGENT_LOG, - os.path.join(_AGENT_LIB_DIR, "GoalState.*.xml"), - os.path.join(_AGENT_LIB_DIR, "ExtensionsConfig.*.xml"), - os.path.join(_AGENT_LIB_DIR, "HostingEnvironmentConfig.*.xml"), - os.path.join(_AGENT_LIB_DIR, "SharedConfig.*.xml"), - os.path.join(_AGENT_LIB_DIR, "*manifest.xml"), - os.path.join(_AGENT_LIB_DIR, "waagent_status.*.json"), + os.path.join(_AGENT_LIB_DIR, "waagent_status.json"), os.path.join(_AGENT_LIB_DIR, "history", "*.zip"), os.path.join(_EXTENSION_LOG_DIR, "*", "*"), os.path.join(_EXTENSION_LOG_DIR, "*", "*", "*"), diff --git a/azurelinuxagent/common/logcollector_manifests.py b/azurelinuxagent/common/logcollector_manifests.py index cdeed984f7..e77da3d47f 100644 --- a/azurelinuxagent/common/logcollector_manifests.py +++ b/azurelinuxagent/common/logcollector_manifests.py @@ -39,16 +39,13 @@ echo, echo,### Gathering Extension Files ### -copy,$LIB_DIR/*.xml -copy,$LIB_DIR/VmSettings.*.json -copy,$LIB_DIR/waagent_status.*.json +copy,$LIB_DIR/ovf-env.xml +copy,$LIB_DIR/waagent_status.json copy,$LIB_DIR/*/status/*.status copy,$LIB_DIR/*/config/*.settings copy,$LIB_DIR/*/config/HandlerState copy,$LIB_DIR/*/config/HandlerStatus -copy,$LIB_DIR/*.agentsManifest copy,$LIB_DIR/error.json -copy,$LIB_DIR/Incarnation copy,$LIB_DIR/history/*.zip echo, """ @@ -108,19 +105,15 @@ echo, echo,### Gathering Extension Files ### -copy,$LIB_DIR/ExtensionsConfig.*.xml +copy,$LIB_DIR/ovf-env.xml copy,$LIB_DIR/*/status/*.status copy,$LIB_DIR/*/config/*.settings copy,$LIB_DIR/*/config/HandlerState copy,$LIB_DIR/*/config/HandlerStatus -copy,$LIB_DIR/GoalState.*.xml -copy,$LIB_DIR/HostingEnvironmentConfig.xml -copy,$LIB_DIR/*.manifest.xml copy,$LIB_DIR/SharedConfig.xml copy,$LIB_DIR/ManagedIdentity-*.json copy,$LIB_DIR/*/error.json -copy,$LIB_DIR/Incarnation -copy,$LIB_DIR/waagent_status.*.json +copy,$LIB_DIR/waagent_status.json copy,$LIB_DIR/history/*.zip echo, diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index efc374beca..d2216b17b8 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -81,6 +81,9 @@ def correlation_id(self): @property def created_on_timestamp(self): + """ + Timestamp assigned by the CRP (time at which the Fast Track goal state was created) + """ return self._created_on_timestamp @property diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index b92e0f9652..f4c9604335 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -15,7 +15,7 @@ # limitations under the License. # # Requires Python 2.6+ and Openssl 1.0+ - +import datetime import os import re import time @@ -24,12 +24,13 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.AgentGlobals import AgentGlobals from azurelinuxagent.common.datacontract import set_properties -from azurelinuxagent.common.exception import IncompleteGoalStateError -from azurelinuxagent.common.exception import ProtocolError +from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory +from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported from azurelinuxagent.common.protocol.restapi import Cert, CertList, RemoteAccessUser, RemoteAccessUsersList from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.common.utils.archive import GoalStateHistory from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, findtext, getattrib @@ -40,7 +41,7 @@ TRANSPORT_CERT_FILE_NAME = "TransportCert.pem" TRANSPORT_PRV_FILE_NAME = "TransportPrivate.pem" -_NUM_GS_FETCH_RETRIES = 6 +_GET_GOAL_STATE_MAX_ATTEMPTS = 6 class GoalState(object): @@ -48,73 +49,231 @@ def __init__(self, wire_client): """ Fetches the goal state using the given wire client. - __init__ fetches only the goal state itself, not including inner properties such as ExtensionsConfig; to fetch the entire goal state - use the fetch_full_goal_state(). + Fetching the goal state involves several HTTP requests to the WireServer and the HostGAPlugin. There is an initial request to WireServer's goalstate API, + which response includes the incarnation, role instance, container ID, role config, and URIs to the rest of the goal state (ExtensionsConfig, Certificates, + Remote Access users, etc.). Additional requests are done using those URIs (all of them point to APIs in the WireServer). Additionally, there is a + request to the HostGAPlugin for the vmSettings, which determines the goal state for extensions when using the Fast Track pipeline. + + To reduce the number of requests, when possible, create a single instance of GoalState and use the update() method to keep it up to date. + """ + try: + self._wire_client = wire_client + + # These "basic" properties come from the initial request to WireServer's goalstate API + self._timestamp = None + self._incarnation = None + self._role_instance_id = None + self._role_config_name = None + self._container_id = None + + xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client) + + self._initialize_basic_properties(xml_doc) + + # The goal state for extensions can come from vmSettings when using FastTrack or from extensionsConfig otherwise, self._fetch_extended_goal_state + # populates the '_extensions_goal_state' property. + self._extensions_goal_state = None + vm_settings = self._fetch_vm_settings() + + # These "extended" properties come from additional HTTP requests to the URIs included in the basic goal state + self._hosting_env = None + self._shared_conf = None + self._certs = None + self._remote_access = None + + self._fetch_extended_goal_state(xml_text, xml_doc, vm_settings) + + except Exception as exception: + # We don't log the error here since fetching the goal state is done every few seconds + raise ProtocolError(msg="Error fetching goal state", inner=exception) + + @property + def timestamp(self): + return self._timestamp + + @property + def incarnation(self): + return self._incarnation + + @property + def container_id(self): + return self._container_id + + @property + def role_instance_id(self): + return self._role_instance_id + + @property + def role_config_name(self): + return self._role_config_name + + @property + def extensions_goal_state(self): + return self._extensions_goal_state + + @property + def certs(self): + return self._certs + + @property + def hosting_env(self): + return self._hosting_env + + @property + def shared_conf(self): + return self._shared_conf + + @property + def remote_access(self): + return self._remote_access + + @staticmethod + def update_host_plugin_headers(wire_client): + """ + Updates the container ID and role config name that are send in the headers of HTTP requests to the HostGAPlugin + """ + # Fetching the goal state updates the HostGAPlugin so simply trigger the request + GoalState._fetch_goal_state(wire_client) + + def update(self, force_update=False): + """ + Updates the current GoalState instance fetching values from the WireServer/HostGAPlugin as needed + """ + xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client) + + vm_settings = self._fetch_vm_settings(force_update=force_update) + + if force_update or self._incarnation != findtext(xml_doc, "Incarnation"): + # update the extended goal state, using vm_settings for the extensions (unless they are None, then use extensionsConfig) + self._initialize_basic_properties(xml_doc) + self._fetch_extended_goal_state(xml_text, xml_doc, vm_settings) + else: + # else just ensure the extensions are using the latest vm_settings + if vm_settings is not None: + self._extensions_goal_state = vm_settings + + def save_to_history(self, data, file_name): + self._history.save(data, file_name) + + def _initialize_basic_properties(self, xml_doc): + self._timestamp = datetime.datetime.utcnow().isoformat() + self._incarnation = findtext(xml_doc, "Incarnation") + self._history = GoalStateHistory(self._timestamp, self._incarnation) # history for the WireServer goal state; vmSettings are separate + role_instance = find(xml_doc, "RoleInstance") + self._role_instance_id = findtext(role_instance, "InstanceId") + role_config = find(role_instance, "Configuration") + self._role_config_name = findtext(role_config, "ConfigName") + container = find(xml_doc, "Container") + self._container_id = findtext(container, "ContainerId") + + @staticmethod + def _fetch_goal_state(wire_client): + """ + Issues an HTTP request for the goal state (WireServer) and returns a tuple containing the response as text and as an XML Document """ uri = GOAL_STATE_URI.format(wire_client.get_endpoint()) - for _ in range(0, _NUM_GS_FETCH_RETRIES): - self.xml_text = wire_client.fetch_config(uri, wire_client.get_header()) - xml_doc = parse_doc(self.xml_text) - self.incarnation = findtext(xml_doc, "Incarnation") + # In some environments a few goal state requests return a missing RoleInstance; these retries are used to work around that issue + # TODO: Consider retrying on 410 (ResourceGone) as well + for _ in range(0, _GET_GOAL_STATE_MAX_ATTEMPTS): + xml_text = wire_client.fetch_config(uri, wire_client.get_header()) + xml_doc = parse_doc(xml_text) role_instance = find(xml_doc, "RoleInstance") if role_instance: break time.sleep(0.5) else: - raise IncompleteGoalStateError("Fetched goal state without a RoleInstance [incarnation {inc}]".format(inc=self.incarnation)) + incarnation = findtext(xml_doc, "Incarnation") + raise ProtocolError("Fetched goal state without a RoleInstance [incarnation {inc}]".format(inc=incarnation)) - try: - self.role_instance_id = findtext(role_instance, "InstanceId") - role_config = find(role_instance, "Configuration") - self.role_config_name = findtext(role_config, "ConfigName") - container = find(xml_doc, "Container") - self.container_id = findtext(container, "ContainerId") - - AgentGlobals.update_container_id(self.container_id) - - # these properties are populated by fetch_full_goal_state() - self._hosting_env_uri = findtext(xml_doc, "HostingEnvironmentConfig") - self.hosting_env = None - self._shared_conf_uri = findtext(xml_doc, "SharedConfig") - self.shared_conf = None - self._certs_uri = findtext(xml_doc, "Certificates") - self.certs = None - self._remote_access_uri = findtext(container, "RemoteAccessInfo") - self.remote_access = None - # TODO: extensions_config is an instance member only temporarily. Once we stop comparing extensionsConfig with - # vmSettings, it will be replaced with the extensions goal state - self.extensions_config = None - self._extensions_config_uri = findtext(xml_doc, "ExtensionsConfig") + # Telemetry and the HostGAPlugin depend on the container id/role config; keep them up-to-date each time we fetch the goal state + # (note that these elements can change even if the incarnation of the goal state does not change) + container = find(xml_doc, "Container") + container_id = findtext(container, "ContainerId") + role_config = find(role_instance, "Configuration") + role_config_name = findtext(role_config, "ConfigName") - except Exception as exception: - # We don't log the error here since fetching the goal state is done every few seconds - raise ProtocolError(msg="Error fetching goal state", inner=exception) + AgentGlobals.update_container_id(container_id) # Telemetry uses this global to pick up the container id - def fetch_full_goal_state(self, wire_client): - try: - logger.info('Fetching goal state [incarnation {0}]', self.incarnation) + wire_client.update_host_plugin(container_id, role_config_name) + + return xml_text, xml_doc + + def _fetch_vm_settings(self, force_update=False): + """ + Issues an HTTP request (HostGAPlugin) for the vm settings and returns the response as an ExtensionsGoalStateFromVmSettings. + """ + vm_settings, vm_settings_updated = (None, False) + + if conf.get_enable_fast_track(): + try: + vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) + + except VmSettingsNotSupported: + pass + except ResourceGoneError: + # retry after refreshing the HostGAPlugin + GoalState.update_host_plugin_headers(self._wire_client) + vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) + + if vm_settings_updated: + # The vmSettings are updated independently of the WireServer goal state and they are saved to a separate directory + history = GoalStateHistory(datetime.datetime.utcnow().isoformat(), vm_settings.etag) + history.save_vm_settings(vm_settings.get_redacted_text()) - xml_text = wire_client.fetch_config(self._hosting_env_uri, wire_client.get_header()) - self.hosting_env = HostingEnv(xml_text) + return vm_settings - xml_text = wire_client.fetch_config(self._shared_conf_uri, wire_client.get_header()) - self.shared_conf = SharedConfig(xml_text) + def _fetch_extended_goal_state(self, xml_text, xml_doc, vm_settings): + """ + Issues HTTP requests (WireServer) for each of the URIs in the goal state (ExtensionsConfig, Certificate, Remote Access users, etc) + and populates the corresponding properties. If the given 'vm_settings' are not None they are used for the extensions goal state, + otherwise extensionsConfig is used instead. + """ + try: + logger.info('Fetching goal state [incarnation {0}]', self._incarnation) - if self._certs_uri is not None: - xml_text = wire_client.fetch_config(self._certs_uri, wire_client.get_header_for_cert()) - self.certs = Certificates(xml_text) + self._history.save_goal_state(xml_text) - if self._remote_access_uri is not None: - xml_text = wire_client.fetch_config(self._remote_access_uri, wire_client.get_header_for_cert()) - self.remote_access = RemoteAccess(xml_text) + # TODO: at this point we always fetch the extensionsConfig, even if it is not needed, and save it for debugging purposes. Once + # FastTrack is stable this code can be updated to fetch it only when actually needed. + extensions_config_uri = findtext(xml_doc, "ExtensionsConfig") - if self._extensions_config_uri is None: - self.extensions_config = ExtensionsGoalStateFactory.create_empty() + if extensions_config_uri is None: + extensions_config = ExtensionsGoalStateFactory.create_empty() else: - xml_text = wire_client.fetch_config(self._extensions_config_uri, wire_client.get_header()) - self.extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(self.incarnation, xml_text, wire_client) + xml_text = self._wire_client.fetch_config(extensions_config_uri, self._wire_client.get_header()) + extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(self._incarnation, xml_text, self._wire_client) + self._history.save_extensions_config(extensions_config.get_redacted_text()) + + if vm_settings is not None: + self._extensions_goal_state = vm_settings + else: + self._extensions_goal_state = extensions_config + + hosting_env_uri = findtext(xml_doc, "HostingEnvironmentConfig") + xml_text = self._wire_client.fetch_config(hosting_env_uri, self._wire_client.get_header()) + self._hosting_env = HostingEnv(xml_text) + self._history.save_hosting_env(xml_text) + + shared_conf_uri = findtext(xml_doc, "SharedConfig") + xml_text = self._wire_client.fetch_config(shared_conf_uri, self._wire_client.get_header()) + self._shared_conf = SharedConfig(xml_text) + self._history.save_shared_conf(xml_text) + + certs_uri = findtext(xml_doc, "Certificates") + if certs_uri is not None: + # Note that we do not save the certificates to the goal state history + xml_text = self._wire_client.fetch_config(certs_uri, self._wire_client.get_header_for_cert()) + self._certs = Certificates(xml_text) + + container = find(xml_doc, "Container") + remote_access_uri = findtext(container, "RemoteAccessInfo") + if remote_access_uri is not None: + xml_text = self._wire_client.fetch_config(remote_access_uri, self._wire_client.get_header_for_cert()) + self._remote_access = RemoteAccess(xml_text) + self._history.save_remote_access(xml_text) + except Exception as exception: logger.warn("Fetching the goal state failed: {0}", ustr(exception)) raise ProtocolError(msg="Error fetching goal state", inner=exception) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 9b6ab0762e..40ed8fda9d 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -65,16 +65,20 @@ class HostPluginProtocol(object): FETCH_REPORTING_PERIOD = datetime.timedelta(minutes=1) STATUS_REPORTING_PERIOD = datetime.timedelta(minutes=1) - def __init__(self, endpoint, container_id, role_config_name): + def __init__(self, endpoint): + """ + NOTE: Before using the HostGAPlugin be sure to invoke GoalState.update_host_plugin_headers() to initialize + the container id and role config name + """ if endpoint is None: raise ProtocolError("HostGAPlugin: Endpoint not provided") self.is_initialized = False self.is_available = False self.api_versions = None self.endpoint = endpoint - self.container_id = container_id - self.deployment_id = self._extract_deployment_id(role_config_name) - self.role_config_name = role_config_name + self.container_id = None + self.deployment_id = None + self.role_config_name = None self.manifest_uri = None self.health_service = HealthService(endpoint) self.fetch_error_state = ErrorState(min_timedelta=ERROR_STATE_HOST_PLUGIN_FAILURE) @@ -408,6 +412,9 @@ def raise_not_supported(reset_state=False): add_event(op=WALAEventOperation.HostPlugin, message="vmSettings is not supported", is_success=True) raise VmSettingsNotSupported() + def format_message(msg): + return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) + try: # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.datetime.now(): @@ -416,18 +423,12 @@ def raise_not_supported(reset_state=False): etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag correlation_id = str(uuid.uuid4()) - def format_message(msg): - return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) - - def get_vm_settings(): - url, headers = self.get_vm_settings_request(correlation_id) - if etag is not None: - headers['if-none-match'] = etag - return restutil.http_get(url, headers=headers, use_proxy=False, max_retry=1, return_raw_response=True) - self._vm_settings_error_reporter.report_request() - response = get_vm_settings() + url, headers = self.get_vm_settings_request(correlation_id) + if etag is not None: + headers['if-none-match'] = etag + response = restutil.http_get(url, headers=headers, use_proxy=False, max_retry=1, return_raw_response=True) if response.status == httpclient.GONE: raise ResourceGoneError() diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index efce4e6d7c..7ee8e3721d 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -35,12 +35,12 @@ ResourceGoneError, ExtensionDownloadError, InvalidContainerError, ProtocolError, HttpError from azurelinuxagent.common.future import httpclient, bytebuffer, ustr from azurelinuxagent.common.protocol.goal_state import GoalState, TRANSPORT_CERT_FILE_NAME, TRANSPORT_PRV_FILE_NAME -from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol, VmSettingsNotSupported +from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.restapi import DataContract, ExtHandlerPackage, \ ExtHandlerPackageList, ProvisionStatus, VMInfo, VMStatus from azurelinuxagent.common.telemetryevent import GuestAgentExtensionEventsSchema from azurelinuxagent.common.utils import fileutil, restutil -from azurelinuxagent.common.utils.archive import StateFlusher +from azurelinuxagent.common.utils.archive import _MANIFEST_FILE_NAME from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, \ findtext, gettext, remove_bom, get_bytes_from_pem, parse_json @@ -51,16 +51,6 @@ ROLE_PROP_URI = "http://{0}/machine?comp=roleProperties" TELEMETRY_URI = "http://{0}/machine?comp=telemetrydata" -WIRE_SERVER_ADDR_FILE_NAME = "WireServer" -INCARNATION_FILE_NAME = "Incarnation" -GOAL_STATE_FILE_NAME = "GoalState.{0}.xml" -VM_SETTINGS_FILE_NAME = "VmSettings.{0}.json" -HOSTING_ENV_FILE_NAME = "HostingEnvironmentConfig.xml" -SHARED_CONF_FILE_NAME = "SharedConfig.xml" -REMOTE_ACCESS_FILE_NAME = "RemoteAccess.{0}.xml" -EXT_CONF_FILE_NAME = "ExtensionsConfig.{0}.xml" -MANIFEST_FILE_NAME = "{0}.{1}.manifest.xml" - PROTOCOL_VERSION = "2012-11-30" ENDPOINT_FINE_NAME = "WireServer" @@ -123,7 +113,7 @@ def get_incarnation(self): def get_vmagent_manifests(self): goal_state = self.client.get_goal_state() - ext_conf = self.client.get_extensions_goal_state() + ext_conf = goal_state.extensions_goal_state return ext_conf.agent_manifests, goal_state.incarnation def get_vmagent_pkgs(self, vmagent_manifest): @@ -137,8 +127,8 @@ def get_ext_handler_pkgs(self, ext_handler): man = self.client.get_ext_manifest(ext_handler) return man.pkg_list - def get_extensions_goal_state(self): - return self.client.get_extensions_goal_state() + def get_goal_state(self): + return self.client.get_goal_state() def _download_ext_handler_pkg_through_host(self, uri, destination): host = self.client.get_host_plugin() @@ -572,10 +562,8 @@ def __init__(self, endpoint): logger.info("Wire server endpoint:{0}", endpoint) self._endpoint = endpoint self._goal_state = None - self._extensions_goal_state = None # The goal state to use for extensions; can be an ExtensionsGoalStateFromVmSettings or ExtensionsGoalStateFromExtensionsConfig self._host_plugin = None self.status_blob = StatusBlob(self) - self.goal_state_flusher = StateFlusher(conf.get_lib_dir()) def get_endpoint(self): return self._endpoint @@ -623,15 +611,6 @@ def fetch_cache(self, local_file): except IOError as e: raise ProtocolError("Failed to read cache: {0}".format(e)) - @staticmethod - def _save_cache(data, file_name): - try: - file_path = os.path.join(conf.get_lib_dir(), file_name) - fileutil.write_file(file_path, data) - except IOError as e: - fileutil.clean_ioerror(e, paths=[file_name]) - raise ProtocolError("Failed to write cache: {0}".format(e)) - @staticmethod def call_storage_service(http_req, *args, **kwargs): # Default to use the configured HTTP proxy @@ -768,121 +747,32 @@ def update_host_plugin_from_goal_state(self): """ Fetches a new goal state and updates the Container ID and Role Config Name of the host plugin client """ - goal_state = GoalState(self) - self._update_host_plugin(goal_state.container_id, goal_state.role_config_name) + if self._host_plugin is not None: + GoalState.update_host_plugin_headers(self) + + def update_host_plugin(self, container_id, role_config_name): + if self._host_plugin is not None: + self._host_plugin.update_container_id(container_id) + self._host_plugin.update_role_config_name(role_config_name) def update_goal_state(self, force_update=False): """ Updates the goal state if the incarnation or etag changed or if 'force_update' is True """ try: - # - # The goal state needs to be retrieved using both the WireServer (via the GoalState class) and the HostGAPlugin - # (via the self._fetch_vm_settings_goal_state method). - # - # We always need at least 2 queries: one to the WireServer (to check for incarnation changes) and one to the HostGAPlugin - # (to check for extension updates). Note that vmSettings are not a full goal state; they include only the extension information - # (minus certificates). The check on incarnation (which is also not included in the vmSettings) is needed to check for changes - # in, for example, the remote users for JIT access. - # - # We start by fetching the goal state from the WireServer. The response to this initial query will include the incarnation, - # container ID, role config, and URLs to the rest of the goal state (certificates, remote users, extensions config, etc). We - # do this first because we need to initialize the HostGAPlugin with the container ID and role config. - # - goal_state = GoalState(self) - - host_ga_plugin = self.get_host_plugin() - host_ga_plugin.update_container_id(goal_state.container_id) - host_ga_plugin.update_role_config_name(goal_state.role_config_name) - - # - # Then we fetch the vmSettings from the HostGAPlugin; the response will include the goal state for extensions. - # - vm_settings, vm_settings_updated = (None, False) - - if conf.get_enable_fast_track(): - try: - vm_settings, vm_settings_updated = host_ga_plugin.fetch_vm_settings(force_update=force_update) - except VmSettingsNotSupported: - pass # if vmSettings are not supported we use extensionsConfig below - except ResourceGoneError: - self.update_host_plugin_from_goal_state() - vm_settings, vm_settings_updated = host_ga_plugin.fetch_vm_settings(force_update=force_update) - - # - # Now we fetch the rest of the goal state from the WireServer (but ony if needed: initialization, a "forced" update, or - # a change in the incarnation). Note that if we fetch the full goal state we also update self._goal_state. - # if force_update: logger.info("Forcing an update of the goal state..") - fetch_full_goal_state = force_update or self._goal_state is None or self._goal_state.incarnation != goal_state.incarnation - - if not fetch_full_goal_state: - goal_state_updated = False + if self._goal_state is None or force_update: + self._goal_state = GoalState(self) else: - goal_state.fetch_full_goal_state(self) - self._goal_state = goal_state - goal_state_updated = True - - # - # And, lastly, we use extensionsConfig if we don't have the vmSettings (Fast Track may be disabled or not supported). - # - if vm_settings is not None: - self._extensions_goal_state = vm_settings - else: - self._extensions_goal_state = self._goal_state.extensions_config - - # - # If either goal state changed (goal_state or vm_settings_goal_state) save them - # - if goal_state_updated or vm_settings_updated: - self._save_goal_state(vm_settings) + self._goal_state.update() except ProtocolError: raise except Exception as exception: raise ProtocolError("Error fetching goal state: {0}".format(ustr(exception))) - def _update_host_plugin(self, container_id, role_config_name): - if self._host_plugin is not None: - self._host_plugin.update_container_id(container_id) - self._host_plugin.update_role_config_name(role_config_name) - - def _save_goal_state(self, vm_settings): - try: - self.goal_state_flusher.flush() - except Exception as e: - logger.warn("Failed to save the previous goal state to the history folder: {0}", ustr(e)) - - try: - def save_if_not_none(goal_state_property, file_name): - if goal_state_property is not None and goal_state_property.xml_text is not None: - self._save_cache(goal_state_property.xml_text, file_name) - - # NOTE: Certificates are saved in Certificate.__init__ - self._save_cache(self._goal_state.incarnation, INCARNATION_FILE_NAME) - save_if_not_none(self._goal_state, GOAL_STATE_FILE_NAME.format(self._goal_state.incarnation)) - save_if_not_none(self._goal_state.hosting_env, HOSTING_ENV_FILE_NAME) - save_if_not_none(self._goal_state.shared_conf, SHARED_CONF_FILE_NAME) - save_if_not_none(self._goal_state.remote_access, REMOTE_ACCESS_FILE_NAME.format(self._goal_state.incarnation)) - if self._goal_state.extensions_config is not None: - text = self._goal_state.extensions_config.get_redacted_text() - if text != '': - self._save_cache(text, EXT_CONF_FILE_NAME.format(self._goal_state.extensions_config.incarnation)) - if vm_settings is not None: - text = vm_settings.get_redacted_text() - if text != '': - self._save_cache(text, VM_SETTINGS_FILE_NAME.format(vm_settings.id)) - - except Exception as e: - logger.warn("Failed to save the goal state to disk: {0}", ustr(e)) - - def _set_host_plugin(self, new_host_plugin): - if new_host_plugin is None: - logger.warn("Setting empty Host Plugin object!") - self._host_plugin = new_host_plugin - def get_goal_state(self): if self._goal_state is None: raise ProtocolError("Trying to fetch goal state before initialization!") @@ -903,19 +793,13 @@ def get_certs(self): raise ProtocolError("Trying to fetch Certificates before initialization!") return self._goal_state.certs - def get_extensions_goal_state(self): - if self._extensions_goal_state is None: - raise ProtocolError("Trying to fetch ExtensionsGoalState before initialization!") - - return self._extensions_goal_state - def get_ext_manifest(self, ext_handler): if self._goal_state is None: raise ProtocolError("Trying to fetch Extension Manifest before initialization!") try: xml_text = self.fetch_manifest(ext_handler.manifest_uris) - self._save_cache(xml_text, MANIFEST_FILE_NAME.format(ext_handler.name, self.get_goal_state().incarnation)) + self._goal_state.save_to_history(xml_text, _MANIFEST_FILE_NAME.format(ext_handler.name)) return ExtensionManifest(xml_text) except Exception as e: raise ExtensionDownloadError("Failed to retrieve extension manifest. Error: {0}".format(ustr(e))) @@ -926,12 +810,9 @@ def get_remote_access(self): return self._goal_state.remote_access def fetch_gafamily_manifest(self, vmagent_manifest, goal_state): - local_file = MANIFEST_FILE_NAME.format(vmagent_manifest.family, goal_state.incarnation) - local_file = os.path.join(conf.get_lib_dir(), local_file) - try: xml_text = self.fetch_manifest(vmagent_manifest.uris) - fileutil.write_file(local_file, xml_text) + goal_state.save_to_history(xml_text, _MANIFEST_FILE_NAME.format(vmagent_manifest.family)) return ExtensionManifest(xml_text) except Exception as e: raise ProtocolError("Failed to retrieve GAFamily manifest. Error: {0}".format(ustr(e))) @@ -1078,12 +959,12 @@ def send_request_using_appropriate_channel(self, direct_func, host_func): return ret def upload_status_blob(self): - extensions_goal_state = self.get_extensions_goal_state() + extensions_goal_state = self.get_goal_state().extensions_goal_state if extensions_goal_state.status_upload_blob is None: # the status upload blob is in ExtensionsConfig so force a full goal state refresh self.update_goal_state(force_update=True) - extensions_goal_state = self.get_extensions_goal_state() + extensions_goal_state = self.get_goal_state().extensions_goal_state if extensions_goal_state.status_upload_blob is None: raise ProtocolNotFoundError("Status upload uri is missing") @@ -1283,12 +1164,12 @@ def get_header_for_cert(self): def get_host_plugin(self): if self._host_plugin is None: - goal_state = GoalState(self) - self._set_host_plugin(HostPluginProtocol(self.get_endpoint(), goal_state.container_id, goal_state.role_config_name)) + self._host_plugin = HostPluginProtocol(self.get_endpoint()) + GoalState.update_host_plugin_headers(self) return self._host_plugin def get_on_hold(self): - return self.get_extensions_goal_state().on_hold + return self.get_goal_state().extensions_goal_state.on_hold def upload_logs(self, content): host_func = lambda: self._upload_logs_through_host(content) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 8e48457793..9b60e32ab9 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -5,9 +5,9 @@ import re import shutil import zipfile -from datetime import datetime import azurelinuxagent.common.logger as logger +import azurelinuxagent.common.conf as conf from azurelinuxagent.common.utils import fileutil # pylint: disable=W0105 @@ -38,110 +38,39 @@ ARCHIVE_DIRECTORY_NAME = 'history' -_MAX_ARCHIVED_STATES = 50 +_MAX_ARCHIVED_STATES = 100 _CACHE_PATTERNS = [ re.compile(r"^VmSettings.\d+\.json$"), re.compile(r"^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), re.compile(r"^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), - re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE), - re.compile(r"waagent_status\.(\d+)\.json$") + re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) ] -_GOAL_STATE_PATTERN = re.compile(r"^(.*)/GoalState\.(\d+)\.xml$", re.IGNORECASE) - -# Old names didn't have incarnation, new ones do. Ensure the regex captures both cases. -# 2018-04-06T08:21:37.142697_incarnation_N -# 2018-04-06T08:21:37.142697_incarnation_N.zip -_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+(_incarnation_(\d+))?$$") -_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+(_incarnation_(\d+))?\.zip$") - - -class StateFlusher(object): - def __init__(self, lib_dir): - self._source = lib_dir - - directory = os.path.join(self._source, ARCHIVE_DIRECTORY_NAME) - if not os.path.exists(directory): - try: - fileutil.mkdir(directory) - except OSError as exception: - if exception.errno != errno.EEXIST: - logger.error("{0} : {1}", self._source, exception.strerror) - - def flush(self): - files = self._get_files_to_archive() - if not files: - return - - archive_name = self._get_archive_name(files) - if archive_name is None: - return - - if self._mkdir(archive_name): - self._archive(files, archive_name) - else: - self._purge(files) - - def history_dir(self, name): - return os.path.join(self._source, ARCHIVE_DIRECTORY_NAME, name) - - @staticmethod - def _get_archive_name(files): - """ - Gets the most recently modified GoalState.*.xml and uses that timestamp and incarnation for the archive name. - In a normal workflow, we expect there to be only one GoalState.*.xml at a time, but if the previous one - wasn't purged for whatever reason, we take the most recently modified goal state file. - If there are no GoalState.*.xml files, we return None. - """ - latest_timestamp_ms = None - incarnation = None - - for current_file in files: - match = _GOAL_STATE_PATTERN.match(current_file) - if not match: - continue - - modification_time_ms = os.path.getmtime(current_file) - if latest_timestamp_ms is None or latest_timestamp_ms < modification_time_ms: - latest_timestamp_ms = modification_time_ms - incarnation = match.groups()[1] - - if latest_timestamp_ms is not None and incarnation is not None: - return datetime.utcfromtimestamp(latest_timestamp_ms).isoformat() + "_incarnation_{0}".format(incarnation) - return None - - def _get_files_to_archive(self): - files = [] - for current_file in os.listdir(self._source): - full_path = os.path.join(self._source, current_file) - for pattern in _CACHE_PATTERNS: - match = pattern.match(current_file) - if match is not None: - files.append(full_path) - break - - return files - - def _archive(self, files, timestamp): - for current_file in files: - dst = os.path.join(self.history_dir(timestamp), os.path.basename(current_file)) - shutil.move(current_file, dst) - - def _purge(self, files): - for current_file in files: - os.remove(current_file) - - def _mkdir(self, name): - directory = self.history_dir(name) - - try: - fileutil.mkdir(directory, mode=0o700) - return True - except IOError as exception: - logger.error("{0} : {1}".format(directory, exception.strerror)) - return False - +# +# Legacy names +# 2018-04-06T08:21:37.142697_incarnation_N +# 2018-04-06T08:21:37.142697_incarnation_N.zip +# +# Current names +# +# 2018-04-06T08:21:37.142697 +# 2018-04-06T08:21:37.142697.zip +# 2018-04-06T08:21:37.142697_N +# 2018-04-06T08:21:37.142697_N.zip +# +_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+))?$") +_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+))?\.zip$") + +_GOAL_STATE_FILE_NAME = "GoalState.xml" +_VM_SETTINGS_FILE_NAME = "VmSettings.json" +_HOSTING_ENV_FILE_NAME = "HostingEnvironmentConfig.xml" +_SHARED_CONF_FILE_NAME = "SharedConfig.xml" +_REMOTE_ACCESS_FILE_NAME = "RemoteAccess.xml" +_EXT_CONF_FILE_NAME = "ExtensionsConfig.xml" +_MANIFEST_FILE_NAME = "{0}.manifest.xml" + +AGENT_STATUS_FILE = "waagent_status.json" # TODO: use @total_ordering once RHEL/CentOS and SLES 11 are EOL. # @total_ordering first appeared in Python 2.7 and 3.2 @@ -218,13 +147,13 @@ def __init__(self, lib_dir): fileutil.mkdir(self._source, mode=0o700) except IOError as exception: if exception.errno != errno.EEXIST: - logger.error("{0} : {1}", self._source, exception.strerror) + logger.warn("{0} : {1}", self._source, exception.strerror) def purge(self): """ Delete "old" archive directories and .zip archives. Old is defined as any directories or files older than the X - newest ones. + newest ones. Also, clean up any legacy history files. """ states = self._get_archive_states() states.sort(reverse=True) @@ -232,6 +161,18 @@ def purge(self): for state in states[_MAX_ARCHIVED_STATES:]: state.delete() + # legacy history files + for current_file in os.listdir(self._source): + full_path = os.path.join(self._source, current_file) + for pattern in _CACHE_PATTERNS: + match = pattern.match(current_file) + if match is not None: + try: + os.remove(full_path) + except Exception as e: + logger.warn("Cannot delete legacy history file '{0}': {1}".format(full_path, e)) + break + def archive(self): states = self._get_archive_states() for state in states: @@ -250,3 +191,41 @@ def _get_archive_states(self): states.append(StateZip(full_path, match.group(0))) return states + + +class GoalStateHistory(object): + def __init__(self, timestamp, tag=None): + self._errors = False + self._root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "{0}_{1}".format(timestamp, tag) if tag is not None else timestamp) + + def save(self, data, file_name): + try: + if not os.path.exists(self._root): + fileutil.mkdir(self._root, mode=0o700) + full_file_name = os.path.join(self._root, file_name) + fileutil.write_file(full_file_name, data) + except IOError as e: + if not self._errors: # report only 1 error per directory + self._errors = True + logger.warn("Failed to save goal state file {0}: {1} [no additional errors saving the goal state will be reported]".format(file_name, e)) + + def save_goal_state(self, text): + self.save(text, _GOAL_STATE_FILE_NAME) + + def save_extensions_config(self, text): + self.save(text, _EXT_CONF_FILE_NAME) + + def save_vm_settings(self, text): + self.save(text, _VM_SETTINGS_FILE_NAME) + + def save_remote_access(self, text): + self.save(text, _REMOTE_ACCESS_FILE_NAME) + + def save_hosting_env(self, text): + self.save(text, _HOSTING_ENV_FILE_NAME) + + def save_shared_conf(self, text): + self.save(text, _SHARED_CONF_FILE_NAME) + + def save_status(self, text): + self.save(text, AGENT_STATUS_FILE) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index a654806e60..0d4d986379 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -50,7 +50,7 @@ from azurelinuxagent.common.protocol.restapi import ExtensionStatus, ExtensionSubStatus, Extension, ExtHandlerStatus, \ VMStatus, GoalStateAggregateStatus, ExtensionState, ExtensionRequestedState, ExtensionSettings from azurelinuxagent.common.utils import textutil -from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME +from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME, AGENT_STATUS_FILE, GoalStateHistory from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION, \ PY_VERSION_MAJOR, PY_VERSION_MICRO, PY_VERSION_MINOR @@ -67,7 +67,6 @@ HANDLER_COMPLETE_NAME_PATTERN = re.compile(_HANDLER_PATTERN + r'$', re.IGNORECASE) HANDLER_PKG_EXT = ".zip" -AGENT_STATUS_FILE = "waagent_status.{0}.json" NUMBER_OF_DOWNLOAD_RETRIES = 2 # This is the default value for the env variables, whenever we call a command which is not an update scenario, we @@ -298,7 +297,7 @@ def run(self): etag, activity_id, correlation_id, gs_creation_time = None, None, None, None try: - extensions_goal_state = self.protocol.get_extensions_goal_state() + extensions_goal_state = self.protocol.get_goal_state().extensions_goal_state # self.ext_handlers and etag need to be initialized first, since status reporting depends on them self.ext_handlers = extensions_goal_state.extensions @@ -343,7 +342,7 @@ def goal_state_debug_info(duration=None): add_event(op=WALAEventOperation.ExtensionProcessing, is_success=(error is None), message=message, log_event=False, duration=duration) def __get_unsupported_features(self): - required_features = self.protocol.client.get_extensions_goal_state().required_features + required_features = self.protocol.get_goal_state().extensions_goal_state.required_features supported_features = get_agent_supported_features_list_for_crp() return [feature for feature in required_features if feature not in supported_features] @@ -452,7 +451,7 @@ def _extension_processing_allowed(self): return False if conf.get_enable_overprovisioning(): - if self.protocol.get_extensions_goal_state().on_hold: + if self.protocol.get_goal_state().extensions_goal_state.on_hold: logger.info("Extension handling is on hold") return False @@ -944,7 +943,7 @@ def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_ self.report_status_error_state.reset() - self.write_ext_handlers_status_to_info_file(vm_status) + self.write_ext_handlers_status_to_info_file(vm_status, incarnation_changed) return vm_status @@ -958,42 +957,52 @@ def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_ message=msg) return None - def write_ext_handlers_status_to_info_file(self, vm_status): - status_path = os.path.join(conf.get_lib_dir(), AGENT_STATUS_FILE.format(self.protocol.get_incarnation())) - status_blob_data = self.protocol.get_status_blob_data() - data = dict() - if status_blob_data is not None: - data = json.loads(status_blob_data) + def write_ext_handlers_status_to_info_file(self, vm_status, incarnation_changed): + status_file = os.path.join(conf.get_lib_dir(), AGENT_STATUS_FILE) - # Populating the fields that does not come from vm_status or status_blob_data - _metadataNotSentToCRP = { - "agentName": AGENT_NAME, - "daemonVersion": str(version.get_daemon_version()), - "pythonVersion": "Python: {0}.{1}.{2}".format(PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO), - "extensionSupportedFeatures": [name for name, _ in - get_agent_supported_features_list_for_extensions().items()] - } - data["_metadataNotSentToCRP"] = _metadataNotSentToCRP + if os.path.exists(status_file) and incarnation_changed: + # On new goal state, move the last status report for the previous goal state to the history folder + last_modified = os.path.getmtime(status_file) + timestamp = datetime.datetime.utcfromtimestamp(last_modified).isoformat() + GoalStateHistory(timestamp).save_status(status_file) + + # Now create/overwrite the status file; this file is kept for debugging purposes only + status_blob_text = self.protocol.get_status_blob_data() + if status_blob_text is None: + status_blob_text = "" + + debug_info = ExtHandlersHandler._get_status_debug_info(vm_status) + + status_file_text = \ +'''{{ + "__comment__": "The __status__ property is the actual status reported to CRP", + "__status__": {0}, + "__debug__": {1} +}} +'''.format(status_blob_text, debug_info) + + fileutil.write_file(status_file, status_file_text) - # Consuming supports_multi_config info from vm_status. creating a dict out of it for easy lookup in the next step. + @staticmethod + def _get_status_debug_info(vm_status): support_multi_config = dict() + if vm_status is not None: - # Convert VMStatus class to Dict. vm_status_data = get_properties(vm_status) vm_handler_statuses = vm_status_data.get('vmAgent', dict()).get('extensionHandlers') for handler_status in vm_handler_statuses: if handler_status.get('name') is not None: support_multi_config[handler_status.get('name')] = handler_status.get('supports_multi_config') - handler_aggregate_status = data.get('aggregateStatus', dict()).get('handlerAggregateStatus', dict()) - - for handler_status in handler_aggregate_status: - handler_status['supportsMultiConfig'] = support_multi_config.get(handler_status.get('handlerName')) - status = handler_status.get('runtimeSettingsStatus', dict()).get('settingsStatus', dict()).get('status', dict()) - status.pop('formattedMessage', None) - status.pop('substatus', None) + debug_info = { + "agentName": AGENT_NAME, + "daemonVersion": str(version.get_daemon_version()), + "pythonVersion": "Python: {0}.{1}.{2}".format(PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO), + "extensionSupportedFeatures": [name for name, _ in get_agent_supported_features_list_for_extensions().items()], + "supportsMultiConfig": support_multi_config + } - fileutil.write_file(status_path, json.dumps(data)) + return json.dumps(debug_info) def report_ext_handler_status(self, vm_status, ext_handler, incarnation_changed): ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol) diff --git a/dcr/scenario_utils/check_waagent_log.py b/dcr/scenario_utils/check_waagent_log.py index 0dcb8972a4..be539ae092 100644 --- a/dcr/scenario_utils/check_waagent_log.py +++ b/dcr/scenario_utils/check_waagent_log.py @@ -41,9 +41,8 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None): 'if': lambda _: re.match(r"((sles15\.2)|suse12)\D*", distro, flags=re.IGNORECASE) is not None }, # This warning is expected on when WireServer gives us the incomplete goalstate without roleinstance data - # raise IncompleteGoalStateError("Fetched goal state without a RoleInstance [incarnation {inc}]".format(inc=self.incarnation)) { - 'message': r"\[IncompleteGoalStateError\] Fetched goal state without a RoleInstance", + 'message': r"\[ProtocolError\] Fetched goal state without a RoleInstance", }, # The following message is expected to log an error if systemd is not enabled on it { diff --git a/dcr/scenarios/agent-bvt/get_blob_content.py b/dcr/scenarios/agent-bvt/get_blob_content.py deleted file mode 100644 index 91fcce1d22..0000000000 --- a/dcr/scenarios/agent-bvt/get_blob_content.py +++ /dev/null @@ -1,50 +0,0 @@ -import glob -import re -from html.parser import HTMLParser -from time import sleep -from urllib.parse import unquote_plus -from urllib.request import urlopen - - -def show_blob_content(description, key): - config_files = glob.glob('/var/lib/waagent/ExtensionsConfig*.xml') - if len(config_files) == 0: - raise Exception('no extension config files found') - - config_files.sort() - with open(config_files[-1], 'r') as fh: - config = fh.readlines() - - status_line = list(filter(lambda s: key in s, config))[0] - status_pattern = '<{0}.*>(.*\?)(.*)<.*'.format(key) - match = re.match(status_pattern, status_line) - - if not match: - raise Exception(description + ' not found') - - decoded_url = match.groups()[0] - encoded_params = match.groups()[1].split('&') - for param in encoded_params: - kvp = param.split('=') - name = kvp[0] - skip = name == 'sig' - val = HTMLParser().unescape(unquote_plus(kvp[1])) if not skip else kvp[1] - decoded_param = '&{0}={1}'.format(name, val) - decoded_url += decoded_param - - print("\n{0} uri: {1}\n".format(description, decoded_url)) - status = None - retries = 3 - while status is None: - try: - status = urlopen(decoded_url).read() - except Exception as e: - if retries > 0: - retries -= 1 - sleep(60) - else: - # we are only collecting information, so do not fail the test - status = 'Error reading {0}: {1}'.format(description, e) - - return "\n{0} content: {1}\n".format(description, status) - diff --git a/dcr/scenarios/agent-bvt/run2.py b/dcr/scenarios/agent-bvt/run2.py index c2cf87e919..a563b62c3a 100644 --- a/dcr/scenarios/agent-bvt/run2.py +++ b/dcr/scenarios/agent-bvt/run2.py @@ -3,7 +3,6 @@ from dcr.scenario_utils.check_waagent_log import check_waagent_log_for_errors from dcr.scenario_utils.models import get_vm_data_from_env from dcr.scenario_utils.test_orchestrator import TestFuncObj, TestOrchestrator -from get_blob_content import show_blob_content from test_agent_basics import check_agent_processes, check_sudoers if __name__ == '__main__': @@ -11,8 +10,6 @@ tests = [ TestFuncObj("check agent processes", check_agent_processes), TestFuncObj("check agent log", check_waagent_log_for_errors), - TestFuncObj("Verify status blob", lambda: show_blob_content('Status', 'StatusUploadBlob')), - TestFuncObj("Verify status blob", lambda: show_blob_content('InVMArtifacts', 'InVMArtifactsProfileBlob')), TestFuncObj("verify extension timing", verify_extension_timing), TestFuncObj("Check Firewall", lambda: check_firewall(admin_username)), TestFuncObj("Check Sudoers", lambda: check_sudoers(admin_username)) diff --git a/tests/data/hostgaplugin/ext_conf.xml b/tests/data/hostgaplugin/ext_conf.xml index 166e4a7169..eac5d63647 100644 --- a/tests/data/hostgaplugin/ext_conf.xml +++ b/tests/data/hostgaplugin/ext_conf.xml @@ -59,7 +59,7 @@ { "handlerSettings": { "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", - "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent==", "publicSettings": {"GCS_AUTO_CONFIG":true} } } @@ -72,7 +72,7 @@ { "handlerSettings": { "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", - "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent==", "publicSettings": {"enableGenevaUpload":true} } } @@ -135,7 +135,7 @@ { "handlerSettings": { "protectedSettingsCertThumbprint": "59A10F50FFE2A0408D3F03FE336C8FD5716CF25C", - "protectedSettings": "*** REDACTED ***" + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpddesZQewdDBgegkxNzA1BgoJkgergres/Microsoft.OSTCExtensions.VMAccessForLinux==" } } ] diff --git a/tests/data/hostgaplugin/vm_settings.json b/tests/data/hostgaplugin/vm_settings.json index 630976bc33..7b402720ab 100644 --- a/tests/data/hostgaplugin/vm_settings.json +++ b/tests/data/hostgaplugin/vm_settings.json @@ -56,7 +56,7 @@ "settings": [ { "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", - "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } ] @@ -76,7 +76,7 @@ "settings": [ { "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", - "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent==", "publicSettings": "{\"enableGenevaUpload\":true}" } ] @@ -192,7 +192,7 @@ "settings": [ { "protectedSettingsCertThumbprint": "59A10F50FFE2A0408D3F03FE336C8FD5716CF25C", - "protectedSettings": "*** REDACTED ***" + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpddesZQewdDBgegkxNzA1BgoJkgergres/Microsoft.OSTCExtensions.VMAccessForLinux==" } ] } diff --git a/tests/data/wire/ext_conf_no_extensions-block_blob.xml b/tests/data/wire/ext_conf_no_extensions-block_blob.xml new file mode 100644 index 0000000000..3395b17a95 --- /dev/null +++ b/tests/data/wire/ext_conf_no_extensions-block_blob.xml @@ -0,0 +1,12 @@ + + + + + + + + + + http://foo + + diff --git a/tests/data/wire/ext_conf_no_extensions-no_status_blob.xml b/tests/data/wire/ext_conf_no_extensions-no_status_blob.xml new file mode 100644 index 0000000000..6632f352c2 --- /dev/null +++ b/tests/data/wire/ext_conf_no_extensions-no_status_blob.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/tests/data/wire/ext_conf_no_extensions-page_blob.xml b/tests/data/wire/ext_conf_no_extensions-page_blob.xml new file mode 100644 index 0000000000..57724789cd --- /dev/null +++ b/tests/data/wire/ext_conf_no_extensions-page_blob.xml @@ -0,0 +1,24 @@ + + + + + Prod + + http://mock-goal-state/manifest_of_ga.xml + + + + Test + + http://mock-goal-state/manifest_of_ga.xml + + + + + + + + + http://sas_url + + diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 754bcba241..9ad8297b06 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -33,7 +33,7 @@ from azurelinuxagent.common.datacontract import get_properties from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.utils import fileutil -from azurelinuxagent.common.utils.archive import StateArchiver +from azurelinuxagent.common.utils.archive import AGENT_STATUS_FILE from azurelinuxagent.common.utils.fileutil import read_file from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, AGENT_NAME, \ @@ -46,7 +46,7 @@ from azurelinuxagent.common.utils.restutil import KNOWN_WIRESERVER_IP from azurelinuxagent.ga.exthandlers import ExtHandlerInstance, migrate_handler_state, \ - get_exthandlers_handler, AGENT_STATUS_FILE, ExtCommandEnvVariable, HandlerManifest, NOT_RUN, \ + get_exthandlers_handler, ExtCommandEnvVariable, HandlerManifest, NOT_RUN, \ ExtensionStatusValue, HANDLER_COMPLETE_NAME_PATTERN, HandlerEnvironment, GoalStateStatus from tests.protocol import mockwiredata @@ -56,7 +56,6 @@ from tests.tools import AgentTestCase, data_dir, MagicMock, Mock, patch, mock_sleep from tests.ga.extension_emulator import Actions, ExtensionCommandNames, extension_emulator, \ enable_invocations, generate_put_handler -from tests.utils.test_archive import TestArchive # Mocking the original sleep to reduce test execution time SLEEP = time.sleep @@ -170,8 +169,11 @@ def test_cleanup_removes_uninstalled_extensions(self): self.assertEqual(0, TestExtensionCleanup._count_extension_directories(), "All extension directories should be removed") def test_cleanup_removes_orphaned_packages(self): + data_file = mockwiredata.DATA_FILE_NO_EXT.copy() + data_file["ext_conf"] = "wire/ext_conf_no_extensions-no_status_blob.xml" + no_of_orphaned_packages = 5 - with self._setup_test_env(mockwiredata.DATA_FILE_NO_EXT) as (exthandlers_handler, protocol, no_of_exts): + with self._setup_test_env(data_file) as (exthandlers_handler, protocol, no_of_exts): self.assertEqual(no_of_exts, 0, "Test setup error - Extensions found in ExtConfig") # Create random extension directories @@ -1704,7 +1706,7 @@ def test_ext_handler_version_decide_autoupgrade_internalversion(self, *args): datafile = mockwiredata.DATA_FILE _, protocol = self._create_mock(mockwiredata.WireProtocolData(datafile), *args) # pylint: disable=no-value-for-parameter - ext_handlers = protocol.client.get_extensions_goal_state().extensions + ext_handlers = protocol.get_goal_state().extensions_goal_state.extensions self.assertEqual(1, len(ext_handlers)) ext_handler = ext_handlers[0] self.assertEqual('OSTCExtensions.ExampleHandlerLinux', ext_handler.name) @@ -2448,7 +2450,7 @@ def test_it_should_parse_required_features_properly(self, mock_get, mock_crypt_u test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_REQUIRED_FEATURES) _, protocol = self._create_mock(test_data, mock_get, mock_crypt_util, *args) - required_features = protocol.get_extensions_goal_state().required_features + required_features = protocol.get_goal_state().extensions_goal_state.required_features self.assertEqual(3, len(required_features), "Incorrect features parsed") for i, feature in enumerate(required_features): self.assertEqual(feature, "TestRequiredFeature{0}".format(i+1), "Name mismatch") @@ -2508,8 +2510,8 @@ def _set_dependency_levels(self, dependency_levels, exthandlers_handler): for ext in handler.settings: ext.dependencyLevel = level - exthandlers_handler.protocol.client.get_extensions_goal_state()._extensions *= 0 - exthandlers_handler.protocol.client.get_extensions_goal_state().extensions.extend(all_handlers) + exthandlers_handler.protocol.get_goal_state().extensions_goal_state._extensions *= 0 + exthandlers_handler.protocol.get_goal_state().extensions_goal_state.extensions.extend(all_handlers) def _validate_extension_sequence(self, expected_sequence, exthandlers_handler): installed_extensions = [a[0].ext_handler.name for a, _ in exthandlers_handler.handle_ext_handler.call_args_list] @@ -3299,7 +3301,7 @@ def manifest_location_handler(url, **kwargs): manifest_location_handler.num_times_called = 0 with mock_wire_protocol(self.test_data, http_get_handler=manifest_location_handler) as protocol: - ext_handlers = protocol.client.get_extensions_goal_state().extensions + ext_handlers = protocol.get_goal_state().extensions_goal_state.extensions with self.assertRaises(ExtensionDownloadError): protocol.client.fetch_manifest(ext_handlers[0].manifest_uris, @@ -3350,124 +3352,94 @@ def mock_http_put(url, *args, **_): ) expected_status = { - "version": "1.1", - "timestampUTC": "1970-01-01T00:00:00Z", - "aggregateStatus": { - "guestAgentStatus": { - "version": AGENT_VERSION, - "status": "Ready", - "formattedMessage": { - "lang": "en-US", - "message": "Guest Agent is running" - } - }, - "handlerAggregateStatus": [ - { - "handlerVersion": "1.0.0", - "handlerName": "OSTCExtensions.ExampleHandlerLinux", + "__comment__": "The __status__ property is the actual status reported to CRP", + "__status__": { + "version": "1.1", + "timestampUTC": "1970-01-01T00:00:00Z", + "aggregateStatus": { + "guestAgentStatus": { + "version": AGENT_VERSION, "status": "Ready", - "code": 0, - "useExactVersion": True, "formattedMessage": { "lang": "en-US", - "message": "Plugin enabled" - }, - "runtimeSettingsStatus": { - "settingsStatus": { - "status": { - "name": "OSTCExtensions.ExampleHandlerLinux", - "configurationAppliedTime": None, - "operation": None, - "status": "success", - "code": 0 + "message": "Guest Agent is running" + } + }, + "handlerAggregateStatus": [ + { + "handlerVersion": "1.0.0", + "handlerName": "OSTCExtensions.ExampleHandlerLinux", + "status": "Ready", + "code": 0, + "useExactVersion": True, + "formattedMessage": { + "lang": "en-US", + "message": "Plugin enabled" + }, + "runtimeSettingsStatus": { + "settingsStatus": { + "status": { + "name": "OSTCExtensions.ExampleHandlerLinux", + "configurationAppliedTime": None, + "operation": None, + "status": "success", + "code": 0, + "formattedMessage": { + "lang": "en-US", + "message": None + } + }, + "version": 1.0, + "timestampUTC": "1970-01-01T00:00:00Z" }, - "version": 1, - "timestampUTC": "1970-01-01T00:00:00Z" + "sequenceNumber": 0 + } + } + ], + "vmArtifactsAggregateStatus": { + "goalStateAggregateStatus": { + "formattedMessage": { + "lang": "en-US", + "message": "GoalState executed successfully" }, - "sequenceNumber": 0 - }, - "supportsMultiConfig": False + "timestampUTC": "1970-01-01T00:00:00Z", + "inSvdSeqNo": "1", + "status": "Success", + "code": 0 + } } - ], - "vmArtifactsAggregateStatus": { - "goalStateAggregateStatus": { - "formattedMessage": { - "lang": "en-US", - "message": "GoalState executed successfully" - }, - "timestampUTC": "1970-01-01T00:00:00Z", - "inSvdSeqNo": "1", - "status": "Success", - "code": 0 - } - } + }, + "guestOSInfo": None, + "supportedFeatures": supported_features }, - "supportedFeatures": supported_features, - "_metadataNotSentToCRP": { + "__debug__": { "agentName": AGENT_NAME, "daemonVersion": "0.0.0.0", "pythonVersion": "Python: {0}.{1}.{2}".format(PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO), - "extensionSupportedFeatures": [name for name, _ in - get_agent_supported_features_list_for_extensions().items()] - + "extensionSupportedFeatures": [name for name, _ in get_agent_supported_features_list_for_extensions().items()], + "supportsMultiConfig": { + "OSTCExtensions.ExampleHandlerLinux": False + } } - } + exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() status_path = os.path.join(conf.get_lib_dir(), AGENT_STATUS_FILE.format(1)) actual_status_json = json.loads(fileutil.read_file(status_path)) - # Popping run time attributes + # Don't compare the guestOSInfo + status_property = actual_status_json.get("__status__") + self.assertIsNotNone(status_property, "The status file is missing the __status__ property") + self.assertIsNotNone(status_property.get("guestOSInfo"), "The status file is missing the guestOSInfo property") + status_property["guestOSInfo"] = None + actual_status_json.pop('guestOSInfo', None) self.assertEqual(expected_status, actual_status_json) - def test_it_should_zip_waagent_status_when_incarnation_changes(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - - # This test checks when the incarnation changes the waagent_status file for the previous incarnation - # is added into the history folder for the previous incarnation and gets zipped - - exthandlers_handler = get_exthandlers_handler(protocol) - - temp_files = [ - 'ExtensionsConfig.1.xml', - 'GoalState.1.xml', - 'OSTCExtensions.ExampleHandlerLinux.1.manifest.xml', - 'waagent_status.1.json' - ] - - exthandlers_handler.run() - exthandlers_handler.report_ext_handlers_status() - - # Updating incarnation to 2 , hence the history folder should have waaagent_status.1.json added under - # incarnation 1 - protocol.mock_wire_data.set_incarnation(2) - protocol.update_goal_state() - - test_subject = StateArchiver(self.tmp_dir) - test_subject.archive() - - timestamp_zips = os.listdir(os.path.join(self.tmp_dir, "history")) - self.assertEqual(1, len(timestamp_zips), "Expected number of zips in history is 1 for" - " incarnation 1(previous incarnation)") - - zip_fn = timestamp_zips[0] - zip_fullname = os.path.join(self.tmp_dir, "history", zip_fn) - self.assertEqual(TestArchive.assert_zip_contains(zip_fullname, temp_files), None) - exthandlers_handler.run() - exthandlers_handler.report_ext_handlers_status() - - # Updating incarnation to 3 , hence the history folder should have 2 zips files corresponding to incarnation - # 1 and 2 - protocol.mock_wire_data.set_incarnation(3) - protocol.update_goal_state() - test_subject.archive() - self.assertEqual(2, len(os.listdir(os.path.join(self.tmp_dir, "history")))) - def test_it_should_process_extensions_only_if_allowed(self): def assert_extensions_called(exthandlers_handler, expected_call_count=0): extension_name = 'OSTCExtensions.ExampleHandlerLinux' diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index 2e22affb44..643fb718f0 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -51,7 +51,7 @@ def __init__(self, name, seq_no, dependency_level="0", state="enabled"): def _mock_and_assert_ext_handlers(self, expected_handlers): with mock_wire_protocol(self.test_data) as protocol: - ext_handlers = protocol.client.get_extensions_goal_state().extensions + ext_handlers = protocol.get_goal_state().extensions_goal_state.extensions for ext_handler in ext_handlers: if ext_handler.name not in expected_handlers: continue diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index e40c05eaf3..c3dbfdfb81 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -665,9 +665,7 @@ def test_download_fallback(self, mock_http_post, mock_http_get, mock_loaded, moc host_uri = 'host_uri' api_uri = URI_FORMAT_GET_API_VERSIONS.format(host_uri, HOST_PLUGIN_PORT) art_uri = URI_FORMAT_GET_EXTENSION_ARTIFACT.format(host_uri, HOST_PLUGIN_PORT) - mock_host = HostPluginProtocol(host_uri, - 'container_id', - 'role_config') + mock_host = HostPluginProtocol(host_uri) pkg = ExtHandlerPackage(version=str(self._get_agent_version())) pkg.uris.append(ext_uri) diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index 40c3633c4a..0267db2174 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -53,8 +53,7 @@ DATA_FILE_INVALID_VM_META_DATA["ext_conf"] = "wire/ext_conf_invalid_vm_metadata.xml" DATA_FILE_NO_EXT = DATA_FILE.copy() -DATA_FILE_NO_EXT["goal_state"] = "wire/goal_state_no_ext.xml" -DATA_FILE_NO_EXT["ext_conf"] = None +DATA_FILE_NO_EXT["ext_conf"] = "wire/ext_conf_no_extensions-block_blob.xml" DATA_FILE_NOOP_GS = DATA_FILE.copy() DATA_FILE_NOOP_GS["goal_state"] = "wire/goal_state_noop.xml" diff --git a/tests/protocol/test_extensions_goal_state.py b/tests/protocol/test_extensions_goal_state.py index 279d4193fa..cc929938ff 100644 --- a/tests/protocol/test_extensions_goal_state.py +++ b/tests/protocol/test_extensions_goal_state.py @@ -25,24 +25,3 @@ def test_create_from_vm_settings_should_assume_block_when_blob_type_is_not_valid extensions_goal_state = ExtensionsGoalStateFactory.create_from_vm_settings(1234567890, load_data("hostgaplugin/vm_settings-invalid_blob_type.json")) self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBob for an invalid statusBlobType') - def test_extension_goal_state_should_parse_requested_version_properly(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - fabric_manifests, _ = protocol.get_vmagent_manifests() - for manifest in fabric_manifests: - self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") - - vm_settings_ga_manifests = protocol.client._host_plugin._cached_vm_settings.agent_manifests - for manifest in vm_settings_ga_manifests: - self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") - - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() - data_file["vm_settings"] = "hostgaplugin/vm_settings-requested_version.json" - data_file["ext_conf"] = "hostgaplugin/ext_conf-requested_version.xml" - with mock_wire_protocol(data_file) as protocol: - fabric_manifests = protocol.client.get_goal_state().extensions_config.agent_manifests - for manifest in fabric_manifests: - self.assertEqual(manifest.requested_version_string, "9.9.9.10", "Version should be 9.9.9.10") - - vm_settings_ga_manifests = protocol.client._host_plugin._cached_vm_settings.agent_manifests - for manifest in vm_settings_ga_manifests: - self.assertEqual(manifest.requested_version_string, "9.9.9.9", "Version should be 9.9.9.9") diff --git a/tests/protocol/test_extensions_goal_state_from_extensions_config.py b/tests/protocol/test_extensions_goal_state_from_extensions_config.py index d270f7adc0..1d1f5cb6ae 100644 --- a/tests/protocol/test_extensions_goal_state_from_extensions_config.py +++ b/tests/protocol/test_extensions_goal_state_from_extensions_config.py @@ -8,21 +8,34 @@ class ExtensionsGoalStateFromExtensionsConfigTestCase(AgentTestCase): def test_it_should_parse_in_vm_metadata(self): with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_META_DATA) as protocol: - extensions_goal_state = protocol.get_extensions_goal_state() + extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual("555e551c-600e-4fb4-90ba-8ab8ec28eccc", extensions_goal_state.activity_id, "Incorrect activity Id") self.assertEqual("400de90b-522e-491f-9d89-ec944661f531", extensions_goal_state.correlation_id, "Incorrect correlation Id") self.assertEqual('2020-11-09T17:48:50.412125Z', extensions_goal_state.created_on_timestamp, "Incorrect GS Creation time") def test_it_should_use_default_values_when_in_vm_metadata_is_missing(self): with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - extensions_goal_state = protocol.get_extensions_goal_state() + extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.activity_id, "Incorrect activity Id") self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.correlation_id, "Incorrect correlation Id") self.assertEqual('1900-01-01T00:00:00.000000Z', extensions_goal_state.created_on_timestamp, "Incorrect GS Creation time") def test_it_should_use_default_values_when_in_vm_metadata_is_invalid(self): with mock_wire_protocol(mockwiredata.DATA_FILE_INVALID_VM_META_DATA) as protocol: - extensions_goal_state = protocol.get_extensions_goal_state() + extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.activity_id, "Incorrect activity Id") self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.correlation_id, "Incorrect correlation Id") self.assertEqual('1900-01-01T00:00:00.000000Z', extensions_goal_state.created_on_timestamp, "Incorrect GS Creation time") + + def test_extension_goal_state_should_parse_requested_version_properly(self): + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + manifests, _ = protocol.get_vmagent_manifests() + for manifest in manifests: + self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") + + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "hostgaplugin/ext_conf-requested_version.xml" + with mock_wire_protocol(data_file) as protocol: + manifests, _ = protocol.get_vmagent_manifests() + for manifest in manifests: + self.assertEqual(manifest.requested_version_string, "9.9.9.10", "Version should be 9.9.9.10") diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/protocol/test_extensions_goal_state_from_vm_settings.py index b8f4deb6e8..0256e18f53 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/protocol/test_extensions_goal_state_from_vm_settings.py @@ -6,6 +6,7 @@ from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import _CaseFoldedDict from azurelinuxagent.common.utils import fileutil +from tests.protocol.mocks import mockwiredata, mock_wire_protocol from tests.tools import AgentTestCase, data_dir @@ -47,6 +48,20 @@ def assert_property(name, value): # dependency level (multi-config) self.assertEqual(1, vm_settings.extensions[3].settings[1].dependencyLevel, "Incorrect dependency level (multi-config)") + def test_extension_goal_state_should_parse_requested_version_properly(self): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + manifests, _ = protocol.get_vmagent_manifests() + for manifest in manifests: + self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") + + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-requested_version.json" + with mock_wire_protocol(data_file) as protocol: + manifests, _ = protocol.get_vmagent_manifests() + for manifest in manifests: + self.assertEqual(manifest.requested_version_string, "9.9.9.9", "Version should be 9.9.9.9") + + class CaseFoldedDictionaryTestCase(AgentTestCase): def test_it_should_retrieve_items_ignoring_case(self): dictionary = json.loads('''{ diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index 492fddcc9f..1d01e263fb 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -1,8 +1,13 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the Apache License. -from azurelinuxagent.common.exception import IncompleteGoalStateError -from azurelinuxagent.common.protocol.goal_state import GoalState, _NUM_GS_FETCH_RETRIES +import glob +import os +import re + +from azurelinuxagent.common.protocol.goal_state import GoalState, _GET_GOAL_STATE_MAX_ATTEMPTS +from azurelinuxagent.common.exception import ProtocolError +from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME from tests.protocol.mocks import mock_wire_protocol from tests.protocol import mockwiredata from tests.tools import AgentTestCase, patch @@ -16,7 +21,63 @@ def test_fetch_goal_state_should_raise_on_incomplete_goal_state(self): protocol.mock_wire_data.set_incarnation(2) with patch('time.sleep') as mock_sleep: - with self.assertRaises(IncompleteGoalStateError): + with self.assertRaises(ProtocolError): GoalState(protocol.client) - self.assertEqual(_NUM_GS_FETCH_RETRIES, mock_sleep.call_count, "Unexpected number of retries") + self.assertEqual(_GET_GOAL_STATE_MAX_ATTEMPTS, mock_sleep.call_count, "Unexpected number of retries") + + def test_fetch_full_goal_state_should_save_goal_state_to_history_directory(self): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + # use a new goal state with a specific test incarnation and etag + protocol.mock_wire_data.set_incarnation(999) + protocol.mock_wire_data.set_etag(888) + goal_state = GoalState(protocol.client) + + matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_999")) + self.assertTrue(len(matches) == 1, "Expected one history directory for incarnation 999. Got: {0}".format(matches)) + + history_directory = matches[0] + extensions_config_file = os.path.join(history_directory, "ExtensionsConfig.xml") + expected_files = [ + os.path.join(history_directory, "GoalState.xml"), + os.path.join(history_directory, "SharedConfig.xml"), + os.path.join(history_directory, "HostingEnvironmentConfig.xml"), + extensions_config_file, + ] + + matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_888")) + self.assertTrue(len(matches) == 1, "Expected one history directory for etag 888. Got: {0}".format(matches)) + + history_directory = matches[0] + vm_settings_file = os.path.join(history_directory, "VmSettings.json") + expected_files.append(vm_settings_file) + + for f in expected_files: + self.assertTrue(os.path.exists(f), "{0} was not saved".format(f)) + + extensions_goal_state = goal_state.extensions_goal_state + protected_settings = [] + for ext_handler in extensions_goal_state.extensions: + for extension in ext_handler.settings: + if extension.protectedSettings is not None: + protected_settings.append(extension.protectedSettings) + if len(protected_settings) == 0: + raise Exception("The test goal state does not include any protected settings") + + for file_name in extensions_config_file, vm_settings_file: + with open(file_name, "r") as stream: + file_contents = stream.read() + + for settings in protected_settings: + self.assertNotIn( + settings, + file_contents, + "The protectedSettings should not have been saved to {0}".format(file_name)) + + matches = re.findall(r'"protectedSettings"\s*:\s*"\*\*\* REDACTED \*\*\*"', file_contents) + self.assertEqual( + len(matches), + len(protected_settings), + "Could not find the expected number of redacted settings in {0}.\nExpected {1}.\n{2}".format(file_name, len(protected_settings), file_contents)) + + diff --git a/tests/protocol/test_hostplugin.py b/tests/protocol/test_hostplugin.py index a55ae41bb7..a63d389d51 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/protocol/test_hostplugin.py @@ -30,12 +30,13 @@ from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.osutil.default import UUID_PATTERN from azurelinuxagent.common.protocol.hostplugin import API_VERSION, _VmSettingsErrorReporter, VmSettingsNotSupported +from azurelinuxagent.common.protocol.goal_state import GoalState from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.version import AGENT_VERSION, AGENT_NAME from tests.protocol.mocks import mock_wire_protocol, mockwiredata, MockHttpResponse from tests.protocol.HttpRequestPredicates import HttpRequestPredicates from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_NO_EXT -from tests.tools import AgentTestCase, PY_VERSION_MAJOR, Mock, PropertyMock, patch +from tests.tools import AgentTestCase, PY_VERSION_MAJOR, Mock, patch hostplugin_status_url = "http://168.63.129.16:32526/status" @@ -61,10 +62,8 @@ class TestHostPlugin(HttpRequestPredicates, AgentTestCase): def _init_host(self): with mock_wire_protocol(DATA_FILE) as protocol: - test_goal_state = protocol.client.get_goal_state() - host_plugin = wire.HostPluginProtocol(wireserver_url, - test_goal_state.container_id, - test_goal_state.role_config_name) + host_plugin = wire.HostPluginProtocol(wireserver_url) + GoalState.update_host_plugin_headers(protocol.client) self.assertTrue(host_plugin.health_service is not None) return host_plugin @@ -150,13 +149,10 @@ def _validate_hostplugin_args(self, args, goal_state, exp_method, exp_url, exp_d @staticmethod @contextlib.contextmanager def create_mock_protocol(): - with mock_wire_protocol(DATA_FILE_NO_EXT) as protocol: - # These tests use mock wire data that don't have any extensions (extension config will be empty). - # Populate the upload blob and set an initial empty status before returning the protocol. - protocol.client._extensions_goal_state = Mock(wraps=protocol.client._extensions_goal_state) - type(protocol.client._extensions_goal_state).status_upload_blob = PropertyMock(return_value=sas_url) - type(protocol.client._extensions_goal_state).status_upload_blob_type = PropertyMock(return_value=page_blob_type) + data_file = DATA_FILE_NO_EXT.copy() + data_file["ext_conf"] = "wire/ext_conf_no_extensions-page_blob.xml" + with mock_wire_protocol(data_file) as protocol: status = restapi.VMStatus(status="Ready", message="Guest Agent is running") protocol.client.status_blob.set_vm_status(status) @@ -174,7 +170,7 @@ def assert_ensure_initialized(self, patch_event, patch_http_get, patch_report_he should_initialize, should_report_healthy): - host = hostplugin.HostPluginProtocol(endpoint='ws', container_id='cid', role_config_name='rcf') + host = hostplugin.HostPluginProtocol(endpoint='ws') host.is_initialized = False patch_http_get.return_value = MockResponse(body=response_body, @@ -439,11 +435,8 @@ def test_validate_http_request_when_uploading_status(self): def test_validate_block_blob(self): with mock_wire_protocol(DATA_FILE) as protocol: - test_goal_state = protocol.client._goal_state + host_client = protocol.client.get_host_plugin() - host_client = wire.HostPluginProtocol(wireserver_url, - test_goal_state.container_id, - test_goal_state.role_config_name) self.assertFalse(host_client.is_initialized) self.assertTrue(host_client.api_versions is None) self.assertTrue(host_client.health_service is not None) @@ -472,7 +465,7 @@ def test_validate_block_blob(self): # first call is to host plugin self._validate_hostplugin_args( patch_http.call_args_list[0], - test_goal_state, + protocol.get_goal_state(), exp_method, exp_url, exp_data) # second call is to health service @@ -482,11 +475,9 @@ def test_validate_block_blob(self): def test_validate_page_blobs(self): """Validate correct set of data is sent for page blobs""" with mock_wire_protocol(DATA_FILE) as protocol: - test_goal_state = protocol.client._goal_state + test_goal_state = protocol.get_goal_state() - host_client = wire.HostPluginProtocol(wireserver_url, - test_goal_state.container_id, - test_goal_state.role_config_name) + host_client = protocol.client.get_host_plugin() self.assertFalse(host_client.is_initialized) self.assertTrue(host_client.api_versions is None) @@ -548,7 +539,7 @@ def http_put_handler(url, *args, **kwargs): # pylint: disable=inconsistent-retu http_put_handler.args, http_put_handler.kwargs = [], {} with mock_wire_protocol(DATA_FILE, http_put_handler=http_put_handler) as protocol: - test_goal_state = protocol.client.get_goal_state() + test_goal_state = protocol.get_goal_state() expected_url = hostplugin.URI_FORMAT_PUT_LOG.format(wireserver_url, hostplugin.HOST_PLUGIN_PORT) expected_headers = {'x-ms-version': '2015-09-01', @@ -557,9 +548,7 @@ def http_put_handler(url, *args, **kwargs): # pylint: disable=inconsistent-retu "x-ms-client-name": AGENT_NAME, "x-ms-client-version": AGENT_VERSION} - host_client = wire.HostPluginProtocol(wireserver_url, - test_goal_state.container_id, - test_goal_state.role_config_name) + host_client = protocol.client.get_host_plugin() self.assertFalse(host_client.is_initialized, "Host plugin should not be initialized!") @@ -590,11 +579,9 @@ def http_put_handler(url, *args, **kwargs): # pylint: disable=inconsistent-retu http_put_handler.args, http_put_handler.kwargs = [], {} with mock_wire_protocol(DATA_FILE, http_put_handler=http_put_handler) as protocol: - test_goal_state = protocol.client.get_goal_state() - host_client = wire.HostPluginProtocol(wireserver_url, - test_goal_state.container_id, - test_goal_state.role_config_name) + host_client = wire.HostPluginProtocol(wireserver_url) + GoalState.update_host_plugin_headers(protocol.client) self.assertFalse(host_client.is_initialized, "Host plugin should not be initialized!") @@ -608,7 +595,7 @@ def http_put_handler(url, *args, **kwargs): # pylint: disable=inconsistent-retu def test_validate_get_extension_artifacts(self): with mock_wire_protocol(DATA_FILE) as protocol: - test_goal_state = protocol.client._goal_state + test_goal_state = protocol.get_goal_state() expected_url = hostplugin.URI_FORMAT_GET_EXTENSION_ARTIFACT.format(wireserver_url, hostplugin.HOST_PLUGIN_PORT) expected_headers = {'x-ms-version': '2015-09-01', @@ -616,9 +603,8 @@ def test_validate_get_extension_artifacts(self): "x-ms-host-config-name": test_goal_state.role_config_name, "x-ms-artifact-location": sas_url} - host_client = wire.HostPluginProtocol(wireserver_url, - test_goal_state.container_id, - test_goal_state.role_config_name) + host_client = protocol.client.get_host_plugin() + self.assertFalse(host_client.is_initialized) self.assertTrue(host_client.api_versions is None) self.assertTrue(host_client.health_service is not None) diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 198d5c3375..6d9458c917 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -19,14 +19,11 @@ import contextlib import json import os -import re import socket import time import unittest import uuid -from datetime import datetime, timedelta -from azurelinuxagent.common import conf from azurelinuxagent.common.agent_supported_feature import SupportedFeatureNames, get_supported_feature_by_name, \ get_agent_supported_features_list_for_crp from azurelinuxagent.common.future import httpclient @@ -34,12 +31,11 @@ from azurelinuxagent.common.exception import ResourceGoneError, ProtocolError, \ ExtensionDownloadError, HttpError from azurelinuxagent.common.protocol import hostplugin -from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.wire import WireProtocol, WireClient, \ - StatusBlob, VMStatus, EXT_CONF_FILE_NAME + StatusBlob, VMStatus from azurelinuxagent.common.telemetryevent import GuestAgentExtensionEventsSchema, \ TelemetryEventParam, TelemetryEvent from azurelinuxagent.common.utils import restutil @@ -51,7 +47,7 @@ from tests.protocol.HttpRequestPredicates import HttpRequestPredicates from tests.protocol.mockwiredata import DATA_FILE_NO_EXT, DATA_FILE from tests.protocol.mockwiredata import WireProtocolData -from tests.tools import Mock, PropertyMock, patch, AgentTestCase +from tests.tools import patch, AgentTestCase data_with_bom = b'\xef\xbb\xbfhehe' testurl = 'http://foo' @@ -74,14 +70,8 @@ def get_event(message, duration=30000, evt_type="", is_internal=False, is_succes @contextlib.contextmanager -def create_mock_protocol(status_upload_blob=None, status_upload_blob_type=None): +def create_mock_protocol(): with mock_wire_protocol(DATA_FILE_NO_EXT) as protocol: - # These tests use mock wire data that dont have any extensions (extension config will be empty). - # Mock the upload blob and artifacts profile blob. - protocol.client._extensions_goal_state = Mock(wraps=protocol.client._extensions_goal_state) - type(protocol.client._extensions_goal_state).status_upload_blob = PropertyMock(return_value=status_upload_blob) - type(protocol.client._extensions_goal_state).status_upload_blob_type = PropertyMock(return_value=status_upload_blob_type) - yield protocol @@ -102,7 +92,7 @@ def _test_getters(self, test_data, certsMustBePresent, __, MockCryptUtil, _): protocol.detect() protocol.get_vminfo() protocol.get_certs() - ext_handlers = protocol.client.get_extensions_goal_state().extensions + ext_handlers = protocol.get_goal_state().extensions_goal_state.extensions for ext_handler in ext_handlers: protocol.get_ext_handler_pkgs(ext_handler) @@ -215,13 +205,13 @@ def test_call_storage_kwargs(self, *args): # pylint: disable=unused-argument def test_status_blob_parsing(self, *args): # pylint: disable=unused-argument with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - extensions_goal_state = protocol.client.get_extensions_goal_state() + extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertIsInstance(extensions_goal_state, ExtensionsGoalStateFromExtensionsConfig) self.assertEqual(extensions_goal_state.status_upload_blob, 'https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?' 'sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&' 'sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo') - self.assertEqual(protocol.client.get_extensions_goal_state().status_upload_blob_type, u'BlockBlob') + self.assertEqual(protocol.get_goal_state().extensions_goal_state.status_upload_blob_type, u'BlockBlob') def test_get_host_ga_plugin(self, *args): # pylint: disable=unused-argument with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: @@ -245,7 +235,7 @@ def http_put_handler(url, *_, **__): # pylint: disable=inconsistent-return-stat self.assertEqual(len(urls), 1, 'Expected one post request to the host: [{0}]'.format(urls)) def test_upload_status_blob_host_ga_plugin(self, *_): - with create_mock_protocol(status_upload_blob=testurl, status_upload_blob_type=testtype) as protocol: + with create_mock_protocol() as protocol: protocol.client.status_blob.vm_status = VMStatus(message="Ready", status="Ready") with patch.object(HostPluginProtocol, "ensure_initialized", return_value=True): @@ -258,7 +248,7 @@ def test_upload_status_blob_host_ga_plugin(self, *_): self.assertFalse(HostPluginProtocol.is_default_channel) def test_upload_status_blob_reports_prepare_error(self, *_): - with create_mock_protocol(status_upload_blob=testurl, status_upload_blob_type=testtype) as protocol: + with create_mock_protocol() as protocol: protocol.client.status_blob.vm_status = VMStatus(message="Ready", status="Ready") with patch.object(StatusBlob, "prepare", side_effect=Exception) as mock_prepare: @@ -270,11 +260,11 @@ def test_get_in_vm_artifacts_profile_blob_not_available(self, *_): data_file["ext_conf"] = "wire/ext_conf_in_vm_empty_artifacts_profile.xml" with mock_wire_protocol(data_file) as protocol: - self.assertFalse(protocol.get_extensions_goal_state().on_hold) + self.assertFalse(protocol.get_goal_state().extensions_goal_state.on_hold) def test_it_should_set_on_hold_to_false_when_the_in_vm_artifacts_profile_is_not_valid(self, *_): with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: - extensions_on_hold = protocol.get_extensions_goal_state().on_hold + extensions_on_hold = protocol.get_goal_state().extensions_goal_state.on_hold self.assertTrue(extensions_on_hold, "Extensions should be on hold in the test data") def http_get_handler(url, *_, **kwargs): @@ -285,24 +275,24 @@ def http_get_handler(url, *_, **kwargs): mock_response = MockHttpResponse(200, body=None) protocol.client.update_goal_state(force_update=True) - extensions_on_hold = protocol.get_extensions_goal_state().on_hold + extensions_on_hold = protocol.get_goal_state().extensions_goal_state.on_hold self.assertFalse(extensions_on_hold, "Extensions should not be on hold when the in-vm artifacts profile response body is None") mock_response = MockHttpResponse(200, ' '.encode('utf-8')) protocol.client.update_goal_state(force_update=True) - extensions_on_hold = protocol.get_extensions_goal_state().on_hold + extensions_on_hold = protocol.get_goal_state().extensions_goal_state.on_hold self.assertFalse(extensions_on_hold, "Extensions should not be on hold when the in-vm artifacts profile response is an empty string") mock_response = MockHttpResponse(200, '{ }'.encode('utf-8')) protocol.client.update_goal_state(force_update=True) - extensions_on_hold = protocol.get_extensions_goal_state().on_hold + extensions_on_hold = protocol.get_goal_state().extensions_goal_state.on_hold self.assertFalse(extensions_on_hold, "Extensions should not be on hold when the in-vm artifacts profile response is an empty json object") with patch("azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config.add_event") as add_event: mock_response = MockHttpResponse(200, 'invalid json'.encode('utf-8')) protocol.client.update_goal_state(force_update=True) - extensions_on_hold = protocol.get_extensions_goal_state().on_hold + extensions_on_hold = protocol.get_goal_state().extensions_goal_state.on_hold self.assertFalse(extensions_on_hold, "Extensions should not be on hold when the in-vm artifacts profile response is not valid json") events = [kwargs for _, kwargs in add_event.call_args_list if kwargs['op'] == WALAEventOperation.ArtifactsProfileBlob] @@ -475,11 +465,11 @@ def test_report_event_large_event(self, patch_send_event, *args): # pylint: dis class TestWireClient(HttpRequestPredicates, AgentTestCase): def test_get_ext_conf_without_extensions_should_retrieve_vmagent_manifests_info(self, *args): # pylint: disable=unused-argument - # Basic test for get_extensions_goal_state() when extensions are not present in the config. The test verifies that - # get_extensions_goal_state() fetches the correct data by comparing the returned data with the test data provided the + # Basic test for extensions_goal_state when extensions are not present in the config. The test verifies that + # extensions_goal_state fetches the correct data by comparing the returned data with the test data provided the # mock_wire_protocol. with mock_wire_protocol(mockwiredata.DATA_FILE_NO_EXT) as protocol: - extensions_goal_state = protocol.client.get_extensions_goal_state() + extensions_goal_state = protocol.get_goal_state().extensions_goal_state ext_handlers_names = [ext_handler.name for ext_handler in extensions_goal_state.extensions] self.assertEqual(0, len(extensions_goal_state.extensions), @@ -487,19 +477,14 @@ def test_get_ext_conf_without_extensions_should_retrieve_vmagent_manifests_info( vmagent_manifests = [manifest.family for manifest in extensions_goal_state.agent_manifests] self.assertEqual(0, len(extensions_goal_state.agent_manifests), "Unexpected number of vmagent manifests in the extension config: [{0}]".format(vmagent_manifests)) - self.assertIsNone(extensions_goal_state.status_upload_blob, - "Status upload blob in the extension config is expected to be None") - self.assertIsNone(extensions_goal_state.status_upload_blob_type, - "Type of status upload blob in the extension config is expected to be None") self.assertFalse(extensions_goal_state.on_hold, "Extensions On Hold is expected to be False") def test_get_ext_conf_with_extensions_should_retrieve_ext_handlers_and_vmagent_manifests_info(self): - # Basic test for get_extensions_goal_state() when extensions are present in the config. The test verifies that get_extensions_goal_state() + # Basic test for extensions_goal_state when extensions are present in the config. The test verifies that extensions_goal_state # fetches the correct data by comparing the returned data with the test data provided the mock_wire_protocol. with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - wire_protocol_client = protocol.client - extensions_goal_state = wire_protocol_client.get_extensions_goal_state() + extensions_goal_state = protocol.get_goal_state().extensions_goal_state ext_handlers_names = [ext_handler.name for ext_handler in extensions_goal_state.extensions] self.assertEqual(1, len(extensions_goal_state.extensions), @@ -1054,7 +1039,7 @@ def test_it_should_update_the_goal_state_and_the_host_plugin_when_the_incarnatio else: protocol.client.update_goal_state() - sequence_number = protocol.client.get_extensions_goal_state().extensions[0].settings[0].sequenceNumber + sequence_number = protocol.get_goal_state().extensions_goal_state.extensions[0].settings[0].sequenceNumber self.assertEqual(protocol.client.get_goal_state().incarnation, new_incarnation) self.assertEqual(protocol.client.get_hosting_env().deployment_name, new_hosting_env_deployment_name) @@ -1071,8 +1056,8 @@ def test_non_forced_update_should_not_update_the_goal_state_but_should_update_th # The container id, role config name and shared config can change without the incarnation changing; capture the initial # goal state and then change those fields. - goal_state = protocol.client.get_goal_state().xml_text - shared_conf = protocol.client.get_shared_conf().xml_text + container_id = protocol.client.get_goal_state().container_id + role_config_name = protocol.client.get_goal_state().role_config_name new_container_id = str(uuid.uuid4()) new_role_config_name = str(uuid.uuid4()) @@ -1083,8 +1068,8 @@ def test_non_forced_update_should_not_update_the_goal_state_but_should_update_th protocol.client.update_goal_state() - self.assertEqual(protocol.client.get_goal_state().xml_text, goal_state) - self.assertEqual(protocol.client.get_shared_conf().xml_text, shared_conf) + self.assertEqual(protocol.client.get_goal_state().container_id, container_id) + self.assertEqual(protocol.client.get_goal_state().role_config_name, role_config_name) self.assertEqual(protocol.client.get_host_plugin().container_id, new_container_id) self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) @@ -1112,107 +1097,6 @@ def test_forced_update_should_update_the_goal_state_and_the_host_plugin_when_the self.assertEqual(protocol.client.get_host_plugin().container_id, new_container_id) self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) - def test_update_goal_state_should_archive_last_goal_state(self): - # We use the last modified timestamp of the goal state to be archived to determine the archive's name. - mock_mtime = os.path.getmtime(self.tmp_dir) - with patch("azurelinuxagent.common.utils.archive.os.path.getmtime") as patch_mtime: - first_gs_ms = mock_mtime + timedelta(minutes=5).seconds - second_gs_ms = mock_mtime + timedelta(minutes=10).seconds - third_gs_ms = mock_mtime + timedelta(minutes=15).seconds - - patch_mtime.side_effect = [first_gs_ms, second_gs_ms, third_gs_ms] - - # The first goal state is created when we instantiate the protocol - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - history_dir = os.path.join(conf.get_lib_dir(), "history") - archives = os.listdir(history_dir) - self.assertEqual(len(archives), 0, "The goal state archive should have been empty since this is the first goal state") - - # Create the second new goal state, so the initial one should be archived - protocol.mock_wire_data.set_incarnation("2") - protocol.client.update_goal_state() - - # The initial goal state should be in the archive - first_archive_name = datetime.utcfromtimestamp(first_gs_ms).isoformat() + "_incarnation_1" - archives = os.listdir(history_dir) - self.assertEqual(len(archives), 1, "Only one goal state should have been archived") - self.assertEqual(archives[0], first_archive_name, "The name of goal state archive should match the first goal state timestamp and incarnation") - - # Create the third goal state, so the second one should be archived too - protocol.mock_wire_data.set_incarnation("3") - protocol.client.update_goal_state() - - # The second goal state should be in the archive - second_archive_name = datetime.utcfromtimestamp(second_gs_ms).isoformat() + "_incarnation_2" - archives = os.listdir(history_dir) - archives.sort() - self.assertEqual(len(archives), 2, "Two goal states should have been archived") - self.assertEqual(archives[1], second_archive_name, "The name of goal state archive should match the second goal state timestamp and incarnation") - - def test_update_goal_state_should_not_persist_the_protected_settings(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_MULTIPLE_EXT) as protocol: - # instantiating the protocol fetches the goal state, so there is no need to do another call to update_goal_state() - goal_state = protocol.client.get_goal_state() - extensions_goal_state = protocol.client.get_extensions_goal_state() - - protected_settings = [] - for ext_handler in extensions_goal_state.extensions: - for extension in ext_handler.settings: - if extension.protectedSettings is not None: - protected_settings.append(extension.protectedSettings) - if len(protected_settings) == 0: - raise Exception("The test goal state does not include any protected settings") - - extensions_config_file = os.path.join(conf.get_lib_dir(), EXT_CONF_FILE_NAME.format(goal_state.incarnation)) - if not os.path.exists(extensions_config_file): - raise Exception("Cannot find {0}".format(extensions_config_file)) - - with open(extensions_config_file, "r") as stream: - extensions_config = stream.read() - - for settings in protected_settings: - self.assertNotIn(settings, extensions_config, "The protectedSettings should not have been saved to {0}".format(extensions_config_file)) - - matches = re.findall(r'"protectedSettings"\s*:\s*"\*\*\* REDACTED \*\*\*"', extensions_config) - self.assertEqual( - len(matches), - len(protected_settings), - "Could not find the expected number of redacted settings. Expected {0}.\n{1}".format(len(protected_settings), extensions_config)) - - def test_update_goal_state_should_save_goal_state(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - protocol.mock_wire_data.set_incarnation(999) - protocol.mock_wire_data.set_etag(888) - protocol.update_goal_state() - - extensions_config_file = os.path.join(conf.get_lib_dir(), "ExtensionsConfig.999.xml") - vm_settings_file = os.path.join(conf.get_lib_dir(), "VmSettings.888.json") - expected_files = [ - os.path.join(conf.get_lib_dir(), "GoalState.999.xml"), - os.path.join(conf.get_lib_dir(), "SharedConfig.xml"), - os.path.join(conf.get_lib_dir(), "Certificates.xml"), - os.path.join(conf.get_lib_dir(), "HostingEnvironmentConfig.xml"), - extensions_config_file, - vm_settings_file - ] - - for f in expected_files: - self.assertTrue(os.path.exists(f), "{0} was not saved".format(f)) - - with open(extensions_config_file, "r") as file_: - extensions_goal_state = ExtensionsGoalStateFactory.create_from_extensions_config(123, file_.read(), protocol) - self.assertEqual(5, len(extensions_goal_state.extensions), "Incorrect number of extensions in ExtensionsConfig") - for e in extensions_goal_state.extensions: - if e.name in ("Microsoft.Azure.Monitor.AzureMonitorLinuxAgent", "Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent"): - self.assertEqual(e.settings[0].protectedSettings, "*** REDACTED ***", "The protected settings for {0} were not redacted".format(e.name)) - - with open(vm_settings_file, "r") as file_: - extensions_goal_state = ExtensionsGoalStateFactory.create_from_vm_settings(None, file_.read()) - self.assertEqual(5, len(extensions_goal_state.extensions), "Incorrect number of extensions in vmSettings") - for e in extensions_goal_state.extensions: - if e.name in ("Microsoft.Azure.Monitor.AzureMonitorLinuxAgent", "Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent"): - self.assertEqual(e.settings[0].protectedSettings, "*** REDACTED ***", "The protected settings for {0} were not redacted".format(e.name)) - def test_it_should_retry_get_vm_settings_on_resource_gone_error(self): # Requests to the hostgaplugin incude the Container ID and the RoleConfigName as headers; when the hostgaplugin returns GONE (HTTP status 410) the agent # needs to get a new goal state and retry the request with updated values for the Container ID and RoleConfigName headers. @@ -1243,7 +1127,7 @@ def http_get_vm_settings(_method, _host, _relative_url, **kwargs): def test_it_should_use_vm_settings_by_default(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - extensions_goal_state = protocol.get_extensions_goal_state() + extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertTrue( isinstance(extensions_goal_state, ExtensionsGoalStateFromVmSettings), 'The extensions goal state should have been created from the vmSettings (got: {0})'.format(type(extensions_goal_state))) @@ -1256,7 +1140,7 @@ def _assert_is_extensions_goal_state_from_extensions_config(self, extensions_goa def test_it_should_use_extensions_config_when_fast_track_is_disabled(self): with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=False): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) + self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_goal_state().extensions_goal_state) def test_it_should_use_extensions_config_when_fast_track_is_not_supported(self): def http_get_handler(url, *_, **__): @@ -1265,14 +1149,14 @@ def http_get_handler(url, *_, **__): return None with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: - self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) + self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_goal_state().extensions_goal_state) def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_supported(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-unsupported_version.json" with mock_wire_protocol(data_file) as protocol: - self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) + self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_goal_state().extensions_goal_state) class UpdateHostPluginFromGoalStateTestCase(AgentTestCase): @@ -1290,9 +1174,6 @@ def test_it_should_update_the_host_plugin_with_or_without_incarnation_changes(se new_container_id = str(uuid.uuid4()) new_role_config_name = str(uuid.uuid4()) - goal_state_xml_text = protocol.mock_wire_data.goal_state - shared_conf_xml_text = protocol.mock_wire_data.shared_config - if incarnation_change: protocol.mock_wire_data.set_incarnation(str(uuid.uuid4())) @@ -1306,10 +1187,6 @@ def test_it_should_update_the_host_plugin_with_or_without_incarnation_changes(se self.assertEqual(protocol.client.get_host_plugin().container_id, new_container_id) self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) - # it should not update the goal state - self.assertEqual(protocol.client.get_goal_state().xml_text, goal_state_xml_text) - self.assertEqual(protocol.client.get_shared_conf().xml_text, shared_conf_xml_text) - if __name__ == '__main__': unittest.main() diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 2e46848778..7834a7d25b 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -8,7 +8,7 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.utils import fileutil -from azurelinuxagent.common.utils.archive import StateFlusher, StateArchiver, _MAX_ARCHIVED_STATES +from azurelinuxagent.common.utils.archive import StateArchiver, _MAX_ARCHIVED_STATES from tests.tools import AgentTestCase, patch debug = False @@ -53,41 +53,6 @@ def _parse_archive_name(name): incarnation_no_ext = os.path.splitext(incarnation_ext)[0] return timestamp_str, incarnation_no_ext - def test_archive00(self): - """ - StateFlusher should move all 'goal state' files to a new directory - under the history folder that is timestamped. - """ - temp_files = [ - 'GoalState.0.xml', - 'Prod.0.manifest.xml', - 'Prod.0.agentsManifest', - 'Microsoft.Azure.Extensions.CustomScript.0.xml' - ] - - for temp_file in temp_files: - self._write_file(temp_file) - - test_subject = StateFlusher(self.tmp_dir) - test_subject.flush() - - self.assertTrue(os.path.exists(self.history_dir)) - self.assertTrue(os.path.isdir(self.history_dir)) - - timestamp_dirs = os.listdir(self.history_dir) - self.assertEqual(1, len(timestamp_dirs)) - - timestamp_str, incarnation = self._parse_archive_name(timestamp_dirs[0]) - self.assert_is_iso8601(timestamp_str) - timestamp = self.parse_isoformat(timestamp_str) - self.assert_datetime_close_to(timestamp, datetime.utcnow(), timedelta(seconds=30)) - self.assertEqual("0", incarnation) - - for temp_file in temp_files: - history_path = os.path.join(self.history_dir, timestamp_dirs[0], temp_file) - msg = "expected the temp file {0} to exist".format(history_path) - self.assertTrue(os.path.exists(history_path), msg) - def test_archive01(self): """ StateArchiver should archive all history directories by @@ -97,17 +62,17 @@ def test_archive01(self): 2. Deleting the timestamped directory """ temp_files = [ - 'GoalState.0.xml', - 'Prod.0.manifest.xml', - 'Prod.0.agentsManifest', - 'Microsoft.Azure.Extensions.CustomScript.0.xml' + 'GoalState.xml', + 'Prod.manifest.xml', + 'Prod.agentsManifest', + 'Microsoft.Azure.Extensions.CustomScript.xml' ] - for current_file in temp_files: - self._write_file(current_file) + # this directory matches the pattern that StateArchiver.archive() searches for + temp_directory = os.path.join(self.history_dir, datetime.utcnow().isoformat() + "_incarnation_0") - flusher = StateFlusher(self.tmp_dir) - flusher.flush() + for current_file in temp_files: + self._write_file(os.path.join(temp_directory, current_file)) test_subject = StateArchiver(self.tmp_dir) test_subject.archive() @@ -147,9 +112,9 @@ def test_archive02(self): timestamps.append(timestamp) if i % 2 == 0: - filename = os.path.join('history', "{0}_incarnation_0".format(timestamp.isoformat()), 'Prod.0.manifest.xml') + filename = os.path.join('history', "{0}_0".format(timestamp.isoformat()), 'Prod.manifest.xml') else: - filename = os.path.join('history', "{0}_incarnation_0.zip".format(timestamp.isoformat())) + filename = os.path.join('history', "{0}_0.zip".format(timestamp.isoformat())) self._write_file(filename) @@ -166,18 +131,19 @@ def test_archive02(self): for i in range(0, _MAX_ARCHIVED_STATES): timestamp = timestamps[i + count].isoformat() if i % 2 == 0: - filename = "{0}_incarnation_0".format(timestamp) + filename = "{0}_0".format(timestamp) else: - filename = "{0}_incarnation_0.zip".format(timestamp) + filename = "{0}_0.zip".format(timestamp) self.assertTrue(filename in archived_entries, "'{0}' is not in the list of unpurged entires".format(filename)) def test_archive03(self): """ - All archives should be purged, both with the new naming (with incarnation number) and with the old naming. + All archives should be purged, both with the legacy naming (with incarnation number) and with the new naming. """ start = datetime.now() timestamp1 = start + timedelta(seconds=5) timestamp2 = start + timedelta(seconds=10) + timestamp3 = start + timedelta(seconds=10) dir_old = timestamp1.isoformat() dir_new = "{0}_incarnation_1".format(timestamp2.isoformat()) @@ -185,12 +151,15 @@ def test_archive03(self): archive_old = "{0}.zip".format(timestamp1.isoformat()) archive_new = "{0}_incarnation_1.zip".format(timestamp2.isoformat()) - self._write_file(os.path.join("history", dir_old, "Prod.0.manifest.xml")) - self._write_file(os.path.join("history", dir_new, "Prod.1.manifest.xml")) + status = "{0}.zip".format(timestamp3.isoformat()) + + self._write_file(os.path.join("history", dir_old, "Prod.manifest.xml")) + self._write_file(os.path.join("history", dir_new, "Prod.manifest.xml")) self._write_file(os.path.join("history", archive_old)) self._write_file(os.path.join("history", archive_new)) + self._write_file(os.path.join("history", status)) - self.assertEqual(4, len(os.listdir(self.history_dir)), "Not all entries were archived!") + self.assertEqual(5, len(os.listdir(self.history_dir)), "Not all entries were archived!") test_subject = StateArchiver(self.tmp_dir) with patch("azurelinuxagent.common.utils.archive._MAX_ARCHIVED_STATES", 0): From 3215b6cb835806f0ff2ba3c27a6f46817964184d Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 7 Feb 2022 15:58:44 -0800 Subject: [PATCH 19/84] Set agent version to 9.9.9.9; disable fast track (#2493) Co-authored-by: narrieta --- azurelinuxagent/common/conf.py | 4 ++-- azurelinuxagent/common/version.py | 2 +- tests/ga/test_update.py | 3 ++- .../protocol/test_extensions_goal_state_from_vm_settings.py | 5 +++-- tests/protocol/test_goal_state.py | 3 ++- tests/protocol/test_hostplugin.py | 6 ++++-- tests/protocol/test_wire.py | 6 ++++-- tests/test_agent.py | 2 +- 8 files changed, 19 insertions(+), 12 deletions(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 65d0703094..a23c5e9280 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -136,7 +136,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Debug.CgroupLogMetrics": False, "Debug.CgroupDisableOnProcessCheckFailure": True, "Debug.CgroupDisableOnQuotaCheckFailure": True, - "Debug.EnableFastTrack": True, + "Debug.EnableFastTrack": False, "Debug.EnableGAVersioning": False } @@ -571,7 +571,7 @@ def get_enable_fast_track(conf=__conf__): NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_switch("Debug.EnableFastTrack", True) + return conf.get_switch("Debug.EnableFastTrack", False) def get_etp_collection_period(conf=__conf__): diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index af01a9861a..e35e28b94a 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. Current agent version: 2.4.0.0 # -AGENT_VERSION = '8.8.8.8' +AGENT_VERSION = '9.9.9.9' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index c3dbfdfb81..8cec8f90a4 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -2726,7 +2726,8 @@ def _fail_goal_state_fetch(url, **_): except IndexError: raise HttpError() - def test_update_handler_should_report_status_even_on_failed_goal_state_fetch(self): + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_update_handler_should_report_status_even_on_failed_goal_state_fetch(self, _): try: # Returning None forces the mock wire data to return the contents in the static diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/protocol/test_extensions_goal_state_from_vm_settings.py index 0256e18f53..515293976e 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/protocol/test_extensions_goal_state_from_vm_settings.py @@ -7,7 +7,7 @@ from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import _CaseFoldedDict from azurelinuxagent.common.utils import fileutil from tests.protocol.mocks import mockwiredata, mock_wire_protocol -from tests.tools import AgentTestCase, data_dir +from tests.tools import AgentTestCase, data_dir, patch class ExtensionsGoalStateFromVmSettingsTestCase(AgentTestCase): @@ -48,7 +48,8 @@ def assert_property(name, value): # dependency level (multi-config) self.assertEqual(1, vm_settings.extensions[3].settings[1].dependencyLevel, "Incorrect dependency level (multi-config)") - def test_extension_goal_state_should_parse_requested_version_properly(self): + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_extension_goal_state_should_parse_requested_version_properly(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: manifests, _ = protocol.get_vmagent_manifests() for manifest in manifests: diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index 1d01e263fb..fdd41303bf 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -25,7 +25,8 @@ def test_fetch_goal_state_should_raise_on_incomplete_goal_state(self): GoalState(protocol.client) self.assertEqual(_GET_GOAL_STATE_MAX_ATTEMPTS, mock_sleep.call_count, "Unexpected number of retries") - def test_fetch_full_goal_state_should_save_goal_state_to_history_directory(self): + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_fetch_full_goal_state_should_save_goal_state_to_history_directory(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: # use a new goal state with a specific test incarnation and etag protocol.mock_wire_data.set_incarnation(999) diff --git a/tests/protocol/test_hostplugin.py b/tests/protocol/test_hostplugin.py index a63d389d51..4fac06b3a5 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/protocol/test_hostplugin.py @@ -867,7 +867,8 @@ def _fetch_vm_settings_ignoring_errors(protocol): except (ProtocolError, VmSettingsNotSupported): pass - def test_it_should_keep_track_of_errors_in_vm_settings_requests(self): + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_it_should_keep_track_of_errors_in_vm_settings_requests(self, _): mock_response = None def http_get_handler(url, *_, **__): @@ -943,7 +944,8 @@ def http_get_handler(url, *_, **__): self.assertEqual(3, len(messages), "Expected additional errors to be reported in the next period (got: {0})".format(messages)) - def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self): + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: def http_get_handler(url, *_, **__): if self.is_host_plugin_vm_settings_request(url): diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 6d9458c917..442e14de8e 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -1097,7 +1097,8 @@ def test_forced_update_should_update_the_goal_state_and_the_host_plugin_when_the self.assertEqual(protocol.client.get_host_plugin().container_id, new_container_id) self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) - def test_it_should_retry_get_vm_settings_on_resource_gone_error(self): + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_it_should_retry_get_vm_settings_on_resource_gone_error(self, _): # Requests to the hostgaplugin incude the Container ID and the RoleConfigName as headers; when the hostgaplugin returns GONE (HTTP status 410) the agent # needs to get a new goal state and retry the request with updated values for the Container ID and RoleConfigName headers. with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: @@ -1125,7 +1126,8 @@ def http_get_vm_settings(_method, _host, _relative_url, **kwargs): self.assertEqual("GET_VM_SETTINGS_TEST_CONTAINER_ID", request_headers[1][hostplugin._HEADER_CONTAINER_ID], "The retry request did not include the expected header for the ContainerId") self.assertEqual("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME", request_headers[1][hostplugin._HEADER_HOST_CONFIG_NAME], "The retry request did not include the expected header for the RoleConfigName") - def test_it_should_use_vm_settings_by_default(self): + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_it_should_use_vm_settings_by_default(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertTrue( diff --git a/tests/test_agent.py b/tests/test_agent.py index 1ce321290c..0f2d253428 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -38,7 +38,7 @@ Debug.CgroupLogMetrics = False Debug.CgroupMonitorExpiryTime = 2022-01-31 Debug.CgroupMonitorExtensionName = Microsoft.Azure.Monitor.AzureMonitorLinuxAgent -Debug.EnableFastTrack = True +Debug.EnableFastTrack = False Debug.EnableGAVersioning = False Debug.EtpCollectionPeriod = 300 DetectScvmmEnv = False From 3db2ca9ac45c42edae1a554519d7bfcf64d24855 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 7 Feb 2022 16:52:26 -0800 Subject: [PATCH 20/84] enable firewall frequency to 5 mins (#2491) --- azurelinuxagent/common/conf.py | 4 ++-- tests/test_agent.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index a666e53ebb..32cda350ce 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -169,7 +169,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Extensions.GoalStatePeriod": 6, "Extensions.InitialGoalStatePeriod": 6, "Extensions.GoalStateHistoryCleanupPeriod": 1800, - "OS.EnableFirewallPeriod": 30, + "OS.EnableFirewallPeriod": 300, "OS.RemovePersistentNetRulesPeriod": 30, "OS.RootDeviceScsiTimeoutPeriod": 30, "OS.MonitorDhcpClientRestartPeriod": 30, @@ -230,7 +230,7 @@ def enable_firewall(conf=__conf__): def get_enable_firewall_period(conf=__conf__): - return conf.get_int("OS.EnableFirewallPeriod", 30) + return conf.get_int("OS.EnableFirewallPeriod", 300) def get_remove_persistent_net_rules_period(conf=__conf__): diff --git a/tests/test_agent.py b/tests/test_agent.py index 386fab61c1..e5a37dfd0d 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -60,7 +60,7 @@ OS.CheckRdmaDriver = False OS.EnableFIPS = True OS.EnableFirewall = False -OS.EnableFirewallPeriod = 30 +OS.EnableFirewallPeriod = 300 OS.EnableRDMA = False OS.HomeDir = /home OS.MonitorDhcpClientRestartPeriod = 30 From 45e8d04fd5c5d7061934e2f5f3b6ea920233f7a7 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 7 Feb 2022 16:57:26 -0800 Subject: [PATCH 21/84] =?UTF-8?q?Do=20not=20raise=20on=20missing=20status?= =?UTF-8?q?=20blob;=20reduce=20amount=20of=20logging=20for=20vms=E2=80=A6?= =?UTF-8?q?=20(#2492)=20(#2496)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Do not raise on missing status blob; reduce amount of logging for vmsettings * remove extra file; fix typo Co-authored-by: narrieta (cherry picked from commit 7cce03b271cce9bee8d3c09a6eea3ea75cc3526c) --- .../extensions_goal_state_from_vm_settings.py | 20 +++--- azurelinuxagent/common/protocol/wire.py | 7 +- .../ext_conf-no_status_upload_blob.xml | 39 +++++++++++ .../vm_settings-no_status_upload_blob.json | 65 +++++++++++++++++++ ...sions_goal_state_from_extensions_config.py | 10 +++ ..._extensions_goal_state_from_vm_settings.py | 8 +++ tests/protocol/test_wire.py | 26 +++++--- 7 files changed, 156 insertions(+), 19 deletions(-) create mode 100644 tests/data/hostgaplugin/ext_conf-no_status_upload_blob.xml create mode 100644 tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index dcf6c2c2d9..12a231732a 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -22,10 +22,10 @@ from azurelinuxagent.common.AgentGlobals import AgentGlobals from azurelinuxagent.common.exception import VmSettingsError +from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState from azurelinuxagent.common.protocol.restapi import VMAgentManifest, Extension, ExtensionRequestedState, ExtensionSettings from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.utils.textutil import format_exception class ExtensionsGoalStateFromVmSettings(ExtensionsGoalState): @@ -53,7 +53,7 @@ def __init__(self, etag, json_text): self._parse_vm_settings(json_text) self._do_common_validations() except Exception as e: - raise VmSettingsError("Error parsing vmSettings (etag: {0} HGAP: {1}): {2}\n{3}".format(etag, self._host_ga_plugin_version, format_exception(e), self.get_redacted_text())) + raise VmSettingsError("Error parsing vmSettings [HGAP: {0}]: {1}".format(self._host_ga_plugin_version, ustr(e))) @property def id(self): @@ -173,13 +173,15 @@ def _parse_status_upload_blob(self, vm_settings): # } status_upload_blob = vm_settings.get("statusUploadBlob") if status_upload_blob is None: - raise Exception("Missing statusUploadBlob") - self._status_upload_blob = status_upload_blob.get("value") - if self._status_upload_blob is None: - raise Exception("Missing statusUploadBlob.value") - self._status_upload_blob_type = status_upload_blob.get("statusBlobType") - if self._status_upload_blob is None: - raise Exception("Missing statusUploadBlob.statusBlobType") + self._status_upload_blob = None + self._status_upload_blob_type = "BlockBlob" + else: + self._status_upload_blob = status_upload_blob.get("value") + if self._status_upload_blob is None: + raise Exception("Missing statusUploadBlob.value") + self._status_upload_blob_type = status_upload_blob.get("statusBlobType") + if self._status_upload_blob_type is None: + self._status_upload_blob_type = "BlockBlob" def _parse_required_features(self, vm_settings): # Sample: diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 2f80ac212f..cda6381f1e 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -1537,7 +1537,8 @@ class _VmSettingsError(object): class _VmSettingsErrorReporter(object): - _MaxErrors = 5 # Max number of error reported by period + _MaxLogErrors = 1 # Max number of errors by period reported to the local log + _MaxTelemetryErrors = 3 # Max number of errors by period reported to telemetry _Period = timedelta(hours=1) # How often to report the summary def __init__(self): @@ -1558,8 +1559,10 @@ def report_request(self): def report_error(self, error, category=None): self._error_count += 1 - if self._error_count <= _VmSettingsErrorReporter._MaxErrors: + if self._error_count <= _VmSettingsErrorReporter._MaxLogErrors: logger.info("[VmSettings] [Informational only, the Agent will continue normal operation] {0}", error) + + if self._error_count <= _VmSettingsErrorReporter._MaxTelemetryErrors: add_event(op=WALAEventOperation.VmSettings, message=error, is_success=False, log_event=False) if category == _VmSettingsError.ServerError: diff --git a/tests/data/hostgaplugin/ext_conf-no_status_upload_blob.xml b/tests/data/hostgaplugin/ext_conf-no_status_upload_blob.xml new file mode 100644 index 0000000000..5141c0dca9 --- /dev/null +++ b/tests/data/hostgaplugin/ext_conf-no_status_upload_blob.xml @@ -0,0 +1,39 @@ + + + + + Prod + + https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml + https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml + + + + CentralUSEUAP + CRP + + + + + https://umsanh4b5rfz0q0p4pwm.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml + + + + + + + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"commandToExecute":"echo 'cee174d4-4daa-4b07-9958-53b9649445c2'"} + } + } + ] +} + + +https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.vmSettings?sv=2018-03-28&sr=b&sk=system-1&sig=PaiLic%3d&se=9999-01-01T00%3a00%3a00Z&sp=r + diff --git a/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json new file mode 100644 index 0000000000..c184454a65 --- /dev/null +++ b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json @@ -0,0 +1,65 @@ +{ + "hostGAPluginVersion": "1.0.8.115", + "vmSettingsSchemaVersion": "0.0", + "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", + "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "extensionsLastModifiedTickCount": 637726657706205217, + "extensionGoalStatesSource": "Fabric", + "onHold": true, + "inVMMetadata": { + "subscriptionId": "8e037ad4-618f-4466-8bc8-5099d41ac15b", + "resourceGroupName": "rg-dc-86fjzhp", + "vmName": "edp0plkw2b", + "location": "CentralUSEUAP", + "vmId": "86f4ae0a-61f8-48ae-9199-40f402d56864", + "vmSize": "Standard_B2s", + "osType": "Linux" + }, + "requiredFeatures": [ + { + "name": "MultipleExtensionsPerHandler" + } + ], + "gaFamilies": [ + { + "name": "Prod", + "uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml" + ] + } + ], + "extensionGoalStates": [ + { + "name": "Microsoft.Azure.Extensions.CustomScript", + "version": "2.1.6", + "location": "https://umsavwggj2v40kvqhc0w.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml", + "failoverlocation": "https://umsafwzhkbm1rfrhl0ws.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml", + "additionalLocations": [ + "https://umsanh4b5rfz0q0p4pwm.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml" + ], + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "isMultiConfig": false, + "settings": [ + { + "publicSettings": "{\"commandToExecute\":\"echo 'cee174d4-4daa-4b07-9958-53b9649445c2'\"}" + } + ], + "dependsOn": [ + { + "DependsOnExtension": [ + { + "handler": "Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent" + } + ], + "dependencyLevel": 1 + } + ] + } + ] +} diff --git a/tests/protocol/test_extensions_goal_state_from_extensions_config.py b/tests/protocol/test_extensions_goal_state_from_extensions_config.py index d270f7adc0..0fb1ef1385 100644 --- a/tests/protocol/test_extensions_goal_state_from_extensions_config.py +++ b/tests/protocol/test_extensions_goal_state_from_extensions_config.py @@ -26,3 +26,13 @@ def test_it_should_use_default_values_when_in_vm_metadata_is_invalid(self): self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.activity_id, "Incorrect activity Id") self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.correlation_id, "Incorrect correlation Id") self.assertEqual('1900-01-01T00:00:00.000000Z', extensions_goal_state.created_on_timestamp, "Incorrect GS Creation time") + + def test_it_should_parse_missing_status_upload_blob_as_none(self): + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["ext_conf"] = "hostgaplugin/ext_conf-no_status_upload_blob.xml" + with mock_wire_protocol(data_file) as protocol: + extensions_goal_state = protocol.get_extensions_goal_state() + + self.assertIsNone(extensions_goal_state.status_upload_blob, "Expected status upload blob to be None") + self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") + diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/protocol/test_extensions_goal_state_from_vm_settings.py index b8f4deb6e8..fe30eff0cf 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/protocol/test_extensions_goal_state_from_vm_settings.py @@ -47,6 +47,14 @@ def assert_property(name, value): # dependency level (multi-config) self.assertEqual(1, vm_settings.extensions[3].settings[1].dependencyLevel, "Incorrect dependency level (multi-config)") + def test_create_from_vm_settings_should_parse_missing_status_upload_blob_as_none(self): + vm_settings_text = fileutil.read_file(os.path.join(data_dir, "hostgaplugin/vm_settings-no_status_upload_blob.json")) + vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings("123", vm_settings_text) + + self.assertIsNone(vm_settings.status_upload_blob, "Expected status upload blob to be None") + self.assertEqual("BlockBlob", vm_settings.status_upload_blob_type, "Expected status upload blob to be Block") + + class CaseFoldedDictionaryTestCase(AgentTestCase): def test_it_should_retrieve_items_ignoring_case(self): dictionary = json.loads('''{ diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index e84ea2a658..48e6c9b43a 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -1291,25 +1291,35 @@ def http_get_handler(url, *_, **__): return None protocol.set_http_handlers(http_get_handler=http_get_handler) + def get_telemetry_messages(): + return [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + + def get_log_messages(): + return [arg[0][0] for arg in logger_info.call_args_list if "[VmSettings]" in arg[0][0]] + with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - for _ in range(_VmSettingsErrorReporter._MaxErrors + 3): - protocol.client.update_goal_state() + with patch('azurelinuxagent.common.logger.info') as logger_info: + for _ in range(_VmSettingsErrorReporter._MaxTelemetryErrors + 3): + protocol.client.update_goal_state() - messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + telemetry_messages = get_telemetry_messages() + self.assertEqual(_VmSettingsErrorReporter._MaxTelemetryErrors, len(telemetry_messages), "The number of errors reported to telemetry is not the max allowed (got: {0})".format(telemetry_messages)) - self.assertEqual(_VmSettingsErrorReporter._MaxErrors, len(messages), "The number of errors reported is not the max allowed (got: {0})".format(messages)) + log_messages = get_log_messages() + self.assertEqual(_VmSettingsErrorReporter._MaxLogErrors, len(log_messages), "The number of errors reported to the local log is not the max allowed (got: {0})".format(telemetry_messages)) # Reset the error reporter and verify that additional errors are reported protocol.client._vm_settings_error_reporter._next_period = datetime.now() protocol.client.update_goal_state() # this triggers the reset with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - for _ in range(3): - protocol.client.update_goal_state() + protocol.client.update_goal_state() - messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + telemetry_messages = get_telemetry_messages() + self.assertEqual(1, len(telemetry_messages), "Expected additional errors to be reported to telemetry in the next period (got: {0})".format(telemetry_messages)) - self.assertEqual(3, len(messages), "Expected additional errors to be reported in the next period (got: {0})".format(messages)) + log_messages = get_log_messages() + self.assertEqual(1, len(log_messages), "Expected additional errors to be reported to the local log in the next period (got: {0})".format(telemetry_messages)) def test_it_should_use_vm_settings_by_default(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: From b2c4abc3418c9f668de39b80454d874ae2228cc1 Mon Sep 17 00:00:00 2001 From: narrieta Date: Tue, 8 Feb 2022 09:55:34 -0800 Subject: [PATCH 22/84] fix merge issues --- azurelinuxagent/common/protocol/hostplugin.py | 8 +++- ...sions_goal_state_from_extensions_config.py | 2 +- tests/protocol/test_hostplugin.py | 40 ++++++++++++++----- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 40ed8fda9d..81d9b2f0c2 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -512,7 +512,8 @@ class _VmSettingsError(object): class _VmSettingsErrorReporter(object): - _MaxErrors = 5 # Max number of error reported by period + _MaxLogErrors = 1 # Max number of errors by period reported to the local log + _MaxTelemetryErrors = 3 # Max number of errors by period reported to telemetry _Period = datetime.timedelta(hours=1) # How often to report the summary def __init__(self): @@ -533,7 +534,10 @@ def report_request(self): def report_error(self, error, category=None): self._error_count += 1 - if self._error_count <= _VmSettingsErrorReporter._MaxErrors: + if self._error_count <= _VmSettingsErrorReporter._MaxLogErrors: + logger.info("[VmSettings] [Informational only, the Agent will continue normal operation] {0}", error) + + if self._error_count <= _VmSettingsErrorReporter._MaxTelemetryErrors: add_event(op=WALAEventOperation.VmSettings, message=error, is_success=False, log_event=False) if category == _VmSettingsError.ServerError: diff --git a/tests/protocol/test_extensions_goal_state_from_extensions_config.py b/tests/protocol/test_extensions_goal_state_from_extensions_config.py index 9d82a70640..3f2c4d0b3a 100644 --- a/tests/protocol/test_extensions_goal_state_from_extensions_config.py +++ b/tests/protocol/test_extensions_goal_state_from_extensions_config.py @@ -31,7 +31,7 @@ def test_it_should_parse_missing_status_upload_blob_as_none(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["ext_conf"] = "hostgaplugin/ext_conf-no_status_upload_blob.xml" with mock_wire_protocol(data_file) as protocol: - extensions_goal_state = protocol.get_extensions_goal_state() + extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertIsNone(extensions_goal_state.status_upload_blob, "Expected status upload blob to be None") self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") diff --git a/tests/protocol/test_hostplugin.py b/tests/protocol/test_hostplugin.py index 4fac06b3a5..c7d32c3355 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/protocol/test_hostplugin.py @@ -916,7 +916,8 @@ def http_get_handler(url, *_, **__): self.assertEqual(expected, summary, "The count of errors is incorrect") - def test_it_should_limit_the_number_of_errors_it_reports(self): + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_it_should_limit_the_number_of_errors_it_reports(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: def http_get_handler(url, *_, **__): if self.is_host_plugin_vm_settings_request(url): @@ -924,25 +925,42 @@ def http_get_handler(url, *_, **__): return None protocol.set_http_handlers(http_get_handler=http_get_handler) + def get_telemetry_messages(): + return [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + + def get_log_messages(): + return [arg[0][0] for arg in logger_info.call_args_list if "[VmSettings]" in arg[0][0]] + + def fetch_vm_settings(): + try: + host_plugin.fetch_vm_settings(True) + except: + pass # All calls produce an error; ignore it + with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: - for _ in range(_VmSettingsErrorReporter._MaxErrors + 3): - self._fetch_vm_settings_ignoring_errors(protocol) + with patch('azurelinuxagent.common.logger.info') as logger_info: + host_plugin = protocol.client.get_host_plugin() + for _ in range(_VmSettingsErrorReporter._MaxTelemetryErrors + 3): + fetch_vm_settings() - messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + telemetry_messages = get_telemetry_messages() + self.assertEqual(_VmSettingsErrorReporter._MaxTelemetryErrors, len(telemetry_messages), "The number of errors reported to telemetry is not the max allowed (got: {0})".format(telemetry_messages)) - self.assertEqual(_VmSettingsErrorReporter._MaxErrors, len(messages), "The number of errors reported is not the max allowed (got: {0})".format(messages)) + log_messages = get_log_messages() + self.assertEqual(_VmSettingsErrorReporter._MaxLogErrors, len(log_messages), "The number of errors reported to the local log is not the max allowed (got: {0})".format(telemetry_messages)) # Reset the error reporter and verify that additional errors are reported - protocol.client._host_plugin._vm_settings_error_reporter._next_period = datetime.datetime.now() - self._fetch_vm_settings_ignoring_errors(protocol) # this triggers the reset + host_plugin._vm_settings_error_reporter._next_period = datetime.datetime.now() + fetch_vm_settings() # this triggers the reset with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: - for _ in range(3): - self._fetch_vm_settings_ignoring_errors(protocol) + fetch_vm_settings() - messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + telemetry_messages = get_telemetry_messages() + self.assertEqual(1, len(telemetry_messages), "Expected additional errors to be reported to telemetry in the next period (got: {0})".format(telemetry_messages)) - self.assertEqual(3, len(messages), "Expected additional errors to be reported in the next period (got: {0})".format(messages)) + log_messages = get_log_messages() + self.assertEqual(1, len(log_messages), "Expected additional errors to be reported to the local log in the next period (got: {0})".format(telemetry_messages)) @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self, _): From 4aacb3b7f293ac745aa22bbb8e1bb1284b8ccac8 Mon Sep 17 00:00:00 2001 From: narrieta Date: Tue, 8 Feb 2022 10:05:58 -0800 Subject: [PATCH 23/84] pylint issue --- tests/protocol/test_hostplugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/protocol/test_hostplugin.py b/tests/protocol/test_hostplugin.py index c7d32c3355..c18627a0a5 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/protocol/test_hostplugin.py @@ -934,7 +934,7 @@ def get_log_messages(): def fetch_vm_settings(): try: host_plugin.fetch_vm_settings(True) - except: + except ProtocolError: pass # All calls produce an error; ignore it with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: From 459f0598d2fcbccd457ae16bd5f4dea1f4b57c82 Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Wed, 9 Feb 2022 13:06:38 -0800 Subject: [PATCH 24/84] Report VMSize via Heartbeat telemetry event (#2462) --- azurelinuxagent/ga/update.py | 30 +++++++++++++++++++++++-- tests/ga/test_update.py | 43 ++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 8852575250..e6414a7f4a 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -33,6 +33,7 @@ import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger +from azurelinuxagent.common.protocol.imds import get_imds_client import azurelinuxagent.common.utils.fileutil as fileutil import azurelinuxagent.common.utils.restutil as restutil import azurelinuxagent.common.utils.textutil as textutil @@ -161,6 +162,9 @@ def __init__(self): self._heartbeat_id = str(uuid.uuid4()).upper() self._heartbeat_counter = 0 + # VM Size is reported via the heartbeat, default it here. + self._vm_size = None + # these members are used to avoid reporting errors too frequently self._heartbeat_update_goal_state_error_count = 0 self._last_try_update_goal_state_failed = False @@ -407,6 +411,25 @@ def run(self, debug=False): self._shutdown() sys.exit(0) + def _get_vm_size(self, protocol): + """ + Including VMSize is meant to capture the architecture of the VM (i.e. arm64 VMs will + have arm64 included in their vmsize field and amd64 will have no architecture indicated). + """ + if self._vm_size is None: + + imds_client = get_imds_client(protocol.get_endpoint()) + + try: + imds_info = imds_client.get_compute() + self._vm_size = imds_info.vmSize + except Exception as e: + err_msg = "Attempts to retrieve VM size information from IMDS are failing: {0}".format(textutil.format_exception(e)) + logger.periodic_warn(logger.EVERY_SIX_HOURS, "[PERIODIC] {0}".format(err_msg)) + return "unknown" + + return self._vm_size + def _check_daemon_running(self, debug): # Check that the parent process (the agent's daemon) is still running if not debug and self._is_orphaned: @@ -1152,10 +1175,13 @@ def _send_heartbeat_telemetry(self, protocol): if datetime.utcnow() >= (self._last_telemetry_heartbeat + UpdateHandler.TELEMETRY_HEARTBEAT_PERIOD): dropped_packets = self.osutil.get_firewall_dropped_packets(protocol.get_endpoint()) auto_update_enabled = 1 if conf.get_autoupdate_enabled() else 0 + # Include VMSize in the heartbeat message because the kusto table does not have + # a separate column for it (or architecture). + vmsize = self._get_vm_size(protocol) - telemetry_msg = "{0};{1};{2};{3};{4}".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, + telemetry_msg = "{0};{1};{2};{3};{4};{5}".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, self._heartbeat_update_goal_state_error_count, - auto_update_enabled) + auto_update_enabled, vmsize) debug_log_msg = "[DEBUG HeartbeatCounter: {0};HeartbeatId: {1};DroppedPackets: {2};" \ "UpdateGSErrors: {3};AutoUpdate: {4}]".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 1d0fffc240..4e429eaf54 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -20,6 +20,7 @@ from datetime import datetime, timedelta from threading import currentThread +from azurelinuxagent.common.protocol.imds import ComputeInfo from tests.common.osutil.test_default import TestOSUtil import azurelinuxagent.common.osutil.default as osutil @@ -1565,6 +1566,48 @@ def test_telemetry_heartbeat_creates_event(self, patch_add_event, patch_info, *_ self.assertTrue(any(call_args[0] == "[HEARTBEAT] Agent {0} is running as the goal state agent {1}" for call_args in patch_info.call_args), "The heartbeat was not written to the agent's log") + @patch("azurelinuxagent.ga.update.add_event") + @patch("azurelinuxagent.common.protocol.imds.ImdsClient") + def test_telemetry_heartbeat_retries_failed_vm_size_fetch(self, mock_imds_factory, patch_add_event, *_): + + def validate_single_heartbeat_event_matches_vm_size(vm_size): + heartbeat_event_kwargs = [ + kwargs for _, kwargs in patch_add_event.call_args_list + if kwargs.get('op', None) == WALAEventOperation.HeartBeat + ] + + self.assertEqual(1, len(heartbeat_event_kwargs), "Expected exactly one HeartBeat event, got {0}"\ + .format(heartbeat_event_kwargs)) + + telemetry_message = heartbeat_event_kwargs[0].get("message", "") + self.assertTrue(telemetry_message.endswith(vm_size), + "Expected HeartBeat message ('{0}') to end with the test vmSize value, {1}."\ + .format(telemetry_message, vm_size)) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as mock_protocol: + update_handler = get_update_handler() + update_handler.protocol_util.get_protocol = Mock(return_value=mock_protocol) + + # Zero out the _vm_size parameter for test resiliency + update_handler._vm_size = None + + mock_imds_client = mock_imds_factory.return_value = Mock() + + # First force a vmSize retrieval failure + mock_imds_client.get_compute.side_effect = HttpError(msg="HTTP Test Failure") + update_handler._last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) + update_handler._send_heartbeat_telemetry(mock_protocol) + + validate_single_heartbeat_event_matches_vm_size("unknown") + patch_add_event.reset_mock() + + # Now provide a vmSize + mock_imds_client.get_compute = lambda: ComputeInfo(vmSize="TestVmSizeValue") + update_handler._last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) + update_handler._send_heartbeat_telemetry(mock_protocol) + + validate_single_heartbeat_event_matches_vm_size("TestVmSizeValue") + @staticmethod def _get_test_ext_handler_instance(protocol, name="OSTCExtensions.ExampleHandlerLinux", version="1.0.0"): eh = Extension(name=name) From 25c9b55714171ea9444b1ff1d5c56bb58b6dd6ff Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 11 Feb 2022 07:41:34 -0800 Subject: [PATCH 25/84] Save agent status file to history (#2500) * Save agent status file to history * rename variables * rename variables Co-authored-by: narrieta --- azurelinuxagent/common/utils/archive.py | 22 ++++++++++++++++------ azurelinuxagent/ga/exthandlers.py | 2 +- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 9b60e32ab9..7c081c819e 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -199,15 +199,25 @@ def __init__(self, timestamp, tag=None): self._root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "{0}_{1}".format(timestamp, tag) if tag is not None else timestamp) def save(self, data, file_name): + def write_to_file(d, f): + with open(f, "w") as h: + h.write(d) + + self._save(write_to_file, data, file_name) + + def _save_file(self, source_file, target_name): + self._save(shutil.move, source_file, target_name) + + def _save(self, function, source, target_name): try: if not os.path.exists(self._root): fileutil.mkdir(self._root, mode=0o700) - full_file_name = os.path.join(self._root, file_name) - fileutil.write_file(full_file_name, data) - except IOError as e: + target = os.path.join(self._root, target_name) + function(source, target) + except Exception as e: if not self._errors: # report only 1 error per directory self._errors = True - logger.warn("Failed to save goal state file {0}: {1} [no additional errors saving the goal state will be reported]".format(file_name, e)) + logger.warn("Failed to save goal state file {0}: {1} [no additional errors saving the goal state will be reported]".format(target_name, e)) def save_goal_state(self, text): self.save(text, _GOAL_STATE_FILE_NAME) @@ -227,5 +237,5 @@ def save_hosting_env(self, text): def save_shared_conf(self, text): self.save(text, _SHARED_CONF_FILE_NAME) - def save_status(self, text): - self.save(text, AGENT_STATUS_FILE) + def save_status_file(self, status_file): + self._save_file(status_file, AGENT_STATUS_FILE) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index cd87225b10..49eb4cf0cf 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -965,7 +965,7 @@ def write_ext_handlers_status_to_info_file(self, vm_status, incarnation_changed) # On new goal state, move the last status report for the previous goal state to the history folder last_modified = os.path.getmtime(status_file) timestamp = datetime.datetime.utcfromtimestamp(last_modified).isoformat() - GoalStateHistory(timestamp).save_status(status_file) + GoalStateHistory(timestamp).save_status_file(status_file) # Now create/overwrite the status file; this file is kept for debugging purposes only status_blob_text = self.protocol.get_status_blob_data() From daad9fb4486f26b8729d7333c134eb810c501f0b Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 11 Feb 2022 12:27:09 -0800 Subject: [PATCH 26/84] Use ExtensionsGoalState.id when checking for changes in extensions (#2499) * Use ExtensionsGoalState.id when checking for changes in extensions * Fix typo Co-authored-by: narrieta --- ...sions_goal_state_from_extensions_config.py | 2 +- .../extensions_goal_state_from_vm_settings.py | 2 +- azurelinuxagent/common/protocol/wire.py | 3 - azurelinuxagent/ga/update.py | 71 +++++++++++-------- tests/ga/test_remoteaccess_handler.py | 3 +- tests/ga/test_update.py | 33 ++++++--- tests/protocol/test_wire.py | 2 +- 7 files changed, 70 insertions(+), 46 deletions(-) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py index 7f8aeea3d8..d7d80f4c10 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py @@ -32,7 +32,7 @@ class ExtensionsGoalStateFromExtensionsConfig(ExtensionsGoalState): def __init__(self, incarnation, xml_text, wire_client): super(ExtensionsGoalStateFromExtensionsConfig, self).__init__() - self._id = incarnation + self._id = "incarnation_{0}".format(incarnation) self._incarnation = incarnation self._text = xml_text self._status_upload_blob = None diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index aabf188997..d559254c67 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -33,7 +33,7 @@ class ExtensionsGoalStateFromVmSettings(ExtensionsGoalState): def __init__(self, etag, json_text): super(ExtensionsGoalStateFromVmSettings, self).__init__() - self._id = etag + self._id = "etag_{0}".format(etag) self._etag = etag self._text = json_text self._host_ga_plugin_version = FlexibleVersion('0.0.0.0') diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 7ee8e3721d..ba06f7b1e8 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -108,9 +108,6 @@ def get_certs(self): certificates = self.client.get_certs() return certificates.cert_list - def get_incarnation(self): - return self.client.get_goal_state().incarnation - def get_vmagent_manifests(self): goal_state = self.client.get_goal_state() ext_conf = goal_state.extensions_goal_state diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index e6414a7f4a..f389c5cfcf 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -170,7 +170,15 @@ def __init__(self): self._last_try_update_goal_state_failed = False self._report_status_last_failed_incarnation = -1 - self.last_incarnation = None + # incarnation of the last goal state that has been fully processed + # (None if no goal state has been processed) + self._last_incarnation = None + # ID of the last extensions goal state that has been fully processed (incarnation for WireServer goal states or etag for HostGAPlugin goal states) + # (None if no extensions goal state has been processed) + self._last_extensions_gs_id = None + # Goal state that is currently been processed + # (None if no goal state is being processed) + self._goal_state = None self._extensions_summary = ExtensionsSummary() @@ -449,6 +457,8 @@ def _try_update_goal_state(self, protocol): try: protocol.update_goal_state() + self._goal_state = protocol.get_goal_state() + if self._last_try_update_goal_state_failed: self._last_try_update_goal_state_failed = False message = u"Retrieving the goal state recovered from previous errors" @@ -463,6 +473,7 @@ def _try_update_goal_state(self, protocol): add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=False, message=message, log_event=False) message = u"Attempts to retrieve the goal state are failing: {0}".format(ustr(e)) logger.periodic_warn(logger.EVERY_SIX_HOURS, "[PERIODIC] {0}".format(message)) + self._heartbeat_update_goal_state_error_count += 1 return False return True @@ -540,30 +551,30 @@ def handle_updates_for_requested_version(): self.__upgrade_agent_if_permitted() - def __goal_state_updated(self, incarnation): + def _processing_new_incarnation(self): + """ + True if we are currently processing a new incarnation (i.e. WireServer goal state) + """ + return self._goal_state is not None and self._goal_state.incarnation != self._last_incarnation + + def _processing_new_extensions_goal_state(self): """ - This function returns if the Goal State updated. - We currently rely on the incarnation number to determine that; i.e. if it changed from the last processed GS + True if we are currently processing a new extensions goal state """ - # TODO: This check should be based on the ExtensionsGoalState.id property - # (this property abstracts incarnation/etag logic based on the delivery pipeline of the Goal State) - return incarnation != self.last_incarnation + return self._goal_state is not None and self._goal_state.extensions_goal_state.id != self._last_extensions_gs_id def _process_goal_state(self, exthandlers_handler, remote_access_handler): + try: + protocol = exthandlers_handler.protocol - protocol = exthandlers_handler.protocol - if not self._try_update_goal_state(protocol): - self._heartbeat_update_goal_state_error_count += 1 - # We should have a cached goal state here, go ahead and report status for that. - self._report_status(exthandlers_handler, incarnation_changed=False) - return + # update self._goal_state + self._try_update_goal_state(protocol) - # Update the Guest Agent if a new version is available - self.__update_guest_agent(protocol) - incarnation = protocol.get_incarnation() + # Update the Guest Agent if a new version is available + if self._goal_state is not None: + self.__update_guest_agent(protocol) - try: - if self.__goal_state_updated(incarnation): + if self._processing_new_extensions_goal_state(): if not self._extensions_summary.converged: message = "A new goal state was received, but not all the extensions in the previous goal state have completed: {0}".format(self._extensions_summary) logger.warn(message) @@ -575,12 +586,14 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): # report status always, even if the goal state did not change # do it before processing the remote access, since that operation can take a long time - self._report_status(exthandlers_handler, incarnation_changed=self.__goal_state_updated(incarnation)) + self._report_status(exthandlers_handler) - if self.__goal_state_updated(incarnation): + if self._processing_new_incarnation(): remote_access_handler.run() finally: - self.last_incarnation = incarnation + if self._goal_state is not None: + self._last_incarnation = self._goal_state.incarnation + self._last_extensions_gs_id = self._goal_state.extensions_goal_state.id def __get_vmagent_update_status(self, protocol, incarnation_changed): """ @@ -618,10 +631,11 @@ def __get_vmagent_update_status(self, protocol, incarnation_changed): return update_status - def _report_status(self, exthandlers_handler, incarnation_changed): - vm_agent_update_status = self.__get_vmagent_update_status(exthandlers_handler.protocol, incarnation_changed) + def _report_status(self, exthandlers_handler): + vm_agent_update_status = self.__get_vmagent_update_status(exthandlers_handler.protocol, self._processing_new_extensions_goal_state()) # report_ext_handlers_status does its own error handling and returns None if an error occurred - vm_status = exthandlers_handler.report_ext_handlers_status(incarnation_changed=incarnation_changed, + # TODO: Review the use of incarnation when reporting status... what should be the behavior for Fast Track goal states (i.e. no incarnation)? + vm_status = exthandlers_handler.report_ext_handlers_status(incarnation_changed=self._processing_new_extensions_goal_state(), vm_agent_update_status=vm_agent_update_status) if vm_status is None: return @@ -641,8 +655,9 @@ def _report_status(self, exthandlers_handler, incarnation_changed): self._on_initial_goal_state_completed(self._extensions_summary) except Exception as error: # report errors only once per incarnation - if self._report_status_last_failed_incarnation != exthandlers_handler.protocol.get_incarnation(): - self._report_status_last_failed_incarnation = exthandlers_handler.protocol.get_incarnation() + goal_state = exthandlers_handler.protocol.get_goal_state() + if self._report_status_last_failed_incarnation != goal_state.incarnation: + self._report_status_last_failed_incarnation = goal_state.incarnation msg = u"Error logging the goal state summary: {0}".format(textutil.format_exception(error)) logger.warn(msg) add_event(op=WALAEventOperation.GoalState, is_success=False, message=msg) @@ -1044,8 +1059,8 @@ def agent_upgrade_time_elapsed(now_): daemon_version = self.__get_daemon_version_for_update() try: # Fetch the agent manifests from the latest Goal State - incarnation = protocol.get_incarnation() - gs_updated = self.__goal_state_updated(incarnation) + incarnation = self._goal_state.incarnation + gs_updated = self._processing_new_extensions_goal_state() requested_version, manifest = self.__get_requested_version_and_manifest_from_last_gs(protocol) if manifest is None: logger.verbose( diff --git a/tests/ga/test_remoteaccess_handler.py b/tests/ga/test_remoteaccess_handler.py index 1e8e76e216..37187702e3 100644 --- a/tests/ga/test_remoteaccess_handler.py +++ b/tests/ga/test_remoteaccess_handler.py @@ -343,9 +343,8 @@ def test_handle_remote_access_deleted_user_readded(self, _): @patch('azurelinuxagent.common.utils.cryptutil.CryptUtil.decrypt_secret', return_value="]aPPEv}uNg1FPnl?") @patch('azurelinuxagent.common.osutil.get_osutil', return_value=MockOSUtil()) @patch('azurelinuxagent.common.protocol.util.ProtocolUtil.get_protocol', return_value=WireProtocol("12.34.56.78")) - @patch('azurelinuxagent.common.protocol.wire.WireProtocol.get_incarnation', return_value="1") @patch('azurelinuxagent.common.protocol.wire.WireClient.get_remote_access', return_value="asdf") - def test_remote_access_handler_run_bad_data(self, _1, _2, _3, _4, _5): + def test_remote_access_handler_run_bad_data(self, _1, _2, _3, _4): with patch("azurelinuxagent.ga.remoteaccess.get_osutil", return_value=MockOSUtil()): rah = RemoteAccessHandler(Mock()) tstpassword = "]aPPEv}uNg1FPnl?" diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 4e429eaf54..013c9223a9 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1461,6 +1461,7 @@ def _test_upgrade_available( protocol = self._create_protocol(count=count, versions=versions) self.update_handler.protocol_util = protocol + self.update_handler._goal_state = protocol.get_goal_state() conf.get_autoupdate_gafamily = Mock(return_value=protocol.family) return self.update_handler._download_agent_if_upgrade_available(protocol, base_version=base_version) @@ -2525,6 +2526,17 @@ def __init__(self, return_value=0, side_effect=None): self.wait = Mock(return_value=return_value, side_effect=side_effect) +class ExtensionsGoalStateMock(object): + def __init__(self, identifier): + self.id = identifier + + +class GoalStateMock(object): + def __init__(self, incarnation): + self.incarnation = incarnation + self.extensions_goal_state = ExtensionsGoalStateMock(incarnation) + + class ProtocolMock(object): def __init__(self, family="TestAgent", etag=42, versions=None, client=None): self.family = family @@ -2534,6 +2546,7 @@ def __init__(self, family="TestAgent", etag=42, versions=None, client=None): "get_vmagent_pkgs": 0, "update_goal_state": 0 } + self._goal_state = GoalStateMock(etag) self.goal_state_is_stale = False self.etag = etag self.versions = versions if versions is not None else [] @@ -2569,8 +2582,8 @@ def create_packages(self): def get_protocol(self): return self - def get_incarnation(self): - return self.etag + def get_goal_state(self): + return self._goal_state def get_vmagent_manifests(self): self.call_counts["get_vmagent_manifests"] += 1 @@ -2647,16 +2660,16 @@ def test_it_should_update_the_goal_state(self): # the first goal state should produce an update update_handler._try_update_goal_state(protocol) - self.assertEqual(protocol.get_incarnation(), '12345', "The goal state was not updated (received unexpected incarnation)") + self.assertEqual(update_handler._goal_state.incarnation, '12345', "The goal state was not updated (received unexpected incarnation)") # no changes in the goal state should not produce an update update_handler._try_update_goal_state(protocol) - self.assertEqual(protocol.get_incarnation(), '12345', "The goal state should not be updated (received unexpected incarnation)") + self.assertEqual(update_handler._goal_state.incarnation, '12345', "The goal state should not be updated (received unexpected incarnation)") # a new goal state should produce an update protocol.mock_wire_data.set_incarnation(6789) update_handler._try_update_goal_state(protocol) - self.assertEqual(protocol.get_incarnation(), '6789', "The goal state was not updated (received unexpected incarnation)") + self.assertEqual(update_handler._goal_state.incarnation, '6789', "The goal state was not updated (received unexpected incarnation)") def test_it_should_log_errors_only_when_the_error_state_changes(self): with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: @@ -2961,18 +2974,18 @@ def test_report_status_should_log_errors_only_once_per_goal_state(self): update_handler = _create_update_handler() with _mock_exthandlers_handler() as exthandlers_handler: with patch("azurelinuxagent.ga.update.logger.warn") as logger_warn: - update_handler._report_status(exthandlers_handler, False) + update_handler._report_status(exthandlers_handler) self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", return_value=Exception("TEST EXCEPTION")): # simulate an error during _report_status() - update_handler._report_status(exthandlers_handler, False) - update_handler._report_status(exthandlers_handler, False) - update_handler._report_status(exthandlers_handler, False) + update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler) self.assertEqual(1, logger_warn.call_count, "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) update_handler._try_update_goal_state(exthandlers_handler.protocol) - update_handler._report_status(exthandlers_handler, True) + update_handler._report_status(exthandlers_handler) self.assertEqual(2, logger_warn.call_count, "UpdateHandler._report_status() should continue reporting errors after a new goal state") diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 442e14de8e..b182ca356e 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -110,7 +110,7 @@ def _test_getters(self, test_data, certsMustBePresent, __, MockCryptUtil, _): self.assertFalse(os.path.isfile(crt1)) self.assertFalse(os.path.isfile(crt2)) self.assertFalse(os.path.isfile(prv2)) - self.assertEqual("1", protocol.get_incarnation()) + self.assertEqual("1", protocol.get_goal_state().incarnation) @staticmethod def _get_telemetry_events_generator(event_list): From 438260a751ac382ff55489215bd5e25e03688d00 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 15 Feb 2022 10:30:54 -0800 Subject: [PATCH 27/84] Save vmSettings on parse errors; improve messages in parse errors (#2503) (#2507) * Save vmSettings on parse errors; improve messages in parse errors * pylint warnings * pylint warnings Co-authored-by: narrieta (cherry picked from commit 97906d483ba215e12eae1435dd0a83e8af1ce31c) --- azurelinuxagent/common/exception.py | 4 ++ .../extensions_goal_state_from_vm_settings.py | 26 ++++--- azurelinuxagent/common/protocol/goal_state.py | 13 ++-- azurelinuxagent/common/protocol/hostplugin.py | 13 +++- tests/common/test_exception.py | 46 ------------- .../ext_conf-empty_depends_on.xml | 56 +++++++++++++++ .../vm_settings-empty_depends_on.json | 68 +++++++++++++++++++ ...sions_goal_state_from_extensions_config.py | 10 +++ tests/protocol/test_goal_state.py | 22 +++++- 9 files changed, 194 insertions(+), 64 deletions(-) delete mode 100644 tests/common/test_exception.py create mode 100644 tests/data/hostgaplugin/ext_conf-empty_depends_on.xml create mode 100644 tests/data/hostgaplugin/vm_settings-empty_depends_on.json diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py index bfeb039639..2c2fe19816 100644 --- a/azurelinuxagent/common/exception.py +++ b/azurelinuxagent/common/exception.py @@ -125,6 +125,10 @@ class VmSettingsError(ExtensionsGoalStateError): """ Error raised when the VmSettings are malformed """ + def __init__(self, message, etag, vm_settings_text, inner=None): + super(VmSettingsError, self).__init__(message, inner) + self.etag = etag + self.vm_settings_text = vm_settings_text class MultiConfigExtensionEnableError(ExtensionError): diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index d559254c67..afe7fa0a7e 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -23,6 +23,7 @@ from azurelinuxagent.common.AgentGlobals import AgentGlobals from azurelinuxagent.common.exception import VmSettingsError from azurelinuxagent.common.future import ustr +import azurelinuxagent.common.logger as logger from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState from azurelinuxagent.common.protocol.restapi import VMAgentManifest, Extension, ExtensionRequestedState, ExtensionSettings from azurelinuxagent.common.utils.flexible_version import FlexibleVersion @@ -53,7 +54,7 @@ def __init__(self, etag, json_text): self._parse_vm_settings(json_text) self._do_common_validations() except Exception as e: - raise VmSettingsError("Error parsing vmSettings [HGAP: {0}]: {1}".format(self._host_ga_plugin_version, ustr(e))) + raise VmSettingsError("Error parsing vmSettings [HGAP: {0}]: {1}".format(self._host_ga_plugin_version, ustr(e)), etag, self.get_redacted_text()) @property def id(self): @@ -200,13 +201,13 @@ def _parse_required_features(self, vm_settings): required_features = vm_settings.get("requiredFeatures") if required_features is not None: if not isinstance(required_features, list): - raise Exception("requiredFeatures should be an array") + raise Exception("requiredFeatures should be an array (got {0})".format(required_features)) def get_required_features_names(): for feature in required_features: name = feature.get("name") if name is None: - raise Exception("A required feature is missing the 'name' property") + raise Exception("A required feature is missing the 'name' property (got {0})".format(feature)) yield name self._required_features.extend(get_required_features_names()) @@ -238,7 +239,7 @@ def _parse_agent_manifests(self, vm_settings): if families is None: return if not isinstance(families, list): - raise Exception("gaFamilies should be an array") + raise Exception("gaFamilies should be an array (got {0})".format(families)) for family in families: name = family["name"] @@ -321,7 +322,7 @@ def _parse_extensions(self, vm_settings): extension_goal_states = vm_settings.get("extensionGoalStates") if extension_goal_states is not None: if not isinstance(extension_goal_states, list): - raise Exception("extension_goal_states should be an array") + raise Exception("extension_goal_states should be an array (got {0})".format(type(extension_goal_states))) # report only the type, since the value may contain secrets for extension_gs in extension_goal_states: extension = Extension() @@ -340,7 +341,7 @@ def _parse_extensions(self, vm_settings): additional_locations = extension_gs.get('additionalLocations') if additional_locations is not None: if not isinstance(additional_locations, list): - raise Exception('additionalLocations should be an array') + raise Exception('additionalLocations should be an array (got {0})'.format(additional_locations)) extension.manifest_uris.extend(additional_locations) # @@ -458,13 +459,18 @@ def _parse_dependency_level(depends_on, extension): # ... # } if not isinstance(depends_on, list): - raise Exception('dependsOn should be an array') + raise Exception('dependsOn should be an array ({0}) (got {1})'.format(extension.name, depends_on)) if not extension.supports_multi_config: # single-config - if len(depends_on) != 1: - raise Exception('dependsOn should be an array with exactly one item for single-config extensions') - extension.settings[0].dependencyLevel = depends_on[0]['dependencyLevel'] + length = len(depends_on) + if length > 1: + raise Exception('dependsOn should be an array with exactly one item for single-config extensions ({0}) (got {1})'.format(extension.name, depends_on)) + elif length == 0: + logger.warn('dependsOn is an empty array for extension {0}; setting the dependency level to 0'.format(extension.name)) + extension.settings[0].dependencyLevel = 0 + else: + extension.settings[0].dependencyLevel = depends_on[0]['dependencyLevel'] else: # multi-config settings_by_name = {} diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index f4c9604335..5dc4c1462b 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -24,7 +24,7 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.AgentGlobals import AgentGlobals from azurelinuxagent.common.datacontract import set_properties -from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError +from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError, VmSettingsError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported @@ -207,20 +207,25 @@ def _fetch_vm_settings(self, force_update=False): vm_settings, vm_settings_updated = (None, False) if conf.get_enable_fast_track(): + def save_to_history(etag, text): + # The vmSettings are updated independently of the WireServer goal state and they are saved to a separate directory + history = GoalStateHistory(datetime.datetime.utcnow().isoformat(), etag) + history.save_vm_settings(text) + try: vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) except VmSettingsNotSupported: pass + except VmSettingsError as exception: + save_to_history(exception.etag, exception.vm_settings_text) except ResourceGoneError: # retry after refreshing the HostGAPlugin GoalState.update_host_plugin_headers(self._wire_client) vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) if vm_settings_updated: - # The vmSettings are updated independently of the WireServer goal state and they are saved to a separate directory - history = GoalStateHistory(datetime.datetime.utcnow().isoformat(), vm_settings.etag) - history.save_vm_settings(vm_settings.get_redacted_text()) + save_to_history(vm_settings.etag, vm_settings.get_redacted_text()) return vm_settings diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 81d9b2f0c2..74da755422 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -25,7 +25,7 @@ from azurelinuxagent.common import logger from azurelinuxagent.common.errorstate import ErrorState, ERROR_STATE_HOST_PLUGIN_FAILURE from azurelinuxagent.common.event import WALAEventOperation, add_event -from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError +from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError, VmSettingsError from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.protocol.healthservice import HealthService @@ -400,8 +400,11 @@ def fetch_vm_settings(self, force_update): Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and a boolean indicating if they are an updated (True) or a cached value (False). - Raises VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API, ResourceGoneError if the container ID and roleconfig name - need to be refreshed, or ProtocolError if the request fails for any other reason (e.g. not supported, time out, server error). + Raises + * VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API + * VmSettingsError if the HostGAPlugin returned invalid vmSettings (e.g. syntax error) + * ResourceGoneError if the container ID and roleconfig name need to be refreshed + * ProtocolError if the request fails for any other reason (e.g. not supported, time out, server error) """ def raise_not_supported(reset_state=False): if reset_state: @@ -488,6 +491,10 @@ def format_message(msg): except (ProtocolError, ResourceGoneError, VmSettingsNotSupported): raise + except VmSettingsError as vmSettingsError: + message = format_message(ustr(vmSettingsError)) + self._vm_settings_error_reporter.report_error(message) + raise except Exception as exception: if isinstance(exception, IOError) and "timed out" in ustr(exception): message = format_message("Timeout") diff --git a/tests/common/test_exception.py b/tests/common/test_exception.py deleted file mode 100644 index a7acc5d077..0000000000 --- a/tests/common/test_exception.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2019 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requires Python 2.6+ and Openssl 1.0+ -# - -import inspect -import sys - -from azurelinuxagent.common.exception import AgentError -from tests.tools import AgentTestCase - - -class TestAgentError(AgentTestCase): - @classmethod - def setUpClass(cls): - AgentTestCase.setUpClass() - - cls.agent_exceptions = inspect.getmembers( - sys.modules["azurelinuxagent.common.exception"], - lambda member: inspect.isclass(member) and issubclass(member, AgentError)) - - def test_agent_exceptions_should_set_their_error_message(self): - for exception_name, exception_class in TestAgentError.agent_exceptions: - exception_instance = exception_class("A test Message") - - self.assertEqual("[{0}] A test Message".format(exception_name), str(exception_instance)) - - def test_agent_exceptions_should_include_the_inner_exception_in_their_error_message(self): - inner_exception = Exception("The inner exception") - - for exception_name, exception_class in TestAgentError.agent_exceptions: - exception_instance = exception_class("A test Message", inner_exception) - - self.assertEqual("[{0}] A test Message\nInner error: The inner exception".format(exception_name), str(exception_instance)) diff --git a/tests/data/hostgaplugin/ext_conf-empty_depends_on.xml b/tests/data/hostgaplugin/ext_conf-empty_depends_on.xml new file mode 100644 index 0000000000..a29424ced1 --- /dev/null +++ b/tests/data/hostgaplugin/ext_conf-empty_depends_on.xml @@ -0,0 +1,56 @@ + + + + + + Prod + + https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml + https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml + + 2.5.0.2 + + + Test + + https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml + https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml + + 2.5.0.2 + + + CentralUSEUAP + CRP + + + + MultipleExtensionsPerHandler + + + https://dcrcqabsr1.blob.core.windows.net/$system/edpxmal5j1.058b176d-445b-4e75-bd97-4911511b7d96.status?sv=2018-03-28&sr=b&sk=system-1&sig=U4KaLxlyYfgQ%2fie8RCwgMBSXa3E4vlW0ozPYOEHikoc%3d&se=9999-01-01T00%3a00%3a00Z&sp=w + + + + https://umsafwzhkbm1rfrhl0ws.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml + + + + + + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"commandToExecute":"echo '09cd27e9-fbd6-48ad-be86-55f3783e0a23'"} + } + } + ] + } + + + + https://dcrcqabsr1.blob.core.windows.net/$system/edpxmal5j1.058b176d-445b-4e75-bd97-4911511b7d96.vmSettings?sv=2018-03-28&sr=b&sk=system-1&sig=mOMtcUyao4oNPMtcVhQjzMK%2bmGSJS3Y1MIKOJPjqzus%3d&se=9999-01-01T00%3a00%3a00Z&sp=r + + \ No newline at end of file diff --git a/tests/data/hostgaplugin/vm_settings-empty_depends_on.json b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json new file mode 100644 index 0000000000..3f31b6f809 --- /dev/null +++ b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json @@ -0,0 +1,68 @@ +{ + "hostGAPluginVersion": "1.0.8.115", + "vmSettingsSchemaVersion": "0.0", + "activityId": "2e7f8b5d-f637-4721-b757-cb190d49b4e9", + "correlationId": "1bef4c48-044e-4225-8f42-1d1eac1eb158", + "extensionsLastModifiedTickCount": 637693267431616449, + "extensionGoalStatesSource": "Fabric", + "StatusUploadBlob": { + "statusBlobType": "BlockBlob", + "value": "https://dcrcqabsr1.blob.core.windows.net/$system/edpxmal5j1.058b176d-445b-4e75-bd97-4911511b7d96.status?sv=2018-03-28&sr=b&sk=system-1&sig=U4KaLxlyYfgQ%2fie8RCwgMBSXa3E4vlW0ozPYOEHikoc%3d&se=9999-01-01T00%3a00%3a00Z&sp=w" + }, + "inVMMetadata": { + "subscriptionId": "8e037ad4-618f-4466-8bc8-5099d41ac15b", + "resourceGroupName": "rg-dc-qphvx25", + "vmName": "edpxmal5j1", + "location": "CentralUSEUAP", + "vmId": "058b176d-445b-4e75-bd97-4911511b7d96", + "vmSize": "Standard_D2s_v3", + "osType": "Linux" + }, + "requiredFeatures": [ + { + "name": "MultipleExtensionsPerHandler" + } + ], + "gaFamilies": [ + { + "Name": "Prod", + "Version": "2.5.0.2", + "Uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml" + ] + }, + { + "Name": "Test", + "Version": "2.5.0.2", + "Uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml" + ] + } + ], + "extensionGoalStates": [ + { + "name": "Microsoft.Azure.Extensions.CustomScript", + "version": "2.1.6", + "location": "https://umsavwggj2v40kvqhc0w.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml", + "failoverlocation": "https://umsanh4b5rfz0q0p4pwm.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml", + "additionalLocations": [ + "https://umsafwzhkbm1rfrhl0ws.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml" + ], + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "isMultiConfig": false, + "settings": [ + { + "publicSettings": "{\"commandToExecute\":\"echo '09cd27e9-fbd6-48ad-be86-55f3783e0a23'\"}" + } + ], + "dependsOn": [] + } + ] +} \ No newline at end of file diff --git a/tests/protocol/test_extensions_goal_state_from_extensions_config.py b/tests/protocol/test_extensions_goal_state_from_extensions_config.py index 3f2c4d0b3a..08a028e4e5 100644 --- a/tests/protocol/test_extensions_goal_state_from_extensions_config.py +++ b/tests/protocol/test_extensions_goal_state_from_extensions_config.py @@ -35,3 +35,13 @@ def test_it_should_parse_missing_status_upload_blob_as_none(self): self.assertIsNone(extensions_goal_state.status_upload_blob, "Expected status upload blob to be None") self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") + + def test_it_should_parse_empty_depends_on_as_dependency_level_0(self): + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-empty_depends_on.json" + data_file["ext_conf"] = "hostgaplugin/ext_conf-empty_depends_on.xml" + with mock_wire_protocol(data_file) as protocol: + extensions = protocol.get_goal_state().extensions_goal_state.extensions + + self.assertEqual(0, extensions[0].settings[0].dependencyLevel, "Incorrect dependencyLevel}") + diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index fdd41303bf..331ea0598f 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -7,10 +7,11 @@ from azurelinuxagent.common.protocol.goal_state import GoalState, _GET_GOAL_STATE_MAX_ATTEMPTS from azurelinuxagent.common.exception import ProtocolError +from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME from tests.protocol.mocks import mock_wire_protocol from tests.protocol import mockwiredata -from tests.tools import AgentTestCase, patch +from tests.tools import AgentTestCase, patch, load_data class GoalStateTestCase(AgentTestCase): @@ -80,5 +81,24 @@ def test_fetch_full_goal_state_should_save_goal_state_to_history_directory(self, len(protected_settings), "Could not find the expected number of redacted settings in {0}.\nExpected {1}.\n{2}".format(file_name, len(protected_settings), file_contents)) + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_it_should_save_vm_settings_on_parse_errors(self, _): + invalid_vm_settings_file = "hostgaplugin/vm_settings-parse_error.json" + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = invalid_vm_settings_file + with mock_wire_protocol(data_file) as protocol: + protocol.mock_wire_data.set_etag(888) + + GoalState(protocol.client) + + matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_888")) + self.assertTrue(len(matches) == 1, "Expected one history directory for etag 888. Got: {0}".format(matches)) + + history_directory = matches[0] + vm_settings_file = os.path.join(history_directory, "VmSettings.json") + self.assertTrue(os.path.exists(vm_settings_file), "{0} was not saved".format(vm_settings_file)) + expected = load_data(invalid_vm_settings_file) + actual = fileutil.read_file(vm_settings_file) + self.assertEqual(expected, actual, "The vmSettings were not saved correctly") \ No newline at end of file From 2939a14728ec99d0d7bf4cd469d672227257f21d Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 15 Feb 2022 11:13:45 -0800 Subject: [PATCH 28/84] Cleanup history directory after every goal state (#2508) * Cleanup history directory after every goal state * pylint * pylint * fix index * remove extra change Co-authored-by: narrieta --- README.md | 10 ---- azurelinuxagent/common/conf.py | 5 -- azurelinuxagent/common/utils/archive.py | 25 ++++++---- azurelinuxagent/ga/env.py | 15 ------ azurelinuxagent/ga/exthandlers.py | 2 +- azurelinuxagent/ga/update.py | 23 +++++++++ config/waagent.conf | 3 -- tests/test_agent.py | 1 - tests/utils/test_archive.py | 62 +++++++++++++++---------- 9 files changed, 79 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index c69f43e6a8..a1bfab5a58 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,6 @@ A sample configuration file is shown below: ```yml Extensions.Enabled=y Extensions.GoalStatePeriod=6 -Extensions.GoalStateHistoryCleanupPeriod=1800 Provisioning.Agent=auto Provisioning.DeleteRootPassword=n Provisioning.RegenerateSshHostKeyPair=y @@ -243,15 +242,6 @@ _Note_: setting up this parameter to more than a few minutes can make the state the VM be reported as unresponsive/unavailable on the Azure portal. Also, this setting affects how fast the agent starts executing extensions. -#### __Extensions.GoalStateHistoryCleanupPeriod__ - -_Type: Integer_ -_Default: 1800 (30 minutes)_ - -How often to clean up the history folder of the agent. The agent keeps past goal -states on this folder, each goal state represented with a set of small files. The -history is useful to debug issues in the agent or extensions. - #### __AutoUpdate.Enabled__ _Type: Boolean_ diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index f733663d50..daf9d0e567 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -168,7 +168,6 @@ def load_conf_from_file(conf_file_path, conf=__conf__): __INTEGER_OPTIONS__ = { "Extensions.GoalStatePeriod": 6, "Extensions.InitialGoalStatePeriod": 6, - "Extensions.GoalStateHistoryCleanupPeriod": 1800, "OS.EnableFirewallPeriod": 300, "OS.RemovePersistentNetRulesPeriod": 30, "OS.RootDeviceScsiTimeoutPeriod": 30, @@ -377,10 +376,6 @@ def get_initial_goal_state_period(conf=__conf__): return conf.get_int("Extensions.InitialGoalStatePeriod", default_value=lambda: get_goal_state_period(conf=conf)) -def get_goal_state_history_cleanup_period(conf=__conf__): - return conf.get_int("Extensions.GoalStateHistoryCleanupPeriod", 1800) - - def get_allow_reset_sys_user(conf=__conf__): return conf.get_switch("Provisioning.AllowResetSysUser", False) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 7c081c819e..b40da7964d 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -44,7 +44,10 @@ re.compile(r"^VmSettings.\d+\.json$"), re.compile(r"^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), re.compile(r"^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), - re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) + re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE), + re.compile(r"^SharedConfig\.xml$", re.IGNORECASE), + re.compile(r"^HostingEnvironmentConfig\.xml$", re.IGNORECASE), + re.compile(r"^RemoteAccess\.xml$", re.IGNORECASE) ] # @@ -59,8 +62,8 @@ # 2018-04-06T08:21:37.142697_N # 2018-04-06T08:21:37.142697_N.zip # -_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+))?$") -_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+))?\.zip$") +_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+|status))?$") +_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+|status))?\.zip$") _GOAL_STATE_FILE_NAME = "GoalState.xml" _VM_SETTINGS_FILE_NAME = "VmSettings.json" @@ -161,9 +164,11 @@ def purge(self): for state in states[_MAX_ARCHIVED_STATES:]: state.delete() - # legacy history files - for current_file in os.listdir(self._source): - full_path = os.path.join(self._source, current_file) + @staticmethod + def purge_legacy_goal_state_history(): + lib_dir = conf.get_lib_dir() + for current_file in os.listdir(lib_dir): + full_path = os.path.join(lib_dir, current_file) for pattern in _CACHE_PATTERNS: match = pattern.match(current_file) if match is not None: @@ -175,8 +180,12 @@ def purge(self): def archive(self): states = self._get_archive_states() - for state in states: - state.archive() + states.sort(reverse=True) + + if len(states) > 0: + # Skip the most recent goal state, since it may still be in use + for state in states[1:]: + state.archive() def _get_archive_states(self): states = [] diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 771f47657c..5e17059345 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -31,7 +31,6 @@ from azurelinuxagent.common.interfaces import ThreadHandlerInterface from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.protocol.util import get_protocol_util -from azurelinuxagent.common.utils.archive import StateArchiver from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION from azurelinuxagent.ga.periodic_operation import PeriodicOperation @@ -99,19 +98,6 @@ def _get_dhcp_client_pid(self): return pid -class CleanupGoalStateHistory(PeriodicOperation): - def __init__(self): - super(CleanupGoalStateHistory, self).__init__(conf.get_goal_state_history_cleanup_period()) - self.archiver = StateArchiver(conf.get_lib_dir()) - - def _operation(self): - """ - Purge history and create a .zip of the history that has been preserved. - """ - self.archiver.purge() - self.archiver.archive() - - class EnableFirewall(PeriodicOperation): def __init__(self, osutil, protocol): super(EnableFirewall, self).__init__(conf.get_enable_firewall_period()) @@ -239,7 +225,6 @@ def daemon(self): periodic_operations = [ RemovePersistentNetworkRules(osutil), MonitorDhcpClientRestart(osutil), - CleanupGoalStateHistory() ] if conf.enable_firewall(): diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 49eb4cf0cf..8e35bf58c5 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -965,7 +965,7 @@ def write_ext_handlers_status_to_info_file(self, vm_status, incarnation_changed) # On new goal state, move the last status report for the previous goal state to the history folder last_modified = os.path.getmtime(status_file) timestamp = datetime.datetime.utcfromtimestamp(last_modified).isoformat() - GoalStateHistory(timestamp).save_status_file(status_file) + GoalStateHistory(timestamp, "status").save_status_file(status_file) # Now create/overwrite the status file; this file is kept for debugging purposes only status_blob_text = self.protocol.get_status_blob_data() diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index f389c5cfcf..f5785bc4d2 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -50,6 +50,7 @@ VERSION_0 from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.utils import shellutil +from azurelinuxagent.common.utils.archive import StateArchiver from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import AddFirewallRules from azurelinuxagent.common.utils.shellutil import CommandError @@ -395,6 +396,8 @@ def run(self, debug=False): logger.info("Goal State Period: {0} sec. This indicates how often the agent checks for new goal states and reports status.", self._goal_state_period) + self._cleanup_legacy_goal_state_history() + while self.is_running: self._check_daemon_running(debug) self._check_threads_running(all_thread_handlers) @@ -590,11 +593,31 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): if self._processing_new_incarnation(): remote_access_handler.run() + + if self._processing_new_extensions_goal_state(): + try: + UpdateHandler._cleanup_goal_state_history() + except Exception as exception: + logger.warn("Error cleaning up the goal state history: {0}", ustr(exception)) + finally: if self._goal_state is not None: self._last_incarnation = self._goal_state.incarnation self._last_extensions_gs_id = self._goal_state.extensions_goal_state.id + @staticmethod + def _cleanup_goal_state_history(): + archiver = StateArchiver(conf.get_lib_dir()) + archiver.purge() + archiver.archive() + + @staticmethod + def _cleanup_legacy_goal_state_history(): + try: + StateArchiver.purge_legacy_goal_state_history() + except Exception as exception: + logger.warn("Error removing legacy history files: {0}", ustr(exception)) + def __get_vmagent_update_status(self, protocol, incarnation_changed): """ This function gets the VMAgent update status as per the last GoalState. diff --git a/config/waagent.conf b/config/waagent.conf index c7d10e2f19..0dd988840b 100644 --- a/config/waagent.conf +++ b/config/waagent.conf @@ -9,9 +9,6 @@ Extensions.Enabled=y # How often (in seconds) to poll for new goal states Extensions.GoalStatePeriod=6 -# How often (in seconds) to clean up the goal state history. The default value is 30 min -Extensions.GoalStateHistoryCleanupPeriod=1800 - # Which provisioning agent to use. Supported values are "auto" (default), "waagent", # "cloud-init", or "disabled". Provisioning.Agent=auto diff --git a/tests/test_agent.py b/tests/test_agent.py index b35b142470..56ac1604d2 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -46,7 +46,6 @@ EnableOverProvisioning = True Extension.LogDir = /var/log/azure Extensions.Enabled = True -Extensions.GoalStateHistoryCleanupPeriod = 1800 Extensions.GoalStatePeriod = 6 Extensions.InitialGoalStatePeriod = 6 HttpProxy.Host = None diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 7834a7d25b..9cf31b9ebc 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -53,43 +53,36 @@ def _parse_archive_name(name): incarnation_no_ext = os.path.splitext(incarnation_ext)[0] return timestamp_str, incarnation_no_ext - def test_archive01(self): - """ - StateArchiver should archive all history directories by - - 1. Creating a .zip of a timestamped directory's files - 2. Saving the .zip to /var/lib/waagent/history/ - 2. Deleting the timestamped directory - """ - temp_files = [ + def test_archive_should_zip_all_but_the_latest_goal_state_in_the_history_folder(self): + test_files = [ 'GoalState.xml', 'Prod.manifest.xml', 'Prod.agentsManifest', 'Microsoft.Azure.Extensions.CustomScript.xml' ] - # this directory matches the pattern that StateArchiver.archive() searches for - temp_directory = os.path.join(self.history_dir, datetime.utcnow().isoformat() + "_incarnation_0") - - for current_file in temp_files: - self._write_file(os.path.join(temp_directory, current_file)) + # these directories match the pattern that StateArchiver.archive() searches for + test_directories = [] + for i in range(0, 3): + timestamp = (datetime.utcnow() + timedelta(minutes=i)).isoformat() + directory = os.path.join(self.history_dir, "{0}_incarnation_{1}".format(timestamp, i)) + for current_file in test_files: + self._write_file(os.path.join(directory, current_file)) + test_directories.append(directory) test_subject = StateArchiver(self.tmp_dir) test_subject.archive() - timestamp_zips = os.listdir(self.history_dir) - self.assertEqual(1, len(timestamp_zips)) + for directory in test_directories[0:2]: + zip_file = directory + ".zip" + self.assertTrue(os.path.exists(zip_file), "{0} was not archived (could not find {1})".format(directory, zip_file)) - zip_fn = timestamp_zips[0] # 2000-01-01T00:00:00.000000_incarnation_N.zip - timestamp_str, incarnation = self._parse_archive_name(zip_fn) + missing_file = self.assert_zip_contains(zip_file, test_files) + self.assertEqual(None, missing_file, missing_file) - self.assert_is_iso8601(timestamp_str) - timestamp = self.parse_isoformat(timestamp_str) - self.assert_datetime_close_to(timestamp, datetime.utcnow(), timedelta(seconds=30)) - self.assertEqual("0", incarnation) + self.assertFalse(os.path.exists(directory), "{0} was not removed after being archived ".format(directory)) - zip_full = os.path.join(self.history_dir, zip_fn) - self.assertEqual(self.assert_zip_contains(zip_full, temp_files), None) + self.assertTrue(os.path.exists(test_directories[2]), "{0}, the latest goal state, should not have being removed".format(test_directories[2])) def test_archive02(self): """ @@ -136,6 +129,27 @@ def test_archive02(self): filename = "{0}_0.zip".format(timestamp) self.assertTrue(filename in archived_entries, "'{0}' is not in the list of unpurged entires".format(filename)) + def test_purge_legacy_goal_state_history(self): + with patch("azurelinuxagent.common.conf.get_lib_dir", return_value=self.tmp_dir): + legacy_files = [ + 'GoalState.1.xml', + 'VmSettings.1.json', + 'Prod.1.manifest.xml', + 'ExtensionsConfig.1.xml', + 'Microsoft.Azure.Extensions.CustomScript.1.xml', + 'SharedConfig.xml', + 'HostingEnvironmentConfig.xml', + 'RemoteAccess.xml' + ] + legacy_files = [os.path.join(self.tmp_dir, f) for f in legacy_files] + for f in legacy_files: + self._write_file(f) + + StateArchiver.purge_legacy_goal_state_history() + + for f in legacy_files: + self.assertFalse(os.path.exists(f), "Legacy file {0} was not removed".format(f)) + def test_archive03(self): """ All archives should be purged, both with the legacy naming (with incarnation number) and with the new naming. From 9a11fa997b3edc8aa474badc115822faf081b771 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 15 Feb 2022 13:38:15 -0800 Subject: [PATCH 29/84] ignore firewall packets reset error, check enable firewall config flag and extend cgroup extension monitoring expiry time (#2509) * ignore firewall packets reset error, check enable firewall config flag and extend cgroup extension monitoring expiry time (cherry picked from commit 0ac5dc17704daee941873a19312e70b40f259a5e) * fix test cases * fixed typo --- azurelinuxagent/common/conf.py | 6 +- azurelinuxagent/common/osutil/default.py | 5 +- azurelinuxagent/ga/update.py | 3 + tests/common/osutil/test_default.py | 7 + tests/ga/test_update.py | 186 ++++++++++++++--------- tests/test_agent.py | 2 +- 6 files changed, 132 insertions(+), 77 deletions(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index daf9d0e567..af23a04c45 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -160,7 +160,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "ResourceDisk.MountOptions": None, "ResourceDisk.Filesystem": "ext3", "AutoUpdate.GAFamily": "Prod", - "Debug.CgroupMonitorExpiryTime": "2022-01-31", + "Debug.CgroupMonitorExpiryTime": "2022-03-31", "Debug.CgroupMonitorExtensionName": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent", } @@ -546,11 +546,11 @@ def get_agent_cpu_quota(conf=__conf__): def get_cgroup_monitor_expiry_time (conf=__conf__): """ - cgroups monitoring disabled after expiry time + cgroups monitoring for pilot extensions disabled after expiry time NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get("Debug.CgroupMonitorExpiryTime", "2022-01-31") + return conf.get("Debug.CgroupMonitorExpiryTime", "2022-03-31") def get_cgroup_monitor_extension_name (conf=__conf__): """ diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index f18ff9048c..d2ae36e834 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -167,9 +167,10 @@ def get_firewall_dropped_packets(self, dst_ip=None): return int(m.group(1)) except Exception as e: - if isinstance(e, CommandError) and e.returncode == 3: # pylint: disable=E1101 - # Transient error that we ignore. This code fires every loop + if isinstance(e, CommandError) and (e.returncode == 3 or e.returncode == 4): # pylint: disable=E1101 + # Transient error that we ignore returncode 3. This code fires every loop # of the daemon (60m), so we will get the value eventually. + # ignore returncode 4 as temporary fix (RULE_REPLACE failed (Invalid argument)) return 0 logger.warn("Failed to get firewall packets: {0}", ustr(e)) return -1 diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index f5785bc4d2..8d4f0a24d6 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -1300,6 +1300,9 @@ def _ensure_firewall_rules_persisted(dst_ip): def _add_accept_tcp_firewall_rule_if_not_enabled(self, dst_ip): + if not conf.enable_firewall(): + return + def _execute_run_command(command): # Helper to execute a run command, returns True if no exception # Here we primarily check if an iptable rule exist. True if it exits , false if not diff --git a/tests/common/osutil/test_default.py b/tests/common/osutil/test_default.py index 0f7d445f73..2817a63481 100644 --- a/tests/common/osutil/test_default.py +++ b/tests/common/osutil/test_default.py @@ -716,6 +716,13 @@ def test_get_firewall_dropped_packets_should_ignore_transient_errors(self): mock_iptables.set_command(osutil.get_firewall_packets_command(mock_iptables.wait), exit_code=3, output="can't initialize iptables table `security': iptables who? (do you need to insmod?)") self.assertEqual(0, osutil.DefaultOSUtil().get_firewall_dropped_packets()) + def test_get_firewall_dropped_packets_should_ignore_returncode_4(self): + + with TestOSUtil._mock_iptables() as mock_iptables: + with patch.object(osutil, '_enable_firewall', True): + mock_iptables.set_command(osutil.get_firewall_packets_command(mock_iptables.wait), exit_code=4, output="iptables v1.8.2 (nf_tables): RULE_REPLACE failed (Invalid argument): rule in chain OUTPUT") + self.assertEqual(0, osutil.DefaultOSUtil().get_firewall_dropped_packets()) + def test_get_firewall_dropped_packets(self): destination = '168.63.129.16' diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 013c9223a9..f9f1034a1d 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1708,87 +1708,131 @@ def test_it_should_set_dns_tcp_iptable_if_drop_available_accept_unavailable(self with TestOSUtil._mock_iptables() as mock_iptables: with _get_update_handler(test_data=DATA_FILE) as (update_handler, _): - with patch.object(osutil, '_enable_firewall', True): - # drop rule is present - mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait), exit_code=0) - # non root tcp iptable rule is absent - mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait), exit_code=1) - update_handler.run(debug=True) - - drop_check_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait)) - accept_tcp_check_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait)) - accept_tcp_insert_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait)) - - # Filtering the mock iptable command calls with only the once related to this test. - filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if cmd in [drop_check_command, accept_tcp_check_rule, accept_tcp_insert_rule]] - - self.assertEqual(len(filtered_mock_iptable_calls), 3, "Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls)) - self.assertEqual(filtered_mock_iptable_calls[0], drop_check_command, - "The first command should check the drop rule") - self.assertEqual(filtered_mock_iptable_calls[1], accept_tcp_check_rule, - "The second command should check the accept rule") - self.assertEqual(filtered_mock_iptable_calls[2], accept_tcp_insert_rule, - "The third command should add the accept rule") + with patch('azurelinuxagent.common.conf.enable_firewall', return_value=True): + with patch.object(osutil, '_enable_firewall', True): + # drop rule is present + mock_iptables.set_command( + AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait), exit_code=0) + # non root tcp iptable rule is absent + mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait), + exit_code=1) + + update_handler._add_accept_tcp_firewall_rule_if_not_enabled(mock_iptables.destination) + + drop_check_command = TestOSUtil._command_to_string( + AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_check_rule = TestOSUtil._command_to_string( + AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_insert_rule = TestOSUtil._command_to_string( + AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait)) + + # Filtering the mock iptable command calls with only the ones related to this test. + filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if + cmd in [drop_check_command, accept_tcp_check_rule, + accept_tcp_insert_rule]] + + self.assertEqual(len(filtered_mock_iptable_calls), 3, + "Incorrect number of calls to iptables: [{0}]".format( + mock_iptables.command_calls)) + self.assertEqual(filtered_mock_iptable_calls[0], drop_check_command, + "The first command should check the drop rule") + self.assertEqual(filtered_mock_iptable_calls[1], accept_tcp_check_rule, + "The second command should check the accept rule") + self.assertEqual(filtered_mock_iptable_calls[2], accept_tcp_insert_rule, + "The third command should add the accept rule") def test_it_should_not_set_dns_tcp_iptable_if_drop_unavailable(self): with TestOSUtil._mock_iptables() as mock_iptables: with _get_update_handler(test_data=DATA_FILE) as (update_handler, _): - with patch.object(osutil, '_enable_firewall', True): - # drop rule is not available - mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait), exit_code=1) - - update_handler.run(debug=True) - - drop_check_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait)) - accept_tcp_check_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait)) - accept_tcp_insert_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait)) - - # Filtering the mock iptable command calls with only the once related to this test. - filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if cmd in [drop_check_command, accept_tcp_check_rule, accept_tcp_insert_rule]] - - self.assertEqual(len(filtered_mock_iptable_calls), 1, "Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls)) - self.assertEqual(filtered_mock_iptable_calls[0], drop_check_command, - "The first command should check the drop rule") + with patch('azurelinuxagent.common.conf.enable_firewall', return_value=True): + with patch.object(osutil, '_enable_firewall', True): + # drop rule is not available + mock_iptables.set_command( + AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait), exit_code=1) + + update_handler._add_accept_tcp_firewall_rule_if_not_enabled(mock_iptables.destination) + + drop_check_command = TestOSUtil._command_to_string( + AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_check_rule = TestOSUtil._command_to_string( + AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_insert_rule = TestOSUtil._command_to_string( + AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait)) + + # Filtering the mock iptable command calls with only the ones related to this test. + filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if + cmd in [drop_check_command, accept_tcp_check_rule, + accept_tcp_insert_rule]] + + self.assertEqual(len(filtered_mock_iptable_calls), 1, + "Incorrect number of calls to iptables: [{0}]".format( + mock_iptables.command_calls)) + self.assertEqual(filtered_mock_iptable_calls[0], drop_check_command, + "The first command should check the drop rule") def test_it_should_not_set_dns_tcp_iptable_if_drop_and_accept_available(self): with TestOSUtil._mock_iptables() as mock_iptables: with _get_update_handler(test_data=DATA_FILE) as (update_handler, _): - with patch.object(osutil, '_enable_firewall', True): - # drop rule is available - mock_iptables.set_command(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait), exit_code=0) - # non root tcp iptable rule is available - mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait), exit_code=0) - - update_handler.run(debug=True) - - drop_check_command = TestOSUtil._command_to_string(AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait)) - accept_tcp_check_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait)) - accept_tcp_insert_rule = TestOSUtil._command_to_string(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, mock_iptables.destination, - wait=mock_iptables.wait)) - - # Filtering the mock iptable command calls with only the once related to this test. - filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if cmd in [drop_check_command, accept_tcp_check_rule, accept_tcp_insert_rule]] - - self.assertEqual(len(filtered_mock_iptable_calls), 2, "Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls)) - self.assertEqual(filtered_mock_iptable_calls[0], drop_check_command, - "The first command should check the drop rule") - self.assertEqual(filtered_mock_iptable_calls[1], accept_tcp_check_rule, - "The second command should check the accept rule") + with patch('azurelinuxagent.common.conf.enable_firewall', return_value=True): + with patch.object(osutil, '_enable_firewall', True): + # drop rule is available + mock_iptables.set_command( + AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait), exit_code=0) + # non root tcp iptable rule is available + mock_iptables.set_command(AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait), + exit_code=0) + + update_handler._add_accept_tcp_firewall_rule_if_not_enabled(mock_iptables.destination) + + drop_check_command = TestOSUtil._command_to_string( + AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_check_rule = TestOSUtil._command_to_string( + AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait)) + accept_tcp_insert_rule = TestOSUtil._command_to_string( + AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.INSERT_COMMAND, + mock_iptables.destination, + wait=mock_iptables.wait)) + + # Filtering the mock iptable command calls with only the ones related to this test. + filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if + cmd in [drop_check_command, accept_tcp_check_rule, + accept_tcp_insert_rule]] + + self.assertEqual(len(filtered_mock_iptable_calls), 2, + "Incorrect number of calls to iptables: [{0}]".format( + mock_iptables.command_calls)) + self.assertEqual(filtered_mock_iptable_calls[0], drop_check_command, + "The first command should check the drop rule") + self.assertEqual(filtered_mock_iptable_calls[1], accept_tcp_check_rule, + "The second command should check the accept rule") @contextlib.contextmanager def _setup_test_for_ext_event_dirs_retention(self): diff --git a/tests/test_agent.py b/tests/test_agent.py index 56ac1604d2..f5a3b90bb0 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -36,7 +36,7 @@ Debug.CgroupDisableOnProcessCheckFailure = True Debug.CgroupDisableOnQuotaCheckFailure = True Debug.CgroupLogMetrics = False -Debug.CgroupMonitorExpiryTime = 2022-01-31 +Debug.CgroupMonitorExpiryTime = 2022-03-31 Debug.CgroupMonitorExtensionName = Microsoft.Azure.Monitor.AzureMonitorLinuxAgent Debug.EnableFastTrack = False Debug.EnableGAVersioning = False From 45a005e495c827084c4989454878f30c2a869698 Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Thu, 17 Feb 2022 12:12:36 -0800 Subject: [PATCH 30/84] Temporarily disable get vmsize while UTs are hanging (#2515) --- azurelinuxagent/ga/update.py | 4 +++- tests/ga/test_update.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 8d4f0a24d6..91c7745696 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -1215,7 +1215,9 @@ def _send_heartbeat_telemetry(self, protocol): auto_update_enabled = 1 if conf.get_autoupdate_enabled() else 0 # Include VMSize in the heartbeat message because the kusto table does not have # a separate column for it (or architecture). - vmsize = self._get_vm_size(protocol) + # Temporarily disable vmsize because it is breaking UTs. TODO: Re-enable when this is fixed. + # vmsize = self._get_vm_size(protocol) + vmsize = "unknown" telemetry_msg = "{0};{1};{2};{3};{4};{5}".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, self._heartbeat_update_goal_state_error_count, diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index f9f1034a1d..3dc84e7756 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1567,6 +1567,8 @@ def test_telemetry_heartbeat_creates_event(self, patch_add_event, patch_info, *_ self.assertTrue(any(call_args[0] == "[HEARTBEAT] Agent {0} is running as the goal state agent {1}" for call_args in patch_info.call_args), "The heartbeat was not written to the agent's log") + + @skip_if_predicate_true(lambda: True, "Enable this test when VMSize bug hanging Uts is fixed.") @patch("azurelinuxagent.ga.update.add_event") @patch("azurelinuxagent.common.protocol.imds.ImdsClient") def test_telemetry_heartbeat_retries_failed_vm_size_fetch(self, mock_imds_factory, patch_add_event, *_): From 231b22ff0c2240bc105787b70d316c885d081d4b Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Thu, 17 Feb 2022 13:51:28 -0800 Subject: [PATCH 31/84] Handle OOM errors by stopping the periodic log collector (#2510) (#2516) (cherry picked from commit 9305e7f2811e24faf9198a93e397ce8023e3e3fb) --- azurelinuxagent/common/logcollector.py | 1 + azurelinuxagent/ga/collect_logs.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/azurelinuxagent/common/logcollector.py b/azurelinuxagent/common/logcollector.py index a462c5e206..9b88681fa1 100644 --- a/azurelinuxagent/common/logcollector.py +++ b/azurelinuxagent/common/logcollector.py @@ -45,6 +45,7 @@ CGROUPS_UNIT = "collect-logs.scope" +FORCE_KILLED_ERRCODE = -9 INVALID_CGROUPS_ERRCODE = 2 _MUST_COLLECT_FILES = [ diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py index e494d03d13..b26e260ca3 100644 --- a/azurelinuxagent/ga/collect_logs.py +++ b/azurelinuxagent/ga/collect_logs.py @@ -203,6 +203,12 @@ def exec_command(output_file): if e.returncode == logcollector.INVALID_CGROUPS_ERRCODE: # pylint: disable=no-member logger.info("Disabling periodic log collection until service restart due to process error.") self.stop() + + # When the OOM killer is invoked on the log collector process, this error code is + # returned. Stop the periodic operation because it seems to be persistent. + elif e.returncode == logcollector.FORCE_KILLED_ERRCODE: # pylint: disable=no-member + logger.info("Disabling periodic log collection until service restart due to OOM error.") + self.stop() else: logger.info(err_msg) From acd63016eb38985e2e06a778dcbddc15aa03cc4c Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 18 Feb 2022 11:54:13 -0800 Subject: [PATCH 32/84] Implement FastTrack handshake with CRP (#2514) * Implement FastTrack handshake with CRP * Added unit tests * Set minimum hostgaplugin version to 117 * typos * fix comment, merge Co-authored-by: narrieta --- .../common/agent_supported_feature.py | 1 + .../common/protocol/extensions_goal_state.py | 14 ++ ...sions_goal_state_from_extensions_config.py | 6 +- .../extensions_goal_state_from_vm_settings.py | 6 +- azurelinuxagent/common/protocol/goal_state.py | 1 + azurelinuxagent/common/protocol/hostplugin.py | 5 +- azurelinuxagent/common/protocol/restapi.py | 8 + azurelinuxagent/common/protocol/wire.py | 9 +- azurelinuxagent/ga/exthandlers.py | 3 +- azurelinuxagent/ga/update.py | 18 +- ...tings-difference_in_required_features.json | 2 +- .../vm_settings-empty_depends_on.json | 2 +- .../vm_settings-invalid_blob_type.json | 2 +- .../vm_settings-no_status_upload_blob.json | 2 +- .../hostgaplugin/vm_settings-out-of-sync.json | 2 +- .../hostgaplugin/vm_settings-parse_error.json | 2 +- .../vm_settings-requested_version.json | 2 +- .../vm_settings-unsupported_version.json | 2 +- tests/data/hostgaplugin/vm_settings.json | 2 +- tests/data/wire/ext_conf_mock_status_blob.xml | 26 --- tests/ga/extension_emulator.py | 9 +- tests/ga/test_report_status.py | 149 ++++++++++++++++ tests/ga/test_update.py | 165 ------------------ tests/protocol/HttpRequestPredicates.py | 9 + tests/protocol/mockwiredata.py | 20 ++- tests/protocol/test_extensions_goal_state.py | 27 --- ...sions_goal_state_from_extensions_config.py | 17 +- ..._extensions_goal_state_from_vm_settings.py | 111 +++++++----- tests/protocol/test_goal_state.py | 12 +- 29 files changed, 332 insertions(+), 302 deletions(-) delete mode 100644 tests/data/wire/ext_conf_mock_status_blob.xml create mode 100644 tests/ga/test_report_status.py delete mode 100644 tests/protocol/test_extensions_goal_state.py diff --git a/azurelinuxagent/common/agent_supported_feature.py b/azurelinuxagent/common/agent_supported_feature.py index e10abfecb0..d7f93e2245 100644 --- a/azurelinuxagent/common/agent_supported_feature.py +++ b/azurelinuxagent/common/agent_supported_feature.py @@ -22,6 +22,7 @@ class SupportedFeatureNames(object): """ MultiConfig = "MultipleExtensionsPerHandler" ExtensionTelemetryPipeline = "ExtensionTelemetryPipeline" + FastTrack = "FastTrack" class AgentSupportedFeature(object): diff --git a/azurelinuxagent/common/protocol/extensions_goal_state.py b/azurelinuxagent/common/protocol/extensions_goal_state.py index d2d8421201..1a628ab078 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state.py @@ -22,6 +22,12 @@ from azurelinuxagent.common.utils import textutil +class GoalStateChannel(object): + WireServer = "WireServer" + HostGAPlugin = "HostGAPlugin" + Empty = "Empty" + + class ExtensionsGoalState(object): """ ExtensionsGoalState represents the extensions information in the goal state; that information can originate from @@ -50,6 +56,10 @@ def correlation_id(self): def created_on_timestamp(self): raise NotImplementedError() + @property + def source_channel(self): + raise NotImplementedError() + @property def status_upload_blob(self): raise NotImplementedError() @@ -136,6 +146,10 @@ def correlation_id(self): def created_on_timestamp(self): return datetime.datetime.min + @property + def source_channel(self): + return GoalStateChannel.Empty + @property def status_upload_blob(self): return None diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py index d7d80f4c10..2f3eaa1b31 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py @@ -23,7 +23,7 @@ from azurelinuxagent.common.event import add_event, WALAEventOperation from azurelinuxagent.common.exception import ExtensionsConfigError from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState +from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, VMAgentManifest, ExtensionState, InVMGoalStateMetaData from azurelinuxagent.common.utils.textutil import parse_doc, parse_json, findall, find, findtext, getattrib, gettext, format_exception, \ is_str_none_or_whitespace, is_str_empty @@ -148,6 +148,10 @@ def correlation_id(self): def created_on_timestamp(self): return self._created_on_timestamp + @property + def source_channel(self): + return GoalStateChannel.WireServer + @property def status_upload_blob(self): return self._status_upload_blob diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index afe7fa0a7e..c107aa14fc 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -24,7 +24,7 @@ from azurelinuxagent.common.exception import VmSettingsError from azurelinuxagent.common.future import ustr import azurelinuxagent.common.logger as logger -from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState +from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel from azurelinuxagent.common.protocol.restapi import VMAgentManifest, Extension, ExtensionRequestedState, ExtensionSettings from azurelinuxagent.common.utils.flexible_version import FlexibleVersion @@ -87,6 +87,10 @@ def created_on_timestamp(self): """ return self._created_on_timestamp + @property + def source_channel(self): + return GoalStateChannel.HostGAPlugin + @property def source(self): """ diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 5dc4c1462b..ee8b15fcd2 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -219,6 +219,7 @@ def save_to_history(etag, text): pass except VmSettingsError as exception: save_to_history(exception.etag, exception.vm_settings_text) + raise except ResourceGoneError: # retry after refreshing the HostGAPlugin GoalState.update_host_plugin_headers(self._wire_client) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 74da755422..375969f0fb 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -480,8 +480,9 @@ def format_message(msg): logger.info(message) add_event(op=WALAEventOperation.HostPlugin, message=message, is_success=True) - # Don't support HostGAPlugin versions older than 115 - if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.115"): + # Don't support HostGAPlugin versions older than 123 + # TODO: update the minimum version to 1.0.8.123 before release + if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.117"): raise_not_supported(reset_state=True) logger.info("Fetched new vmSettings [correlation ID: {0} New eTag: {1}]", correlation_id, vm_settings.etag) diff --git a/azurelinuxagent/common/protocol/restapi.py b/azurelinuxagent/common/protocol/restapi.py index 82a9abc101..0edd4623ab 100644 --- a/azurelinuxagent/common/protocol/restapi.py +++ b/azurelinuxagent/common/protocol/restapi.py @@ -293,6 +293,14 @@ def __init__(self, status=None, message=None, gs_aggregate_status=None, update_s self.extensionHandlers = DataContractList(ExtHandlerStatus) self.vm_artifacts_aggregate_status = VMArtifactsAggregateStatus(gs_aggregate_status) self.update_status = update_status + self._supports_fast_track = False + + @property + def supports_fast_track(self): + return self._supports_fast_track + + def set_supports_fast_track(self, value): + self._supports_fast_track = value class VMStatus(DataContract): diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index ba06f7b1e8..40e58cc0f5 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -27,7 +27,7 @@ import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.textutil as textutil -from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_crp +from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_crp, SupportedFeatureNames from azurelinuxagent.common.datacontract import validate_param from azurelinuxagent.common.event import add_event, WALAEventOperation, report_event, \ CollectOrReportEventDebugInfo, add_periodic @@ -406,6 +406,13 @@ def vm_status_to_v1(vm_status): "Value": feature.version } ) + if vm_status.vmAgent.supports_fast_track: + supported_features.append( + { + "Key": SupportedFeatureNames.FastTrack, + "Value": "1.0" # This is a dummy version; CRP ignores it + } + ) if supported_features: v1_vm_status["supportedFeatures"] = supported_features diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 8e35bf58c5..f848d2d329 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -889,7 +889,7 @@ def __get_handlers_on_file_system(self, incarnation_changed): return handlers_to_report - def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_status=None): + def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_status=None, vm_agent_supports_fast_track=False): """ Go through handler_state dir, collect and report status. Returns the status it reported, or None if an error occurred. @@ -898,6 +898,7 @@ def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_ vm_status = VMStatus(status="Ready", message="Guest Agent is running", gs_aggregate_status=self.__gs_aggregate_status, vm_agent_update_status=vm_agent_update_status) + vm_status.vmAgent.set_supports_fast_track(vm_agent_supports_fast_track) handlers_to_report = [] # In case of Unsupported error, report the status of the handlers in the VM diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 91c7745696..879f8bf91b 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -45,6 +45,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler +from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateChannel from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList, \ VERSION_0 @@ -430,7 +431,7 @@ def _get_vm_size(self, protocol): if self._vm_size is None: imds_client = get_imds_client(protocol.get_endpoint()) - + try: imds_info = imds_client.get_compute() self._vm_size = imds_info.vmSize @@ -438,7 +439,7 @@ def _get_vm_size(self, protocol): err_msg = "Attempts to retrieve VM size information from IMDS are failing: {0}".format(textutil.format_exception(e)) logger.periodic_warn(logger.EVERY_SIX_HOURS, "[PERIODIC] {0}".format(err_msg)) return "unknown" - + return self._vm_size def _check_daemon_running(self, debug): @@ -657,9 +658,18 @@ def __get_vmagent_update_status(self, protocol, incarnation_changed): def _report_status(self, exthandlers_handler): vm_agent_update_status = self.__get_vmagent_update_status(exthandlers_handler.protocol, self._processing_new_extensions_goal_state()) # report_ext_handlers_status does its own error handling and returns None if an error occurred + # # TODO: Review the use of incarnation when reporting status... what should be the behavior for Fast Track goal states (i.e. no incarnation)? - vm_status = exthandlers_handler.report_ext_handlers_status(incarnation_changed=self._processing_new_extensions_goal_state(), - vm_agent_update_status=vm_agent_update_status) + # TODO: How to handle the case when the HostGAPlugin goes from supporting vmSettings to not supporting it? + # + if self._goal_state is None: + supports_fast_track = False + else: + supports_fast_track = self._goal_state.extensions_goal_state.source_channel == GoalStateChannel.HostGAPlugin + vm_status = exthandlers_handler.report_ext_handlers_status( + incarnation_changed=self._processing_new_extensions_goal_state(), + vm_agent_update_status=vm_agent_update_status, + vm_agent_supports_fast_track=supports_fast_track) if vm_status is None: return diff --git a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json index 8e3de91f7a..3113c49231 100644 --- a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json +++ b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.115", + "hostGAPluginVersion": "1.0.8.123", "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/hostgaplugin/vm_settings-empty_depends_on.json b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json index 3f31b6f809..8442cd399f 100644 --- a/tests/data/hostgaplugin/vm_settings-empty_depends_on.json +++ b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.115", + "hostGAPluginVersion": "1.0.8.123", "vmSettingsSchemaVersion": "0.0", "activityId": "2e7f8b5d-f637-4721-b757-cb190d49b4e9", "correlationId": "1bef4c48-044e-4225-8f42-1d1eac1eb158", diff --git a/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json b/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json index df0b91fab5..e792f423d1 100644 --- a/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json +++ b/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.115", + "hostGAPluginVersion": "1.0.8.123", "vmSettingsSchemaVersion": "0.0", "activityId": "2e7f8b5d-f637-4721-b757-cb190d49b4e9", "correlationId": "1bef4c48-044e-4225-8f42-1d1eac1eb158", diff --git a/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json index c184454a65..364d6ced3b 100644 --- a/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json +++ b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.115", + "hostGAPluginVersion": "1.0.8.123", "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/hostgaplugin/vm_settings-out-of-sync.json b/tests/data/hostgaplugin/vm_settings-out-of-sync.json index db755d249d..ef26d75962 100644 --- a/tests/data/hostgaplugin/vm_settings-out-of-sync.json +++ b/tests/data/hostgaplugin/vm_settings-out-of-sync.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.115", + "hostGAPluginVersion": "1.0.8.123", "vmSettingsSchemaVersion": "0.0", "activityId": "AAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE", "correlationId": "EEEEEEEE-DDDD-CCCC-BBBB-AAAAAAAAAAAA", diff --git a/tests/data/hostgaplugin/vm_settings-parse_error.json b/tests/data/hostgaplugin/vm_settings-parse_error.json index 274ba5568b..1712bbda10 100644 --- a/tests/data/hostgaplugin/vm_settings-parse_error.json +++ b/tests/data/hostgaplugin/vm_settings-parse_error.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.115", + "hostGAPluginVersion": "1.0.8.123", "vmSettingsSchemaVersion": THIS_IS_A_SYNTAX_ERROR, "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/hostgaplugin/vm_settings-requested_version.json b/tests/data/hostgaplugin/vm_settings-requested_version.json index 0760a3fe91..a033776384 100644 --- a/tests/data/hostgaplugin/vm_settings-requested_version.json +++ b/tests/data/hostgaplugin/vm_settings-requested_version.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.115", + "hostGAPluginVersion": "1.0.8.123", "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/hostgaplugin/vm_settings-unsupported_version.json b/tests/data/hostgaplugin/vm_settings-unsupported_version.json index abc9248a87..a32d5389de 100644 --- a/tests/data/hostgaplugin/vm_settings-unsupported_version.json +++ b/tests/data/hostgaplugin/vm_settings-unsupported_version.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.114", + "hostGAPluginVersion": "1.0.8.116", "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/hostgaplugin/vm_settings.json b/tests/data/hostgaplugin/vm_settings.json index 7b402720ab..4647cca284 100644 --- a/tests/data/hostgaplugin/vm_settings.json +++ b/tests/data/hostgaplugin/vm_settings.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.115", + "hostGAPluginVersion": "1.0.8.123", "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/wire/ext_conf_mock_status_blob.xml b/tests/data/wire/ext_conf_mock_status_blob.xml deleted file mode 100644 index 924e8f1851..0000000000 --- a/tests/data/wire/ext_conf_mock_status_blob.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - Prod - - http://mock-goal-state/manifest_of_ga.xml - - - - Test - - http://mock-goal-state/manifest_of_ga.xml - - - - - - - - - - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - - -https://mock-goal-state/StatusBlob - diff --git a/tests/ga/extension_emulator.py b/tests/ga/extension_emulator.py index e2df343e06..dafd365dff 100644 --- a/tests/ga/extension_emulator.py +++ b/tests/ga/extension_emulator.py @@ -28,6 +28,8 @@ from azurelinuxagent.ga.exthandlers import ExtHandlerInstance, ExtCommandEnvVariable from tests.tools import Mock, patch +from tests.protocol.mockwiredata import WireProtocolData +from tests.protocol.mocks import MockHttpResponse from tests.protocol.HttpRequestPredicates import HttpRequestPredicates @@ -111,9 +113,11 @@ def generate_put_handler(*emulators): def mock_put_handler(url, *args, **_): if HttpRequestPredicates.is_host_plugin_status_request(url): - return + status_blob = WireProtocolData.get_status_blob_from_hostgaplugin_put_status_request(args[0]) + else: + status_blob = args[0] - handler_statuses = json.loads(args[0]).get("aggregateStatus", {}).get("handlerAggregateStatus", []) + handler_statuses = json.loads(status_blob).get("aggregateStatus", {}).get("handlerAggregateStatus", []) for handler_status in handler_statuses: supplied_name = handler_status.get("handlerName", None) @@ -126,6 +130,7 @@ def mock_put_handler(url, *args, **_): except StopIteration: # Tests will want to know that the agent is running an extension they didn't specifically allocate. raise Exception("Extension running, but not present in emulators: {0}, {1}".format(supplied_name, supplied_version)) + return MockHttpResponse(status=200) return mock_put_handler diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py new file mode 100644 index 0000000000..dd26845fe9 --- /dev/null +++ b/tests/ga/test_report_status.py @@ -0,0 +1,149 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the Apache License. + +import contextlib +import json + +from mock import PropertyMock +from azurelinuxagent.ga.exthandlers import ExtHandlersHandler +from azurelinuxagent.ga.update import UpdateHandler, get_update_handler +from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse +from tests.tools import AgentTestCase, patch, Mock, mock_sleep +from tests.protocol import mockwiredata +from tests.protocol.HttpRequestPredicates import HttpRequestPredicates + + +class ReportStatusTestCase(AgentTestCase): + """ + Tests for UpdateHandler._report_status() + """ + + @staticmethod + @contextlib.contextmanager + def _mock_update_handler(protocol, exthandlers_handler=None, iterations=1, on_new_iteration=lambda _: None): + """ + The run() method of the mock handler will execute its main loop for the given 'iterations', and will invoke 'on_new_iteration' when + starting each iteration, passing the iteration number as argument. + """ + iteration_count = [0] + + def is_running(*args): # mock for property UpdateHandler.is_running, which controls the main loop + if len(args) == 0: + # getter + iteration_count[0] += 1 + on_new_iteration(iteration_count[0]) + return iteration_count[0] <= iterations + else: + # setter + return None + + if exthandlers_handler is None: + exthandlers_handler = ExtHandlersHandler(protocol) + + with patch("azurelinuxagent.ga.exthandlers.get_exthandlers_handler", return_value=exthandlers_handler): + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): # skip agent update + with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): + with patch('time.sleep', side_effect=lambda _: mock_sleep(0.001)): + with patch('sys.exit', side_effect=lambda _: 0): + update_handler = get_update_handler() + update_handler.protocol_util.get_protocol = Mock(return_value=protocol) + + yield update_handler + + def test_update_handler_should_report_status_when_fetch_goal_state_fails(self): + # The test executes the main loop of UpdateHandler.run() twice, failing requests for the goal state + # on the second iteration. We expect the 2 iterations to report status, despite the goal state failure. + fail_goal_state_request = [False] + + def http_get_handler(url, *_, **__): + if HttpRequestPredicates.is_goal_state_request(url) and fail_goal_state_request[0]: + return MockHttpResponse(status=410) + return None + + def on_new_iteration(iteration): + fail_goal_state_request[0] = iteration == 2 + + with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: + exthandlers_handler = ExtHandlersHandler(protocol) + with patch.object(exthandlers_handler, "run", wraps=exthandlers_handler.run) as exthandlers_handler_run: + with ReportStatusTestCase._mock_update_handler(protocol, exthandlers_handler=exthandlers_handler, iterations=2, on_new_iteration=on_new_iteration) as update_handler: + update_handler.run(debug=True) + + self.assertEqual(1, exthandlers_handler_run.call_count, "Extensions should have been executed only once.") + self.assertEqual(2, len(protocol.mock_wire_data.status_blobs), "Status should have been reported for the 2 iterations.") + + # + # Verify that we reported status for the extension in the test data + # + first_status = json.loads(protocol.mock_wire_data.status_blobs[0]) + + handler_aggregate_status = first_status.get('aggregateStatus', {}).get("handlerAggregateStatus") + self.assertIsNotNone(handler_aggregate_status, "Could not find the handlerAggregateStatus") + self.assertEqual(1, len(handler_aggregate_status), "Expected 1 extension status. Got: {0}".format(handler_aggregate_status)) + extension_status = handler_aggregate_status[0] + self.assertEqual("OSTCExtensions.ExampleHandlerLinux", extension_status["handlerName"], "The status does not correspond to the test data") + + # + # Verify that we reported the same status (minus timestamps) in the 2 iterations + # + second_status = json.loads(protocol.mock_wire_data.status_blobs[1]) + + def remove_timestamps(x): + if isinstance(x, list): + for v in x: + remove_timestamps(v) + elif isinstance(x, dict): + for k, v in x.items(): + if k == "timestampUTC": + x[k] = '' + else: + remove_timestamps(v) + + remove_timestamps(first_status) + remove_timestamps(second_status) + + self.assertEqual(first_status, second_status) + + def test_report_status_should_log_errors_only_once_per_goal_state(self): + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): # skip agent update + with patch("azurelinuxagent.ga.update.logger.warn") as logger_warn: + update_handler = get_update_handler() + exthandlers_handler = ExtHandlersHandler(protocol) + update_handler._report_status(exthandlers_handler) + self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") + + with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", return_value=Exception("TEST EXCEPTION")): # simulate an error during _report_status() + update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler) + self.assertEqual(1, logger_warn.call_count, "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") + + exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) + update_handler._try_update_goal_state(exthandlers_handler.protocol) + update_handler._report_status(exthandlers_handler) + self.assertEqual(2, logger_warn.call_count, "UpdateHandler._report_status() should continue reporting errors after a new goal state") + + def test_update_handler_should_add_fast_track_to_supported_features_when_it_is_supported(self): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + self._test_supported_features_includes_fast_track(protocol, True) + + def test_update_handler_should_not_add_fast_track_to_supported_features_when_it_is_not_supported(self): + def http_get_handler(url, *_, **__): + if HttpRequestPredicates.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(status=404) + return None + + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: + self._test_supported_features_includes_fast_track(protocol, False) + + def _test_supported_features_includes_fast_track(self, protocol, expected): + with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True): + with ReportStatusTestCase._mock_update_handler(protocol) as update_handler: + update_handler.run(debug=True) + + status = json.loads(protocol.mock_wire_data.status_blobs[0]) + supported_features = status['supportedFeatures'] + includes_fast_track = any(f['Key'] == 'FastTrack' for f in supported_features) + self.assertEqual(expected, includes_fast_track, "supportedFeatures should {0}include FastTrack. Got: {1}".format("" if expected else "not ", supported_features)) + diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 3dc84e7756..396aaf82c1 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -2870,171 +2870,6 @@ def test_it_should_process_goal_state_only_on_new_goal_state(self): self.assertEqual(2, remote_access_handler.run.call_count, "remote_access_handler.run() should have been called on a new goal state") -class ReportStatusTestCase(AgentTestCase): - """ - Tests for UpdateHandler._report_status() - """ - - def setUp(self): - self.patches = [ - patch("time.sleep", side_effect=lambda _: mock_sleep(0.001)), - patch("sys.exit") - ] - - for p in self.patches: - p.start() - - return AgentTestCase.setUp(self) - - def tearDown(self): - - for p in self.patches: - p.stop() - - return AgentTestCase.tearDown(self) - - @staticmethod - @contextlib.contextmanager - def _mock_update_handler(iterations=1, **kwargs): - """ - Creates an UpdateHandler instance that will run n iterations. - - Can be supplied keyword args for: - * mock_wire_data_file: This arg is treated like mock_wire_protocol - would. Defaults to mockwiredata.DATA_FILE_STATUS_BLOB - * http__handler where action is get, put, or post: This arg - is treated like mock_wire_protocol would. - - Returned UpdateHandler instance has its protocol mocked via mock_wire_protocol. - """ - - # Build the side_effect list for the UpdateHandler.is_running PropertyMock. - # Return True for the first n iterations followed by a single False to stop - # and then another True because the current UpdateHandler implementation - # does a __set__ during shutdown. - is_running_return_values = [True] * iterations + [False, True] - is_running_patch = patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running_return_values)) - - mock_http_get = kwargs.get("http_get_handler") - mock_http_put = kwargs.get("http_put_handler") - mock_http_post = kwargs.get("http_post_handler") - - mock_wire_data_file = kwargs.get("mock_wire_data_file", mockwiredata.DATA_FILE_STATUS_BLOB) - - try: - with mock_wire_protocol(mock_wire_data_file, mock_http_get, mock_http_post, mock_http_put) as protocol: - update_handler = get_update_handler() - - update_handler.protocol_util.get_protocol = Mock(return_value=protocol) - - is_running_patch.start() - yield update_handler - finally: - is_running_patch.stop() - - @staticmethod - def _fail_goal_state_fetch(url, **_): - """ - For each goal state requested, returns values in order before failing with an - HttpError. Is useful for getting the agent into a specific state before causing - a failure. - - Relies on this function to have the property return_vals populated with a list - of values to be returned in order. Any `None` in the list will cause the mock wire - data to be queried and returned, and thus functions as a sort of default. - """ - if not HttpRequestPredicates.is_goal_state_request(url): - # url does not represent a request for a goal state; return None so - # that the mock_wire_protocol will return whatever data is in the mock - # wire data object (as per the mock_wire_protocol's docstring). - return None - try: - return ReportStatusTestCase._fail_goal_state_fetch.return_vals.pop() - except IndexError: - raise HttpError() - - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_update_handler_should_report_status_even_on_failed_goal_state_fetch(self, _): - - try: - # Returning None forces the mock wire data to return the contents in the static - # files, as documented in mock_wire_protocol's docstring. We return thrice: - # once for protocol initialization, once for HostGAPlugin initialization, - # and once for the initial call in run(). - # TODO: This test has too much knowledge of the protocol with the wireserver; rewrite it - # at the level of UpdateHanlder._process_goal_state, which is where the tested - # logic resides. - # - # TODO: For the same reason, the test below (commented out) needs to be rewritten - ReportStatusTestCase._fail_goal_state_fetch.return_vals = [None, None, None] - - with ReportStatusTestCase._mock_update_handler(http_get_handler=ReportStatusTestCase._fail_goal_state_fetch) as update_handler: - update_handler.run(debug=True) - - mock_protocol = update_handler.protocol_util.get_protocol() - self.assertEqual(mock_protocol.mock_wire_data.call_counts['/StatusBlob'], 1, - "Expected a single status blob to be uploaded") - finally: - # clean up the static variable - del ReportStatusTestCase._fail_goal_state_fetch.return_vals - - @skip_if_predicate_true(lambda: True, "See TODO comment in test_update_handler_should_report_status_even_on_failed_goal_state_fetch") - def test_update_handler_should_report_status_for_cached_goal_state_on_failed_fetch(self): - - try: - # Adds one return to the test above (test_upload_vm_status_even_on_failed_goal_state_fetch). - # The third (and last) return is to allow for the extensions to be processed once so that - # we will have extension status to test for. - ReportStatusTestCase._fail_goal_state_fetch.return_vals = [ None, None, None, None ] - - with ReportStatusTestCase._mock_update_handler(iterations=2, - http_get_handler=ReportStatusTestCase._fail_goal_state_fetch) as update_handler: - update_handler.run(debug=True) - - wire_data = update_handler.protocol_util.get_protocol().mock_wire_data - self.assertEqual(wire_data.call_counts['/StatusBlob'], 2, - "Expected two status blobs to be uploaded, one for each iteration of the run loop.") - - latest_status_blob_str = wire_data.status_blobs[-1] - latest_status_blob = json.loads(latest_status_blob_str) - - ext_handler_statuses = latest_status_blob.get('aggregateStatus', {}).get("handlerAggregateStatus") - self.assertEqual(1, len(ext_handler_statuses), "Expected status for a single extension") - - expectedHandlerInfo = { - "handlerName": "OSTCExtensions.ExampleHandlerLinux", - "handlerVersion": "1.0.0" - } - - for key, expected_val in expectedHandlerInfo.items(): - actual_val = ext_handler_statuses[0].get(key) - - msg = "Extension information '{0}' did not match the provided extension.".format(key) - self.assertEqual(actual_val, expected_val, msg) - - finally: - # clean up the static variable - del ReportStatusTestCase._fail_goal_state_fetch.return_vals - - def test_report_status_should_log_errors_only_once_per_goal_state(self): - update_handler = _create_update_handler() - with _mock_exthandlers_handler() as exthandlers_handler: - with patch("azurelinuxagent.ga.update.logger.warn") as logger_warn: - update_handler._report_status(exthandlers_handler) - self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") - - with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", return_value=Exception("TEST EXCEPTION")): # simulate an error during _report_status() - update_handler._report_status(exthandlers_handler) - update_handler._report_status(exthandlers_handler) - update_handler._report_status(exthandlers_handler) - self.assertEqual(1, logger_warn.call_count, "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") - - exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) - update_handler._try_update_goal_state(exthandlers_handler.protocol) - update_handler._report_status(exthandlers_handler) - self.assertEqual(2, logger_warn.call_count, "UpdateHandler._report_status() should continue reporting errors after a new goal state") - - class GoalStateIntervalTestCase(AgentTestCase): def test_initial_goal_state_period_should_default_to_goal_state_period(self): configuration_provider = conf.ConfigurationProvider() diff --git a/tests/protocol/HttpRequestPredicates.py b/tests/protocol/HttpRequestPredicates.py index 669c2eef4b..39243d5431 100644 --- a/tests/protocol/HttpRequestPredicates.py +++ b/tests/protocol/HttpRequestPredicates.py @@ -44,6 +44,15 @@ def is_host_plugin_health_request(url): def is_host_plugin_extension_artifact_request(url): return url.lower() == 'http://{0}:{1}/extensionartifact'.format(restutil.KNOWN_WIRESERVER_IP, restutil.HOST_PLUGIN_PORT) + @staticmethod + def is_status_request(url): + return HttpRequestPredicates.is_storage_status_request(url) or HttpRequestPredicates.is_host_plugin_status_request(url) + + @staticmethod + def is_storage_status_request(url): + # e.g. 'https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo' + return re.match(r'^https://.+/.*\.status\?[^/]+$', url, re.IGNORECASE) + @staticmethod def is_host_plugin_status_request(url): return url.lower() == 'http://{0}:{1}/status'.format(restutil.KNOWN_WIRESERVER_IP, restutil.HOST_PLUGIN_PORT) diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index 0267db2174..3c9cd6a0a6 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -14,7 +14,8 @@ # # Requires Python 2.6+ and Openssl 1.0+ # - +import base64 +import json import re from azurelinuxagent.common.utils.textutil import parse_doc, find, findall @@ -117,9 +118,6 @@ DATA_FILE_VM_SETTINGS["ext_conf"] = "hostgaplugin/ext_conf.xml" DATA_FILE_VM_SETTINGS["in_vm_artifacts_profile"] = "hostgaplugin/in_vm_artifacts_profile.json" -DATA_FILE_STATUS_BLOB = DATA_FILE.copy() -DATA_FILE_STATUS_BLOB["ext_conf"] = "wire/ext_conf_mock_status_blob.xml" - class WireProtocolData(object): def __init__(self, data_files=None): @@ -132,7 +130,6 @@ def __init__(self, data_files=None): "/health": 0, "/HealthService": 0, "/vmAgentLog": 0, - '/StatusBlob': 0, "goalstate": 0, "hostingenvuri": 0, "sharedconfiguri": 0, @@ -305,9 +302,10 @@ def mock_http_put(self, url, data, **_): if url.endswith('/vmAgentLog'): self.call_counts['/vmAgentLog'] += 1 - elif url.endswith('/StatusBlob'): - self.call_counts['/StatusBlob'] += 1 + elif HttpRequestPredicates.is_storage_status_request(url): self.status_blobs.append(data) + elif HttpRequestPredicates.is_host_plugin_status_request(url): + self.status_blobs.append(WireProtocolData.get_status_blob_from_hostgaplugin_put_status_request(content)) else: raise NotImplementedError(url) @@ -315,7 +313,7 @@ def mock_http_put(self, url, data, **_): return resp def mock_crypt_util(self, *args, **kw): - #Partially patch instance method of class CryptUtil + # Partially patch instance method of class CryptUtil cryptutil = CryptUtil(*args, **kw) cryptutil.gen_transport_cert = Mock(side_effect=self.mock_gen_trans_cert) return cryptutil @@ -327,6 +325,12 @@ def mock_gen_trans_cert(self, trans_prv_file, trans_cert_file): with open(trans_cert_file, 'w+') as cert_file: cert_file.write(self.trans_cert) + @staticmethod + def get_status_blob_from_hostgaplugin_put_status_request(data): + status_object = json.loads(data) + content = status_object["content"] + return base64.b64decode(content) + def get_no_of_plugins_in_extension_config(self): if self.ext_conf is None: return 0 diff --git a/tests/protocol/test_extensions_goal_state.py b/tests/protocol/test_extensions_goal_state.py deleted file mode 100644 index cc929938ff..0000000000 --- a/tests/protocol/test_extensions_goal_state.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the Apache License. -import copy -import re -import sys - -from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory -from tests.protocol.mocks import mockwiredata, mock_wire_protocol -from tests.tools import AgentTestCase, load_data - -# Python < 3.7 can't copy regular expressions, this is the recommended patch -if sys.version_info[0] < 3 or sys.version_info[0] == 3 and sys.version_info[1] < 7: - copy._deepcopy_dispatch[type(re.compile(''))] = lambda r, _: r - - -class ExtensionsGoalStateTestCase(AgentTestCase): - def test_create_from_extensions_config_should_assume_block_when_blob_type_is_not_valid(self): - data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "hostgaplugin/ext_conf-invalid_blob_type.xml" - with mock_wire_protocol(data_file) as protocol: - extensions_goal_state = ExtensionsGoalStateFactory.create_from_extensions_config(123, load_data("hostgaplugin/ext_conf-invalid_blob_type.xml"), protocol) - self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBob for an invalid statusBlobType') - - def test_create_from_vm_settings_should_assume_block_when_blob_type_is_not_valid(self): - extensions_goal_state = ExtensionsGoalStateFactory.create_from_vm_settings(1234567890, load_data("hostgaplugin/vm_settings-invalid_blob_type.json")) - self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBob for an invalid statusBlobType') - diff --git a/tests/protocol/test_extensions_goal_state_from_extensions_config.py b/tests/protocol/test_extensions_goal_state_from_extensions_config.py index 08a028e4e5..6e31634371 100644 --- a/tests/protocol/test_extensions_goal_state_from_extensions_config.py +++ b/tests/protocol/test_extensions_goal_state_from_extensions_config.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the Apache License. from azurelinuxagent.common.AgentGlobals import AgentGlobals +from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateChannel from tests.protocol.mocks import mockwiredata, mock_wire_protocol from tests.tools import AgentTestCase @@ -28,7 +29,7 @@ def test_it_should_use_default_values_when_in_vm_metadata_is_invalid(self): self.assertEqual('1900-01-01T00:00:00.000000Z', extensions_goal_state.created_on_timestamp, "Incorrect GS Creation time") def test_it_should_parse_missing_status_upload_blob_as_none(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "hostgaplugin/ext_conf-no_status_upload_blob.xml" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state @@ -36,6 +37,13 @@ def test_it_should_parse_missing_status_upload_blob_as_none(self): self.assertIsNone(extensions_goal_state.status_upload_blob, "Expected status upload blob to be None") self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") + def test_it_should_default_to_block_blob_when_the_status_blob_type_is_not_valid(self): + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "hostgaplugin/ext_conf-invalid_blob_type.xml" + with mock_wire_protocol(data_file) as protocol: + extensions_goal_state = protocol.get_goal_state().extensions_goal_state + self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBlob for an invalid statusBlobType') + def test_it_should_parse_empty_depends_on_as_dependency_level_0(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-empty_depends_on.json" @@ -43,5 +51,10 @@ def test_it_should_parse_empty_depends_on_as_dependency_level_0(self): with mock_wire_protocol(data_file) as protocol: extensions = protocol.get_goal_state().extensions_goal_state.extensions - self.assertEqual(0, extensions[0].settings[0].dependencyLevel, "Incorrect dependencyLevel}") + self.assertEqual(0, extensions[0].settings[0].dependencyLevel, "Incorrect dependencyLevel") + + def test_its_source_channel_should_be_wire_server(self): + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + extensions_goal_state = protocol.get_goal_state().extensions_goal_state + self.assertEqual(GoalStateChannel.WireServer, extensions_goal_state.source_channel, "The source_channel is incorrect") diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/protocol/test_extensions_goal_state_from_vm_settings.py index e34d4bcff5..af12154fcb 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/protocol/test_extensions_goal_state_from_vm_settings.py @@ -1,55 +1,54 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the Apache License. import json -import os.path -from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory +from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateChannel from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import _CaseFoldedDict -from azurelinuxagent.common.utils import fileutil from tests.protocol.mocks import mockwiredata, mock_wire_protocol -from tests.tools import AgentTestCase, data_dir, patch +from tests.tools import AgentTestCase, patch +@patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) class ExtensionsGoalStateFromVmSettingsTestCase(AgentTestCase): - def test_create_from_vm_settings_should_parse_vm_settings(self): - vm_settings_text = fileutil.read_file(os.path.join(data_dir, "hostgaplugin/vm_settings.json")) - vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings("123", vm_settings_text) - - def assert_property(name, value): - self.assertEqual(value, getattr(vm_settings, name), '{0} was not parsed correctly'.format(name)) - - assert_property("activity_id", "a33f6f53-43d6-4625-b322-1a39651a00c9") - assert_property("correlation_id", "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e") - assert_property("created_on_timestamp", "2021-11-16T13:22:50.620522Z") - assert_property("status_upload_blob", "https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w") - assert_property("status_upload_blob_type", "BlockBlob") - assert_property("required_features", ["MultipleExtensionsPerHandler"]) - assert_property("on_hold", True) - - # - # for the rest of the attributes, we check only 1 item in each container (but check the length of the container) - # - - # agent manifests - self.assertEqual(2, len(vm_settings.agent_manifests), "Incorrect number of agent manifests. Got: {0}".format(vm_settings.agent_manifests)) - self.assertEqual("Prod", vm_settings.agent_manifests[0].family, "Incorrect agent family.") - self.assertEqual(2, len(vm_settings.agent_manifests[0].uris), "Incorrect number of uris. Got: {0}".format(vm_settings.agent_manifests[0].uris)) - self.assertEqual("https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml", vm_settings.agent_manifests[0].uris[0], "Incorrect number of uris.") - - # extensions - self.assertEqual(5, len(vm_settings.extensions), "Incorrect number of extensions. Got: {0}".format(vm_settings.extensions)) - self.assertEqual('Microsoft.Azure.Monitor.AzureMonitorLinuxAgent', vm_settings.extensions[0].name, "Incorrect extension name") - self.assertEqual(1, len(vm_settings.extensions[0].settings[0].publicSettings), "Incorrect number of public settings") - self.assertEqual(True, vm_settings.extensions[0].settings[0].publicSettings["GCS_AUTO_CONFIG"], "Incorrect public settings") - - # dependency level (single-config) - self.assertEqual(1, vm_settings.extensions[2].settings[0].dependencyLevel, "Incorrect dependency level (single-config)") - - # dependency level (multi-config) - self.assertEqual(1, vm_settings.extensions[3].settings[1].dependencyLevel, "Incorrect dependency level (multi-config)") - - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_extension_goal_state_should_parse_requested_version_properly(self, _): + def test_it_should_parse_vm_settings(self, _): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + extensions_goal_state = protocol.get_goal_state().extensions_goal_state + + def assert_property(name, value): + self.assertEqual(value, getattr(extensions_goal_state, name), '{0} was not parsed correctly'.format(name)) + + assert_property("activity_id", "a33f6f53-43d6-4625-b322-1a39651a00c9") + assert_property("correlation_id", "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e") + assert_property("created_on_timestamp", "2021-11-16T13:22:50.620522Z") + assert_property("status_upload_blob", "https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w") + assert_property("status_upload_blob_type", "BlockBlob") + assert_property("required_features", ["MultipleExtensionsPerHandler"]) + assert_property("on_hold", True) + + # + # for the rest of the attributes, we check only 1 item in each container (but check the length of the container) + # + + # agent manifests + self.assertEqual(2, len(extensions_goal_state.agent_manifests), "Incorrect number of agent manifests. Got: {0}".format(extensions_goal_state.agent_manifests)) + self.assertEqual("Prod", extensions_goal_state.agent_manifests[0].family, "Incorrect agent family.") + self.assertEqual(2, len(extensions_goal_state.agent_manifests[0].uris), "Incorrect number of uris. Got: {0}".format(extensions_goal_state.agent_manifests[0].uris)) + expected = "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml" + self.assertEqual(expected, extensions_goal_state.agent_manifests[0].uris[0], "Unexpected URI for the agent manifest.") + + # extensions + self.assertEqual(5, len(extensions_goal_state.extensions), "Incorrect number of extensions. Got: {0}".format(extensions_goal_state.extensions)) + self.assertEqual('Microsoft.Azure.Monitor.AzureMonitorLinuxAgent', extensions_goal_state.extensions[0].name, "Incorrect extension name") + self.assertEqual(1, len(extensions_goal_state.extensions[0].settings[0].publicSettings), "Incorrect number of public settings") + self.assertEqual(True, extensions_goal_state.extensions[0].settings[0].publicSettings["GCS_AUTO_CONFIG"], "Incorrect public settings") + + # dependency level (single-config) + self.assertEqual(1, extensions_goal_state.extensions[2].settings[0].dependencyLevel, "Incorrect dependency level (single-config)") + + # dependency level (multi-config) + self.assertEqual(1, extensions_goal_state.extensions[3].settings[1].dependencyLevel, "Incorrect dependency level (multi-config)") + + def test_it_should_parse_requested_version_properly(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: manifests, _ = protocol.get_vmagent_manifests() for manifest in manifests: @@ -61,13 +60,29 @@ def test_extension_goal_state_should_parse_requested_version_properly(self, _): manifests, _ = protocol.get_vmagent_manifests() for manifest in manifests: self.assertEqual(manifest.requested_version_string, "9.9.9.9", "Version should be 9.9.9.9") - def test_create_from_vm_settings_should_parse_missing_status_upload_blob_as_none(self): - vm_settings_text = fileutil.read_file(os.path.join(data_dir, "hostgaplugin/vm_settings-no_status_upload_blob.json")) - vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings("123", vm_settings_text) - self.assertIsNone(vm_settings.status_upload_blob, "Expected status upload blob to be None") - self.assertEqual("BlockBlob", vm_settings.status_upload_blob_type, "Expected status upload blob to be Block") + def test_it_should_parse_missing_status_upload_blob_as_none(self, _): + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-no_status_upload_blob.json" + with mock_wire_protocol(data_file) as protocol: + extensions_goal_state = protocol.get_goal_state().extensions_goal_state + + self.assertIsNone(extensions_goal_state.status_upload_blob, "Expected status upload blob to be None") + self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") + + def test_it_should_default_to_block_blob_when_the_status_blob_type_is_not_valid(self, _): + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-invalid_blob_type.json" + with mock_wire_protocol(data_file) as protocol: + extensions_goal_state = protocol.get_goal_state().extensions_goal_state + + self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBlob for an invalid statusBlobType') + + def test_its_source_channel_should_be_host_ga_plugin(self, _): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + extensions_goal_state = protocol.get_goal_state().extensions_goal_state + self.assertEqual(GoalStateChannel.HostGAPlugin, extensions_goal_state.source_channel, "The source_channel is incorrect") class CaseFoldedDictionaryTestCase(AgentTestCase): def test_it_should_retrieve_items_ignoring_case(self): diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index 331ea0598f..4e8dd4f737 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -83,13 +83,15 @@ def test_fetch_full_goal_state_should_save_goal_state_to_history_directory(self, @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) def test_it_should_save_vm_settings_on_parse_errors(self, _): - invalid_vm_settings_file = "hostgaplugin/vm_settings-parse_error.json" - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() - data_file["vm_settings"] = invalid_vm_settings_file - with mock_wire_protocol(data_file) as protocol: + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + invalid_vm_settings_file = "hostgaplugin/vm_settings-parse_error.json" + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = invalid_vm_settings_file + protocol.mock_wire_data = mockwiredata.WireProtocolData(data_file) protocol.mock_wire_data.set_etag(888) - GoalState(protocol.client) + with self.assertRaises(ProtocolError): + GoalState(protocol.client) matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_888")) self.assertTrue(len(matches) == 1, "Expected one history directory for etag 888. Got: {0}".format(matches)) From 899a38ee1859cb654332d883e37fa5ff496851f0 Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Tue, 22 Feb 2022 09:41:04 -0800 Subject: [PATCH 33/84] Stop UTs from attempting to call to IMDS (#2513) --- azurelinuxagent/ga/update.py | 4 +- .../ga/test_exthandlers_exthandlerinstance.py | 1 + tests/ga/test_update.py | 126 +++++++++--------- tests/protocol/mockwiredata.py | 6 + .../test_metadata_server_migration_util.py | 2 + tests/protocol/test_protocol_util.py | 2 + tests/tools.py | 16 +++ tests/utils/test_archive.py | 6 +- tests/utils/test_extension_process_util.py | 5 +- 9 files changed, 94 insertions(+), 74 deletions(-) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 879f8bf91b..ab1950d934 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -1225,9 +1225,7 @@ def _send_heartbeat_telemetry(self, protocol): auto_update_enabled = 1 if conf.get_autoupdate_enabled() else 0 # Include VMSize in the heartbeat message because the kusto table does not have # a separate column for it (or architecture). - # Temporarily disable vmsize because it is breaking UTs. TODO: Re-enable when this is fixed. - # vmsize = self._get_vm_size(protocol) - vmsize = "unknown" + vmsize = self._get_vm_size(protocol) telemetry_msg = "{0};{1};{2};{3};{4};{5}".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, self._heartbeat_update_goal_state_error_count, diff --git a/tests/ga/test_exthandlers_exthandlerinstance.py b/tests/ga/test_exthandlers_exthandlerinstance.py index 60234b6d15..6295d68d27 100644 --- a/tests/ga/test_exthandlers_exthandlerinstance.py +++ b/tests/ga/test_exthandlers_exthandlerinstance.py @@ -29,6 +29,7 @@ def setUp(self): def tearDown(self): self.mock_get_base_dir.stop() + super(ExtHandlerInstanceTestCase, self).tearDown() def test_rm_ext_handler_dir_should_remove_the_extension_packages(self): os.mkdir(self.extension_directory) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 396aaf82c1..b9d9e29642 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -53,8 +53,8 @@ READONLY_FILE_GLOBS, ExtensionsSummary, AgentUpgradeType from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT -from tests.tools import AgentTestCase, data_dir, DEFAULT, patch, load_bin_data, Mock, MagicMock, \ - clear_singleton_instances, mock_sleep, skip_if_predicate_true +from tests.tools import AgentTestCase, AgentTestCaseWithGetVmSizeMock, data_dir, DEFAULT, patch, load_bin_data, Mock, MagicMock, \ + clear_singleton_instances, mock_sleep from tests.protocol import mockwiredata from tests.protocol.HttpRequestPredicates import HttpRequestPredicates @@ -156,13 +156,13 @@ def check_running(*val, **__): type(update_handler).is_running = True -class UpdateTestCase(AgentTestCase): +class UpdateTestCase(AgentTestCaseWithGetVmSizeMock): _test_suite_tmp_dir = None _agent_zip_dir = None @classmethod def setUpClass(cls): - AgentTestCase.setUpClass() + super(UpdateTestCase, cls).setUpClass() # copy data_dir/ga/WALinuxAgent-0.0.0.0.zip to _test_suite_tmp_dir/waagent-zip/WALinuxAgent-.zip sample_agent_zip = "WALinuxAgent-0.0.0.0.zip" test_agent_zip = sample_agent_zip.replace("0.0.0.0", AGENT_VERSION) @@ -175,7 +175,7 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - AgentTestCase.tearDownClass() + super(UpdateTestCase, cls).tearDownClass() shutil.rmtree(UpdateTestCase._test_suite_tmp_dir) @staticmethod @@ -1555,62 +1555,6 @@ def test_update_happens_when_extensions_disabled(self, _): self.update_handler._download_agent_if_upgrade_available = Mock(return_value=True) self._test_run(invocations=0, calls=0, enable_updates=True, sleep_interval=(300,)) - @patch("azurelinuxagent.common.logger.info") - @patch("azurelinuxagent.ga.update.add_event") - def test_telemetry_heartbeat_creates_event(self, patch_add_event, patch_info, *_): - update_handler = get_update_handler() - mock_protocol = WireProtocol("foo.bar") - - update_handler.last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) - update_handler._send_heartbeat_telemetry(mock_protocol) - self.assertEqual(1, patch_add_event.call_count) - self.assertTrue(any(call_args[0] == "[HEARTBEAT] Agent {0} is running as the goal state agent {1}" - for call_args in patch_info.call_args), "The heartbeat was not written to the agent's log") - - - @skip_if_predicate_true(lambda: True, "Enable this test when VMSize bug hanging Uts is fixed.") - @patch("azurelinuxagent.ga.update.add_event") - @patch("azurelinuxagent.common.protocol.imds.ImdsClient") - def test_telemetry_heartbeat_retries_failed_vm_size_fetch(self, mock_imds_factory, patch_add_event, *_): - - def validate_single_heartbeat_event_matches_vm_size(vm_size): - heartbeat_event_kwargs = [ - kwargs for _, kwargs in patch_add_event.call_args_list - if kwargs.get('op', None) == WALAEventOperation.HeartBeat - ] - - self.assertEqual(1, len(heartbeat_event_kwargs), "Expected exactly one HeartBeat event, got {0}"\ - .format(heartbeat_event_kwargs)) - - telemetry_message = heartbeat_event_kwargs[0].get("message", "") - self.assertTrue(telemetry_message.endswith(vm_size), - "Expected HeartBeat message ('{0}') to end with the test vmSize value, {1}."\ - .format(telemetry_message, vm_size)) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as mock_protocol: - update_handler = get_update_handler() - update_handler.protocol_util.get_protocol = Mock(return_value=mock_protocol) - - # Zero out the _vm_size parameter for test resiliency - update_handler._vm_size = None - - mock_imds_client = mock_imds_factory.return_value = Mock() - - # First force a vmSize retrieval failure - mock_imds_client.get_compute.side_effect = HttpError(msg="HTTP Test Failure") - update_handler._last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) - update_handler._send_heartbeat_telemetry(mock_protocol) - - validate_single_heartbeat_event_matches_vm_size("unknown") - patch_add_event.reset_mock() - - # Now provide a vmSize - mock_imds_client.get_compute = lambda: ComputeInfo(vmSize="TestVmSizeValue") - update_handler._last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) - update_handler._send_heartbeat_telemetry(mock_protocol) - - validate_single_heartbeat_event_matches_vm_size("TestVmSizeValue") - @staticmethod def _get_test_ext_handler_instance(protocol, name="OSTCExtensions.ExampleHandlerLinux", version="1.0.0"): eh = Extension(name=name) @@ -2461,9 +2405,9 @@ def test_it_should_not_downgrade_below_daemon_version(self): @patch('azurelinuxagent.ga.update.get_collect_logs_handler') @patch('azurelinuxagent.ga.update.get_monitor_handler') @patch('azurelinuxagent.ga.update.get_env_handler') -class MonitorThreadTest(AgentTestCase): +class MonitorThreadTest(AgentTestCaseWithGetVmSizeMock): def setUp(self): - AgentTestCase.setUp(self) + super(MonitorThreadTest, self).setUp() self.event_patch = patch('azurelinuxagent.common.event.add_event') currentThread().setName("ExtHandler") protocol = Mock() @@ -2869,6 +2813,62 @@ def test_it_should_process_goal_state_only_on_new_goal_state(self): self.assertEqual(3, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on a new goal state") self.assertEqual(2, remote_access_handler.run.call_count, "remote_access_handler.run() should have been called on a new goal state") +class HeartbeatTestCase(AgentTestCase): + + @patch("azurelinuxagent.common.logger.info") + @patch("azurelinuxagent.ga.update.add_event") + def test_telemetry_heartbeat_creates_event(self, patch_add_event, patch_info, *_): + + with mock_wire_protocol(mockwiredata.DATA_FILE) as mock_protocol: + update_handler = get_update_handler() + + update_handler.last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) + update_handler._send_heartbeat_telemetry(mock_protocol) + self.assertEqual(1, patch_add_event.call_count) + self.assertTrue(any(call_args[0] == "[HEARTBEAT] Agent {0} is running as the goal state agent {1}" + for call_args in patch_info.call_args), "The heartbeat was not written to the agent's log") + + @patch("azurelinuxagent.ga.update.add_event") + @patch("azurelinuxagent.common.protocol.imds.ImdsClient") + def test_telemetry_heartbeat_retries_failed_vm_size_fetch(self, mock_imds_factory, patch_add_event, *_): + + def validate_single_heartbeat_event_matches_vm_size(vm_size): + heartbeat_event_kwargs = [ + kwargs for _, kwargs in patch_add_event.call_args_list + if kwargs.get('op', None) == WALAEventOperation.HeartBeat + ] + + self.assertEqual(1, len(heartbeat_event_kwargs), "Expected exactly one HeartBeat event, got {0}"\ + .format(heartbeat_event_kwargs)) + + telemetry_message = heartbeat_event_kwargs[0].get("message", "") + self.assertTrue(telemetry_message.endswith(vm_size), + "Expected HeartBeat message ('{0}') to end with the test vmSize value, {1}."\ + .format(telemetry_message, vm_size)) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as mock_protocol: + update_handler = get_update_handler() + update_handler.protocol_util.get_protocol = Mock(return_value=mock_protocol) + + # Zero out the _vm_size parameter for test resiliency + update_handler._vm_size = None + + mock_imds_client = mock_imds_factory.return_value = Mock() + + # First force a vmSize retrieval failure + mock_imds_client.get_compute.side_effect = HttpError(msg="HTTP Test Failure") + update_handler._last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) + update_handler._send_heartbeat_telemetry(mock_protocol) + + validate_single_heartbeat_event_matches_vm_size("unknown") + patch_add_event.reset_mock() + + # Now provide a vmSize + mock_imds_client.get_compute = lambda: ComputeInfo(vmSize="TestVmSizeValue") + update_handler._last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) + update_handler._send_heartbeat_telemetry(mock_protocol) + + validate_single_heartbeat_event_matches_vm_size("TestVmSizeValue") class GoalStateIntervalTestCase(AgentTestCase): def test_initial_goal_state_period_should_default_to_goal_state_period(self): diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index 3c9cd6a0a6..dfa51c7804 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -21,6 +21,7 @@ from azurelinuxagent.common.utils.textutil import parse_doc, find, findall from tests.protocol.HttpRequestPredicates import HttpRequestPredicates from tests.tools import load_bin_data, load_data, MagicMock, Mock +from azurelinuxagent.common.protocol.imds import IMDS_ENDPOINT from azurelinuxagent.common.exception import HttpError, ResourceGoneError from azurelinuxagent.common.future import httpclient from azurelinuxagent.common.utils.cryptutil import CryptUtil @@ -37,6 +38,7 @@ "trans_prv": "wire/trans_prv", "trans_cert": "wire/trans_cert", "test_ext": "ext/sample_ext-1.3.0.zip", + "imds_info": "imds/valid.json", "remote_access": None, "in_vm_artifacts_profile": None, "vm_settings": None, @@ -161,6 +163,7 @@ def __init__(self, data_files=None): self.in_vm_artifacts_profile = None self.vm_settings = None self.etag = None + self.imds_info = None self.reload() @@ -177,6 +180,7 @@ def reload(self): self.ga_manifest = load_data(self.data_files.get("ga_manifest")) self.trans_prv = load_data(self.data_files.get("trans_prv")) self.trans_cert = load_data(self.data_files.get("trans_cert")) + self.imds_info = json.loads(load_data(self.data_files.get("imds_info"))) self.ext = load_bin_data(self.data_files.get("test_ext")) vm_settings = self.data_files.get("vm_settings") @@ -236,6 +240,8 @@ def mock_http_get(self, url, *_, **kwargs): content = self.vm_settings response_headers = [('ETag', self.etag)] self.call_counts["vm_settings"] += 1 + elif '{0}/metadata/compute'.format(IMDS_ENDPOINT) in url: + content = json.dumps(self.imds_info.get("compute", "{}")) else: # A stale GoalState results in a 400 from the HostPlugin diff --git a/tests/protocol/test_metadata_server_migration_util.py b/tests/protocol/test_metadata_server_migration_util.py index 3f5c2cf9c9..5950b43f17 100644 --- a/tests/protocol/test_metadata_server_migration_util.py +++ b/tests/protocol/test_metadata_server_migration_util.py @@ -128,5 +128,7 @@ def tearDown(self): os.remove(path) # pylint: enable=redefined-builtin + super(TestMetadataServerMigrationUtil, self).tearDown() + if __name__ == '__main__': unittest.main() diff --git a/tests/protocol/test_protocol_util.py b/tests/protocol/test_protocol_util.py index 6af6dbf843..3529e95d42 100644 --- a/tests/protocol/test_protocol_util.py +++ b/tests/protocol/test_protocol_util.py @@ -62,6 +62,8 @@ def tearDown(self): if os.path.exists(endpoint_path): os.remove(endpoint_path) + super(TestProtocolUtil, self).tearDown() + def test_get_protocol_util_should_return_same_object_for_same_thread(self, _): protocol_util1 = get_protocol_util() protocol_util2 = get_protocol_util() diff --git a/tests/tools.py b/tests/tools.py index 5df9ae6d76..b22a856377 100644 --- a/tests/tools.py +++ b/tests/tools.py @@ -440,6 +440,22 @@ def create_script(script_file, contents): os.chmod(script_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) +class AgentTestCaseWithGetVmSizeMock(AgentTestCase): + + def setUp(self): + + self._get_vm_size_patch = patch('azurelinuxagent.ga.update.UpdateHandler._get_vm_size', return_value="unknown") + self._get_vm_size_patch.start() + + super(AgentTestCaseWithGetVmSizeMock, self).setUp() + + def tearDown(self): + + if self._get_vm_size_patch: + self._get_vm_size_patch.stop() + + super(AgentTestCaseWithGetVmSizeMock, self).tearDown() + def load_data(name): """Load test data""" path = os.path.join(data_dir, name) diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 9cf31b9ebc..f65b230109 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the Apache License. import os -import shutil import tempfile import zipfile from datetime import datetime, timedelta @@ -23,14 +22,11 @@ class TestArchive(AgentTestCase): def setUp(self): + super(TestArchive, self).setUp() prefix = "{0}_".format(self.__class__.__name__) self.tmp_dir = tempfile.mkdtemp(prefix=prefix) - def tearDown(self): - if not debug and self.tmp_dir is not None: - shutil.rmtree(self.tmp_dir) - def _write_file(self, filename, contents=None): full_name = os.path.join(self.tmp_dir, filename) fileutil.mkdir(os.path.dirname(full_name)) diff --git a/tests/utils/test_extension_process_util.py b/tests/utils/test_extension_process_util.py index 11ad1fdce3..e950338fa6 100644 --- a/tests/utils/test_extension_process_util.py +++ b/tests/utils/test_extension_process_util.py @@ -15,7 +15,6 @@ # Requires Python 2.6+ and Openssl 1.0+ # import os -import shutil import subprocess import tempfile @@ -39,8 +38,8 @@ def setUp(self): def tearDown(self): self.stderr.close() self.stdout.close() - if self.tmp_dir is not None: - shutil.rmtree(self.tmp_dir) + + super(TestProcessUtils, self).tearDown() def test_wait_for_process_completion_or_timeout_should_terminate_cleanly(self): process = subprocess.Popen( From 3036ae5ed80f1039584d069c95f8191f7db5b66f Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Thu, 24 Feb 2022 09:08:46 -0800 Subject: [PATCH 34/84] Add mariner 1.0 to DCR v2 (#2517) --- dcr/azure-pipelines.yml | 6 ++++++ dcr/scenarios/agent-bvt/check_firewall.py | 4 ++-- dcr/scenarios/agent-bvt/test_agent_basics.py | 2 +- dcr/scripts/test-vm/harvest.sh | 3 +++ dcr/templates/deploy-linux-vm.json | 3 ++- 5 files changed, 14 insertions(+), 4 deletions(-) diff --git a/dcr/azure-pipelines.yml b/dcr/azure-pipelines.yml index cf67cca119..08a6455261 100644 --- a/dcr/azure-pipelines.yml +++ b/dcr/azure-pipelines.yml @@ -40,6 +40,12 @@ parameters: sku: "7-RAW" version: "latest" name: "rhel7Raw" +## + - publisher: "microsoftcblmariner" + offer: "cbl-mariner" + sku: "cbl-mariner-1" + version: "latest" + name: "mariner1" trigger: - develop diff --git a/dcr/scenarios/agent-bvt/check_firewall.py b/dcr/scenarios/agent-bvt/check_firewall.py index f797a731a4..df7261a7f6 100644 --- a/dcr/scenarios/agent-bvt/check_firewall.py +++ b/dcr/scenarios/agent-bvt/check_firewall.py @@ -24,8 +24,8 @@ def __is_firewall_enabled(): if update_match: return update_match.groups()[0].lower() == 'y' - # The firewall is enabled by default. - return True + # The firewall is disabled by default. + return False def run(*args): diff --git a/dcr/scenarios/agent-bvt/test_agent_basics.py b/dcr/scenarios/agent-bvt/test_agent_basics.py index 3b61a59648..b8c9483c89 100644 --- a/dcr/scenarios/agent-bvt/test_agent_basics.py +++ b/dcr/scenarios/agent-bvt/test_agent_basics.py @@ -48,7 +48,7 @@ def check_root_login(): print(root_passwd_line) root_passwd = root_passwd_line.split(":")[1] - if "!" in root_passwd or "*" in root_passwd: + if any(val in root_passwd for val in ("!", "*", "x")): return 'root login disabled' else: raise Exception('root login appears to be enabled: {0}'.format(root_passwd)) diff --git a/dcr/scripts/test-vm/harvest.sh b/dcr/scripts/test-vm/harvest.sh index 10fe54e481..d1715ee59d 100644 --- a/dcr/scripts/test-vm/harvest.sh +++ b/dcr/scripts/test-vm/harvest.sh @@ -11,6 +11,9 @@ set -euxo pipefail ssh -o "StrictHostKeyChecking no" "$1"@"$2" "sudo tar --exclude='journal/*' --exclude='omsbundle' --exclude='omsagent' --exclude='mdsd' --exclude='scx*' --exclude='*.so' --exclude='*__LinuxDiagnostic__*' --exclude='*.zip' --exclude='*.deb' --exclude='*.rpm' -czf logs-$2.tgz /var/log /var/lib/waagent/ /etc/waagent.conf" +# Some distros do not have "other" permissions (e.g., mariner1.0), so change the +# owning user so we can grab them below (during the scp command). +ssh -o "StrictHostKeyChecking no" "$1"@"$2" "sudo chown $1 logs-$2.tgz" # Create directory if doesn't exist mkdir -p "$3" diff --git a/dcr/templates/deploy-linux-vm.json b/dcr/templates/deploy-linux-vm.json index 2a977337a4..ecff1d62a0 100644 --- a/dcr/templates/deploy-linux-vm.json +++ b/dcr/templates/deploy-linux-vm.json @@ -234,7 +234,8 @@ "createOption": "FromImage", "managedDisk": { "storageAccountType": "[variables('osDiskType')]" - } + }, + "diskSizeGB": 32 }, "imageReference": { "publisher": "[parameters('imagePublisher')]", From 05c9e4349ef1714694302973c008ee6de6903173 Mon Sep 17 00:00:00 2001 From: Laveesh Rohra Date: Thu, 24 Feb 2022 10:02:46 -0800 Subject: [PATCH 35/84] Add good/bad events for only the extensions added by us (#2518) --- .../extensions/CustomScriptExtension.py | 14 ++++++---- .../extensions/VMAccessExtension.py | 13 +++++---- dcr/scenario_utils/models.py | 12 ++++++-- .../etp_helpers.py | 5 ++-- .../extension-telemetry-pipeline/run.py | 28 ++++++++++--------- 5 files changed, 43 insertions(+), 29 deletions(-) diff --git a/dcr/scenario_utils/extensions/CustomScriptExtension.py b/dcr/scenario_utils/extensions/CustomScriptExtension.py index 32b285a8ce..29df351134 100644 --- a/dcr/scenario_utils/extensions/CustomScriptExtension.py +++ b/dcr/scenario_utils/extensions/CustomScriptExtension.py @@ -5,13 +5,15 @@ class CustomScriptExtension(BaseExtensionTestClass): + META_DATA = ExtensionMetaData( + publisher='Microsoft.Azure.Extensions', + ext_type='CustomScript', + version="2.1" + ) + def __init__(self, extension_name: str): - extension_data = ExtensionMetaData( - publisher='Microsoft.Azure.Extensions', - ext_type='CustomScript', - version="2.1", - ext_name=extension_name - ) + extension_data = self.META_DATA + extension_data.name = extension_name super().__init__(extension_data) diff --git a/dcr/scenario_utils/extensions/VMAccessExtension.py b/dcr/scenario_utils/extensions/VMAccessExtension.py index 0cfa3691ad..c84ae12053 100644 --- a/dcr/scenario_utils/extensions/VMAccessExtension.py +++ b/dcr/scenario_utils/extensions/VMAccessExtension.py @@ -8,14 +8,15 @@ class VMAccessExtension(BaseExtensionTestClass): + META_DATA = ExtensionMetaData( + publisher='Microsoft.OSTCExtensions', + ext_type='VMAccessForLinux', + version="1.5" + ) def __init__(self, extension_name: str): - extension_data = ExtensionMetaData( - publisher='Microsoft.OSTCExtensions', - ext_type='VMAccessForLinux', - version="1.5", - ext_name=extension_name - ) + extension_data = self.META_DATA + extension_data.name = extension_name super().__init__(extension_data) self.public_key, self.private_key_file = generate_ssh_key_pair('dcr_py') self.user_name = f'dcr{random_alphanum(length=8)}' diff --git a/dcr/scenario_utils/models.py b/dcr/scenario_utils/models.py index d8825e4acf..806c830c12 100644 --- a/dcr/scenario_utils/models.py +++ b/dcr/scenario_utils/models.py @@ -11,7 +11,7 @@ class VMModelType(Enum): class ExtensionMetaData: - def __init__(self, publisher: str, ext_type: str, version: str, ext_name: str): + def __init__(self, publisher: str, ext_type: str, version: str, ext_name: str = ""): self.__publisher = publisher self.__ext_type = ext_type self.__version = version @@ -30,9 +30,17 @@ def version(self) -> str: return self.__version @property - def name(self) -> str: + def name(self): return self.__ext_name + @name.setter + def name(self, ext_name): + self.__ext_name = ext_name + + @property + def handler_name(self): + return f"{self.publisher}.{self.ext_type}" + class VMMetaData: diff --git a/dcr/scenarios/extension-telemetry-pipeline/etp_helpers.py b/dcr/scenarios/extension-telemetry-pipeline/etp_helpers.py index 1e5ad6e1dc..9fd06587ea 100644 --- a/dcr/scenarios/extension-telemetry-pipeline/etp_helpers.py +++ b/dcr/scenarios/extension-telemetry-pipeline/etp_helpers.py @@ -37,7 +37,7 @@ def wait_for_extension_events_dir_empty(timeout=timedelta(minutes=2)): raise AssertionError("Extension events dir not empty!") -def add_extension_events_and_get_count(bad_event_count=0, no_of_events_per_extension=50): +def add_extension_events_and_get_count(bad_event_count=0, no_of_events_per_extension=50, extension_names=None): print("Creating random extension events now. No of Good Events: {0}, No of Bad Events: {1}".format( no_of_events_per_extension - bad_event_count, bad_event_count)) @@ -128,7 +128,8 @@ def oversize_file_limit(make_bad_event): for ext_dir in os.listdir(ext_log_dir): events_dir = os.path.join(ext_log_dir, ext_dir, "events") - if not os.path.isdir(events_dir): + # If specific extensions are provided, only add the events for them + if not os.path.isdir(events_dir) or (extension_names is not None and ext_dir not in extension_names): continue new_opr_id = str(uuid.uuid4()) diff --git a/dcr/scenarios/extension-telemetry-pipeline/run.py b/dcr/scenarios/extension-telemetry-pipeline/run.py index 9ae7d63b46..3bff11e111 100644 --- a/dcr/scenarios/extension-telemetry-pipeline/run.py +++ b/dcr/scenarios/extension-telemetry-pipeline/run.py @@ -5,16 +5,19 @@ from dcr.scenario_utils.agent_log_parser import parse_agent_log_file from dcr.scenario_utils.check_waagent_log import is_data_in_waagent_log, check_waagent_log_for_errors +from dcr.scenario_utils.extensions.CustomScriptExtension import CustomScriptExtension +from dcr.scenario_utils.extensions.VMAccessExtension import VMAccessExtension from dcr.scenario_utils.test_orchestrator import TestFuncObj from dcr.scenario_utils.test_orchestrator import TestOrchestrator from etp_helpers import add_extension_events_and_get_count, wait_for_extension_events_dir_empty, \ get_collect_telemetry_thread_name -def add_good_extension_events_and_verify(): +def add_good_extension_events_and_verify(extension_names): max_events = random.randint(10, 50) print("Creating a total of {0} events".format(max_events)) - ext_event_count = add_extension_events_and_get_count(no_of_events_per_extension=max_events) + ext_event_count = add_extension_events_and_get_count(no_of_events_per_extension=max_events, + extension_names=extension_names) # Ensure that the event collector ran after adding the events wait_for_extension_events_dir_empty() @@ -36,11 +39,12 @@ def add_good_extension_events_and_verify(): is_data_in_waagent_log("Collected {0} events for extension: {1}".format(good_count, ext_name)) -def add_bad_events_and_verify_count(): +def add_bad_events_and_verify_count(extension_names): max_events = random.randint(15, 50) print("Creating a total of {0} events".format(max_events)) extension_event_count = add_extension_events_and_get_count(bad_event_count=random.randint(5, max_events - 5), - no_of_events_per_extension=max_events) + no_of_events_per_extension=max_events, + extension_names=extension_names) # Ensure that the event collector ran after adding the events wait_for_extension_events_dir_empty() @@ -62,13 +66,7 @@ def verify_etp_enabled(): event_dirs = glob.glob(os.path.join("/var/log/azure/", "*", "events")) assert event_dirs, "No extension event directories exist!" - verified = True - for event_dir in event_dirs: - exists = os.path.exists(event_dir) - print("Dir: {0} exists: {1}".format(event_dir, exists)) - verified = verified and exists - - if not verified: + if not all(os.path.exists(event_dir) for event_dir in event_dirs): raise AssertionError("Event directory not found for all extensions!") @@ -86,10 +84,14 @@ def check_agent_log(): if __name__ == '__main__': + + extensions_to_verify = [CustomScriptExtension.META_DATA.handler_name, VMAccessExtension.META_DATA.handler_name] tests = [ TestFuncObj("Verify ETP enabled", verify_etp_enabled, raise_on_error=True, retry=3), - TestFuncObj("Add Good extension events and verify", add_good_extension_events_and_verify), - TestFuncObj("Add Bad extension events and verify", add_bad_events_and_verify_count), + TestFuncObj("Add Good extension events and verify", + lambda: add_good_extension_events_and_verify(extensions_to_verify)), + TestFuncObj("Add Bad extension events and verify", + lambda: add_bad_events_and_verify_count(extensions_to_verify)), TestFuncObj("Verify all events processed", wait_for_extension_events_dir_empty), TestFuncObj("Check Agent log", check_agent_log), ] From dfdf7b49339c70c7a822c30b90cabc332195cc6b Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 24 Feb 2022 12:03:16 -0800 Subject: [PATCH 36/84] Remove dependency on default python on test extension (#2519) * Remove dependcy on default python on test extension * add newline Co-authored-by: narrieta --- tests/data/ext/sample_ext-1.3.0.zip | Bin 1770 -> 2079 bytes tests/data/ext/sample_ext-1.3.0/python.sh | 11 +++++++++++ tests/data/ext/sample_ext-1.3.0/sample.py | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100755 tests/data/ext/sample_ext-1.3.0/python.sh diff --git a/tests/data/ext/sample_ext-1.3.0.zip b/tests/data/ext/sample_ext-1.3.0.zip index 2b7e69417013a7cf654e7a032f9593393a912d5b..090e316edf5fd0a836460e2ede633dc88627887e 100644 GIT binary patch delta 1700 zcmZXUc{J2(7{`AYVyrn68YEk~V~NO6Wb9_f*k#W?Xqb_$VPs?}ONc?1+;SW1#l3{C z8=?@}#*)cW$#m0#p4lErzj<~<69oY-&?g=M_NI(IBX`C^XJSVeehCV7YA zFp_(fr8Cp2d8*ObLREuS0NPt&4PL!W#dtlrrG6)3>$(nBDWzR2*|MNgpd_*1dm_o6 z(7s`&IIk7w8YZvJQZB6pr~Fw1Pa){N-H6k_l`XP3=L-lx} z9q9HHkAtIb5OWbjo#SGwAEZ(gvceWo)I#bE$at4@?L=mro`;u0&c~JKbBd9%xqB0x z%;f2|(`v3G;FUK(qw5o{L%^x-x8MPkfaxDO@K9x;7tU zyzG0XY1?sGcG3uHc>a+o-5}XhyiW@uvYm?TE^CU_7?4|QBsx+Z5vyEj^x#C?X9ufu z26Ue(i4(lPKhL4mfZiapv!f?SZ=YRzhZV%RNJ6_y&H76`-w?^gQ_{YQuvY(~BCPQA zEN-i%c`>+GDMQo7rfI5JTcCK{4g>AkmM7`3&?WUecd z;s)<|UzbuvlG_lzfvMnTZ2TX2&N;fJJqR-mA;%O!%rgL!Tcl!Uju=yThaPHSp6tKr zID57$%7zh4opxT@_A|VDRi zKyRow+?zTeDvGrZWgLK*-LdE^7Jov+;}~twTR~kT!_vKH0znLsQfawi*KmjEy-%5NoZ|`aj(~+j>Dj16nCiRPE_hzv~2vYchvm|S6iGMVA){X zt3^J1AZIxvplaB-EA6^_h=fY4n2MEEp=?84xwVi~Z@R2C>-}eVlA`GZM18qjVCLpa zqy<R`As+W4+0h>X{CY zX2XPwYhQ2;ubrY0ETSw2B)Zo{pAw z`AnEq9=BO`_l)lTmImWWSX=UMDuEFp)r@y95Z`=0-!68yuO?|67>xPeG&IV})K4YtbWRQt+k+J(K#*svTJ}Fv@(D z^)NGJL*kUt{6<`M&9;wQ7*W0a)rYsc#{Jkr1WKM1lrufqzP7#~v-R=IDj&MjbLyXG8(Pv&V%F1BF<)m8*HwY3cxF_>J>yy;Y(P%=%| zH1f%#i!FQN#3%Y2xR2THb(1InKuX@}i60&-d(KFH78~ delta 1429 zcmbO)@QSxSz?+#xgn@y9gCVoMKPWe^V50{M1A{Iv0|O_63`22ZZb43}UO{DO2qy#c zvx15Q5H79YW?%%WVqgFhz2V;ZkIe-3{SG%cbuj39;H2A1D>iF%$y8+oD7-jn$Py`a z`i|Ff*uBd|?(E~)*WI7he|ofQ@}nyEo2DDYFYQ~&oX&k~ON_>+ z*l+J+ewj1=^WAsyXj0&zc_9moIPH@d?X4zoo#KfLJ@$$1TK^o!qU{PlF4#u?J9%?n zn158i=!YAo)&ky6l58G=jG}WQQ~$sZ%B`jO0l3&+hQ=b7TB+`N^saj4Vrpis}Pp9%=OL|ND9O`)_|g zfBxwG>CYUs{h5YgM#(a*O9j0Gx;A;at(MC*>TK-DkK^U?@JXv=>%oh_vZ31f4_B~qU8GE$C;Lx%&RG9T9UAxGmiXRdlAfhdx3ODjQZmDznRk0_Uoac3t&BT$Lu1LVzQRe<0zL>m zS`qwv_4=y0wNt-t%xv2>*->;utlO5pl<;~H1ye_}AL{GbjxP_L_4G&L&$UTkGIOir z9a95Wp8U^jqtzyr_+#fg_Fem)iygYz$1eK*?xMFJyVq`?{O!UVhmU8%1Qqx2Pdl7m zYd(p8|IO)*hmI%xT6SLHc7y-6!j_E9TP8g%6TZ2nQ1$<|xJ7SS)*lRgzQ!p;aKq~Q z`HQZG99&rMAp5ww7GlBj~h=b9&vL56A2xzE56h*PwPef1&g}F14>L-`}i=iipdvFq9CfGMl&k zwvOxZFPwj$uvok_zkT*o!}X<7E()>R?yg(rs!_xV*i;zWfB*tcwu<3>Vw}z zJ{b-Z2ff(*NhJvrjf`2qVwsu%x6VRi<)UU5cf2+%Z;J157mOM@LbS&?0e=^4vp iTXtzpkOah<7tFw76GQ}fv$BEQ!wiJqfb>095Dx&&pm}cq diff --git a/tests/data/ext/sample_ext-1.3.0/python.sh b/tests/data/ext/sample_ext-1.3.0/python.sh new file mode 100755 index 0000000000..dfa6afcd60 --- /dev/null +++ b/tests/data/ext/sample_ext-1.3.0/python.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# +# Executes its arguments using the 'python' command, if it can be found, else using 'python3'. +# +python=$(command -v python 2> /dev/null) + +if [ -z "$PYTHON" ]; then + python=$(command -v python3) +fi + +${python} "$@" diff --git a/tests/data/ext/sample_ext-1.3.0/sample.py b/tests/data/ext/sample_ext-1.3.0/sample.py index 47f86af8ce..a877a5fe9b 100755 --- a/tests/data/ext/sample_ext-1.3.0/sample.py +++ b/tests/data/ext/sample_ext-1.3.0/sample.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!./python.sh import json import os import re From bb9a337b15e6209048ea1bd2beea71fb1332e660 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 1 Mar 2022 10:57:39 -0800 Subject: [PATCH 37/84] keep network service active (#2512) --- azurelinuxagent/common/persist_firewall_rules.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azurelinuxagent/common/persist_firewall_rules.py b/azurelinuxagent/common/persist_firewall_rules.py index 55b19c41ef..74b878ce57 100644 --- a/azurelinuxagent/common/persist_firewall_rules.py +++ b/azurelinuxagent/common/persist_firewall_rules.py @@ -43,7 +43,7 @@ class PersistFirewallRulesHandler(object): [Service] Type=oneshot ExecStart={py_path} {binary_path} -RemainAfterExit=false +RemainAfterExit=yes [Install] WantedBy=network.target @@ -69,7 +69,7 @@ class PersistFirewallRulesHandler(object): # The current version of the unit file; Update it whenever the unit file is modified to ensure Agent can dynamically # modify the unit file on VM too - _UNIT_VERSION = "1.2" + _UNIT_VERSION = "1.3" @staticmethod def get_service_file_path(): From 6445aaa8ff087b70a8fd69beab64e58e948b55d2 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 1 Mar 2022 11:04:50 -0800 Subject: [PATCH 38/84] Reorganize the history directory (#2520) * Reorganize the history directory * pylint * python2 * write agent status directly to history * update message Co-authored-by: narrieta --- .../extensions_goal_state_from_vm_settings.py | 6 +- azurelinuxagent/common/protocol/goal_state.py | 75 ++++++++-------- azurelinuxagent/common/utils/archive.py | 21 +---- azurelinuxagent/common/utils/timeutil.py | 10 +++ azurelinuxagent/ga/exthandlers.py | 52 ++++------- azurelinuxagent/ga/update.py | 38 ++++---- tests/ga/test_extension.py | 7 +- tests/ga/test_report_status.py | 3 +- tests/ga/test_update.py | 18 ++++ tests/protocol/test_goal_state.py | 87 +++++++++++++------ 10 files changed, 173 insertions(+), 144 deletions(-) create mode 100644 azurelinuxagent/common/utils/timeutil.py diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index c107aa14fc..7d2657e5da 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -126,7 +126,11 @@ def extensions(self): return self._extensions def get_redacted_text(self): - return re.sub(r'("protectedSettings"\s*:\s*)"[^"]+"', r'\1"*** REDACTED ***"', self._text) + return ExtensionsGoalStateFromVmSettings.redact(self._text) + + @staticmethod + def redact(text): + return re.sub(r'("protectedSettings"\s*:\s*)"[^"]+"', r'\1"*** REDACTED ***"', text) def _parse_vm_settings(self, json_text): vm_settings = _CaseFoldedDict.from_dict(json.loads(json_text)) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index ee8b15fcd2..d4429085d7 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -15,7 +15,6 @@ # limitations under the License. # # Requires Python 2.6+ and Openssl 1.0+ -import datetime import os import re import time @@ -27,12 +26,14 @@ from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError, VmSettingsError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory +from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported from azurelinuxagent.common.protocol.restapi import Cert, CertList, RemoteAccessUser, RemoteAccessUsersList from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.archive import GoalStateHistory from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, findtext, getattrib +from azurelinuxagent.common.utils.timeutil import create_timestamp GOAL_STATE_URI = "http://{0}/machine/?comp=goalstate" CERTS_FILE_NAME = "Certificates.xml" @@ -60,37 +61,29 @@ def __init__(self, wire_client): self._wire_client = wire_client # These "basic" properties come from the initial request to WireServer's goalstate API - self._timestamp = None self._incarnation = None self._role_instance_id = None self._role_config_name = None self._container_id = None - xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client) - - self._initialize_basic_properties(xml_doc) - - # The goal state for extensions can come from vmSettings when using FastTrack or from extensionsConfig otherwise, self._fetch_extended_goal_state - # populates the '_extensions_goal_state' property. + # These "extended" properties come from additional HTTP requests to the URIs included in the basic goal state, or to the HostGAPlugin self._extensions_goal_state = None - vm_settings = self._fetch_vm_settings() - - # These "extended" properties come from additional HTTP requests to the URIs included in the basic goal state self._hosting_env = None self._shared_conf = None self._certs = None self._remote_access = None - self._fetch_extended_goal_state(xml_text, xml_doc, vm_settings) + timestamp = create_timestamp() + xml_text, xml_doc, incarnation = GoalState._fetch_goal_state(self._wire_client) + self._history = GoalStateHistory(timestamp, incarnation) + + self._initialize_basic_properties(xml_doc) + self._fetch_extended_goal_state(xml_text, xml_doc) except Exception as exception: # We don't log the error here since fetching the goal state is done every few seconds raise ProtocolError(msg="Error fetching goal state", inner=exception) - @property - def timestamp(self): - return self._timestamp - @property def incarnation(self): return self._incarnation @@ -139,26 +132,28 @@ def update(self, force_update=False): """ Updates the current GoalState instance fetching values from the WireServer/HostGAPlugin as needed """ - xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client) - - vm_settings = self._fetch_vm_settings(force_update=force_update) + timestamp = create_timestamp() + xml_text, xml_doc, incarnation = GoalState._fetch_goal_state(self._wire_client) - if force_update or self._incarnation != findtext(xml_doc, "Incarnation"): - # update the extended goal state, using vm_settings for the extensions (unless they are None, then use extensionsConfig) + if force_update or self._incarnation != incarnation: + # If we are fetching a new goal state + self._history = GoalStateHistory(timestamp, incarnation) self._initialize_basic_properties(xml_doc) - self._fetch_extended_goal_state(xml_text, xml_doc, vm_settings) + self._fetch_extended_goal_state(xml_text, xml_doc, force_vm_settings_update=force_update) else: - # else just ensure the extensions are using the latest vm_settings - if vm_settings is not None: + # else ensure the extensions are using the latest vm_settings + timestamp = create_timestamp() + vm_settings, vm_settings_updated = self._fetch_vm_settings(force_update=force_update) + if vm_settings_updated: + self._history = GoalStateHistory(timestamp, vm_settings.etag) self._extensions_goal_state = vm_settings + self._history.save_vm_settings(vm_settings.get_redacted_text()) def save_to_history(self, data, file_name): self._history.save(data, file_name) def _initialize_basic_properties(self, xml_doc): - self._timestamp = datetime.datetime.utcnow().isoformat() self._incarnation = findtext(xml_doc, "Incarnation") - self._history = GoalStateHistory(self._timestamp, self._incarnation) # history for the WireServer goal state; vmSettings are separate role_instance = find(xml_doc, "RoleInstance") self._role_instance_id = findtext(role_instance, "InstanceId") role_config = find(role_instance, "Configuration") @@ -175,16 +170,17 @@ def _fetch_goal_state(wire_client): # In some environments a few goal state requests return a missing RoleInstance; these retries are used to work around that issue # TODO: Consider retrying on 410 (ResourceGone) as well + incarnation = "unknown" for _ in range(0, _GET_GOAL_STATE_MAX_ATTEMPTS): xml_text = wire_client.fetch_config(uri, wire_client.get_header()) xml_doc = parse_doc(xml_text) + incarnation = findtext(xml_doc, "Incarnation") role_instance = find(xml_doc, "RoleInstance") if role_instance: break time.sleep(0.5) else: - incarnation = findtext(xml_doc, "Incarnation") raise ProtocolError("Fetched goal state without a RoleInstance [incarnation {inc}]".format(inc=incarnation)) # Telemetry and the HostGAPlugin depend on the container id/role config; keep them up-to-date each time we fetch the goal state @@ -198,7 +194,7 @@ def _fetch_goal_state(wire_client): wire_client.update_host_plugin(container_id, role_config_name) - return xml_text, xml_doc + return xml_text, xml_doc, incarnation def _fetch_vm_settings(self, force_update=False): """ @@ -207,30 +203,23 @@ def _fetch_vm_settings(self, force_update=False): vm_settings, vm_settings_updated = (None, False) if conf.get_enable_fast_track(): - def save_to_history(etag, text): - # The vmSettings are updated independently of the WireServer goal state and they are saved to a separate directory - history = GoalStateHistory(datetime.datetime.utcnow().isoformat(), etag) - history.save_vm_settings(text) - try: vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) except VmSettingsNotSupported: pass except VmSettingsError as exception: - save_to_history(exception.etag, exception.vm_settings_text) + # ensure we save the vmSettings if there were parsing errors + self._history.save_vm_settings(ExtensionsGoalStateFromVmSettings.redact(exception.vm_settings_text)) raise except ResourceGoneError: # retry after refreshing the HostGAPlugin GoalState.update_host_plugin_headers(self._wire_client) vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) - if vm_settings_updated: - save_to_history(vm_settings.etag, vm_settings.get_redacted_text()) - - return vm_settings + return vm_settings, vm_settings_updated - def _fetch_extended_goal_state(self, xml_text, xml_doc, vm_settings): + def _fetch_extended_goal_state(self, xml_text, xml_doc, force_vm_settings_update=False): """ Issues HTTP requests (WireServer) for each of the URIs in the goal state (ExtensionsConfig, Certificate, Remote Access users, etc) and populates the corresponding properties. If the given 'vm_settings' are not None they are used for the extensions goal state, @@ -241,8 +230,8 @@ def _fetch_extended_goal_state(self, xml_text, xml_doc, vm_settings): self._history.save_goal_state(xml_text) - # TODO: at this point we always fetch the extensionsConfig, even if it is not needed, and save it for debugging purposes. Once - # FastTrack is stable this code can be updated to fetch it only when actually needed. + # Always fetch the ExtensionsConfig, even if it is not needed, and save it for debugging purposes. Once FastTrack is stable this code could be updated to + # fetch it only when actually needed. extensions_config_uri = findtext(xml_doc, "ExtensionsConfig") if extensions_config_uri is None: @@ -252,8 +241,12 @@ def _fetch_extended_goal_state(self, xml_text, xml_doc, vm_settings): extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(self._incarnation, xml_text, self._wire_client) self._history.save_extensions_config(extensions_config.get_redacted_text()) + vm_settings, vm_settings_updated = self._fetch_vm_settings(force_update=force_vm_settings_update) + if vm_settings is not None: self._extensions_goal_state = vm_settings + if vm_settings_updated: + self._history.save_vm_settings(vm_settings.get_redacted_text()) else: self._extensions_goal_state = extensions_config diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index b40da7964d..807b5dca73 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -38,7 +38,7 @@ ARCHIVE_DIRECTORY_NAME = 'history' -_MAX_ARCHIVED_STATES = 100 +_MAX_ARCHIVED_STATES = 50 _CACHE_PATTERNS = [ re.compile(r"^VmSettings.\d+\.json$"), @@ -208,25 +208,15 @@ def __init__(self, timestamp, tag=None): self._root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "{0}_{1}".format(timestamp, tag) if tag is not None else timestamp) def save(self, data, file_name): - def write_to_file(d, f): - with open(f, "w") as h: - h.write(d) - - self._save(write_to_file, data, file_name) - - def _save_file(self, source_file, target_name): - self._save(shutil.move, source_file, target_name) - - def _save(self, function, source, target_name): try: if not os.path.exists(self._root): fileutil.mkdir(self._root, mode=0o700) - target = os.path.join(self._root, target_name) - function(source, target) + with open(os.path.join(self._root, file_name), "w") as handle: + handle.write(data) except Exception as e: if not self._errors: # report only 1 error per directory self._errors = True - logger.warn("Failed to save goal state file {0}: {1} [no additional errors saving the goal state will be reported]".format(target_name, e)) + logger.warn("Failed to save {0} to the goal state history: {1} [no additional errors saving the goal state will be reported]".format(file_name, e)) def save_goal_state(self, text): self.save(text, _GOAL_STATE_FILE_NAME) @@ -245,6 +235,3 @@ def save_hosting_env(self, text): def save_shared_conf(self, text): self.save(text, _SHARED_CONF_FILE_NAME) - - def save_status_file(self, status_file): - self._save_file(status_file, AGENT_STATUS_FILE) diff --git a/azurelinuxagent/common/utils/timeutil.py b/azurelinuxagent/common/utils/timeutil.py new file mode 100644 index 0000000000..baf311ca7a --- /dev/null +++ b/azurelinuxagent/common/utils/timeutil.py @@ -0,0 +1,10 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the Apache License. +import datetime + + +def create_timestamp(): + """ + Returns a string with current UTC time in iso format + """ + return datetime.datetime.utcnow().isoformat() diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index f848d2d329..209f2c57ed 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -50,7 +50,7 @@ from azurelinuxagent.common.protocol.restapi import ExtensionStatus, ExtensionSubStatus, Extension, ExtHandlerStatus, \ VMStatus, GoalStateAggregateStatus, ExtensionState, ExtensionRequestedState, ExtensionSettings from azurelinuxagent.common.utils import textutil -from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME, AGENT_STATUS_FILE, GoalStateHistory +from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION, \ PY_VERSION_MAJOR, PY_VERSION_MICRO, PY_VERSION_MINOR @@ -945,8 +945,6 @@ def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_ self.report_status_error_state.reset() - self.write_ext_handlers_status_to_info_file(vm_status, incarnation_changed) - return vm_status except Exception as error: @@ -959,52 +957,32 @@ def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_ message=msg) return None - def write_ext_handlers_status_to_info_file(self, vm_status, incarnation_changed): - status_file = os.path.join(conf.get_lib_dir(), AGENT_STATUS_FILE) - - if os.path.exists(status_file) and incarnation_changed: - # On new goal state, move the last status report for the previous goal state to the history folder - last_modified = os.path.getmtime(status_file) - timestamp = datetime.datetime.utcfromtimestamp(last_modified).isoformat() - GoalStateHistory(timestamp, "status").save_status_file(status_file) - - # Now create/overwrite the status file; this file is kept for debugging purposes only + def get_ext_handlers_status_debug_info(self, vm_status): status_blob_text = self.protocol.get_status_blob_data() if status_blob_text is None: status_blob_text = "" - debug_info = ExtHandlersHandler._get_status_debug_info(vm_status) - - status_file_text = \ -'''{{ - "__comment__": "The __status__ property is the actual status reported to CRP", - "__status__": {0}, - "__debug__": {1} -}} -'''.format(status_blob_text, debug_info) - - fileutil.write_file(status_file, status_file_text) - - @staticmethod - def _get_status_debug_info(vm_status): support_multi_config = dict() + vm_status_data = get_properties(vm_status) + vm_handler_statuses = vm_status_data.get('vmAgent', dict()).get('extensionHandlers') + for handler_status in vm_handler_statuses: + if handler_status.get('name') is not None: + support_multi_config[handler_status.get('name')] = handler_status.get('supports_multi_config') - if vm_status is not None: - vm_status_data = get_properties(vm_status) - vm_handler_statuses = vm_status_data.get('vmAgent', dict()).get('extensionHandlers') - for handler_status in vm_handler_statuses: - if handler_status.get('name') is not None: - support_multi_config[handler_status.get('name')] = handler_status.get('supports_multi_config') - - debug_info = { + debug_text = json.dumps({ "agentName": AGENT_NAME, "daemonVersion": str(version.get_daemon_version()), "pythonVersion": "Python: {0}.{1}.{2}".format(PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO), "extensionSupportedFeatures": [name for name, _ in get_agent_supported_features_list_for_extensions().items()], "supportsMultiConfig": support_multi_config - } + }) - return json.dumps(debug_info) + return '''{{ + "__comment__": "The __status__ property is the actual status reported to CRP", + "__status__": {0}, + "__debug__": {1} +}} +'''.format(status_blob_text, debug_text) def report_ext_handler_status(self, vm_status, ext_handler, incarnation_changed): ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index ab1950d934..0e87586565 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -51,7 +51,7 @@ VERSION_0 from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.utils import shellutil -from azurelinuxagent.common.utils.archive import StateArchiver +from azurelinuxagent.common.utils.archive import StateArchiver, AGENT_STATUS_FILE from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import AddFirewallRules from azurelinuxagent.common.utils.shellutil import CommandError @@ -170,7 +170,7 @@ def __init__(self): # these members are used to avoid reporting errors too frequently self._heartbeat_update_goal_state_error_count = 0 self._last_try_update_goal_state_failed = False - self._report_status_last_failed_incarnation = -1 + self._report_status_last_failed_goal_state = None # incarnation of the last goal state that has been fully processed # (None if no goal state has been processed) @@ -588,18 +588,16 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): self._extensions_summary = ExtensionsSummary() exthandlers_handler.run() - # report status always, even if the goal state did not change + # always report status, even if the goal state did not change # do it before processing the remote access, since that operation can take a long time self._report_status(exthandlers_handler) if self._processing_new_incarnation(): remote_access_handler.run() + # lastly, cleanup the goal state history (but do it only on new goal states - no need to do it on every iteration) if self._processing_new_extensions_goal_state(): - try: - UpdateHandler._cleanup_goal_state_history() - except Exception as exception: - logger.warn("Error cleaning up the goal state history: {0}", ustr(exception)) + UpdateHandler._cleanup_goal_state_history() finally: if self._goal_state is not None: @@ -608,9 +606,12 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): @staticmethod def _cleanup_goal_state_history(): - archiver = StateArchiver(conf.get_lib_dir()) - archiver.purge() - archiver.archive() + try: + archiver = StateArchiver(conf.get_lib_dir()) + archiver.purge() + archiver.archive() + except Exception as exception: + logger.warn("Error cleaning up the goal state history: {0}", ustr(exception)) @staticmethod def _cleanup_legacy_goal_state_history(): @@ -666,13 +667,19 @@ def _report_status(self, exthandlers_handler): supports_fast_track = False else: supports_fast_track = self._goal_state.extensions_goal_state.source_channel == GoalStateChannel.HostGAPlugin + vm_status = exthandlers_handler.report_ext_handlers_status( incarnation_changed=self._processing_new_extensions_goal_state(), vm_agent_update_status=vm_agent_update_status, vm_agent_supports_fast_track=supports_fast_track) - if vm_status is None: - return + if vm_status is not None: + self._report_extensions_summary(vm_status) + if self._goal_state is not None: + agent_status = exthandlers_handler.get_ext_handlers_status_debug_info(vm_status) + self._goal_state.save_to_history(agent_status, AGENT_STATUS_FILE) + + def _report_extensions_summary(self, vm_status): try: extensions_summary = ExtensionsSummary(vm_status) if self._extensions_summary != extensions_summary: @@ -687,10 +694,9 @@ def _report_status(self, exthandlers_handler): if self._is_initial_goal_state: self._on_initial_goal_state_completed(self._extensions_summary) except Exception as error: - # report errors only once per incarnation - goal_state = exthandlers_handler.protocol.get_goal_state() - if self._report_status_last_failed_incarnation != goal_state.incarnation: - self._report_status_last_failed_incarnation = goal_state.incarnation + # report errors only once per goal state + if self._report_status_last_failed_goal_state != self._goal_state.extensions_goal_state.id: + self._report_status_last_failed_goal_state = self._goal_state.extensions_goal_state.id msg = u"Error logging the goal state summary: {0}".format(textutil.format_exception(error)) logger.warn(msg) add_event(op=WALAEventOperation.GoalState, is_success=False, message=msg) diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 9ad8297b06..5967031293 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -33,7 +33,6 @@ from azurelinuxagent.common.datacontract import get_properties from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.utils import fileutil -from azurelinuxagent.common.utils.archive import AGENT_STATUS_FILE from azurelinuxagent.common.utils.fileutil import read_file from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, AGENT_NAME, \ @@ -3425,10 +3424,8 @@ def mock_http_put(url, *args, **_): exthandlers_handler.run() - exthandlers_handler.report_ext_handlers_status() - - status_path = os.path.join(conf.get_lib_dir(), AGENT_STATUS_FILE.format(1)) - actual_status_json = json.loads(fileutil.read_file(status_path)) + vm_status = exthandlers_handler.report_ext_handlers_status() + actual_status_json = json.loads(exthandlers_handler.get_ext_handlers_status_debug_info(vm_status)) # Don't compare the guestOSInfo status_property = actual_status_json.get("__status__") diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index dd26845fe9..df1776d31b 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -109,11 +109,12 @@ def test_report_status_should_log_errors_only_once_per_goal_state(self): with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): # skip agent update with patch("azurelinuxagent.ga.update.logger.warn") as logger_warn: update_handler = get_update_handler() + update_handler._goal_state = protocol.get_goal_state() # these tests skip the initialization of the goal state. so do that here exthandlers_handler = ExtHandlersHandler(protocol) update_handler._report_status(exthandlers_handler) self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") - with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", return_value=Exception("TEST EXCEPTION")): # simulate an error during _report_status() + with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", side_effect=Exception("TEST EXCEPTION")): # simulate an error during _report_status() update_handler._report_status(exthandlers_handler) update_handler._report_status(exthandlers_handler) update_handler._report_status(exthandlers_handler) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index b9d9e29642..6f587d3c53 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -41,6 +41,7 @@ from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.protocol.wire import WireProtocol from azurelinuxagent.common.utils import fileutil, restutil, textutil +from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME, AGENT_STATUS_FILE from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules from azurelinuxagent.common.version import AGENT_PKG_GLOB, AGENT_DIR_GLOB, AGENT_NAME, AGENT_DIR_PATTERN, \ @@ -2780,6 +2781,7 @@ def create_vm_status(extension_status): exthandlers_handler.report_ext_handlers_status = Mock(return_value=create_vm_status(ExtensionStatusValue.success)) else: exthandlers_handler.report_ext_handlers_status = Mock(side_effect=[create_vm_status(s) for s in extension_statuses]) + exthandlers_handler.get_ext_handlers_status_debug_info = Mock(return_value='') yield exthandlers_handler @@ -2813,6 +2815,22 @@ def test_it_should_process_goal_state_only_on_new_goal_state(self): self.assertEqual(3, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on a new goal state") self.assertEqual(2, remote_access_handler.run.call_count, "remote_access_handler.run() should have been called on a new goal state") + def test_it_should_write_the_agent_status_to_the_history_folder(self): + with _mock_exthandlers_handler() as exthandlers_handler: + update_handler = _create_update_handler() + remote_access_handler = Mock() + remote_access_handler.run = Mock() + + update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + + incarnation = exthandlers_handler.protocol.get_goal_state().incarnation + matches = glob.glob(os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "*_{0}".format(incarnation))) + self.assertTrue(len(matches) == 1, "Could not find the history directory for the goal state. Got: {0}".format(matches)) + + status_file = os.path.join(matches[0], AGENT_STATUS_FILE) + self.assertTrue(os.path.exists(status_file), "Could not find {0}".format(status_file)) + + class HeartbeatTestCase(AgentTestCase): @patch("azurelinuxagent.common.logger.info") diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index 4e8dd4f737..dfa2ccb4f1 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -5,12 +5,14 @@ import os import re +from azurelinuxagent.common.future import httpclient from azurelinuxagent.common.protocol.goal_state import GoalState, _GET_GOAL_STATE_MAX_ATTEMPTS from azurelinuxagent.common.exception import ProtocolError from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME -from tests.protocol.mocks import mock_wire_protocol +from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse from tests.protocol import mockwiredata +from tests.protocol.HttpRequestPredicates import HttpRequestPredicates from tests.tools import AgentTestCase, patch, load_data @@ -27,34 +29,64 @@ def test_fetch_goal_state_should_raise_on_incomplete_goal_state(self): self.assertEqual(_GET_GOAL_STATE_MAX_ATTEMPTS, mock_sleep.call_count, "Unexpected number of retries") @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_fetch_full_goal_state_should_save_goal_state_to_history_directory(self, _): + def test_instantiating_goal_state_should_save_the_goal_state_to_the_history_directory(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - # use a new goal state with a specific test incarnation and etag protocol.mock_wire_data.set_incarnation(999) - protocol.mock_wire_data.set_etag(888) - goal_state = GoalState(protocol.client) - matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_999")) - self.assertTrue(len(matches) == 1, "Expected one history directory for incarnation 999. Got: {0}".format(matches)) + _ = GoalState(protocol.client) - history_directory = matches[0] - extensions_config_file = os.path.join(history_directory, "ExtensionsConfig.xml") - expected_files = [ - os.path.join(history_directory, "GoalState.xml"), - os.path.join(history_directory, "SharedConfig.xml"), - os.path.join(history_directory, "HostingEnvironmentConfig.xml"), - extensions_config_file, - ] + self._assert_directory_contents( + self._find_history_subdirectory("999"), + ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) - matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_888")) - self.assertTrue(len(matches) == 1, "Expected one history directory for etag 888. Got: {0}".format(matches)) + def _find_history_subdirectory(self, tag): + matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_{0}".format(tag))) + self.assertTrue(len(matches) == 1, "Expected one history directory for tag {0}. Got: {1}".format(tag, matches)) + return matches[0] - history_directory = matches[0] - vm_settings_file = os.path.join(history_directory, "VmSettings.json") - expected_files.append(vm_settings_file) + def _assert_directory_contents(self, directory, expected_files): + actual_files = os.listdir(directory) + + expected_files.sort() + actual_files.sort() + + self.assertEqual(expected_files, actual_files, "The expected files were not saved to {0}".format(directory)) - for f in expected_files: - self.assertTrue(os.path.exists(f), "{0} was not saved".format(f)) + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_update_should_create_new_history_subdirectories(self, _): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + protocol.mock_wire_data.set_incarnation(123) + protocol.mock_wire_data.set_etag(654) + + goal_state = GoalState(protocol.client) + self._assert_directory_contents( + self._find_history_subdirectory("123"), + ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) + + def http_get_handler(url, *_, **__): + if HttpRequestPredicates.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(status=httpclient.NOT_MODIFIED) + return None + + protocol.mock_wire_data.set_incarnation(234) + protocol.set_http_handlers(http_get_handler=http_get_handler) + goal_state.update() + self._assert_directory_contents( + self._find_history_subdirectory("234"), + ["GoalState.xml", "ExtensionsConfig.xml", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) + + protocol.mock_wire_data.set_etag(987) + protocol.set_http_handlers(http_get_handler=None) + goal_state.update() + self._assert_directory_contents( + self._find_history_subdirectory("987"), ["VmSettings.json"]) + + @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) + def test_it_should_redact_the_protected_settings_when_saving_to_the_history_directory(self, _): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + protocol.mock_wire_data.set_incarnation(888) + + goal_state = GoalState(protocol.client) extensions_goal_state = goal_state.extensions_goal_state protected_settings = [] @@ -65,6 +97,9 @@ def test_fetch_full_goal_state_should_save_goal_state_to_history_directory(self, if len(protected_settings) == 0: raise Exception("The test goal state does not include any protected settings") + history_directory = self._find_history_subdirectory("888") + extensions_config_file = os.path.join(history_directory, "ExtensionsConfig.xml") + vm_settings_file = os.path.join(history_directory, "VmSettings.json") for file_name in extensions_config_file, vm_settings_file: with open(file_name, "r") as stream: file_contents = stream.read() @@ -88,13 +123,13 @@ def test_it_should_save_vm_settings_on_parse_errors(self, _): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = invalid_vm_settings_file protocol.mock_wire_data = mockwiredata.WireProtocolData(data_file) - protocol.mock_wire_data.set_etag(888) + protocol.mock_wire_data.set_incarnation(888) - with self.assertRaises(ProtocolError): - GoalState(protocol.client) + with self.assertRaises(ProtocolError): # the parsing error will cause an exception + _ = GoalState(protocol.client) matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_888")) - self.assertTrue(len(matches) == 1, "Expected one history directory for etag 888. Got: {0}".format(matches)) + self.assertTrue(len(matches) == 1, "Expected one history directory for incarnation 888. Got: {0}".format(matches)) history_directory = matches[0] vm_settings_file = os.path.join(history_directory, "VmSettings.json") From 415882d767326bc2c65e364f6137e0d7dd2fe4ed Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 2 Mar 2022 12:06:46 -0800 Subject: [PATCH 39/84] Improve logging of goal state processing (#2523) * Reorganize the history directory * pylint * python2 * write agent status directly to history * Improve logging for goal state processing * fix incarnation * fix message * Update test * fix messages * add return value Co-authored-by: narrieta --- .../common/protocol/extensions_goal_state.py | 24 ++++++- .../protocol/extensions_goal_state_factory.py | 4 +- ...sions_goal_state_from_extensions_config.py | 8 ++- .../extensions_goal_state_from_vm_settings.py | 23 +++++-- azurelinuxagent/common/protocol/goal_state.py | 3 + azurelinuxagent/common/protocol/hostplugin.py | 3 +- azurelinuxagent/ga/exthandlers.py | 65 ++++++++----------- azurelinuxagent/ga/update.py | 2 +- dcr/scenario_utils/agent_log_parser.py | 11 +--- ...sions_goal_state_from_extensions_config.py | 2 +- ..._extensions_goal_state_from_vm_settings.py | 2 +- 11 files changed, 84 insertions(+), 63 deletions(-) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state.py b/azurelinuxagent/common/protocol/extensions_goal_state.py index 1a628ab078..fc423c885a 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state.py @@ -28,6 +28,12 @@ class GoalStateChannel(object): Empty = "Empty" +class GoalStateSource(object): + Fabric = "Fabric" + FastTrack = "FastTrack" + Empty = "Empty" + + class ExtensionsGoalState(object): """ ExtensionsGoalState represents the extensions information in the goal state; that information can originate from @@ -57,7 +63,17 @@ def created_on_timestamp(self): raise NotImplementedError() @property - def source_channel(self): + def channel(self): + """ + Whether the goal state was retrieved from the WireServer or the HostGAPlugin + """ + raise NotImplementedError() + + @property + def source(self): + """ + Whether the goal state originated from Fabric or Fast Track + """ raise NotImplementedError() @property @@ -147,9 +163,13 @@ def created_on_timestamp(self): return datetime.datetime.min @property - def source_channel(self): + def channel(self): return GoalStateChannel.Empty + @property + def source(self): + return GoalStateSource.Empty + @property def status_upload_blob(self): return None diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_factory.py b/azurelinuxagent/common/protocol/extensions_goal_state_factory.py index f3c8dcffe1..552bc1075c 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_factory.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_factory.py @@ -31,6 +31,6 @@ def create_from_extensions_config(incarnation, xml_text, wire_client): return ExtensionsGoalStateFromExtensionsConfig(incarnation, xml_text, wire_client) @staticmethod - def create_from_vm_settings(etag, json_text): - return ExtensionsGoalStateFromVmSettings(etag, json_text) + def create_from_vm_settings(etag, json_text, correlation_id): + return ExtensionsGoalStateFromVmSettings(etag, json_text, correlation_id) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py index 2f3eaa1b31..9aafeb2d9b 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py @@ -23,7 +23,7 @@ from azurelinuxagent.common.event import add_event, WALAEventOperation from azurelinuxagent.common.exception import ExtensionsConfigError from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel +from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, GoalStateSource from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, VMAgentManifest, ExtensionState, InVMGoalStateMetaData from azurelinuxagent.common.utils.textutil import parse_doc, parse_json, findall, find, findtext, getattrib, gettext, format_exception, \ is_str_none_or_whitespace, is_str_empty @@ -149,9 +149,13 @@ def created_on_timestamp(self): return self._created_on_timestamp @property - def source_channel(self): + def channel(self): return GoalStateChannel.WireServer + @property + def source(self): + return GoalStateSource.Fabric + @property def status_upload_blob(self): return self._status_upload_blob diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index 7d2657e5da..31a06bf2a0 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -32,10 +32,11 @@ class ExtensionsGoalStateFromVmSettings(ExtensionsGoalState): _MINIMUM_TIMESTAMP = datetime.datetime(1900, 1, 1, 0, 0) # min value accepted by datetime.strftime() - def __init__(self, etag, json_text): + def __init__(self, etag, json_text, correlation_id): super(ExtensionsGoalStateFromVmSettings, self).__init__() self._id = "etag_{0}".format(etag) self._etag = etag + self._fetch_correlation_id = correlation_id self._text = json_text self._host_ga_plugin_version = FlexibleVersion('0.0.0.0') self._schema_version = FlexibleVersion('0.0.0.0') @@ -74,28 +75,38 @@ def schema_version(self): @property def activity_id(self): + """ + The CRP activity id + """ return self._activity_id @property def correlation_id(self): + """ + The correlation id for the CRP operation + """ return self._correlation_id + @property + def fetch_correlation_id(self): + """ + The correlation id for the fetch operation (i.e. the call to the HostGAPlugin vmSettings API) + """ + return self._fetch_correlation_id + @property def created_on_timestamp(self): """ - Timestamp assigned by the CRP (time at which the Fast Track goal state was created) + Timestamp assigned by the CRP (time at which the goal state was created) """ return self._created_on_timestamp @property - def source_channel(self): + def channel(self): return GoalStateChannel.HostGAPlugin @property def source(self): - """ - Whether the goal state originated from Fabric or Fast Track - """ return self._source @property diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index d4429085d7..995f58a84a 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -145,6 +145,7 @@ def update(self, force_update=False): timestamp = create_timestamp() vm_settings, vm_settings_updated = self._fetch_vm_settings(force_update=force_update) if vm_settings_updated: + logger.info("Fetched new vmSettings [correlation ID: {0} eTag: {1} source: {2}]", vm_settings.fetch_correlation_id, vm_settings.etag, vm_settings.source) self._history = GoalStateHistory(timestamp, vm_settings.etag) self._extensions_goal_state = vm_settings self._history.save_vm_settings(vm_settings.get_redacted_text()) @@ -244,6 +245,8 @@ def _fetch_extended_goal_state(self, xml_text, xml_doc, force_vm_settings_update vm_settings, vm_settings_updated = self._fetch_vm_settings(force_update=force_vm_settings_update) if vm_settings is not None: + new = " new " if vm_settings_updated else " " + logger.info("Fetched{0}vmSettings [correlation ID: {1} eTag: {2} source: {3}]", new, vm_settings.fetch_correlation_id, vm_settings.etag, vm_settings.source) self._extensions_goal_state = vm_settings if vm_settings_updated: self._history.save_vm_settings(vm_settings.get_redacted_text()) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 375969f0fb..3004fc6256 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -471,7 +471,7 @@ def format_message(msg): response_content = ustr(response.read(), encoding='utf-8') - vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content) + vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content, correlation_id) # log the HostGAPlugin version if vm_settings.host_ga_plugin_version != self._host_plugin_version: @@ -485,7 +485,6 @@ def format_message(msg): if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.117"): raise_not_supported(reset_state=True) - logger.info("Fetched new vmSettings [correlation ID: {0} New eTag: {1}]", correlation_id, vm_settings.etag) self._host_plugin_supports_vm_settings = True self._cached_vm_settings = vm_settings return vm_settings, True diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 209f2c57ed..be21a8c3c4 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -281,7 +281,6 @@ def __init__(self, protocol): # The GoalState Aggregate status needs to report the last status of the GoalState. Since we only process # extensions on incarnation change, we need to maintain its state. # Setting the status to None here. This would be overridden as soon as the first GoalState is processed - # (once self._extension_processing_allowed() is True). self.__gs_aggregate_status = None self.report_status_error_state = ErrorState() @@ -294,53 +293,43 @@ def __last_gs_unsupported(self): self.__gs_aggregate_status.code == GoalStateAggregateStatusCodes.GoalStateUnsupportedRequiredFeatures def run(self): - etag, activity_id, correlation_id, gs_creation_time = None, None, None, None - try: - extensions_goal_state = self.protocol.get_goal_state().extensions_goal_state + gs = self.protocol.get_goal_state() + egs = gs.extensions_goal_state - # self.ext_handlers and etag need to be initialized first, since status reporting depends on them; also + # self.ext_handlers needs to be initialized before returning, since status reporting depends on it; also # we make a deep copy of the extensions, since changes are made to self.ext_handlers while processing the extensions - self.ext_handlers = copy.deepcopy(extensions_goal_state.extensions) - etag = self.protocol.client.get_goal_state().incarnation + self.ext_handlers = copy.deepcopy(egs.extensions) if not self._extension_processing_allowed(): return - gs_creation_time = extensions_goal_state.created_on_timestamp - activity_id = extensions_goal_state.activity_id - correlation_id = extensions_goal_state.correlation_id + utc_start = datetime.datetime.utcnow() + error = None + message = "ProcessExtensionsGoalState started [{0} channel: {1} source: {2} activity: {3} correlation {4} created: {5}]".format( + egs.id, egs.channel, egs.source, egs.activity_id, egs.correlation_id, egs.created_on_timestamp) + logger.info(message) + add_event(op=WALAEventOperation.ExtensionProcessing, message=message) + + try: + self.__process_and_handle_extensions(gs.incarnation) # TODO: review the use of incarnation + self._cleanup_outdated_handlers() + except Exception as e: + error = u"Error processing extensions:{0}".format(textutil.format_exception(e)) + finally: + duration = elapsed_milliseconds(utc_start) + if error is None: + message = 'ProcessExtensionsGoalState completed [{0} {1} ms]'.format(egs.id, duration) + logger.info(message) + else: + message = 'ProcessExtensionsGoalState failed [{0} {1} ms]\n{2}'.format(egs.id, duration, error) + logger.error(message) + add_event(op=WALAEventOperation.ExtensionProcessing, is_success=(error is None), message=message, log_event=False, duration=duration) + except Exception as error: msg = u"ProcessExtensionsInGoalState - Exception processing extension handlers:{0}".format(textutil.format_exception(error)) - logger.warn(msg) + logger.error(msg) add_event(op=WALAEventOperation.ExtensionProcessing, is_success=False, message=msg, log_event=False) - return - - def goal_state_debug_info(duration=None): - if duration is None: - return "[Incarnation: {0}; Activity Id: {1}; Correlation Id: {2}; GS Creation Time: {3}]".format(etag, activity_id, correlation_id, gs_creation_time) - else: - return "[Incarnation: {0}; {1} ms; Activity Id: {2}; Correlation Id: {3}; GS Creation Time: {4}]".format(etag, duration, activity_id, correlation_id, gs_creation_time) - - utc_start = datetime.datetime.utcnow() - error = None - message = "ProcessExtensionsInGoalState started {0}".format(goal_state_debug_info()) - logger.info(message) - add_event(op=WALAEventOperation.ExtensionProcessing, message=message) - try: - self.__process_and_handle_extensions(etag) - self._cleanup_outdated_handlers() - except Exception as e: - error = u"ProcessExtensionsInGoalState - Exception processing extension handlers:{0}".format(textutil.format_exception(e)) - finally: - duration = elapsed_milliseconds(utc_start) - if error is None: - message = 'ProcessExtensionsInGoalState completed {0}'.format(goal_state_debug_info(duration=duration)) - logger.info(message) - else: - message = 'ProcessExtensionsInGoalState failed {0}\nError:{1}'.format(goal_state_debug_info(duration=duration), error) - logger.warn(message) - add_event(op=WALAEventOperation.ExtensionProcessing, is_success=(error is None), message=message, log_event=False, duration=duration) def __get_unsupported_features(self): required_features = self.protocol.get_goal_state().extensions_goal_state.required_features diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 0e87586565..ace17d92bf 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -666,7 +666,7 @@ def _report_status(self, exthandlers_handler): if self._goal_state is None: supports_fast_track = False else: - supports_fast_track = self._goal_state.extensions_goal_state.source_channel == GoalStateChannel.HostGAPlugin + supports_fast_track = self._goal_state.extensions_goal_state.channel == GoalStateChannel.HostGAPlugin vm_status = exthandlers_handler.report_ext_handlers_status( incarnation_changed=self._processing_new_extensions_goal_state(), diff --git a/dcr/scenario_utils/agent_log_parser.py b/dcr/scenario_utils/agent_log_parser.py index b316b0cdb0..09b9b723de 100644 --- a/dcr/scenario_utils/agent_log_parser.py +++ b/dcr/scenario_utils/agent_log_parser.py @@ -6,14 +6,9 @@ AGENT_LOG_FILE = '/var/log/waagent.log' -# Examples: -# ProcessGoalState completed [Incarnation: 12; 23 ms] -# ProcessGoalState completed [Incarnation: 12; 23 ms; Activity Id: 555e551c-600e-4fb4-90ba-8ab8ec28eccc] -# ProcessGoalState completed [Incarnation: 12; 23 ms; Correlation Id: 555e551c-600e-4fb4-90ba-8ab8ec28eccc] -# ProcessGoalState completed [Incarnation: 12; 23 ms; GS Creation Time: 2020-11-09T17:48:50.000000Z] -GOAL_STATE_COMPLETED = r"ProcessExtensionsInGoalState completed\s\[Incarnation:\s(?P\d+);\s(?P\d+)\sms" \ - r"(;\sActivity Id:\s(?P\S+))?(;\sCorrelation Id:\s(?P\S+))?" \ - r"(;\sGS Creation Time:\s(?P\S+))?\]" +# Example: +# ProcessExtensionsGoalState completed [etag_2824367392948713696 4073 ms] +GOAL_STATE_COMPLETED = r"ProcessExtensionsGoalState completed\s\[(?P[a-z_\d]+)\s(?P\d+)\sms\]" # The format of the log has changed over time and the current log may include records from different sources. Most records are single-line, but some of them # can span across multiple lines. We will assume records always start with a line similar to the examples below; any other lines will be assumed to be part diff --git a/tests/protocol/test_extensions_goal_state_from_extensions_config.py b/tests/protocol/test_extensions_goal_state_from_extensions_config.py index 6e31634371..346a3644c0 100644 --- a/tests/protocol/test_extensions_goal_state_from_extensions_config.py +++ b/tests/protocol/test_extensions_goal_state_from_extensions_config.py @@ -57,4 +57,4 @@ def test_its_source_channel_should_be_wire_server(self): with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state - self.assertEqual(GoalStateChannel.WireServer, extensions_goal_state.source_channel, "The source_channel is incorrect") + self.assertEqual(GoalStateChannel.WireServer, extensions_goal_state.channel, "The channel is incorrect") diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/protocol/test_extensions_goal_state_from_vm_settings.py index af12154fcb..3428269ca6 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/protocol/test_extensions_goal_state_from_vm_settings.py @@ -82,7 +82,7 @@ def test_its_source_channel_should_be_host_ga_plugin(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state - self.assertEqual(GoalStateChannel.HostGAPlugin, extensions_goal_state.source_channel, "The source_channel is incorrect") + self.assertEqual(GoalStateChannel.HostGAPlugin, extensions_goal_state.channel, "The channel is incorrect") class CaseFoldedDictionaryTestCase(AgentTestCase): def test_it_should_retrieve_items_ignoring_case(self): From 96a639952d93b412e187d81454abee8cb058e717 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 3 Mar 2022 14:39:13 -0800 Subject: [PATCH 40/84] Use SVD sequence number instead of incarnation when reporting status (#2524) * Reorganize the history directory * pylint * python2 * write agent status directly to history * Improve logging for goal state processing * fix incarnation * fix message * Update test * Use SVD sequence number instead of incarnation when reporting status * Update messages * typo * fix variable names Co-authored-by: narrieta --- .../common/protocol/extensions_goal_state.py | 21 ++++++- .../protocol/extensions_goal_state_factory.py | 4 +- ...sions_goal_state_from_extensions_config.py | 4 ++ .../extensions_goal_state_from_vm_settings.py | 7 +++ azurelinuxagent/common/protocol/goal_state.py | 2 +- azurelinuxagent/ga/exthandlers.py | 58 +++++++++---------- azurelinuxagent/ga/update.py | 34 +++++------ ...tings-difference_in_required_features.json | 1 + .../vm_settings-empty_depends_on.json | 1 + .../vm_settings-invalid_blob_type.json | 1 + .../vm_settings-no_status_upload_blob.json | 1 + .../hostgaplugin/vm_settings-out-of-sync.json | 1 + .../hostgaplugin/vm_settings-parse_error.json | 1 + .../vm_settings-requested_version.json | 1 + .../vm_settings-unsupported_version.json | 1 + tests/data/hostgaplugin/vm_settings.json | 1 + tests/ga/test_multi_config_extension.py | 2 +- tests/ga/test_update.py | 6 +- 18 files changed, 91 insertions(+), 56 deletions(-) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state.py b/azurelinuxagent/common/protocol/extensions_goal_state.py index fc423c885a..1b3f4d2617 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state.py @@ -45,11 +45,15 @@ class ExtensionsGoalState(object): @property def id(self): """ - Returns the incarnation number if the ExtensionsGoalState was created from ExtensionsConfig, or the etag if it + Returns a string that includes the incarnation number if the ExtensionsGoalState was created from ExtensionsConfig, or the etag if it was created from vmSettings. """ raise NotImplementedError() + @property + def svd_sequence_number(self): + raise NotImplementedError() + @property def activity_id(self): raise NotImplementedError() @@ -146,9 +150,22 @@ def _string_to_id(id_string): class EmptyExtensionsGoalState(ExtensionsGoalState): + def __init__(self, incarnation): + super(EmptyExtensionsGoalState, self).__init__() + self._id = "incarnation_{0}".format(incarnation) + self._incarnation = incarnation + @property def id(self): - return self._string_to_id(None) + return self._id + + @property + def incarnation(self): + return self._incarnation + + @property + def svd_sequence_number(self): + return self._incarnation @property def activity_id(self): diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_factory.py b/azurelinuxagent/common/protocol/extensions_goal_state_factory.py index 552bc1075c..2c5242aa5e 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_factory.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_factory.py @@ -23,8 +23,8 @@ class ExtensionsGoalStateFactory(object): @staticmethod - def create_empty(): - return EmptyExtensionsGoalState() + def create_empty(incarnation): + return EmptyExtensionsGoalState(incarnation) @staticmethod def create_from_extensions_config(incarnation, xml_text, wire_client): diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py index 9aafeb2d9b..98ab84cc56 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py @@ -136,6 +136,10 @@ def id(self): def incarnation(self): return self._incarnation + @property + def svd_sequence_number(self): + return self._incarnation + @property def activity_id(self): return self._activity_id diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index 31a06bf2a0..d1933dd1f0 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -36,6 +36,7 @@ def __init__(self, etag, json_text, correlation_id): super(ExtensionsGoalStateFromVmSettings, self).__init__() self._id = "etag_{0}".format(etag) self._etag = etag + self._svd_sequence_number = 0 self._fetch_correlation_id = correlation_id self._text = json_text self._host_ga_plugin_version = FlexibleVersion('0.0.0.0') @@ -65,6 +66,10 @@ def id(self): def etag(self): return self._etag + @property + def svd_sequence_number(self): + return self._svd_sequence_number + @property def host_ga_plugin_version(self): return self._host_ga_plugin_version @@ -158,6 +163,7 @@ def _parse_simple_attributes(self, vm_settings): # "vmSettingsSchemaVersion": "0.0", # "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", # "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + # "inSvdSeqNo": 1, # "extensionsLastModifiedTickCount": 637726657706205217, # "extensionGoalStatesSource": "Fabric", # ... @@ -170,6 +176,7 @@ def _parse_simple_attributes(self, vm_settings): self._activity_id = self._string_to_id(vm_settings.get("activityId")) self._correlation_id = self._string_to_id(vm_settings.get("correlationId")) + self._svd_sequence_number = self._string_to_id(vm_settings.get("inSvdSeqNo")) self._created_on_timestamp = self._ticks_to_utc_timestamp(vm_settings.get("extensionsLastModifiedTickCount")) schema_version = vm_settings.get("vmSettingsSchemaVersion") diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 995f58a84a..7abcd162f8 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -236,7 +236,7 @@ def _fetch_extended_goal_state(self, xml_text, xml_doc, force_vm_settings_update extensions_config_uri = findtext(xml_doc, "ExtensionsConfig") if extensions_config_uri is None: - extensions_config = ExtensionsGoalStateFactory.create_empty() + extensions_config = ExtensionsGoalStateFactory.create_empty(self._incarnation) else: xml_text = self._wire_client.fetch_config(extensions_config_uri, self._wire_client.get_header()) extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(self._incarnation, xml_text, self._wire_client) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index be21a8c3c4..635352863d 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -279,7 +279,7 @@ def __init__(self, protocol): self.protocol = protocol self.ext_handlers = None # The GoalState Aggregate status needs to report the last status of the GoalState. Since we only process - # extensions on incarnation change, we need to maintain its state. + # extensions on goal state change, we need to maintain its state. # Setting the status to None here. This would be overridden as soon as the first GoalState is processed self.__gs_aggregate_status = None @@ -312,7 +312,7 @@ def run(self): add_event(op=WALAEventOperation.ExtensionProcessing, message=message) try: - self.__process_and_handle_extensions(gs.incarnation) # TODO: review the use of incarnation + self.__process_and_handle_extensions(egs.svd_sequence_number, egs.id) self._cleanup_outdated_handlers() except Exception as e: error = u"Error processing extensions:{0}".format(textutil.format_exception(e)) @@ -336,15 +336,14 @@ def __get_unsupported_features(self): supported_features = get_agent_supported_features_list_for_crp() return [feature for feature in required_features if feature not in supported_features] - def __process_and_handle_extensions(self, etag): + def __process_and_handle_extensions(self, svd_sequence_number, goal_state_id): try: # Verify we satisfy all required features, if any. If not, report failure here itself, no need to process anything further. unsupported_features = self.__get_unsupported_features() if any(unsupported_features): - msg = "Failing GS incarnation: {0} as Unsupported features found: {1}".format(etag, ', '.join( - unsupported_features)) + msg = "Failing GS {0} as Unsupported features found: {1}".format(goal_state_id, ', '.join(unsupported_features)) logger.warn(msg) - self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Failed, seq_no=etag, + self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Failed, seq_no=svd_sequence_number, code=GoalStateAggregateStatusCodes.GoalStateUnsupportedRequiredFeatures, message=msg) add_event(op=WALAEventOperation.GoalStateUnsupportedFeatures, @@ -352,13 +351,13 @@ def __process_and_handle_extensions(self, etag): message=msg, log_event=False) else: - self.handle_ext_handlers(etag) - self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Success, seq_no=etag, + self.handle_ext_handlers(goal_state_id) + self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Success, seq_no=svd_sequence_number, code=GoalStateAggregateStatusCodes.Success, message="GoalState executed successfully") except Exception as error: msg = "Unexpected error when processing goal state:{0}".format(textutil.format_exception(error)) - self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Failed, seq_no=etag, + self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Failed, seq_no=svd_sequence_number, code=GoalStateAggregateStatusCodes.GoalStateUnknownFailure, message=msg) logger.warn(msg) @@ -468,7 +467,7 @@ def __get_sorted_extensions_for_processing(self): return all_extensions - def handle_ext_handlers(self, etag=None): + def handle_ext_handlers(self, goal_state_id): if not self.ext_handlers: logger.info("No extension handlers found, not processing anything.") return @@ -506,7 +505,7 @@ def handle_ext_handlers(self, etag=None): continue # Process extensions and get if it was successfully executed or not - extension_success = self.handle_ext_handler(handler_i, extension, etag) + extension_success = self.handle_ext_handler(handler_i, extension, goal_state_id) dep_level = self.__get_dependency_level((extension, ext_handler)) if 0 <= dep_level < max_dep_level: @@ -573,12 +572,12 @@ def wait_for_handler_completion(handler_i, wait_until, extension=None): msg = "Dependent Extension {0} did not succeed. Status was {1}".format(extension_name, status) raise Exception(msg) - def handle_ext_handler(self, ext_handler_i, extension, etag): + def handle_ext_handler(self, ext_handler_i, extension, goal_state_id): """ Execute the requested command for the handler and return if success :param ext_handler_i: The ExtHandlerInstance object to execute the command on :param extension: The extension settings on which to run the command on - :param etag: Current incarnation of the GoalState + :param goal_state_id: ID of the current GoalState :return: True if the operation was successful, False if not """ @@ -602,7 +601,7 @@ def handle_ext_handler(self, ext_handler_i, extension, etag): raise ExtensionError(msg=err_msg) # Handle everything on an extension level rather than Handler level - ext_handler_i.logger.info("Target handler state: {0} [incarnation {1}]", handler_state, etag) + ext_handler_i.logger.info("Target handler state: {0} [{1}]", handler_state, goal_state_id) if handler_state == ExtensionRequestedState.Enabled: self.handle_enable(ext_handler_i, extension) elif handler_state == ExtensionRequestedState.Disabled: @@ -834,7 +833,7 @@ def handle_uninstall(self, ext_handler_i, extension): ext_handler_i.remove_ext_handler() - def __get_handlers_on_file_system(self, incarnation_changed): + def __get_handlers_on_file_system(self, goal_state_changed): handlers_to_report = [] # Ignoring the `history` and `events` directories as they're not handlers and are agent-generated for item, path in list_agent_lib_directory(skip_agent_package=True, @@ -872,13 +871,14 @@ def __get_handlers_on_file_system(self, incarnation_changed): handlers_to_report.append(ext_handler) except Exception as error: - # Log error once per incarnation - if incarnation_changed: + # Log error once per goal state + if goal_state_changed: logger.warn("Can't fetch ExtHandler from path: {0}; Error: {1}".format(path, ustr(error))) return handlers_to_report - def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_status=None, vm_agent_supports_fast_track=False): + def report_ext_handlers_status(self, goal_state_changed=False, vm_agent_update_status=None, + vm_agent_supports_fast_track=False): """ Go through handler_state dir, collect and report status. Returns the status it reported, or None if an error occurred. @@ -892,7 +892,7 @@ def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_ # In case of Unsupported error, report the status of the handlers in the VM if self.__last_gs_unsupported(): - handlers_to_report = self.__get_handlers_on_file_system(incarnation_changed) + handlers_to_report = self.__get_handlers_on_file_system(goal_state_changed) # If GoalState supported, report the status of extension handlers that were requested by the GoalState elif not self.__last_gs_unsupported() and self.ext_handlers is not None: @@ -900,7 +900,7 @@ def report_ext_handlers_status(self, incarnation_changed=False, vm_agent_update_ for ext_handler in handlers_to_report: try: - self.report_ext_handler_status(vm_status, ext_handler, incarnation_changed) + self.report_ext_handler_status(vm_status, ext_handler, goal_state_changed) except ExtensionError as error: add_event(op=WALAEventOperation.ExtensionProcessing, is_success=False, message=ustr(error)) @@ -973,7 +973,7 @@ def get_ext_handlers_status_debug_info(self, vm_status): }} '''.format(status_blob_text, debug_text) - def report_ext_handler_status(self, vm_status, ext_handler, incarnation_changed): + def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed): ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol) handler_status = ext_handler_i.get_handler_status() @@ -984,7 +984,7 @@ def report_ext_handler_status(self, vm_status, ext_handler, incarnation_changed) # multi-config. If state is != Uninstall, report error if ext_handler.state != ExtensionRequestedState.Uninstall: msg = "No handler status found for {0}. Not reporting anything for it.".format(ext_handler.name) - ext_handler_i.report_error_on_incarnation_change(incarnation_changed, log_msg=msg, event_msg=msg) + ext_handler_i.report_error_on_incarnation_change(goal_state_changed, log_msg=msg, event_msg=msg) return handler_state = ext_handler_i.get_handler_state() @@ -1005,7 +1005,7 @@ def report_ext_handler_status(self, vm_status, ext_handler, incarnation_changed) except ExtensionError as e: ext_handler_i.set_handler_status(message=ustr(e), code=e.code) - ext_handler_statuses = ext_handler_i.get_extension_handler_statuses(handler_status, incarnation_changed) + ext_handler_statuses = ext_handler_i.get_extension_handler_statuses(handler_status, goal_state_changed) # If not any extension status reported, report the Handler status if not any(ext_handler_statuses): @@ -1757,18 +1757,18 @@ def is_ext_handling_complete(self, ext): # Extension completed, return its status return True, status - def report_error_on_incarnation_change(self, incarnation_changed, log_msg, event_msg, extension=None, + def report_error_on_incarnation_change(self, goal_state_changed, log_msg, event_msg, extension=None, op=WALAEventOperation.ReportStatus): - # Since this code is called on a loop, logging as a warning only on incarnation change, else logging it + # Since this code is called on a loop, logging as a warning only on goal state change, else logging it # as verbose - if incarnation_changed: + if goal_state_changed: logger.warn(log_msg) add_event(name=self.get_extension_full_name(extension), version=self.ext_handler.version, op=op, message=event_msg, is_success=False, log_event=False) else: logger.verbose(log_msg) - def get_extension_handler_statuses(self, handler_status, incarnation_changed): + def get_extension_handler_statuses(self, handler_status, goal_state_changed): """ Get the list of ExtHandlerStatus objects corresponding to each extension in the Handler. Each object might have its own status for the Extension status but the Handler status would be the same for each extension in a Handle @@ -1791,7 +1791,7 @@ def get_extension_handler_statuses(self, handler_status, incarnation_changed): except Exception as error: msg = "Something went wrong when trying to get a copy of the Handler status for {0}".format( self.get_extension_full_name()) - self.report_error_on_incarnation_change(incarnation_changed, event_msg=msg, + self.report_error_on_incarnation_change(goal_state_changed, event_msg=msg, log_msg="{0}.\nStack Trace: {1}".format( msg, textutil.format_exception(error))) # Since this is a Handler level error and we need to do it per extension, breaking here and logging @@ -1809,7 +1809,7 @@ def get_extension_handler_statuses(self, handler_status, incarnation_changed): msg = "Unknown error when trying to fetch status from extension {0}".format( self.get_extension_full_name(ext)) - self.report_error_on_incarnation_change(incarnation_changed, event_msg=msg, + self.report_error_on_incarnation_change(goal_state_changed, event_msg=msg, log_msg="{0}.\nStack Trace: {1}".format( msg, textutil.format_exception(error)), extension=ext) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index ace17d92bf..fdf93e4c1e 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -620,7 +620,7 @@ def _cleanup_legacy_goal_state_history(): except Exception as exception: logger.warn("Error removing legacy history files: {0}", ustr(exception)) - def __get_vmagent_update_status(self, protocol, incarnation_changed): + def __get_vmagent_update_status(self, protocol, goal_state_changed): """ This function gets the VMAgent update status as per the last GoalState. Returns: None if the last GS does not ask for requested version else VMAgentUpdateStatus @@ -632,7 +632,7 @@ def __get_vmagent_update_status(self, protocol, incarnation_changed): try: requested_version, manifest = self.__get_requested_version_and_manifest_from_last_gs(protocol) - if manifest is None and incarnation_changed: + if manifest is None and goal_state_changed: logger.info("Unable to report update status as no matching manifest found for family: {0}".format( conf.get_autoupdate_gafamily())) return None @@ -647,8 +647,8 @@ def __get_vmagent_update_status(self, protocol, incarnation_changed): update_status = VMAgentUpdateStatus(expected_version=manifest.requested_version_string, status=status, code=code) except Exception as error: - if incarnation_changed: - err_msg = "[This error will only be logged once per incarnation] " \ + if goal_state_changed: + err_msg = "[This error will only be logged once per goal state] " \ "Ran into error when trying to fetch updateStatus for the agent, skipping reporting update satus. Error: {0}".format( textutil.format_exception(error)) logger.warn(err_msg) @@ -660,7 +660,6 @@ def _report_status(self, exthandlers_handler): vm_agent_update_status = self.__get_vmagent_update_status(exthandlers_handler.protocol, self._processing_new_extensions_goal_state()) # report_ext_handlers_status does its own error handling and returns None if an error occurred # - # TODO: Review the use of incarnation when reporting status... what should be the behavior for Fast Track goal states (i.e. no incarnation)? # TODO: How to handle the case when the HostGAPlugin goes from supporting vmSettings to not supporting it? # if self._goal_state is None: @@ -669,9 +668,8 @@ def _report_status(self, exthandlers_handler): supports_fast_track = self._goal_state.extensions_goal_state.channel == GoalStateChannel.HostGAPlugin vm_status = exthandlers_handler.report_ext_handlers_status( - incarnation_changed=self._processing_new_extensions_goal_state(), - vm_agent_update_status=vm_agent_update_status, - vm_agent_supports_fast_track=supports_fast_track) + goal_state_changed=self._processing_new_extensions_goal_state(), + vm_agent_update_status=vm_agent_update_status, vm_agent_supports_fast_track=supports_fast_track) if vm_status is not None: self._report_extensions_summary(vm_status) @@ -1064,13 +1062,13 @@ def report_error(msg_, version_=CURRENT_VERSION, op=WALAEventOperation.Download) def can_proceed_with_requested_version(): if not gs_updated: - # If incarnation didn't change, don't process anything. + # If the goal state didn't change, don't process anything. return False # With the new model, we will get a new GS when CRP wants us to auto-update using required version. - # If there's no new incarnation, don't proceed with anything - msg_ = "Found requested version in manifest: {0} for incarnation: {1}".format( - requested_version, incarnation) + # If there's no new goal state, don't proceed with anything + msg_ = "Found requested version in manifest: {0} for goal state {1}".format( + requested_version, goal_state_id) logger.info(msg_) add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg_, log_event=False) @@ -1098,16 +1096,16 @@ def agent_upgrade_time_elapsed(now_): daemon_version = self.__get_daemon_version_for_update() try: # Fetch the agent manifests from the latest Goal State - incarnation = self._goal_state.incarnation + goal_state_id = self._goal_state.extensions_goal_state.id gs_updated = self._processing_new_extensions_goal_state() requested_version, manifest = self.__get_requested_version_and_manifest_from_last_gs(protocol) if manifest is None: logger.verbose( - u"No manifest links found for agent family: {0} for incarnation: {1}, skipping update check".format( - family, incarnation)) + u"No manifest links found for agent family: {0} for goal state {1}, skipping update check".format( + family, goal_state_id)) return False except Exception as err: - # If there's some issues in fetching the agent manifests, report it only on incarnation change + # If there's some issues in fetching the agent manifests, report it only on goal state change msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(err)) if gs_updated: report_error(msg) @@ -1157,8 +1155,8 @@ def agent_upgrade_time_elapsed(now_): packages_to_download = [pkg] break else: - msg = "No matching package found in the agent manifest for requested version: {0} in incarnation: {1}, skipping agent update".format( - requested_version, incarnation) + msg = "No matching package found in the agent manifest for requested version: {0} in goal state {1}, skipping agent update".format( + requested_version, goal_state_id) report_error(msg, version_=requested_version) return False diff --git a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json index 3113c49231..8cb9d3b152 100644 --- a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json +++ b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json @@ -3,6 +3,7 @@ "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657706205217, "extensionGoalStatesSource": "Fabric", "onHold": true, diff --git a/tests/data/hostgaplugin/vm_settings-empty_depends_on.json b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json index 8442cd399f..0b7b19a95a 100644 --- a/tests/data/hostgaplugin/vm_settings-empty_depends_on.json +++ b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json @@ -3,6 +3,7 @@ "vmSettingsSchemaVersion": "0.0", "activityId": "2e7f8b5d-f637-4721-b757-cb190d49b4e9", "correlationId": "1bef4c48-044e-4225-8f42-1d1eac1eb158", + "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637693267431616449, "extensionGoalStatesSource": "Fabric", "StatusUploadBlob": { diff --git a/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json b/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json index e792f423d1..d30e5082e6 100644 --- a/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json +++ b/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json @@ -3,6 +3,7 @@ "vmSettingsSchemaVersion": "0.0", "activityId": "2e7f8b5d-f637-4721-b757-cb190d49b4e9", "correlationId": "1bef4c48-044e-4225-8f42-1d1eac1eb158", + "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637693267431616449, "extensionGoalStatesSource": "Fabric", "StatusUploadBlob": { diff --git a/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json index 364d6ced3b..d5c17eeb8b 100644 --- a/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json +++ b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json @@ -3,6 +3,7 @@ "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657706205217, "extensionGoalStatesSource": "Fabric", "onHold": true, diff --git a/tests/data/hostgaplugin/vm_settings-out-of-sync.json b/tests/data/hostgaplugin/vm_settings-out-of-sync.json index ef26d75962..fa3cc0f67b 100644 --- a/tests/data/hostgaplugin/vm_settings-out-of-sync.json +++ b/tests/data/hostgaplugin/vm_settings-out-of-sync.json @@ -3,6 +3,7 @@ "vmSettingsSchemaVersion": "0.0", "activityId": "AAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE", "correlationId": "EEEEEEEE-DDDD-CCCC-BBBB-AAAAAAAAAAAA", + "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657000000000, "extensionGoalStatesSource": "Fabric", "onHold": true, diff --git a/tests/data/hostgaplugin/vm_settings-parse_error.json b/tests/data/hostgaplugin/vm_settings-parse_error.json index 1712bbda10..ffd47857fb 100644 --- a/tests/data/hostgaplugin/vm_settings-parse_error.json +++ b/tests/data/hostgaplugin/vm_settings-parse_error.json @@ -3,6 +3,7 @@ "vmSettingsSchemaVersion": THIS_IS_A_SYNTAX_ERROR, "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657706205217, "extensionGoalStatesSource": "Fabric", "onHold": true, diff --git a/tests/data/hostgaplugin/vm_settings-requested_version.json b/tests/data/hostgaplugin/vm_settings-requested_version.json index a033776384..096c3bee28 100644 --- a/tests/data/hostgaplugin/vm_settings-requested_version.json +++ b/tests/data/hostgaplugin/vm_settings-requested_version.json @@ -3,6 +3,7 @@ "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657706205217, "extensionGoalStatesSource": "Fabric", "onHold": true, diff --git a/tests/data/hostgaplugin/vm_settings-unsupported_version.json b/tests/data/hostgaplugin/vm_settings-unsupported_version.json index a32d5389de..d9fe9f1dd5 100644 --- a/tests/data/hostgaplugin/vm_settings-unsupported_version.json +++ b/tests/data/hostgaplugin/vm_settings-unsupported_version.json @@ -3,6 +3,7 @@ "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657706205217, "extensionGoalStatesSource": "Fabric", "onHold": true, diff --git a/tests/data/hostgaplugin/vm_settings.json b/tests/data/hostgaplugin/vm_settings.json index 4647cca284..fbae08bbab 100644 --- a/tests/data/hostgaplugin/vm_settings.json +++ b/tests/data/hostgaplugin/vm_settings.json @@ -3,6 +3,7 @@ "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657706205217, "extensionGoalStatesSource": "Fabric", "onHold": true, diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index 643fb718f0..c252dc0b02 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -965,7 +965,7 @@ def test_it_should_report_status_correctly_for_unsupported_goal_state(self): GoalStateAggregateStatusCodes.GoalStateUnsupportedRequiredFeatures, "Incorrect code") self.assertEqual(gs_aggregate_status['inSvdSeqNo'], '2', "Incorrect incarnation reported") self.assertEqual(gs_aggregate_status['formattedMessage']['message'], - 'Failing GS incarnation: 2 as Unsupported features found: TestRequiredFeature1, TestRequiredFeature2, TestRequiredFeature3', + 'Failing GS incarnation_2 as Unsupported features found: TestRequiredFeature1, TestRequiredFeature2, TestRequiredFeature3', "Incorrect error message reported") def test_it_should_fail_handler_if_handler_does_not_support_mc(self): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 6f587d3c53..22594056ee 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -2189,12 +2189,12 @@ def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] # This will throw if corresponding message not found so not asserting on that requested_version_found = next(kwarg for kwarg in agent_msgs if - "Found requested version in manifest: 5.2.1.0 for incarnation: 1" in kwarg['message']) + "Found requested version in manifest: 5.2.1.0 for goal state incarnation_1" in kwarg['message']) self.assertTrue(requested_version_found['is_success'], "The requested version found op should be reported as a success") skipping_update = next(kwarg for kwarg in agent_msgs if - "No matching package found in the agent manifest for requested version: 5.2.1.0 in incarnation: 1, skipping agent update" in kwarg['message']) + "No matching package found in the agent manifest for requested version: 5.2.1.0 in goal state incarnation_1, skipping agent update" in kwarg['message']) self.assertEqual(skipping_update['version'], FlexibleVersion("5.2.1.0"), "The not found message should be reported from requested agent version") self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure") @@ -2389,7 +2389,7 @@ def test_it_should_not_downgrade_below_daemon_version(self): kwarg['op'] == WALAEventOperation.AgentUpgrade] # This will throw if corresponding message not found so not asserting on that requested_version_found = next(kwarg for kwarg in upgrade_msgs if - "Found requested version in manifest: 1.0.0.0 for incarnation: 2" in kwarg[ + "Found requested version in manifest: 1.0.0.0 for goal state incarnation_2" in kwarg[ 'message']) self.assertTrue(requested_version_found['is_success'], "The requested version found op should be reported as a success") From c42343c1e1a6d1e829ea18c3c82a2dac6b60ff20 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 3 Mar 2022 15:27:14 -0800 Subject: [PATCH 41/84] Improve logic of unexpected processes check started by agent (#2522) * Improve logic of unexpected processes check started by agent * addressed comments * fix UT * added comment * address envron search --- azurelinuxagent/common/cgroupconfigurator.py | 21 +++++++++++++++++++- azurelinuxagent/common/utils/shellutil.py | 15 +++++++++++++- tests/ga/test_multi_config_extension.py | 5 ++++- tests/utils/test_shell_util.py | 14 +++++++++++++ 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/azurelinuxagent/common/cgroupconfigurator.py b/azurelinuxagent/common/cgroupconfigurator.py index 92abe442ce..e0f408ab55 100644 --- a/azurelinuxagent/common/cgroupconfigurator.py +++ b/azurelinuxagent/common/cgroupconfigurator.py @@ -605,7 +605,8 @@ def _check_processes_in_agent_cgroup(self): current = process while current != 0 and current not in agent_commands: current = self._get_parent(current) - if current == 0: + # Process started by agent will have a marker and check if that marker found in process environment. + if current == 0 and not self.__is_process_descendant_of_the_agent(process): unexpected.append(self.__format_process(process)) if len(unexpected) >= 5: # collect just a small sample break @@ -640,6 +641,24 @@ def __format_process(pid): pass return "[PID: {0}] UNKNOWN".format(pid) + @staticmethod + def __is_process_descendant_of_the_agent(pid): + """ + Returns True if the process is descendant of the agent by looking at the env flag(AZURE_GUEST_AGENT_PARENT_PROCESS_NAME) + that we set when the process starts otherwise False. + """ + try: + env = '/proc/{0}/environ'.format(pid) + if os.path.exists(env): + with open(env, "r") as env_file: + environ = env_file.read() + if environ and environ[-1] == '\x00': + environ = environ[:-1] + return "{0}={1}".format(shellutil.PARENT_PROCESS_NAME, shellutil.AZURE_GUEST_AGENT) in environ + except Exception: + pass + return False + @staticmethod def _check_agent_throttled_time(cgroup_metrics): for metric in cgroup_metrics: diff --git a/azurelinuxagent/common/utils/shellutil.py b/azurelinuxagent/common/utils/shellutil.py index 5ac1141231..50fd4592f1 100644 --- a/azurelinuxagent/common/utils/shellutil.py +++ b/azurelinuxagent/common/utils/shellutil.py @@ -16,7 +16,7 @@ # # Requires Python 2.6+ and Openssl 1.0+ # - +import os import subprocess import tempfile import threading @@ -345,10 +345,23 @@ def quote(word_list): # _running_commands = [] _running_commands_lock = threading.RLock() +PARENT_PROCESS_NAME = "AZURE_GUEST_AGENT_PARENT_PROCESS_NAME" +AZURE_GUEST_AGENT = "AZURE_GUEST_AGENT" def _popen(*args, **kwargs): with _running_commands_lock: + # Add the environment variables + env = {} + if 'env' in kwargs: + env.update(kwargs['env']) + else: + env.update(os.environ) + + # Set the marker before process start + env[PARENT_PROCESS_NAME] = AZURE_GUEST_AGENT + kwargs['env'] = env + process = subprocess.Popen(*args, **kwargs) _running_commands.append(process.pid) return process diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index c252dc0b02..e424afd4bf 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -738,7 +738,10 @@ def __assert_env_variables(handler_name, handler_version="1.0.0", seq_no="1", ex self.assertFalse(any(env_var in commands['data'] for env_var in not_expected), "Unwanted env variable found") def mock_popen(cmd, *_, **kwargs): - if 'env' in kwargs: + # This cgroupsapi Popen mocking all other popen calls which breaking the extension emulator logic. + # The emulator should be used only on extension commands and not on other commands even env flag set. + # So, added ExtensionVersion check to avoid using extension emulator on non extension operations. + if 'env' in kwargs and ExtCommandEnvVariable.ExtensionVersion in kwargs['env']: handler_name, __, command = extract_extension_info_from_command(cmd) name = handler_name if ExtCommandEnvVariable.ExtensionName in kwargs['env']: diff --git a/tests/utils/test_shell_util.py b/tests/utils/test_shell_util.py index 64ac8b6b52..83082bf7e7 100644 --- a/tests/utils/test_shell_util.py +++ b/tests/utils/test_shell_util.py @@ -17,6 +17,7 @@ # import os import signal +import subprocess import tempfile import threading import unittest @@ -156,6 +157,19 @@ def test_run_command_should_execute_the_command(self): ret = shellutil.run_command(command) self.assertEqual(ret, "A TEST STRING") + def test_run_command_should_use_popen_arg_list(self): + with patch("azurelinuxagent.common.utils.shellutil.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: + command = ["echo", "-n", "A TEST STRING"] + ret = shellutil.run_command(command) + + self.assertEqual(ret, "A TEST STRING") + self.assertEqual(popen_patch.call_count, 1) + + args, kwargs = popen_patch.call_args + self.assertTrue(any(arg for arg in args[0] if "A TEST STRING" in arg), "command not being used") + self.assertEqual(kwargs['env'].get(shellutil.PARENT_PROCESS_NAME), shellutil.AZURE_GUEST_AGENT, + "Env flag not being used") + def test_run_pipe_should_execute_a_pipe_with_two_commands(self): # Output the same string 3 times and then remove duplicates test_string = "A TEST STRING\n" From 9a4dd3c635444efc314d1d18b9bb586699cd86cb Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 7 Mar 2022 13:44:27 -0800 Subject: [PATCH 42/84] Rename fetch_correlation_id to hostga_plugin_correlation_id (#2526) * Rename fetch_correlation_id to hostga_plugin_correlation_id * Rename variable Co-authored-by: narrieta --- .../protocol/extensions_goal_state_from_vm_settings.py | 8 ++++---- azurelinuxagent/common/protocol/goal_state.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index d1933dd1f0..f0997e8194 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -37,7 +37,7 @@ def __init__(self, etag, json_text, correlation_id): self._id = "etag_{0}".format(etag) self._etag = etag self._svd_sequence_number = 0 - self._fetch_correlation_id = correlation_id + self._hostga_plugin_correlation_id = correlation_id self._text = json_text self._host_ga_plugin_version = FlexibleVersion('0.0.0.0') self._schema_version = FlexibleVersion('0.0.0.0') @@ -93,11 +93,11 @@ def correlation_id(self): return self._correlation_id @property - def fetch_correlation_id(self): + def hostga_plugin_correlation_id(self): """ - The correlation id for the fetch operation (i.e. the call to the HostGAPlugin vmSettings API) + The correlation id for the call to the HostGAPlugin vmSettings API """ - return self._fetch_correlation_id + return self._hostga_plugin_correlation_id @property def created_on_timestamp(self): diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 7abcd162f8..a2ce59c380 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -145,7 +145,7 @@ def update(self, force_update=False): timestamp = create_timestamp() vm_settings, vm_settings_updated = self._fetch_vm_settings(force_update=force_update) if vm_settings_updated: - logger.info("Fetched new vmSettings [correlation ID: {0} eTag: {1} source: {2}]", vm_settings.fetch_correlation_id, vm_settings.etag, vm_settings.source) + logger.info("Fetched new vmSettings [HostGAPlugin correlation ID: {0} eTag: {1} source: {2}]", vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source) self._history = GoalStateHistory(timestamp, vm_settings.etag) self._extensions_goal_state = vm_settings self._history.save_vm_settings(vm_settings.get_redacted_text()) @@ -246,7 +246,7 @@ def _fetch_extended_goal_state(self, xml_text, xml_doc, force_vm_settings_update if vm_settings is not None: new = " new " if vm_settings_updated else " " - logger.info("Fetched{0}vmSettings [correlation ID: {1} eTag: {2} source: {3}]", new, vm_settings.fetch_correlation_id, vm_settings.etag, vm_settings.source) + logger.info("Fetched{0}vmSettings [HostGAPlugin correlation ID: {1} eTag: {2} source: {3}]", new, vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source) self._extensions_goal_state = vm_settings if vm_settings_updated: self._history.save_vm_settings(vm_settings.get_redacted_text()) From 71b756fb32a28c4da6495af381f69244efaa08b5 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 8 Mar 2022 11:46:41 -0800 Subject: [PATCH 43/84] Enable Fast Track by default (#2525) Co-authored-by: narrieta --- azurelinuxagent/common/conf.py | 4 ++-- tests/test_agent.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index af23a04c45..3b4ea6840d 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -136,7 +136,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Debug.CgroupLogMetrics": False, "Debug.CgroupDisableOnProcessCheckFailure": True, "Debug.CgroupDisableOnQuotaCheckFailure": True, - "Debug.EnableFastTrack": False, + "Debug.EnableFastTrack": True, "Debug.EnableGAVersioning": False } @@ -567,7 +567,7 @@ def get_enable_fast_track(conf=__conf__): NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_switch("Debug.EnableFastTrack", False) + return conf.get_switch("Debug.EnableFastTrack", True) def get_etp_collection_period(conf=__conf__): diff --git a/tests/test_agent.py b/tests/test_agent.py index f5a3b90bb0..52ef9f6714 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -38,7 +38,7 @@ Debug.CgroupLogMetrics = False Debug.CgroupMonitorExpiryTime = 2022-03-31 Debug.CgroupMonitorExtensionName = Microsoft.Azure.Monitor.AzureMonitorLinuxAgent -Debug.EnableFastTrack = False +Debug.EnableFastTrack = True Debug.EnableGAVersioning = False Debug.EtpCollectionPeriod = 300 Debug.FirewallRulesLogPeriod = 86400 From 2027103ca6e894528d3a11bd7f7991dadbe5233a Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Wed, 9 Mar 2022 15:05:19 -0800 Subject: [PATCH 44/84] Add support for Mariner 2.0 in DCRv2 (#2528) --- dcr/azure-pipelines.yml | 6 ++++++ dcr/scenario_utils/check_waagent_log.py | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/dcr/azure-pipelines.yml b/dcr/azure-pipelines.yml index 08a6455261..3fcffc6aef 100644 --- a/dcr/azure-pipelines.yml +++ b/dcr/azure-pipelines.yml @@ -46,6 +46,12 @@ parameters: sku: "cbl-mariner-1" version: "latest" name: "mariner1" +## + - publisher: "microsoftcblmariner" + offer: "cbl-mariner" + sku: "cbl-mariner-2" + version: "latest" + name: "mariner2" trigger: - develop diff --git a/dcr/scenario_utils/check_waagent_log.py b/dcr/scenario_utils/check_waagent_log.py index be539ae092..aab59d0282 100644 --- a/dcr/scenario_utils/check_waagent_log.py +++ b/dcr/scenario_utils/check_waagent_log.py @@ -96,6 +96,15 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None): 'message': r"The agent's process is not within a memory cgroup", 'if': lambda log_line: re.match(r"((centos7\.8)|(centos7\.9)|(redhat7\.8)|(redhat8\.2))\D*", distro, flags=re.IGNORECASE) + }, + # 2022-03-09T20:04:33.745721Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=Install, message=[ExtensionOperationError] \ + # Non-zero exit code: 51, /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.15.3/./shim.sh -install + # + # This is a known issue where AMA does not support Mariner 2.0. Please remove when support is + # added in the next AMA release (1.16.x). + { + 'message': r"Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=Install, message=\[ExtensionOperationError\] Non-zero exit code: 51", + 'if': lambda log_line: "Mariner2.0" in distro and log_line.level == "ERROR" and log_line.who == "ExtHandler" } ] From 3eda39b7b49da01ac992dc7861cc00f23257355b Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Wed, 9 Mar 2022 15:18:37 -0800 Subject: [PATCH 45/84] Remove suppression from warning: "need a bytes-like object, NoneType found" (#2529) --- dcr/scenario_utils/check_waagent_log.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dcr/scenario_utils/check_waagent_log.py b/dcr/scenario_utils/check_waagent_log.py index aab59d0282..cb7a661527 100644 --- a/dcr/scenario_utils/check_waagent_log.py +++ b/dcr/scenario_utils/check_waagent_log.py @@ -22,12 +22,6 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None): # * 'if' receives as parameter an AgentLogRecord # ignore_list = [ - # This is a known issue (https://github.com/Azure/WALinuxAgent/pull/2016) - # Please remove this message from ignored once this task is completed - # - https://msazure.visualstudio.com/One/_workitems/edit/8733946 - { - 'message': r"need a bytes-like object, NoneType found" - }, # This warning is expected on CentOS/RedHat 7.8 and Redhat 7.6 { 'message': r"Move rules file 70-persistent-net.rules to /var/lib/waagent/70-persistent-net.rules", From be720fe7db916aecff7ea8409ec76157605ee844 Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Tue, 15 Mar 2022 10:49:45 -0700 Subject: [PATCH 46/84] Add keep_alive property to collect_logs (#2533) (#2535) (cherry picked from commit 47770b71b4965b1b302f02df91350839b7db24f9) --- azurelinuxagent/common/interfaces.py | 9 +++++++++ azurelinuxagent/ga/collect_logs.py | 3 +++ azurelinuxagent/ga/update.py | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/azurelinuxagent/common/interfaces.py b/azurelinuxagent/common/interfaces.py index d6b0753bb9..41b16e6681 100644 --- a/azurelinuxagent/common/interfaces.py +++ b/azurelinuxagent/common/interfaces.py @@ -30,6 +30,15 @@ def get_thread_name(): def run(self): raise NotImplementedError("run() not implemented") + def keep_alive(self): + """ + Returns true if the thread handler should be restarted when the thread dies + and false when it should remain dead. + + Defaults to True and can be overridden by sub-classes. + """ + return True + def is_alive(self): raise NotImplementedError("is_alive() not implemented") diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py index b26e260ca3..dc62fccf21 100644 --- a/azurelinuxagent/ga/collect_logs.py +++ b/azurelinuxagent/ga/collect_logs.py @@ -107,6 +107,9 @@ def __init__(self): def run(self): self.start() + def keep_alive(self): + return self.should_run + def is_alive(self): return self.event_thread.is_alive() diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index fdf93e4c1e..3c3e405339 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -450,7 +450,7 @@ def _check_daemon_running(self, debug): def _check_threads_running(self, all_thread_handlers): # Check that all the threads are still running for thread_handler in all_thread_handlers: - if not thread_handler.is_alive(): + if thread_handler.keep_alive() and not thread_handler.is_alive(): logger.warn("{0} thread died, restarting".format(thread_handler.get_thread_name())) thread_handler.start() From 5cf4bb339bd472e87cde960b07fab3097a7e3142 Mon Sep 17 00:00:00 2001 From: Laveesh Rohra Date: Tue, 15 Mar 2022 18:16:35 +0000 Subject: [PATCH 47/84] Send telemetry for environment variables used per extension call (#2536) --- azurelinuxagent/ga/exthandlers.py | 8 +++++--- tests/ga/test_extension.py | 16 ++++++++-------- tests/ga/test_multi_config_extension.py | 11 ++++++++--- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 635352863d..cc9d0afc32 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -1902,12 +1902,15 @@ def launch_command(self, cmd, cmd_name=None, timeout=300, extension_error_code=E if supported_features: env[ExtCommandEnvVariable.ExtensionSupportedFeatures] = json.dumps(supported_features) + ext_name = self.get_extension_full_name(extension) try: # Some extensions erroneously begin cmd with a slash; don't interpret those # as root-relative. (Issue #1170) command_full_path = os.path.join(base_dir, cmd.lstrip(os.path.sep)) - self.logger.info("Executing command: {0} with environment variables: {1}".format(command_full_path, - json.dumps(env))) + log_msg = "Executing command: {0} with environment variables: {1}".format(command_full_path, + json.dumps(env)) + self.logger.info(log_msg) + self.report_event(name=ext_name, message=log_msg, log_event=False) # Add the os environment variables before executing command env.update(os.environ) @@ -1928,7 +1931,6 @@ def launch_command(self, cmd, cmd_name=None, timeout=300, extension_error_code=E code=extension_error_code) duration = elapsed_milliseconds(begin_utc) - ext_name = self.get_extension_full_name(extension) log_msg = "Command: {0}\n{1}".format(cmd, "\n".join( [line for line in process_output.split('\n') if line != ""])) self.logger.info(log_msg) diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 5967031293..7ec539541b 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -2336,8 +2336,8 @@ def test_ext_sequence_no_should_be_set_from_within_extension(self, *args): exthandlers_handler.report_ext_handlers_status() for _, kwargs in mock_report_event.call_args_list: - # The output is of the format - 'testfile.sh\n[stdout]ConfigSequenceNumber=N\n[stderr]' - if test_file_name not in kwargs['message']: + # The output is of the format - 'Command: testfile.sh -{Operation} \n[stdout]ConfigSequenceNumber=N\n[stderr]' + if ("Command: " + test_file_name) not in kwargs['message']: continue self.assertIn("{0}={1}".format(ExtCommandEnvVariable.ExtensionSeqNumber, expected_seq_no), kwargs['message']) @@ -2405,13 +2405,13 @@ def test_correct_exit_code_should_be_set_on_uninstall_cmd_failure(self, *args): with patch.object(ExtHandlerInstance, 'report_event') as mock_report_event: exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() - exthandlers_handler.report_ext_handlers_status() - _, disable_kwargs = mock_report_event.call_args_list[1] # pylint: disable=unused-variable - _, update_kwargs = mock_report_event.call_args_list[2] - _, uninstall_kwargs = mock_report_event.call_args_list[3] # pylint: disable=unused-variable - _, install_kwargs = mock_report_event.call_args_list[4] - _, enable_kwargs = mock_report_event.call_args_list[5] + update_kwargs = next(kwargs for _, kwargs in mock_report_event.call_args_list if + "Command: testfile.sh -update" in kwargs['message']) + install_kwargs = next(kwargs for _, kwargs in mock_report_event.call_args_list if + "Command: testfile.sh -install" in kwargs['message']) + enable_kwargs = next(kwargs for _, kwargs in mock_report_event.call_args_list if + "Command: testfile.sh -enable" in kwargs['message']) self.assertIn("%s=%s" % (ExtCommandEnvVariable.DisableReturnCode, exit_code), update_kwargs['message']) self.assertIn("%s=%s" % (ExtCommandEnvVariable.UninstallReturnCode, exit_code), install_kwargs['message']) diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index e424afd4bf..a9a07bd67e 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -1,6 +1,7 @@ import contextlib import json import os.path +import re import subprocess import uuid @@ -887,15 +888,19 @@ def test_it_should_ignore_disable_errors_for_multi_config_extensions(self): (sc_ext, ExtensionCommandNames.ENABLE) ) + reported_events = [kwargs for _, kwargs in patch_report_event.call_args_list if + re.search("Executing command: (.+) with environment variables: ", + kwargs['message']) is None] + self.assertTrue(all( - fail_code in kwargs['message'] for args, kwargs in patch_report_event.call_args_list if + fail_code in kwargs['message'] for kwargs in reported_events if kwargs['name'] == first_ext.name), "Error not reported") self.assertTrue(all( - fail_code in kwargs['message'] for args, kwargs in patch_report_event.call_args_list if + fail_code in kwargs['message'] for kwargs in reported_events if kwargs['name'] == second_ext.name), "Error not reported") # Make sure fail code is not reported for any other extension self.assertFalse(all( - fail_code in kwargs['message'] for args, kwargs in patch_report_event.call_args_list if + fail_code in kwargs['message'] for kwargs in reported_events if kwargs['name'] == third_ext.name), "Error not reported") def test_it_should_report_transitioning_if_status_file_not_found(self): From 5a57d928f2b16bbe6ef0adb5ab4a8e7708650ac7 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 16 Mar 2022 10:29:12 -0700 Subject: [PATCH 48/84] Make extension manifests optional (#2537) Co-authored-by: narrieta --- .../extensions_goal_state_from_vm_settings.py | 4 +- .../vm_settings-no_extension_manifests.json | 78 +++++++++++++++++++ ..._extensions_goal_state_from_vm_settings.py | 17 ++++ 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 tests/data/hostgaplugin/vm_settings-no_extension_manifests.json diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index f0997e8194..241f789990 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -360,7 +360,9 @@ def _parse_extensions(self, vm_settings): is_multi_config = extension_gs.get('isMultiConfig') if is_multi_config is not None: extension.supports_multi_config = is_multi_config - extension.manifest_uris.append(extension_gs['location']) + location = extension_gs.get('location') + if location is not None: + extension.manifest_uris.append(location) fail_over_location = extension_gs.get('failoverLocation') if fail_over_location is not None: extension.manifest_uris.append(fail_over_location) diff --git a/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json b/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json new file mode 100644 index 0000000000..b5653ef76d --- /dev/null +++ b/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json @@ -0,0 +1,78 @@ +{ + "hostGAPluginVersion": "1.0.8.123", + "vmSettingsSchemaVersion": "0.0", + "activityId": "89d50bf1-fa55-4257-8af3-3db0c9f81ab4", + "correlationId": "c143f8f0-a66b-4881-8c06-1efd278b0b02", + "inSvdSeqNo": 978, + "extensionsLastModifiedTickCount": 637829610574739741, + "extensionGoalStatesSource": "Fabric", + "statusUploadBlob": { + "statusBlobType": "PageBlob", + "value": "https://md-ssd-xpdjf15s.blob.core.windows.net/$system/u-sqlwatcher.f338f67e.status?sv=2018-03-28&sr=b&sk=system-1&sig=88Y3NM%2b1aU%3d&se=9999-01-01T00%3a00%3a00Z&sp=rw" + }, + "inVMMetadata": { + "subscriptionId": "8d3c2715-f063-40b8-9402-49784992ae8d", + "resourceGroupName": "SYSTEMCENTERCURRENTBRANCH", + "vmName": "ubuntu-sqlwatcher", + "location": "centralus", + "vmId": "f338f67e-5d06-4f13-892a-ff1b047ba5bf", + "vmSize": "Standard_D2s_v3", + "osType": "Linux", + "vmImage": { + "publisher": "Canonical", + "offer": "UbuntuServer", + "sku": "18.04-LTS", + "version": "18.04.202005220" + } + }, + "gaFamilies": [ + { + "name": "Prod", + "uris": [ + "https://zrdfepirv2dz5prdstr07a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentral_manifest.xml", + "https://rdfepirv2dm1prdstr09.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentral_manifest.xml", + "https://zrdfepirv2dm5prdstr06a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentral_manifest.xml" + ] + } + ], + "extensionGoalStates": [ + { + "name": "Microsoft.Azure.Monitor.WorkloadInsights.Test.Workload.LinuxConfigAgent", + "version": "3.0", + "state": "uninstall", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "isMultiConfig": false + }, + { + "name": "Microsoft.Azure.Monitor.WorkloadInsights.Test.Workload.LinuxInstallerAgent", + "version": "11.0", + "state": "uninstall", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "isMultiConfig": false + }, + { + "name": "Microsoft.Azure.Monitor.Workloads.Workload.WLILinuxExtension", + "version": "0.2.127", + "location": "https://umsakzkwhng2ft0jjptl.blob.core.windows.net/deeb2df6-c025-e6fb-b015-449ed6a676bc/deeb2df6-c025-e6fb-b015-449ed6a676bc_manifest.xml", + "failoverLocation": "https://umsafmqfbv4hgrd1hqff.blob.core.windows.net/deeb2df6-c025-e6fb-b015-449ed6a676bc/deeb2df6-c025-e6fb-b015-449ed6a676bc_manifest.xml", + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 7, + "isMultiConfig": false, + "settings": [ + { + "publicSettings": "{\"workloadConfig\": null}" + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/protocol/test_extensions_goal_state_from_vm_settings.py index 3428269ca6..400b885d5b 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/protocol/test_extensions_goal_state_from_vm_settings.py @@ -70,6 +70,22 @@ def test_it_should_parse_missing_status_upload_blob_as_none(self, _): self.assertIsNone(extensions_goal_state.status_upload_blob, "Expected status upload blob to be None") self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") + def test_it_should_parse_missing_extension_manifests_as_empty(self, _): + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-no_extension_manifests.json" + with mock_wire_protocol(data_file) as protocol: + extensions_goal_state = protocol.get_goal_state().extensions_goal_state + + self.assertEqual(3, len(extensions_goal_state.extensions), "Incorrect number of extensions. Got: {0}".format(extensions_goal_state.extensions)) + self.assertEqual([], extensions_goal_state.extensions[0].manifest_uris, "Expected an empty list of manifests for {0}".format(extensions_goal_state.extensions[0])) + self.assertEqual([], extensions_goal_state.extensions[1].manifest_uris, "Expected an empty list of manifests for {0}".format(extensions_goal_state.extensions[1])) + self.assertEqual( + [ + "https://umsakzkwhng2ft0jjptl.blob.core.windows.net/deeb2df6-c025-e6fb-b015-449ed6a676bc/deeb2df6-c025-e6fb-b015-449ed6a676bc_manifest.xml", + "https://umsafmqfbv4hgrd1hqff.blob.core.windows.net/deeb2df6-c025-e6fb-b015-449ed6a676bc/deeb2df6-c025-e6fb-b015-449ed6a676bc_manifest.xml", + ], + extensions_goal_state.extensions[2].manifest_uris, "Incorrect list of manifests for {0}".format(extensions_goal_state.extensions[2])) + def test_it_should_default_to_block_blob_when_the_status_blob_type_is_not_valid(self, _): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-invalid_blob_type.json" @@ -84,6 +100,7 @@ def test_its_source_channel_should_be_host_ga_plugin(self, _): self.assertEqual(GoalStateChannel.HostGAPlugin, extensions_goal_state.channel, "The channel is incorrect") + class CaseFoldedDictionaryTestCase(AgentTestCase): def test_it_should_retrieve_items_ignoring_case(self): dictionary = json.loads('''{ From eb0c05655e981fbc3fc5b547b67191ef0bd91c88 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 17 Mar 2022 10:04:57 -0700 Subject: [PATCH 49/84] enable cgroups for redhat and centos 7.4+ and update ThrottledTimeThreshod as debug flag (#2531) * enable cgroups for redhat and centos 7.4+ * addressed comment --- azurelinuxagent/common/cgroupapi.py | 2 +- azurelinuxagent/common/cgroupconfigurator.py | 3 +-- azurelinuxagent/common/conf.py | 13 ++++++++++++- tests/common/test_cgroupapi.py | 9 ++++++--- tests/common/test_cgroupconfigurator.py | 5 ++--- tests/test_agent.py | 1 + 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/azurelinuxagent/common/cgroupapi.py b/azurelinuxagent/common/cgroupapi.py index 6225a052d6..f69b1591c0 100644 --- a/azurelinuxagent/common/cgroupapi.py +++ b/azurelinuxagent/common/cgroupapi.py @@ -61,7 +61,7 @@ def cgroups_supported(): return False return ((distro_name.lower() == 'ubuntu' and distro_version.major >= 16) or (distro_name.lower() in ("centos", "redhat") and - ((distro_version.major == 7 and distro_version.minor >= 8) or distro_version.major >= 8))) + ((distro_version.major == 7 and distro_version.minor >= 4) or distro_version.major >= 8))) @staticmethod def track_cgroups(extension_cgroups): diff --git a/azurelinuxagent/common/cgroupconfigurator.py b/azurelinuxagent/common/cgroupconfigurator.py index e0f408ab55..f37f08d938 100644 --- a/azurelinuxagent/common/cgroupconfigurator.py +++ b/azurelinuxagent/common/cgroupconfigurator.py @@ -94,7 +94,6 @@ [Service] CPUQuota={0} """ -_AGENT_THROTTLED_TIME_THRESHOLD = 120 # 2 minutes class DisableCgroups(object): @@ -663,7 +662,7 @@ def __is_process_descendant_of_the_agent(pid): def _check_agent_throttled_time(cgroup_metrics): for metric in cgroup_metrics: if metric.instance == AGENT_NAME_TELEMETRY and metric.counter == MetricsCounter.THROTTLED_TIME: - if metric.value > _AGENT_THROTTLED_TIME_THRESHOLD: + if metric.value > conf.get_agent_cpu_throttled_time_threshold(): raise CGroupsException("The agent has been throttled for {0} seconds".format(metric.value)) @staticmethod diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 3b4ea6840d..bd101f617f 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -185,6 +185,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): # "Debug.CgroupCheckPeriod": 300, "Debug.AgentCpuQuota": 75, + "Debug.AgentCpuThrottledTimeThreshold": 120, "Debug.EtpCollectionPeriod": 300, "Debug.AutoUpdateHotfixFrequency": 14400, "Debug.AutoUpdateNormalFrequency": 86400, @@ -544,7 +545,17 @@ def get_agent_cpu_quota(conf=__conf__): """ return conf.get_int("Debug.AgentCpuQuota", 75) -def get_cgroup_monitor_expiry_time (conf=__conf__): + +def get_agent_cpu_throttled_time_threshold(conf=__conf__): + """ + Throttled time threshold for agent cpu in seconds. + + NOTE: This option is experimental and may be removed in later versions of the Agent. + """ + return conf.get_int("Debug.AgentCpuThrottledTimeThreshold", 120) + + +def get_cgroup_monitor_expiry_time(conf=__conf__): """ cgroups monitoring for pilot extensions disabled after expiry time diff --git a/tests/common/test_cgroupapi.py b/tests/common/test_cgroupapi.py index 9f7e420037..14ed0129df 100644 --- a/tests/common/test_cgroupapi.py +++ b/tests/common/test_cgroupapi.py @@ -48,7 +48,7 @@ def tearDown(self): class CGroupsApiTestCase(_MockedFileSystemTestCase): - def test_cgroups_should_be_supported_only_on_ubuntu_16_and_later(self): + def test_cgroups_should_be_supported_only_on_ubuntu16_centos7dot4_redhat7dot4_and_later_versions(self): test_cases = [ (['ubuntu', '16.04', 'xenial'], True), (['ubuntu', '16.10', 'yakkety'], True), @@ -62,8 +62,11 @@ def test_cgroups_should_be_supported_only_on_ubuntu_16_and_later(self): (['centos', '8.1', 'Source'], True), (['redhat', '8.2', 'Maipo'], True), (['redhat', '8.2.2111', 'Core'], True), - (['centos', '7.5', 'Source'], False), - (['redhat', '7.5', 'Maipo'], False), + (['centos', '7.4', 'Source'], True), + (['redhat', '7.4', 'Maipo'], True), + (['centos', '7.5', 'Source'], True), + (['centos', '7.3', 'Maipo'], False), + (['redhat', '7.2', 'Maipo'], False), (['bigip', '15.0.1', 'Final'], False), (['gaia', '273.562', 'R80.30'], False), (['debian', '9.1', ''], False), diff --git a/tests/common/test_cgroupconfigurator.py b/tests/common/test_cgroupconfigurator.py index 448c4b2c79..fcae952bd2 100644 --- a/tests/common/test_cgroupconfigurator.py +++ b/tests/common/test_cgroupconfigurator.py @@ -30,8 +30,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common.cgroup import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuCgroup -from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator, _AGENT_THROTTLED_TIME_THRESHOLD, \ - DisableCgroups +from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator, DisableCgroups from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.exception import CGroupsException, ExtensionError, ExtensionErrorCodes @@ -837,7 +836,7 @@ def get_completed_process(): thread.join(timeout=5) def test_check_agent_throttled_time_should_raise_a_cgroups_exception_when_the_threshold_is_exceeded(self): - metrics = [MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, AGENT_NAME_TELEMETRY, _AGENT_THROTTLED_TIME_THRESHOLD + 1)] + metrics = [MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, AGENT_NAME_TELEMETRY, conf.get_agent_cpu_throttled_time_threshold() + 1)] with self.assertRaises(CGroupsException) as context_manager: CGroupConfigurator._Impl._check_agent_throttled_time(metrics) diff --git a/tests/test_agent.py b/tests/test_agent.py index 52ef9f6714..ceaa858273 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -30,6 +30,7 @@ Autoupdate.Frequency = 3600 DVD.MountPoint = /mnt/cdrom/secure Debug.AgentCpuQuota = 75 +Debug.AgentCpuThrottledTimeThreshold = 120 Debug.AutoUpdateHotfixFrequency = 14400 Debug.AutoUpdateNormalFrequency = 86400 Debug.CgroupCheckPeriod = 300 From f05ec2e137af160e03df7709daef7e31887c8db4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eug=C3=A9ne=20Roux?= Date: Fri, 18 Mar 2022 04:00:53 +0200 Subject: [PATCH 50/84] Update setup.py to cater for all RHEL 8 versions (#2532) - Testing for version.startswith("8.2") breaks on RHEL versions later than 8.2 - RHEL 8.0, not 8.2, changed the default Python version to 3.x Co-authored-by: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 4c38585d87..56f7f54f11 100755 --- a/setup.py +++ b/setup.py @@ -97,7 +97,7 @@ def get_data_files(name, version, fullname): # pylint: disable=R0912 agent_bin_path = osutil.get_agent_bin_path() if name in ('redhat', 'centos', 'almalinux', 'cloudlinux', 'rocky'): - if version.startswith("8.2"): + if version.startswith("8"): # redhat8+ default to py3 set_bin_files(data_files, dest=agent_bin_path, src=["bin/py3/waagent", "bin/waagent2.0"]) @@ -106,7 +106,7 @@ def get_data_files(name, version, fullname): # pylint: disable=R0912 set_conf_files(data_files) set_logrotate_files(data_files) set_udev_files(data_files) - if version.startswith("8.2"): + if version.startswith("8"): # redhat 8+ uses systemd and python3 set_systemd_files(data_files, dest=systemd_dir_path, src=["init/redhat/waagent.service", From ae89acd11eee067ef4b85387fe7b0922fb6ee3e2 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 18 Mar 2022 14:28:15 -0700 Subject: [PATCH 51/84] don't report CPU metrics if it's negative value (#2538) * don't report CPU negative value * addressed comment --- azurelinuxagent/common/cgroup.py | 17 +++++++++---- tests/common/test_cgroupstelemetry.py | 35 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/azurelinuxagent/common/cgroup.py b/azurelinuxagent/common/cgroup.py index 280ca4be5d..900f4f5b46 100644 --- a/azurelinuxagent/common/cgroup.py +++ b/azurelinuxagent/common/cgroup.py @@ -120,7 +120,9 @@ def is_active(self): def get_tracked_metrics(self, **_): """ - Retrieves the current value of the metrics tracked for this cgroup and returns them as an array + Retrieves the current value of the metrics tracked for this cgroup and returns them as an array. + + Note: Agent won't track the metrics if the current cpu ticks less than previous value and returns empty array. """ raise NotImplementedError() @@ -241,11 +243,16 @@ def get_throttled_time(self): return float(self._current_throttled_time - self._previous_throttled_time) / 1E9 def get_tracked_metrics(self, **kwargs): - tracked = [ - MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, self.get_cpu_usage()), - ] + tracked = [] + cpu_usage = self.get_cpu_usage() + if cpu_usage >= float(0): + tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, cpu_usage)) + if 'track_throttled_time' in kwargs and kwargs['track_throttled_time']: - tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, self.get_throttled_time())) + throttled_time = self.get_throttled_time() + if cpu_usage >= float(0) and throttled_time >= float(0): + tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, throttled_time)) + return tracked diff --git a/tests/common/test_cgroupstelemetry.py b/tests/common/test_cgroupstelemetry.py index c5b5bc243c..9d96a1d825 100644 --- a/tests/common/test_cgroupstelemetry.py +++ b/tests/common/test_cgroupstelemetry.py @@ -374,3 +374,38 @@ def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # py metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(0, len(metrics)) + @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") + @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_throttled_time") + @patch("azurelinuxagent.common.cgroup.CGroup.is_active") + def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_active, path_get_throttled_time, patch_get_cpu_usage): + + num_polls = 5 + num_extensions = 1 + + # only verifying calculations and not validity of the values. + cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls-1)] + cpu_percent_values.append(-1) + cpu_throttled_values = [random.randint(0, 60 * 60) for _ in range(num_polls)] + + dummy_cpu_cgroup = CpuCgroup("dummy_extension_name", "dummy_cpu_path") + CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) + self.assertEqual(1, len(CGroupsTelemetry._tracked)) + + for i in range(num_polls): + patch_is_active.return_value = True + patch_get_cpu_usage.return_value = cpu_percent_values[i] + path_get_throttled_time.return_value = cpu_throttled_values[i] + + CGroupsTelemetry._track_throttled_time = True + metrics = CGroupsTelemetry.poll_all_tracked() + + # 1 CPU metric + 1 CPU throttled + # ignore CPU metrics from telemetry if cpu cgroup reports negative value + if i < num_polls-1: + self.assertEqual(len(metrics), 2 * num_extensions) + else: + self.assertEqual(len(metrics), 0) + + for metric in metrics: + self.assertGreaterEqual(metric.value, 0, "telemetry should not report negative value") + From b3664e830e06d1714ef62b2e589558295848dfb4 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 24 Mar 2022 15:27:07 -0700 Subject: [PATCH 52/84] suppress legacy cgruop warning in older deamons (#2539) --- dcr/scenario_utils/check_waagent_log.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dcr/scenario_utils/check_waagent_log.py b/dcr/scenario_utils/check_waagent_log.py index cb7a661527..e570fed1be 100644 --- a/dcr/scenario_utils/check_waagent_log.py +++ b/dcr/scenario_utils/check_waagent_log.py @@ -99,6 +99,12 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None): { 'message': r"Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=Install, message=\[ExtensionOperationError\] Non-zero exit code: 51", 'if': lambda log_line: "Mariner2.0" in distro and log_line.level == "ERROR" and log_line.who == "ExtHandler" + }, + # 2022-03-18T00:13:37.063540Z INFO ExtHandler ExtHandler [CGW] The daemon's PID was added to a legacy cgroup; will not monitor resource usage. + # + # Agent disables cgroups in older versions of the daemon (2.2.31-2.2.40).This is known issue and ignoring. + { + 'message': r"The daemon's PID was added to a legacy cgroup; will not monitor resource usage" } ] From 75fcab83d0cba822a89b530834e532e6d0d39791 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 30 Mar 2022 12:07:37 -0700 Subject: [PATCH 53/84] Update check_waagent_log with changes from DCR (#2540) --- dcr/scenario_utils/agent_log_parser.py | 10 ++++- dcr/scenario_utils/check_waagent_log.py | 60 ++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/dcr/scenario_utils/agent_log_parser.py b/dcr/scenario_utils/agent_log_parser.py index 09b9b723de..5c67c3a806 100644 --- a/dcr/scenario_utils/agent_log_parser.py +++ b/dcr/scenario_utils/agent_log_parser.py @@ -49,7 +49,15 @@ def get_timestamp(self): @property def is_error(self): - return self.level in ('ERROR', 'WARNING') or any(err in self.text for err in self.__ERROR_TAGS) + is_error = self.level in ('ERROR', 'WARNING') or any(err in self.text for err in self.__ERROR_TAGS) + # + # Don't report errors in the telemetry data. Sample log line: + # + # 2022-03-27T06:40:46.011455Z VERBOSE SendTelemetryHandler ExtHandler HTTP connection [POST] [/machine?comp=telemetrydata] [\n\n ResourceNotAvailable\n The resource requested is no longer available. Please refresh your cache.\n
\n
' + # + # ResourceGone can happen if we are fetching one of the URIs in the goal state and a new goal state arrives + { + 'message': r"(?s)(An error occurred while retrieving the goal state|Fetching the goal state failed|Error fetching goal state).*(\[ResourceGoneError\]|\[410: Gone\]|Resource is gone)", + 'if': lambda log_line: log_line.level == "WARNING" + }, + # + # 2022-03-08T03:03:23.036161Z WARNING ExtHandler ExtHandler Fetch failed from [http://168.63.129.16:32526/extensionArtifact]: [HTTP Failed] [400: Bad Request] b'' + # 2022-03-08T03:03:23.042008Z WARNING ExtHandler ExtHandler Fetch failed: [ProtocolError] Fetch failed from [http://168.63.129.16:32526/extensionArtifact]: [HTTP Failed] [400: Bad Request] b'' + # + # Warning downloading extension manifest. If the issue persists, this would cause errors elsewhere so safe to ignore + { + 'message': r"\[http://168.63.129.16:32526/extensionArtifact\]: \[HTTP Failed\] \[400: Bad Request\]", + 'if': lambda log_line: log_line.level == "WARNING" + }, + # + # 2022-03-08T03:03:23.036161Z WARNING ExtHandler ExtHandler Fetch failed from [http://168.63.129.16:32526/extensionArtifact]: [HTTP Failed] [400: Bad Request] b'' + # 2022-03-08T03:03:23.042008Z WARNING ExtHandler ExtHandler Fetch failed: [ProtocolError] Fetch failed from [http://168.63.129.16:32526/extensionArtifact]: [HTTP Failed] [400: Bad Request] b'' + # + # Warning downloading extension manifest. If the issue persists, this would cause errors elsewhere so safe to ignore + { + 'message': r"\[http://168.63.129.16:32526/extensionArtifact\]: \[HTTP Failed\] \[400: Bad Request\]", + 'if': lambda log_line: log_line.level == "WARNING" + }, + # + # 2022-03-29T05:52:10.089958Z WARNING ExtHandler ExtHandler An error occurred while retrieving the goal state: [ProtocolError] GET vmSettings [correlation ID: da106cf5-83a0-44ec-9484-d0e9223847ab eTag: 9856274988128027586]: Timeout + # + # Ignore warnings about timeouts in vmSettings; if the condition persists, an error will occur elsewhere. + # + { + 'message': r"GET vmSettings \[[^]]+\]: Timeout", + 'if': lambda log_line: log_line.level == "WARNING" + }, # 2022-03-09T20:04:33.745721Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Monitor.AzureMonitorLinuxAgent, op=Install, message=[ExtensionOperationError] \ # Non-zero exit code: 51, /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.15.3/./shim.sh -install # From f91bf787344dc06e653d4861c03741dbf968b7ee Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 5 Apr 2022 06:41:16 -0700 Subject: [PATCH 54/84] Fast Track: Ignore Fabric goal states from HostGAPlugin; process most recent goal state from WireServer/HostGAPlugin (#2541) * fix certificate * . * Fix unit tests * save parsing errors * . * Add unit tests * Output message only once per goal state * Remove placeholder * pylint warnings and py2.6 compat * enable fast track * pylint * ut fixes * update history regex Co-authored-by: narrieta --- azurelinuxagent/common/exception.py | 10 - .../common/protocol/extensions_goal_state.py | 11 ++ .../extensions_goal_state_from_vm_settings.py | 14 +- azurelinuxagent/common/protocol/goal_state.py | 181 ++++++++++-------- azurelinuxagent/common/protocol/hostplugin.py | 25 ++- azurelinuxagent/common/utils/archive.py | 23 ++- azurelinuxagent/common/utils/timeutil.py | 26 ++- azurelinuxagent/ga/update.py | 18 +- ...tings-difference_in_required_features.json | 2 +- .../vm_settings-empty_depends_on.json | 2 +- .../vm_settings-invalid_blob_type.json | 2 +- .../vm_settings-no_extension_manifests.json | 2 +- .../vm_settings-no_status_upload_blob.json | 4 +- .../hostgaplugin/vm_settings-out-of-sync.json | 2 +- .../hostgaplugin/vm_settings-parse_error.json | 2 +- .../vm_settings-requested_version.json | 4 +- .../vm_settings-unsupported_version.json | 2 +- tests/data/hostgaplugin/vm_settings.json | 4 +- tests/data/wire/ext_conf-no_gs_metadata.xml | 27 +++ tests/data/wire/ext_conf.xml | 4 +- .../wire/ext_conf_additional_locations.xml | 5 +- tests/data/wire/ext_conf_aks_extension.xml | 1 + tests/data/wire/ext_conf_autoupgrade.xml | 4 +- .../ext_conf_autoupgrade_internalversion.xml | 4 +- ..._conf_dependencies_with_empty_settings.xml | 1 + .../wire/ext_conf_in_vm_artifacts_profile.xml | 1 + ...ext_conf_in_vm_empty_artifacts_profile.xml | 1 + tests/data/wire/ext_conf_internalversion.xml | 4 +- .../ext_conf_invalid_and_valid_handlers.xml | 1 + tests/data/wire/ext_conf_missing_family.xml | 1 + .../ext_conf_missing_requested_version.xml | 1 + .../wire/ext_conf_multiple_extensions.xml | 1 + .../ext_conf_no_extensions-block_blob.xml | 1 + .../ext_conf_no_extensions-no_status_blob.xml | 1 + .../wire/ext_conf_no_extensions-page_blob.xml | 1 + tests/data/wire/ext_conf_no_public.xml | 1 + tests/data/wire/ext_conf_no_settings.xml | 1 + .../data/wire/ext_conf_requested_version.xml | 1 + .../data/wire/ext_conf_required_features.xml | 1 + tests/data/wire/ext_conf_sequencing.xml | 4 +- .../wire/ext_conf_settings_case_mismatch.xml | 1 + tests/data/wire/ext_conf_upgradeguid.xml | 4 +- ...multiple_depends_on_for_single_handler.xml | 1 + ..._multiple_runtime_settings_same_plugin.xml | 1 + ...onf_multiple_settings_for_same_handler.xml | 1 + ..._conf_plugin_settings_version_mismatch.xml | 4 +- ..._and_multi_config_settings_same_plugin.xml | 1 + tests/ga/test_extension.py | 2 +- tests/ga/test_report_status.py | 15 +- tests/protocol/mockwiredata.py | 62 ++++-- ...sions_goal_state_from_extensions_config.py | 4 +- ..._extensions_goal_state_from_vm_settings.py | 17 +- tests/protocol/test_goal_state.py | 164 ++++++++++++++-- tests/protocol/test_hostplugin.py | 15 +- tests/protocol/test_wire.py | 6 +- tests/utils/test_archive.py | 11 +- 56 files changed, 485 insertions(+), 225 deletions(-) create mode 100644 tests/data/wire/ext_conf-no_gs_metadata.xml diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py index 2c2fe19816..9b16c42678 100644 --- a/azurelinuxagent/common/exception.py +++ b/azurelinuxagent/common/exception.py @@ -121,16 +121,6 @@ class ExtensionsConfigError(ExtensionsGoalStateError): """ -class VmSettingsError(ExtensionsGoalStateError): - """ - Error raised when the VmSettings are malformed - """ - def __init__(self, message, etag, vm_settings_text, inner=None): - super(VmSettingsError, self).__init__(message, inner) - self.etag = etag - self.vm_settings_text = vm_settings_text - - class MultiConfigExtensionEnableError(ExtensionError): """ Error raised when enable for a Multi-Config extension is failing. diff --git a/azurelinuxagent/common/protocol/extensions_goal_state.py b/azurelinuxagent/common/protocol/extensions_goal_state.py index 1b3f4d2617..454a13806a 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state.py @@ -19,6 +19,7 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.AgentGlobals import AgentGlobals +from azurelinuxagent.common.exception import AgentError from azurelinuxagent.common.utils import textutil @@ -34,6 +35,16 @@ class GoalStateSource(object): Empty = "Empty" +class VmSettingsParseError(AgentError): + """ + Error raised when the VmSettings are malformed + """ + def __init__(self, message, etag, vm_settings_text, inner=None): + super(VmSettingsParseError, self).__init__(message, inner) + self.etag = etag + self.vm_settings_text = vm_settings_text + + class ExtensionsGoalState(object): """ ExtensionsGoalState represents the extensions information in the goal state; that information can originate from diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index 241f789990..ce99a26079 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -21,10 +21,9 @@ import sys from azurelinuxagent.common.AgentGlobals import AgentGlobals -from azurelinuxagent.common.exception import VmSettingsError from azurelinuxagent.common.future import ustr import azurelinuxagent.common.logger as logger -from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel +from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, VmSettingsParseError from azurelinuxagent.common.protocol.restapi import VMAgentManifest, Extension, ExtensionRequestedState, ExtensionSettings from azurelinuxagent.common.utils.flexible_version import FlexibleVersion @@ -56,7 +55,8 @@ def __init__(self, etag, json_text, correlation_id): self._parse_vm_settings(json_text) self._do_common_validations() except Exception as e: - raise VmSettingsError("Error parsing vmSettings [HGAP: {0}]: {1}".format(self._host_ga_plugin_version, ustr(e)), etag, self.get_redacted_text()) + message = "Error parsing vmSettings [HGAP: {0} Etag:{1}]: {2}".format(self._host_ga_plugin_version, etag, ustr(e)) + raise VmSettingsParseError(message, etag, self.get_redacted_text()) @property def id(self): @@ -142,11 +142,7 @@ def extensions(self): return self._extensions def get_redacted_text(self): - return ExtensionsGoalStateFromVmSettings.redact(self._text) - - @staticmethod - def redact(text): - return re.sub(r'("protectedSettings"\s*:\s*)"[^"]+"', r'\1"*** REDACTED ***"', text) + return re.sub(r'("protectedSettings"\s*:\s*)"[^"]+"', r'\1"*** REDACTED ***"', self._text) def _parse_vm_settings(self, json_text): vm_settings = _CaseFoldedDict.from_dict(json.loads(json_text)) @@ -165,7 +161,7 @@ def _parse_simple_attributes(self, vm_settings): # "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", # "inSvdSeqNo": 1, # "extensionsLastModifiedTickCount": 637726657706205217, - # "extensionGoalStatesSource": "Fabric", + # "extensionGoalStatesSource": "FastTrack", # ... # } diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index a2ce59c380..7c4c7c8c52 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -23,17 +23,17 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.AgentGlobals import AgentGlobals from azurelinuxagent.common.datacontract import set_properties -from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError, VmSettingsError +from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory -from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings +from azurelinuxagent.common.protocol.extensions_goal_state import VmSettingsParseError, GoalStateSource from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported from azurelinuxagent.common.protocol.restapi import Cert, CertList, RemoteAccessUser, RemoteAccessUsersList -from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.common.utils import fileutil, timeutil from azurelinuxagent.common.utils.archive import GoalStateHistory from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, findtext, getattrib -from azurelinuxagent.common.utils.timeutil import create_timestamp + GOAL_STATE_URI = "http://{0}/machine/?comp=goalstate" CERTS_FILE_NAME = "Certificates.xml" @@ -59,26 +59,20 @@ def __init__(self, wire_client): """ try: self._wire_client = wire_client + self._history = None + self._extensions_goal_state = None # populated from vmSettings or extensionsConfig - # These "basic" properties come from the initial request to WireServer's goalstate API + # These properties hold the goal state from the WireServer and are initialized by self._fetch_full_wire_server_goal_state() self._incarnation = None self._role_instance_id = None self._role_config_name = None self._container_id = None - - # These "extended" properties come from additional HTTP requests to the URIs included in the basic goal state, or to the HostGAPlugin - self._extensions_goal_state = None self._hosting_env = None self._shared_conf = None self._certs = None self._remote_access = None - timestamp = create_timestamp() - xml_text, xml_doc, incarnation = GoalState._fetch_goal_state(self._wire_client) - self._history = GoalStateHistory(timestamp, incarnation) - - self._initialize_basic_properties(xml_doc) - self._fetch_extended_goal_state(xml_text, xml_doc) + self.update() except Exception as exception: # We don't log the error here since fetching the goal state is done every few seconds @@ -128,40 +122,64 @@ def update_host_plugin_headers(wire_client): # Fetching the goal state updates the HostGAPlugin so simply trigger the request GoalState._fetch_goal_state(wire_client) - def update(self, force_update=False): + def update(self): """ Updates the current GoalState instance fetching values from the WireServer/HostGAPlugin as needed """ - timestamp = create_timestamp() - xml_text, xml_doc, incarnation = GoalState._fetch_goal_state(self._wire_client) - - if force_update or self._incarnation != incarnation: - # If we are fetching a new goal state - self._history = GoalStateHistory(timestamp, incarnation) - self._initialize_basic_properties(xml_doc) - self._fetch_extended_goal_state(xml_text, xml_doc, force_vm_settings_update=force_update) - else: - # else ensure the extensions are using the latest vm_settings - timestamp = create_timestamp() - vm_settings, vm_settings_updated = self._fetch_vm_settings(force_update=force_update) + # + # Fetch the goal state from both the HGAP and the WireServer + # + timestamp = timeutil.create_timestamp() + + incarnation, xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client) + goal_state_updated = incarnation != self._incarnation + if goal_state_updated: + logger.info('Fetched new goal state from the WireServer [incarnation {0}]', incarnation) + + vm_settings, vm_settings_updated = GoalState._fetch_vm_settings(self._wire_client) + if vm_settings_updated: + logger.info("Fetched new vmSettings [HostGAPlugin correlation ID: {0} eTag: {1} source: {2}]", vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source) + # Ignore the vmSettings if their source is Fabric (processing a Fabric goal state may require the tenant certificate and the vmSettings don't include it.) + if vm_settings is not None and vm_settings.source == GoalStateSource.Fabric: if vm_settings_updated: - logger.info("Fetched new vmSettings [HostGAPlugin correlation ID: {0} eTag: {1} source: {2}]", vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source) - self._history = GoalStateHistory(timestamp, vm_settings.etag) - self._extensions_goal_state = vm_settings - self._history.save_vm_settings(vm_settings.get_redacted_text()) + logger.info("The vmSettings originated via Fabric; will ignore them.") + vm_settings, vm_settings_updated = None, False + + # If neither goal state has changed we are done with the update + if not goal_state_updated and not vm_settings_updated: + return + + # Start a new history subdirectory and capture the updated goal state + tag = "{0}".format(incarnation) if vm_settings is None else "{0}-{1}".format(incarnation, vm_settings.etag) + self._history = GoalStateHistory(timestamp, tag) + if goal_state_updated: + self._history.save_goal_state(xml_text) + if vm_settings_updated: + self._history.save_vm_settings(vm_settings.get_redacted_text()) + + # + # Continue fetching the rest of the goal state + # + extensions_config = None + if goal_state_updated: + extensions_config = self._fetch_full_wire_server_goal_state(incarnation, xml_doc) + + # + # Lastly, decide whether to use the vmSettings or extensionsConfig for the extensions goal state + # + if goal_state_updated and vm_settings_updated: + most_recent = vm_settings if vm_settings.created_on_timestamp > extensions_config.created_on_timestamp else extensions_config + elif goal_state_updated: + most_recent = extensions_config + else: # vm_settings_updated + most_recent = vm_settings + + if self._extensions_goal_state is None or most_recent.created_on_timestamp > self._extensions_goal_state.created_on_timestamp: + self._extensions_goal_state = most_recent def save_to_history(self, data, file_name): self._history.save(data, file_name) - def _initialize_basic_properties(self, xml_doc): - self._incarnation = findtext(xml_doc, "Incarnation") - role_instance = find(xml_doc, "RoleInstance") - self._role_instance_id = findtext(role_instance, "InstanceId") - role_config = find(role_instance, "Configuration") - self._role_config_name = findtext(role_config, "ConfigName") - container = find(xml_doc, "Container") - self._container_id = findtext(container, "ContainerId") - @staticmethod def _fetch_goal_state(wire_client): """ @@ -195,87 +213,94 @@ def _fetch_goal_state(wire_client): wire_client.update_host_plugin(container_id, role_config_name) - return xml_text, xml_doc, incarnation + return incarnation, xml_text, xml_doc - def _fetch_vm_settings(self, force_update=False): + @staticmethod + def _fetch_vm_settings(wire_client): """ - Issues an HTTP request (HostGAPlugin) for the vm settings and returns the response as an ExtensionsGoalStateFromVmSettings. + Issues an HTTP request (HostGAPlugin) for the vm settings and returns the response as an ExtensionsGoalState. """ vm_settings, vm_settings_updated = (None, False) if conf.get_enable_fast_track(): try: - vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) + try: + vm_settings, vm_settings_updated = wire_client.get_host_plugin().fetch_vm_settings() + except ResourceGoneError: + # retry after refreshing the HostGAPlugin + GoalState.update_host_plugin_headers(wire_client) + vm_settings, vm_settings_updated = wire_client.get_host_plugin().fetch_vm_settings() except VmSettingsNotSupported: pass - except VmSettingsError as exception: - # ensure we save the vmSettings if there were parsing errors - self._history.save_vm_settings(ExtensionsGoalStateFromVmSettings.redact(exception.vm_settings_text)) + except VmSettingsParseError as exception: + # ensure we save the vmSettings if there were parsing errors, but save them only once per ETag + if not GoalStateHistory.tag_exists(exception.etag): + GoalStateHistory(timeutil.create_timestamp(), exception.etag).save_vm_settings(exception.vm_settings_text) raise - except ResourceGoneError: - # retry after refreshing the HostGAPlugin - GoalState.update_host_plugin_headers(self._wire_client) - vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) return vm_settings, vm_settings_updated - def _fetch_extended_goal_state(self, xml_text, xml_doc, force_vm_settings_update=False): + def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): """ - Issues HTTP requests (WireServer) for each of the URIs in the goal state (ExtensionsConfig, Certificate, Remote Access users, etc) - and populates the corresponding properties. If the given 'vm_settings' are not None they are used for the extensions goal state, - otherwise extensionsConfig is used instead. + Issues HTTP requests (to the WireServer) for each of the URIs in the goal state (ExtensionsConfig, Certificate, Remote Access users, etc) + and populates the corresponding properties. + + Returns the value of ExtensionsConfig. """ try: - logger.info('Fetching goal state [incarnation {0}]', self._incarnation) + logger.info('Fetching full goal state from the WireServer') - self._history.save_goal_state(xml_text) + role_instance = find(xml_doc, "RoleInstance") + role_instance_id = findtext(role_instance, "InstanceId") + role_config = find(role_instance, "Configuration") + role_config_name = findtext(role_config, "ConfigName") + container = find(xml_doc, "Container") + container_id = findtext(container, "ContainerId") - # Always fetch the ExtensionsConfig, even if it is not needed, and save it for debugging purposes. Once FastTrack is stable this code could be updated to - # fetch it only when actually needed. extensions_config_uri = findtext(xml_doc, "ExtensionsConfig") - if extensions_config_uri is None: - extensions_config = ExtensionsGoalStateFactory.create_empty(self._incarnation) + extensions_config = ExtensionsGoalStateFactory.create_empty(incarnation) else: xml_text = self._wire_client.fetch_config(extensions_config_uri, self._wire_client.get_header()) - extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(self._incarnation, xml_text, self._wire_client) + extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(incarnation, xml_text, self._wire_client) self._history.save_extensions_config(extensions_config.get_redacted_text()) - vm_settings, vm_settings_updated = self._fetch_vm_settings(force_update=force_vm_settings_update) - - if vm_settings is not None: - new = " new " if vm_settings_updated else " " - logger.info("Fetched{0}vmSettings [HostGAPlugin correlation ID: {1} eTag: {2} source: {3}]", new, vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source) - self._extensions_goal_state = vm_settings - if vm_settings_updated: - self._history.save_vm_settings(vm_settings.get_redacted_text()) - else: - self._extensions_goal_state = extensions_config - hosting_env_uri = findtext(xml_doc, "HostingEnvironmentConfig") xml_text = self._wire_client.fetch_config(hosting_env_uri, self._wire_client.get_header()) - self._hosting_env = HostingEnv(xml_text) + hosting_env = HostingEnv(xml_text) self._history.save_hosting_env(xml_text) shared_conf_uri = findtext(xml_doc, "SharedConfig") xml_text = self._wire_client.fetch_config(shared_conf_uri, self._wire_client.get_header()) - self._shared_conf = SharedConfig(xml_text) + shared_conf = SharedConfig(xml_text) self._history.save_shared_conf(xml_text) + certs = None certs_uri = findtext(xml_doc, "Certificates") if certs_uri is not None: # Note that we do not save the certificates to the goal state history xml_text = self._wire_client.fetch_config(certs_uri, self._wire_client.get_header_for_cert()) - self._certs = Certificates(xml_text) + certs = Certificates(xml_text) - container = find(xml_doc, "Container") + remote_access = None remote_access_uri = findtext(container, "RemoteAccessInfo") if remote_access_uri is not None: xml_text = self._wire_client.fetch_config(remote_access_uri, self._wire_client.get_header_for_cert()) - self._remote_access = RemoteAccess(xml_text) + remote_access = RemoteAccess(xml_text) self._history.save_remote_access(xml_text) + self._incarnation = incarnation + self._role_instance_id = role_instance_id + self._role_config_name = role_config_name + self._container_id = container_id + self._hosting_env = hosting_env + self._shared_conf = shared_conf + self._certs = certs + self._remote_access = remote_access + + return extensions_config + except Exception as exception: logger.warn("Fetching the goal state failed: {0}", ustr(exception)) raise ProtocolError(msg="Error fetching goal state", inner=exception) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 3004fc6256..2e895ff669 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -25,10 +25,11 @@ from azurelinuxagent.common import logger from azurelinuxagent.common.errorstate import ErrorState, ERROR_STATE_HOST_PLUGIN_FAILURE from azurelinuxagent.common.event import WALAEventOperation, add_event -from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError, VmSettingsError +from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.protocol.healthservice import HealthService +from azurelinuxagent.common.protocol.extensions_goal_state import VmSettingsParseError from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.utils import textutil @@ -86,7 +87,7 @@ def __init__(self, endpoint): self.fetch_last_timestamp = None self.status_last_timestamp = None self._host_plugin_version = FlexibleVersion("0.0.0.0") # Version 0 means "unknown" - self._host_plugin_supports_vm_settings = False + self._host_plugin_supports_vm_settings = None # Tri-state variable: None == Not Initialized, True == Supports, False == Does Not Support self._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() self._vm_settings_error_reporter = _VmSettingsErrorReporter() self._cached_vm_settings = None # Cached value of the most recent ExtensionsGoalStateFromVmSettings @@ -96,6 +97,15 @@ def _extract_deployment_id(role_config_name): # Role config name consists of: .(...) return role_config_name.split(".")[0] if role_config_name is not None else None + def check_vm_settings_support(self): + """ + Returns True if the HostGAPlugin supports the vmSettings API. + """ + # _host_plugin_supports_vm_settings is set by fetch_vm_settings() + if self._host_plugin_supports_vm_settings is None: + _, _ = self.fetch_vm_settings() + return self._host_plugin_supports_vm_settings + def update_container_id(self, new_container_id): self.container_id = new_container_id @@ -395,7 +405,7 @@ def _base64_encode(self, data): return s.decode('utf-8') return s - def fetch_vm_settings(self, force_update): + def fetch_vm_settings(self): """ Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and a boolean indicating if they are an updated (True) or a cached value (False). @@ -420,10 +430,11 @@ def format_message(msg): try: # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check - if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.datetime.now(): + # Note that self._host_plugin_supports_vm_settings can be None, so we need to compare against False + if self._host_plugin_supports_vm_settings == False and self._host_plugin_supports_vm_settings_next_check > datetime.datetime.now(): raise_not_supported() - etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag + etag = None if self._cached_vm_settings is None else self._cached_vm_settings.etag correlation_id = str(uuid.uuid4()) self._vm_settings_error_reporter.report_request() @@ -491,8 +502,8 @@ def format_message(msg): except (ProtocolError, ResourceGoneError, VmSettingsNotSupported): raise - except VmSettingsError as vmSettingsError: - message = format_message(ustr(vmSettingsError)) + except VmSettingsParseError as exception: + message = format_message(ustr(exception)) self._vm_settings_error_reporter.report_error(message) raise except Exception as exception: diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 807b5dca73..ed8122e970 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the Apache License. import errno +import glob import os import re import shutil @@ -52,18 +53,19 @@ # # Legacy names +# 2018-04-06T08:21:37.142697 +# 2018-04-06T08:21:37.142697.zip # 2018-04-06T08:21:37.142697_incarnation_N # 2018-04-06T08:21:37.142697_incarnation_N.zip # # Current names # -# 2018-04-06T08:21:37.142697 -# 2018-04-06T08:21:37.142697.zip -# 2018-04-06T08:21:37.142697_N -# 2018-04-06T08:21:37.142697_N.zip +# 2018-04-06T08:21:37.142697_N-M +# 2018-04-06T08:21:37.142697_N-M.zip # -_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+|status))?$") -_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+|status))?\.zip$") +_ARCHIVE_BASE_PATTERN = r"\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+|status)(-\d+)?)?" +_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r'^{0}$'.format(_ARCHIVE_BASE_PATTERN)) +_ARCHIVE_PATTERNS_ZIP = re.compile(r'^{0}\.zip$'.format(_ARCHIVE_BASE_PATTERN)) _GOAL_STATE_FILE_NAME = "GoalState.xml" _VM_SETTINGS_FILE_NAME = "VmSettings.json" @@ -203,10 +205,17 @@ def _get_archive_states(self): class GoalStateHistory(object): - def __init__(self, timestamp, tag=None): + def __init__(self, timestamp, tag): self._errors = False self._root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "{0}_{1}".format(timestamp, tag) if tag is not None else timestamp) + @staticmethod + def tag_exists(tag): + """ + Returns True when an item with the given 'tag' already exists in the history directory + """ + return len(glob.glob(os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "*_{0}".format(tag)))) > 0 + def save(self, data, file_name): try: if not os.path.exists(self._root): diff --git a/azurelinuxagent/common/utils/timeutil.py b/azurelinuxagent/common/utils/timeutil.py index baf311ca7a..c4dd755a0c 100644 --- a/azurelinuxagent/common/utils/timeutil.py +++ b/azurelinuxagent/common/utils/timeutil.py @@ -3,8 +3,28 @@ import datetime -def create_timestamp(): +def create_timestamp(dt=None): """ - Returns a string with current UTC time in iso format + Returns a string with the given datetime iso format. If no datetime is given as parameter, it + uses datetime.utcnow(). """ - return datetime.datetime.utcnow().isoformat() + if dt is None: + dt = datetime.datetime.utcnow() + return dt.isoformat() + + +def datetime_to_ticks(dt): + """ + Converts 'dt', a datetime, to the number of ticks (1 tick == 1/10000000 sec) since datetime.min (0001-01-01 00:00:00). + + Note that the resolution of a datetime goes only to microseconds. + """ + return int(10 ** 7 * total_seconds(dt - datetime.datetime.min)) + + +def total_seconds(dt): + """ + Compute the total_seconds for timedelta 'td'. Used instead timedelta.total_seconds() because 2.6 does not implement total_seconds. + """ + return ((24.0 * 60 * 60 * dt.days + dt.seconds) * 10 ** 6 + dt.microseconds) / 10 ** 6 + diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 3c3e405339..582716acb2 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -45,7 +45,6 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler -from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateChannel from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList, \ VERSION_0 @@ -178,9 +177,10 @@ def __init__(self): # ID of the last extensions goal state that has been fully processed (incarnation for WireServer goal states or etag for HostGAPlugin goal states) # (None if no extensions goal state has been processed) self._last_extensions_gs_id = None - # Goal state that is currently been processed - # (None if no goal state is being processed) + # Goal state that is currently been processed (None if no goal state is being processed) self._goal_state = None + # Whether the agent supports FastTrack (it does, as long as the HostGAPlugin supports the vmSettings API) + self._supports_fast_track = False self._extensions_summary = ExtensionsSummary() @@ -469,6 +469,8 @@ def _try_update_goal_state(self, protocol): add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=True, message=message, log_event=False) logger.info(message) + self._supports_fast_track = conf.get_enable_fast_track() and protocol.client.get_host_plugin().check_vm_settings_support() + except Exception as e: if not self._last_try_update_goal_state_failed: self._last_try_update_goal_state_failed = True @@ -659,17 +661,9 @@ def __get_vmagent_update_status(self, protocol, goal_state_changed): def _report_status(self, exthandlers_handler): vm_agent_update_status = self.__get_vmagent_update_status(exthandlers_handler.protocol, self._processing_new_extensions_goal_state()) # report_ext_handlers_status does its own error handling and returns None if an error occurred - # - # TODO: How to handle the case when the HostGAPlugin goes from supporting vmSettings to not supporting it? - # - if self._goal_state is None: - supports_fast_track = False - else: - supports_fast_track = self._goal_state.extensions_goal_state.channel == GoalStateChannel.HostGAPlugin - vm_status = exthandlers_handler.report_ext_handlers_status( goal_state_changed=self._processing_new_extensions_goal_state(), - vm_agent_update_status=vm_agent_update_status, vm_agent_supports_fast_track=supports_fast_track) + vm_agent_update_status=vm_agent_update_status, vm_agent_supports_fast_track=self._supports_fast_track) if vm_status is not None: self._report_extensions_summary(vm_status) diff --git a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json index 8cb9d3b152..9cfb42752c 100644 --- a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json +++ b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json @@ -5,7 +5,7 @@ "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657706205217, - "extensionGoalStatesSource": "Fabric", + "extensionGoalStatesSource": "FastTrack", "onHold": true, "statusUploadBlob": { "statusBlobType": "BlockBlob", diff --git a/tests/data/hostgaplugin/vm_settings-empty_depends_on.json b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json index 0b7b19a95a..6fa93452cf 100644 --- a/tests/data/hostgaplugin/vm_settings-empty_depends_on.json +++ b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json @@ -5,7 +5,7 @@ "correlationId": "1bef4c48-044e-4225-8f42-1d1eac1eb158", "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637693267431616449, - "extensionGoalStatesSource": "Fabric", + "extensionGoalStatesSource": "FastTrack", "StatusUploadBlob": { "statusBlobType": "BlockBlob", "value": "https://dcrcqabsr1.blob.core.windows.net/$system/edpxmal5j1.058b176d-445b-4e75-bd97-4911511b7d96.status?sv=2018-03-28&sr=b&sk=system-1&sig=U4KaLxlyYfgQ%2fie8RCwgMBSXa3E4vlW0ozPYOEHikoc%3d&se=9999-01-01T00%3a00%3a00Z&sp=w" diff --git a/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json b/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json index d30e5082e6..62314a403d 100644 --- a/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json +++ b/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json @@ -5,7 +5,7 @@ "correlationId": "1bef4c48-044e-4225-8f42-1d1eac1eb158", "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637693267431616449, - "extensionGoalStatesSource": "Fabric", + "extensionGoalStatesSource": "FastTrack", "StatusUploadBlob": { "statusBlobType": "INVALID_BLOB_TYPE", "value": "https://dcrcqabsr1.blob.core.windows.net/$system/edpxmal5j1.058b176d-445b-4e75-bd97-4911511b7d96.status?sv=2018-03-28&sr=b&sk=system-1&sig=U4KaLxlyYfgQ%2fie8RCwgMBSXa3E4vlW0ozPYOEHikoc%3d&se=9999-01-01T00%3a00%3a00Z&sp=w" diff --git a/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json b/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json index b5653ef76d..7deff8d5eb 100644 --- a/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json +++ b/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json @@ -5,7 +5,7 @@ "correlationId": "c143f8f0-a66b-4881-8c06-1efd278b0b02", "inSvdSeqNo": 978, "extensionsLastModifiedTickCount": 637829610574739741, - "extensionGoalStatesSource": "Fabric", + "extensionGoalStatesSource": "FastTrack", "statusUploadBlob": { "statusBlobType": "PageBlob", "value": "https://md-ssd-xpdjf15s.blob.core.windows.net/$system/u-sqlwatcher.f338f67e.status?sv=2018-03-28&sr=b&sk=system-1&sig=88Y3NM%2b1aU%3d&se=9999-01-01T00%3a00%3a00Z&sp=rw" diff --git a/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json index d5c17eeb8b..27ebebcefe 100644 --- a/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json +++ b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json @@ -4,8 +4,8 @@ "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", "inSvdSeqNo": 1, - "extensionsLastModifiedTickCount": 637726657706205217, - "extensionGoalStatesSource": "Fabric", + "extensionsLastModifiedTickCount": 637726657706209999, + "extensionGoalStatesSource": "FastTrack", "onHold": true, "inVMMetadata": { "subscriptionId": "8e037ad4-618f-4466-8bc8-5099d41ac15b", diff --git a/tests/data/hostgaplugin/vm_settings-out-of-sync.json b/tests/data/hostgaplugin/vm_settings-out-of-sync.json index fa3cc0f67b..737350d698 100644 --- a/tests/data/hostgaplugin/vm_settings-out-of-sync.json +++ b/tests/data/hostgaplugin/vm_settings-out-of-sync.json @@ -5,7 +5,7 @@ "correlationId": "EEEEEEEE-DDDD-CCCC-BBBB-AAAAAAAAAAAA", "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657000000000, - "extensionGoalStatesSource": "Fabric", + "extensionGoalStatesSource": "FastTrack", "onHold": true, "statusUploadBlob": { "statusBlobType": "BlockBlob", diff --git a/tests/data/hostgaplugin/vm_settings-parse_error.json b/tests/data/hostgaplugin/vm_settings-parse_error.json index ffd47857fb..e817a1e888 100644 --- a/tests/data/hostgaplugin/vm_settings-parse_error.json +++ b/tests/data/hostgaplugin/vm_settings-parse_error.json @@ -5,7 +5,7 @@ "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657706205217, - "extensionGoalStatesSource": "Fabric", + "extensionGoalStatesSource": "FastTrack", "onHold": true, "statusUploadBlob": { "statusBlobType": "BlockBlob", diff --git a/tests/data/hostgaplugin/vm_settings-requested_version.json b/tests/data/hostgaplugin/vm_settings-requested_version.json index 096c3bee28..1b5023b117 100644 --- a/tests/data/hostgaplugin/vm_settings-requested_version.json +++ b/tests/data/hostgaplugin/vm_settings-requested_version.json @@ -4,8 +4,8 @@ "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", "inSvdSeqNo": 1, - "extensionsLastModifiedTickCount": 637726657706205217, - "extensionGoalStatesSource": "Fabric", + "extensionsLastModifiedTickCount": 637726699999999999, + "extensionGoalStatesSource": "FastTrack", "onHold": true, "statusUploadBlob": { "statusBlobType": "BlockBlob", diff --git a/tests/data/hostgaplugin/vm_settings-unsupported_version.json b/tests/data/hostgaplugin/vm_settings-unsupported_version.json index d9fe9f1dd5..3ae0147b25 100644 --- a/tests/data/hostgaplugin/vm_settings-unsupported_version.json +++ b/tests/data/hostgaplugin/vm_settings-unsupported_version.json @@ -5,7 +5,7 @@ "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", "inSvdSeqNo": 1, "extensionsLastModifiedTickCount": 637726657706205217, - "extensionGoalStatesSource": "Fabric", + "extensionGoalStatesSource": "FastTrack", "onHold": true, "statusUploadBlob": { "statusBlobType": "BlockBlob", diff --git a/tests/data/hostgaplugin/vm_settings.json b/tests/data/hostgaplugin/vm_settings.json index fbae08bbab..b67ee0a23b 100644 --- a/tests/data/hostgaplugin/vm_settings.json +++ b/tests/data/hostgaplugin/vm_settings.json @@ -4,8 +4,8 @@ "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", "inSvdSeqNo": 1, - "extensionsLastModifiedTickCount": 637726657706205217, - "extensionGoalStatesSource": "Fabric", + "extensionsLastModifiedTickCount": 637726657706205299, + "extensionGoalStatesSource": "FastTrack", "onHold": true, "statusUploadBlob": { "statusBlobType": "BlockBlob", diff --git a/tests/data/wire/ext_conf-no_gs_metadata.xml b/tests/data/wire/ext_conf-no_gs_metadata.xml new file mode 100644 index 0000000000..a97384ef14 --- /dev/null +++ b/tests/data/wire/ext_conf-no_gs_metadata.xml @@ -0,0 +1,27 @@ + + + + Prod + + http://mock-goal-state/manifest_of_ga.xml + + + + Test + + http://mock-goal-state/manifest_of_ga.xml + + + + + + + + + + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + + +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + diff --git a/tests/data/wire/ext_conf.xml b/tests/data/wire/ext_conf.xml index 8f205fb89a..8706278fa3 100644 --- a/tests/data/wire/ext_conf.xml +++ b/tests/data/wire/ext_conf.xml @@ -22,5 +22,7 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + diff --git a/tests/data/wire/ext_conf_additional_locations.xml b/tests/data/wire/ext_conf_additional_locations.xml index 4e1f8b4979..9ba6381915 100644 --- a/tests/data/wire/ext_conf_additional_locations.xml +++ b/tests/data/wire/ext_conf_additional_locations.xml @@ -27,5 +27,8 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + + diff --git a/tests/data/wire/ext_conf_aks_extension.xml b/tests/data/wire/ext_conf_aks_extension.xml index 5901c0e442..1b79a899d2 100644 --- a/tests/data/wire/ext_conf_aks_extension.xml +++ b/tests/data/wire/ext_conf_aks_extension.xml @@ -65,5 +65,6 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_autoupgrade.xml b/tests/data/wire/ext_conf_autoupgrade.xml index 313a1af39e..29ea034ef8 100644 --- a/tests/data/wire/ext_conf_autoupgrade.xml +++ b/tests/data/wire/ext_conf_autoupgrade.xml @@ -24,5 +24,7 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + diff --git a/tests/data/wire/ext_conf_autoupgrade_internalversion.xml b/tests/data/wire/ext_conf_autoupgrade_internalversion.xml index 97f840c142..f62563b658 100644 --- a/tests/data/wire/ext_conf_autoupgrade_internalversion.xml +++ b/tests/data/wire/ext_conf_autoupgrade_internalversion.xml @@ -24,5 +24,7 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + diff --git a/tests/data/wire/ext_conf_dependencies_with_empty_settings.xml b/tests/data/wire/ext_conf_dependencies_with_empty_settings.xml index 402de6438d..fb9a50ccdc 100644 --- a/tests/data/wire/ext_conf_dependencies_with_empty_settings.xml +++ b/tests/data/wire/ext_conf_dependencies_with_empty_settings.xml @@ -29,4 +29,5 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_in_vm_artifacts_profile.xml b/tests/data/wire/ext_conf_in_vm_artifacts_profile.xml index 4a8701ff11..fea39e7164 100644 --- a/tests/data/wire/ext_conf_in_vm_artifacts_profile.xml +++ b/tests/data/wire/ext_conf_in_vm_artifacts_profile.xml @@ -25,4 +25,5 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo https://mock-goal-state/test.blob.core.windows.net/$system/test-cs12.test-cs12.test-cs12.vmSettings?sv=2016-05-31&sr=b&sk=system-1&sig=saskey;se=9999-01-01T00%3a00%3a00Z&sp=r + diff --git a/tests/data/wire/ext_conf_in_vm_empty_artifacts_profile.xml b/tests/data/wire/ext_conf_in_vm_empty_artifacts_profile.xml index 345ff07503..fc741fa4a3 100644 --- a/tests/data/wire/ext_conf_in_vm_empty_artifacts_profile.xml +++ b/tests/data/wire/ext_conf_in_vm_empty_artifacts_profile.xml @@ -25,4 +25,5 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_internalversion.xml b/tests/data/wire/ext_conf_internalversion.xml index 97f840c142..f62563b658 100644 --- a/tests/data/wire/ext_conf_internalversion.xml +++ b/tests/data/wire/ext_conf_internalversion.xml @@ -24,5 +24,7 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + diff --git a/tests/data/wire/ext_conf_invalid_and_valid_handlers.xml b/tests/data/wire/ext_conf_invalid_and_valid_handlers.xml index 6bf5c4a9e7..cd175e746e 100644 --- a/tests/data/wire/ext_conf_invalid_and_valid_handlers.xml +++ b/tests/data/wire/ext_conf_invalid_and_valid_handlers.xml @@ -30,5 +30,6 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_missing_family.xml b/tests/data/wire/ext_conf_missing_family.xml index 8f00bc9c19..058c40a881 100644 --- a/tests/data/wire/ext_conf_missing_family.xml +++ b/tests/data/wire/ext_conf_missing_family.xml @@ -30,4 +30,5 @@ eastus https://walaautoasmeastus.blob.core.windows.net/vhds/walaautos73small.walaautos73small.walaautos73small.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=u%2BCA2Cxb7ticiEBRIW8HWgNW7gl2NPuOGQl0u95ApQE%3D + diff --git a/tests/data/wire/ext_conf_missing_requested_version.xml b/tests/data/wire/ext_conf_missing_requested_version.xml index e68fcaf995..3f2b59df25 100644 --- a/tests/data/wire/ext_conf_missing_requested_version.xml +++ b/tests/data/wire/ext_conf_missing_requested_version.xml @@ -34,5 +34,6 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_multiple_extensions.xml b/tests/data/wire/ext_conf_multiple_extensions.xml index 8b987285b9..5cd65c63e0 100644 --- a/tests/data/wire/ext_conf_multiple_extensions.xml +++ b/tests/data/wire/ext_conf_multiple_extensions.xml @@ -52,5 +52,6 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_no_extensions-block_blob.xml b/tests/data/wire/ext_conf_no_extensions-block_blob.xml index 3395b17a95..ae5ca585a1 100644 --- a/tests/data/wire/ext_conf_no_extensions-block_blob.xml +++ b/tests/data/wire/ext_conf_no_extensions-block_blob.xml @@ -8,5 +8,6 @@ http://foo + diff --git a/tests/data/wire/ext_conf_no_extensions-no_status_blob.xml b/tests/data/wire/ext_conf_no_extensions-no_status_blob.xml index 6632f352c2..fed7eb9e2c 100644 --- a/tests/data/wire/ext_conf_no_extensions-no_status_blob.xml +++ b/tests/data/wire/ext_conf_no_extensions-no_status_blob.xml @@ -7,5 +7,6 @@ + diff --git a/tests/data/wire/ext_conf_no_extensions-page_blob.xml b/tests/data/wire/ext_conf_no_extensions-page_blob.xml index 57724789cd..89e76621aa 100644 --- a/tests/data/wire/ext_conf_no_extensions-page_blob.xml +++ b/tests/data/wire/ext_conf_no_extensions-page_blob.xml @@ -20,5 +20,6 @@ http://sas_url + diff --git a/tests/data/wire/ext_conf_no_public.xml b/tests/data/wire/ext_conf_no_public.xml index 23ca595e0d..95619ae3b5 100644 --- a/tests/data/wire/ext_conf_no_public.xml +++ b/tests/data/wire/ext_conf_no_public.xml @@ -42,5 +42,6 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK"}}]} + https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_no_settings.xml b/tests/data/wire/ext_conf_no_settings.xml index ed07fde5c4..21b4c21e15 100644 --- a/tests/data/wire/ext_conf_no_settings.xml +++ b/tests/data/wire/ext_conf_no_settings.xml @@ -37,5 +37,6 @@ + https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_requested_version.xml b/tests/data/wire/ext_conf_requested_version.xml index df99dcc2e3..4d25cd522f 100644 --- a/tests/data/wire/ext_conf_requested_version.xml +++ b/tests/data/wire/ext_conf_requested_version.xml @@ -24,5 +24,6 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_required_features.xml b/tests/data/wire/ext_conf_required_features.xml index 0386511d58..69ed73a4a2 100644 --- a/tests/data/wire/ext_conf_required_features.xml +++ b/tests/data/wire/ext_conf_required_features.xml @@ -36,5 +36,6 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_sequencing.xml b/tests/data/wire/ext_conf_sequencing.xml index 01c59e9b88..6fa8451bfd 100644 --- a/tests/data/wire/ext_conf_sequencing.xml +++ b/tests/data/wire/ext_conf_sequencing.xml @@ -31,4 +31,6 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + diff --git a/tests/data/wire/ext_conf_settings_case_mismatch.xml b/tests/data/wire/ext_conf_settings_case_mismatch.xml index cac8a2ab2d..86ed75779d 100644 --- a/tests/data/wire/ext_conf_settings_case_mismatch.xml +++ b/tests/data/wire/ext_conf_settings_case_mismatch.xml @@ -52,5 +52,6 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_upgradeguid.xml b/tests/data/wire/ext_conf_upgradeguid.xml index 53994a2f42..6c11d6e577 100644 --- a/tests/data/wire/ext_conf_upgradeguid.xml +++ b/tests/data/wire/ext_conf_upgradeguid.xml @@ -22,5 +22,7 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + diff --git a/tests/data/wire/invalid_config/ext_conf_multiple_depends_on_for_single_handler.xml b/tests/data/wire/invalid_config/ext_conf_multiple_depends_on_for_single_handler.xml index 1ae7c51ea7..88a2a05b76 100644 --- a/tests/data/wire/invalid_config/ext_conf_multiple_depends_on_for_single_handler.xml +++ b/tests/data/wire/invalid_config/ext_conf_multiple_depends_on_for_single_handler.xml @@ -41,4 +41,5 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/invalid_config/ext_conf_multiple_runtime_settings_same_plugin.xml b/tests/data/wire/invalid_config/ext_conf_multiple_runtime_settings_same_plugin.xml index f68af98dbf..8ce90371f6 100644 --- a/tests/data/wire/invalid_config/ext_conf_multiple_runtime_settings_same_plugin.xml +++ b/tests/data/wire/invalid_config/ext_conf_multiple_runtime_settings_same_plugin.xml @@ -26,5 +26,6 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/invalid_config/ext_conf_multiple_settings_for_same_handler.xml b/tests/data/wire/invalid_config/ext_conf_multiple_settings_for_same_handler.xml index 59144fdd65..81ef176df0 100644 --- a/tests/data/wire/invalid_config/ext_conf_multiple_settings_for_same_handler.xml +++ b/tests/data/wire/invalid_config/ext_conf_multiple_settings_for_same_handler.xml @@ -28,5 +28,6 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/invalid_config/ext_conf_plugin_settings_version_mismatch.xml b/tests/data/wire/invalid_config/ext_conf_plugin_settings_version_mismatch.xml index 5b1110042f..cba2fe5c3b 100644 --- a/tests/data/wire/invalid_config/ext_conf_plugin_settings_version_mismatch.xml +++ b/tests/data/wire/invalid_config/ext_conf_plugin_settings_version_mismatch.xml @@ -25,5 +25,7 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + + diff --git a/tests/data/wire/invalid_config/ext_conf_single_and_multi_config_settings_same_plugin.xml b/tests/data/wire/invalid_config/ext_conf_single_and_multi_config_settings_same_plugin.xml index 7b0349e79d..4362c37d24 100644 --- a/tests/data/wire/invalid_config/ext_conf_single_and_multi_config_settings_same_plugin.xml +++ b/tests/data/wire/invalid_config/ext_conf_single_and_multi_config_settings_same_plugin.xml @@ -26,5 +26,6 @@ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 7ec539541b..d487fdc28d 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -3533,7 +3533,7 @@ def http_get_handler(url, *_, **kwargs): exthandlers_handler.report_ext_handlers_status() self._assert_handler_status(protocol.report_vm_status, "Ready", 1, "1.0.0") - self.assertEqual("1", protocol.report_vm_status.call_args[0][0].vmAgent.vm_artifacts_aggregate_status.goal_state_aggregate_status.in_svd_seq_no, "Incarnation mismatch") + self.assertEqual("1", protocol.report_vm_status.call_args[0][0].vmAgent.vm_artifacts_aggregate_status.goal_state_aggregate_status.in_svd_seq_no, "SVD sequence number mismatch") if __name__ == '__main__': diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index df1776d31b..acd3f6653c 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -139,12 +139,11 @@ def http_get_handler(url, *_, **__): self._test_supported_features_includes_fast_track(protocol, False) def _test_supported_features_includes_fast_track(self, protocol, expected): - with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True): - with ReportStatusTestCase._mock_update_handler(protocol) as update_handler: - update_handler.run(debug=True) - - status = json.loads(protocol.mock_wire_data.status_blobs[0]) - supported_features = status['supportedFeatures'] - includes_fast_track = any(f['Key'] == 'FastTrack' for f in supported_features) - self.assertEqual(expected, includes_fast_track, "supportedFeatures should {0}include FastTrack. Got: {1}".format("" if expected else "not ", supported_features)) + with ReportStatusTestCase._mock_update_handler(protocol) as update_handler: + update_handler.run(debug=True) + + status = json.loads(protocol.mock_wire_data.status_blobs[0]) + supported_features = status['supportedFeatures'] + includes_fast_track = any(f['Key'] == 'FastTrack' for f in supported_features) + self.assertEqual(expected, includes_fast_track, "supportedFeatures should {0}include FastTrack. Got: {1}".format("" if expected else "not ", supported_features)) diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index dfa51c7804..218bd29377 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -15,9 +15,11 @@ # Requires Python 2.6+ and Openssl 1.0+ # import base64 +import datetime import json import re +from azurelinuxagent.common.utils import timeutil from azurelinuxagent.common.utils.textutil import parse_doc, find, findall from tests.protocol.HttpRequestPredicates import HttpRequestPredicates from tests.tools import load_bin_data, load_data, MagicMock, Mock @@ -116,7 +118,7 @@ DATA_FILE_VM_SETTINGS = DATA_FILE.copy() DATA_FILE_VM_SETTINGS["vm_settings"] = "hostgaplugin/vm_settings.json" -DATA_FILE_VM_SETTINGS["ETag"] ="1" +DATA_FILE_VM_SETTINGS["ETag"] = "1" DATA_FILE_VM_SETTINGS["ext_conf"] = "hostgaplugin/ext_conf.xml" DATA_FILE_VM_SETTINGS["in_vm_artifacts_profile"] = "hostgaplugin/in_vm_artifacts_profile.json" @@ -365,29 +367,51 @@ def get_no_of_extensions_in_config(self): # @staticmethod def replace_xml_element_value(xml_document, element_name, element_value): - new_xml_document = re.sub(r'(?<=<{0}>).+(?=)'.format(element_name), element_value, xml_document) - if new_xml_document == xml_document: - raise Exception("Could not match element '{0}'", element_name) # pylint: disable=raising-format-tuple - return new_xml_document + element_regex = r'(?<=<{0}>).+(?=)'.format(element_name) + if not re.search(element_regex, xml_document): + raise Exception("Can't find XML element '{0}' in {1}".format(element_name, xml_document)) + return re.sub(element_regex, element_value, xml_document) @staticmethod def replace_xml_attribute_value(xml_document, element_name, attribute_name, attribute_value): - new_xml_document = re.sub(r'(?<=<{0} )(.*{1}=")[^"]+(?="[^>]*>)'.format(element_name, attribute_name), r'\g<1>{0}'.format(attribute_value), xml_document) - if new_xml_document == xml_document: - raise Exception("Could not match attribute '{0}' of element '{1}'".format(attribute_name, element_name)) - return new_xml_document - - def set_etag(self, etag): - ''' - Sets the ETag for the mock response - ''' + attribute_regex = r'(?<=<{0} )(.*{1}=")[^"]+(?="[^>]*>)'.format(element_name, attribute_name) + if not re.search(attribute_regex, xml_document): + raise Exception("Can't find attribute {0} in XML element '{1}'. Document: {2}".format(attribute_name, element_name, xml_document)) + return re.sub(attribute_regex, r'\g<1>{0}'.format(attribute_value), xml_document) + + def set_etag(self, etag, timestamp=None): + """ + Sets the ETag for the mock response. + This function is used to mock a new goal state, and it also updates the timestamp (extensionsLastModifiedTickCount) in vmSettings. + """ + if timestamp is None: + timestamp = datetime.datetime.utcnow() self.etag = etag - - def set_incarnation(self, incarnation): - ''' - Sets the incarnation in the goal state, but not on its subcomponents (e.g. hosting env, shared config) - ''' + try: + vm_settings = json.loads(self.vm_settings) + vm_settings["extensionsLastModifiedTickCount"] = timeutil.datetime_to_ticks(timestamp) + self.vm_settings = json.dumps(vm_settings) + except ValueError: # some test data include syntax errors; ignore those + pass + + def set_vm_settings_source(self, source): + """ + Sets the "extensionGoalStatesSource" for the mock vm_settings data + """ + vm_settings = json.loads(self.vm_settings) + vm_settings["extensionGoalStatesSource"] = source + self.vm_settings = json.dumps(vm_settings) + + def set_incarnation(self, incarnation, timestamp=None): + """ + Sets the incarnation in the goal state, but not on its subcomponents (e.g. hosting env, shared config). + This function is used to mock a new goal state, and it also updates the timestamp (createdOnTicks) in ExtensionsConfig. + """ self.goal_state = WireProtocolData.replace_xml_element_value(self.goal_state, "Incarnation", str(incarnation)) + if self.ext_conf is not None: + if timestamp is None: + timestamp = datetime.datetime.utcnow() + self.ext_conf = WireProtocolData.replace_xml_attribute_value(self.ext_conf, "InVMGoalStateMetaData", "createdOnTicks", timeutil.datetime_to_ticks(timestamp)) def set_container_id(self, container_id): self.goal_state = WireProtocolData.replace_xml_element_value(self.goal_state, "ContainerId", container_id) diff --git a/tests/protocol/test_extensions_goal_state_from_extensions_config.py b/tests/protocol/test_extensions_goal_state_from_extensions_config.py index 346a3644c0..5af0aa288f 100644 --- a/tests/protocol/test_extensions_goal_state_from_extensions_config.py +++ b/tests/protocol/test_extensions_goal_state_from_extensions_config.py @@ -15,7 +15,9 @@ def test_it_should_parse_in_vm_metadata(self): self.assertEqual('2020-11-09T17:48:50.412125Z', extensions_goal_state.created_on_timestamp, "Incorrect GS Creation time") def test_it_should_use_default_values_when_in_vm_metadata_is_missing(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf-no_gs_metadata.xml" + with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.activity_id, "Incorrect activity Id") self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.correlation_id, "Incorrect correlation Id") diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/protocol/test_extensions_goal_state_from_vm_settings.py index 400b885d5b..9bcba5ece4 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/protocol/test_extensions_goal_state_from_vm_settings.py @@ -5,12 +5,11 @@ from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateChannel from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import _CaseFoldedDict from tests.protocol.mocks import mockwiredata, mock_wire_protocol -from tests.tools import AgentTestCase, patch +from tests.tools import AgentTestCase -@patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) class ExtensionsGoalStateFromVmSettingsTestCase(AgentTestCase): - def test_it_should_parse_vm_settings(self, _): + def test_it_should_parse_vm_settings(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state @@ -19,7 +18,7 @@ def assert_property(name, value): assert_property("activity_id", "a33f6f53-43d6-4625-b322-1a39651a00c9") assert_property("correlation_id", "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e") - assert_property("created_on_timestamp", "2021-11-16T13:22:50.620522Z") + assert_property("created_on_timestamp", "2021-11-16T13:22:50.620529Z") assert_property("status_upload_blob", "https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w") assert_property("status_upload_blob_type", "BlockBlob") assert_property("required_features", ["MultipleExtensionsPerHandler"]) @@ -48,7 +47,7 @@ def assert_property(name, value): # dependency level (multi-config) self.assertEqual(1, extensions_goal_state.extensions[3].settings[1].dependencyLevel, "Incorrect dependency level (multi-config)") - def test_it_should_parse_requested_version_properly(self, _): + def test_it_should_parse_requested_version_properly(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: manifests, _ = protocol.get_vmagent_manifests() for manifest in manifests: @@ -61,7 +60,7 @@ def test_it_should_parse_requested_version_properly(self, _): for manifest in manifests: self.assertEqual(manifest.requested_version_string, "9.9.9.9", "Version should be 9.9.9.9") - def test_it_should_parse_missing_status_upload_blob_as_none(self, _): + def test_it_should_parse_missing_status_upload_blob_as_none(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-no_status_upload_blob.json" with mock_wire_protocol(data_file) as protocol: @@ -70,7 +69,7 @@ def test_it_should_parse_missing_status_upload_blob_as_none(self, _): self.assertIsNone(extensions_goal_state.status_upload_blob, "Expected status upload blob to be None") self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") - def test_it_should_parse_missing_extension_manifests_as_empty(self, _): + def test_it_should_parse_missing_extension_manifests_as_empty(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-no_extension_manifests.json" with mock_wire_protocol(data_file) as protocol: @@ -86,7 +85,7 @@ def test_it_should_parse_missing_extension_manifests_as_empty(self, _): ], extensions_goal_state.extensions[2].manifest_uris, "Incorrect list of manifests for {0}".format(extensions_goal_state.extensions[2])) - def test_it_should_default_to_block_blob_when_the_status_blob_type_is_not_valid(self, _): + def test_it_should_default_to_block_blob_when_the_status_blob_type_is_not_valid(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-invalid_blob_type.json" with mock_wire_protocol(data_file) as protocol: @@ -94,7 +93,7 @@ def test_it_should_default_to_block_blob_when_the_status_blob_type_is_not_valid( self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBlob for an invalid statusBlobType') - def test_its_source_channel_should_be_host_ga_plugin(self, _): + def test_its_source_channel_should_be_host_ga_plugin(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index dfa2ccb4f1..ce1c29640a 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -1,11 +1,15 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the Apache License. +import contextlib +import datetime import glob import os import re +import time from azurelinuxagent.common.future import httpclient +from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource, GoalStateChannel from azurelinuxagent.common.protocol.goal_state import GoalState, _GET_GOAL_STATE_MAX_ATTEMPTS from azurelinuxagent.common.exception import ProtocolError from azurelinuxagent.common.utils import fileutil @@ -28,15 +32,15 @@ def test_fetch_goal_state_should_raise_on_incomplete_goal_state(self): GoalState(protocol.client) self.assertEqual(_GET_GOAL_STATE_MAX_ATTEMPTS, mock_sleep.call_count, "Unexpected number of retries") - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_instantiating_goal_state_should_save_the_goal_state_to_the_history_directory(self, _): + def test_instantiating_goal_state_should_save_the_goal_state_to_the_history_directory(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: protocol.mock_wire_data.set_incarnation(999) + protocol.mock_wire_data.set_etag(888) _ = GoalState(protocol.client) self._assert_directory_contents( - self._find_history_subdirectory("999"), + self._find_history_subdirectory("999-888"), ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) def _find_history_subdirectory(self, tag): @@ -52,15 +56,14 @@ def _assert_directory_contents(self, directory, expected_files): self.assertEqual(expected_files, actual_files, "The expected files were not saved to {0}".format(directory)) - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_update_should_create_new_history_subdirectories(self, _): + def test_update_should_create_new_history_subdirectories(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: protocol.mock_wire_data.set_incarnation(123) protocol.mock_wire_data.set_etag(654) goal_state = GoalState(protocol.client) self._assert_directory_contents( - self._find_history_subdirectory("123"), + self._find_history_subdirectory("123-654"), ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) def http_get_handler(url, *_, **__): @@ -72,17 +75,16 @@ def http_get_handler(url, *_, **__): protocol.set_http_handlers(http_get_handler=http_get_handler) goal_state.update() self._assert_directory_contents( - self._find_history_subdirectory("234"), + self._find_history_subdirectory("234-654"), ["GoalState.xml", "ExtensionsConfig.xml", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) protocol.mock_wire_data.set_etag(987) protocol.set_http_handlers(http_get_handler=None) goal_state.update() self._assert_directory_contents( - self._find_history_subdirectory("987"), ["VmSettings.json"]) + self._find_history_subdirectory("234-987"), ["VmSettings.json"]) - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_it_should_redact_the_protected_settings_when_saving_to_the_history_directory(self, _): + def test_it_should_redact_the_protected_settings_when_saving_to_the_history_directory(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: protocol.mock_wire_data.set_incarnation(888) @@ -97,7 +99,7 @@ def test_it_should_redact_the_protected_settings_when_saving_to_the_history_dire if len(protected_settings) == 0: raise Exception("The test goal state does not include any protected settings") - history_directory = self._find_history_subdirectory("888") + history_directory = self._find_history_subdirectory("888-1") extensions_config_file = os.path.join(history_directory, "ExtensionsConfig.xml") vm_settings_file = os.path.join(history_directory, "VmSettings.json") for file_name in extensions_config_file, vm_settings_file: @@ -116,26 +118,152 @@ def test_it_should_redact_the_protected_settings_when_saving_to_the_history_dire len(protected_settings), "Could not find the expected number of redacted settings in {0}.\nExpected {1}.\n{2}".format(file_name, len(protected_settings), file_contents)) - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_it_should_save_vm_settings_on_parse_errors(self, _): + def test_it_should_save_vm_settings_on_parse_errors(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: invalid_vm_settings_file = "hostgaplugin/vm_settings-parse_error.json" data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = invalid_vm_settings_file protocol.mock_wire_data = mockwiredata.WireProtocolData(data_file) - protocol.mock_wire_data.set_incarnation(888) + protocol.mock_wire_data.set_etag(888) with self.assertRaises(ProtocolError): # the parsing error will cause an exception _ = GoalState(protocol.client) - matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_888")) - self.assertTrue(len(matches) == 1, "Expected one history directory for incarnation 888. Got: {0}".format(matches)) + # Do an extra call to update the goal state; this should save the vmsettings to the history directory + # only once (self._find_history_subdirectory asserts 1 single match) + time.sleep(0.1) # add a short delay to ensure that a new timestamp would be saved in the history folder + with self.assertRaises(ProtocolError): + _ = GoalState(protocol.client) + + history_directory = self._find_history_subdirectory("888") - history_directory = matches[0] vm_settings_file = os.path.join(history_directory, "VmSettings.json") self.assertTrue(os.path.exists(vm_settings_file), "{0} was not saved".format(vm_settings_file)) expected = load_data(invalid_vm_settings_file) actual = fileutil.read_file(vm_settings_file) - self.assertEqual(expected, actual, "The vmSettings were not saved correctly") \ No newline at end of file + self.assertEqual(expected, actual, "The vmSettings were not saved correctly") + + @staticmethod + @contextlib.contextmanager + def _create_protocol_ws_and_hgap_in_sync(): + """ + Creates a mock protocol in which the HostGAPlugin and the WireServer are in sync, both of them returning + the same Fabric goal state. + """ + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + + with mock_wire_protocol(data_file) as protocol: + timestamp = datetime.datetime.utcnow() + incarnation = '111' + etag = '111111' + protocol.mock_wire_data.set_incarnation(incarnation, timestamp=timestamp) + protocol.mock_wire_data.set_etag(etag, timestamp=timestamp) + protocol.mock_wire_data.set_vm_settings_source(GoalStateSource.Fabric) + + # Do a few checks on the mock data to ensure we catch changes in internal implementations + # that may invalidate this setup. + vm_settings, _ = protocol.client.get_host_plugin().fetch_vm_settings() + if vm_settings.etag != etag: + raise Exception("The HostGAPlugin is no in sync. Expected ETag {0}. Got {1}".format(etag, vm_settings.etag)) + if vm_settings.source != GoalStateSource.Fabric: + raise Exception("The HostGAPlugin should be returning a Fabric goal state. Got {0}".format(vm_settings.source)) + + goal_state = GoalState(protocol.client) + if goal_state.incarnation != incarnation: + raise Exception("The WireServer is no in sync. Expected incarnation {0}. Got {1}".format(incarnation, goal_state.incarnation)) + + if goal_state.extensions_goal_state.correlation_id != vm_settings.correlation_id: + raise Exception( + "The correlation ID in the WireServer and HostGAPlugin are not in sync. WS: {0} HGAP: {1}".format( + goal_state.extensions_goal_state.correlation_id, vm_settings.correlation_id)) + + yield protocol + + def _assert_goal_state(self, goal_state, goal_state_id, channel=None, source=None): + self.assertIn(goal_state_id, goal_state.extensions_goal_state.id, "Incorrect Goal State ID") + if channel is not None: + self.assertEqual(channel, goal_state.extensions_goal_state.channel, "Incorrect Goal State channel") + if source is not None: + self.assertEqual(source, goal_state.extensions_goal_state.source, "Incorrect Goal State source") + + + def test_it_should_ignore_fabric_goal_states_from_the_host_ga_plugin(self): + with GoalStateTestCase._create_protocol_ws_and_hgap_in_sync() as protocol: + # + # Verify __init__() + # + expected_incarnation = '111' # test setup initializes to this value + timestamp = datetime.datetime.utcnow() + datetime.timedelta(seconds=15) + protocol.mock_wire_data.set_etag('22222', timestamp) + + goal_state = GoalState(protocol.client) + + self._assert_goal_state(goal_state, expected_incarnation, channel=GoalStateChannel.WireServer) + + # + # Verify update() + # + timestamp += datetime.timedelta(seconds=15) + protocol.mock_wire_data.set_etag('333333', timestamp) + + goal_state.update() + + self._assert_goal_state(goal_state, expected_incarnation, channel=GoalStateChannel.WireServer) + + def test_it_should_use_fast_track_goal_states_from_the_host_ga_plugin(self): + with GoalStateTestCase._create_protocol_ws_and_hgap_in_sync() as protocol: + protocol.mock_wire_data.set_vm_settings_source(GoalStateSource.FastTrack) + + # + # Verify __init__() + # + expected_etag = '22222' + timestamp = datetime.datetime.utcnow() + datetime.timedelta(seconds=15) + protocol.mock_wire_data.set_etag(expected_etag, timestamp) + + goal_state = GoalState(protocol.client) + + self._assert_goal_state(goal_state, expected_etag, channel=GoalStateChannel.HostGAPlugin) + + # + # Verify update() + # + expected_etag = '333333' + timestamp += datetime.timedelta(seconds=15) + protocol.mock_wire_data.set_etag(expected_etag, timestamp) + + goal_state.update() + + self._assert_goal_state(goal_state, expected_etag, channel=GoalStateChannel.HostGAPlugin) + + def test_it_should_use_the_most_recent_goal_state(self): + with GoalStateTestCase._create_protocol_ws_and_hgap_in_sync() as protocol: + goal_state = GoalState(protocol.client) + + # The most recent goal state is FastTrack + timestamp = datetime.datetime.utcnow() + datetime.timedelta(seconds=15) + protocol.mock_wire_data.set_vm_settings_source(GoalStateSource.FastTrack) + protocol.mock_wire_data.set_etag('222222', timestamp) + + goal_state.update() + + self._assert_goal_state(goal_state, '222222', channel=GoalStateChannel.HostGAPlugin, source=GoalStateSource.FastTrack) + + # The most recent goal state is Fabric + timestamp += datetime.timedelta(seconds=15) + protocol.mock_wire_data.set_incarnation('222', timestamp) + + goal_state.update() + + self._assert_goal_state(goal_state, '222', channel=GoalStateChannel.WireServer, source=GoalStateSource.Fabric) + + # The most recent goal state is Fabric, but it is coming from the HostGAPlugin (should be ignored) + timestamp += datetime.timedelta(seconds=15) + protocol.mock_wire_data.set_vm_settings_source(GoalStateSource.Fabric) + protocol.mock_wire_data.set_etag('333333', timestamp) + + goal_state.update() + + self._assert_goal_state(goal_state, '222', channel=GoalStateChannel.WireServer, source=GoalStateSource.Fabric) diff --git a/tests/protocol/test_hostplugin.py b/tests/protocol/test_hostplugin.py index c18627a0a5..f3b5a5889e 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/protocol/test_hostplugin.py @@ -858,17 +858,16 @@ def http_get_handler(url, *_, **__): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: protocol.set_http_handlers(http_get_handler=http_get_handler) with self.assertRaisesRegexCM(ProtocolError, r'GET vmSettings \[correlation ID: .* eTag: .*\]: \[HTTP Failed\] \[500: None].*TEST ERROR.*'): - protocol.client.get_host_plugin().fetch_vm_settings(False) + protocol.client.get_host_plugin().fetch_vm_settings() @staticmethod def _fetch_vm_settings_ignoring_errors(protocol): try: - protocol.client.get_host_plugin().fetch_vm_settings(False) + protocol.client.get_host_plugin().fetch_vm_settings() except (ProtocolError, VmSettingsNotSupported): pass - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_it_should_keep_track_of_errors_in_vm_settings_requests(self, _): + def test_it_should_keep_track_of_errors_in_vm_settings_requests(self): mock_response = None def http_get_handler(url, *_, **__): @@ -916,8 +915,7 @@ def http_get_handler(url, *_, **__): self.assertEqual(expected, summary, "The count of errors is incorrect") - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_it_should_limit_the_number_of_errors_it_reports(self, _): + def test_it_should_limit_the_number_of_errors_it_reports(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: def http_get_handler(url, *_, **__): if self.is_host_plugin_vm_settings_request(url): @@ -933,7 +931,7 @@ def get_log_messages(): def fetch_vm_settings(): try: - host_plugin.fetch_vm_settings(True) + host_plugin.fetch_vm_settings() except ProtocolError: pass # All calls produce an error; ignore it @@ -962,8 +960,7 @@ def fetch_vm_settings(): log_messages = get_log_messages() self.assertEqual(1, len(log_messages), "Expected additional errors to be reported to the local log in the next period (got: {0})".format(telemetry_messages)) - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self, _): + def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: def http_get_handler(url, *_, **__): if self.is_host_plugin_vm_settings_request(url): diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index b182ca356e..0b619c66d3 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -1097,8 +1097,7 @@ def test_forced_update_should_update_the_goal_state_and_the_host_plugin_when_the self.assertEqual(protocol.client.get_host_plugin().container_id, new_container_id) self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_it_should_retry_get_vm_settings_on_resource_gone_error(self, _): + def test_it_should_retry_get_vm_settings_on_resource_gone_error(self): # Requests to the hostgaplugin incude the Container ID and the RoleConfigName as headers; when the hostgaplugin returns GONE (HTTP status 410) the agent # needs to get a new goal state and retry the request with updated values for the Container ID and RoleConfigName headers. with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: @@ -1126,8 +1125,7 @@ def http_get_vm_settings(_method, _host, _relative_url, **kwargs): self.assertEqual("GET_VM_SETTINGS_TEST_CONTAINER_ID", request_headers[1][hostplugin._HEADER_CONTAINER_ID], "The retry request did not include the expected header for the ContainerId") self.assertEqual("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME", request_headers[1][hostplugin._HEADER_HOST_CONFIG_NAME], "The retry request did not include the expected header for the RoleConfigName") - @patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=True) - def test_it_should_use_vm_settings_by_default(self, _): + def test_it_should_use_vm_settings_by_default(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertTrue( diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index f65b230109..466d674c79 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -6,7 +6,7 @@ from datetime import datetime, timedelta import azurelinuxagent.common.logger as logger -from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.common.utils import fileutil, timeutil from azurelinuxagent.common.utils.archive import StateArchiver, _MAX_ARCHIVED_STATES from tests.tools import AgentTestCase, patch @@ -198,20 +198,13 @@ def assert_is_iso8601(timestamp_str): except: raise AssertionError("the value '{0}' is not an ISO8601 formatted timestamp".format(timestamp_str)) - @staticmethod - def _total_seconds(delta): - """ - Compute the total_seconds for a timedelta because 2.6 does not have total_seconds. - """ - return (0.0 + delta.microseconds + (delta.seconds + delta.days * 24 * 60 * 60) * 10 ** 6) / 10 ** 6 - def assert_datetime_close_to(self, time1, time2, within): if time1 <= time2: diff = time2 - time1 else: diff = time1 - time2 - secs = self._total_seconds(within - diff) + secs = timeutil.total_seconds(within - diff) if secs < 0: self.fail("the timestamps are outside of the tolerance of by {0} seconds".format(secs)) From a5ae1afef8bdcf80de418b03bc53391550392879 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 6 Apr 2022 14:55:46 -0700 Subject: [PATCH 55/84] Skip outdated Fabric goal states when HGAP does not support vmSettings (#2542) * Skip outdated Fabric goal states when HGAP does not support vmSettings * add comment Co-authored-by: narrieta --- .../common/protocol/extensions_goal_state.py | 15 ++ ...sions_goal_state_from_extensions_config.py | 1 + azurelinuxagent/common/protocol/goal_state.py | 29 ++- azurelinuxagent/common/protocol/hostplugin.py | 176 ++++++++++++------ azurelinuxagent/ga/update.py | 5 +- tests/ga/test_update.py | 40 ++-- tests/protocol/test_goal_state.py | 97 +++++++++- tests/protocol/test_hostplugin.py | 81 +++++--- tests/protocol/test_wire.py | 64 ------- 9 files changed, 333 insertions(+), 175 deletions(-) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state.py b/azurelinuxagent/common/protocol/extensions_goal_state.py index 454a13806a..74c761caee 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state.py @@ -53,6 +53,9 @@ class ExtensionsGoalState(object): NOTE: This is an abstract class. The corresponding concrete classes can be instantiated using the ExtensionsGoalStateFactory. """ + def __init__(self): + self._is_outdated = False + @property def id(self): """ @@ -61,6 +64,18 @@ def id(self): """ raise NotImplementedError() + @property + def is_outdated(self): + """ + A goal state can be outdated if, for example, the VM Agent is using Fast Track and support for it stops (e.g. the VM is migrated + to a node with an older version of the HostGAPlugin) and now the Agent is fetching goal states via the WireServer. + """ + return self._is_outdated + + @is_outdated.setter + def is_outdated(self, value): + self._is_outdated = value + @property def svd_sequence_number(self): raise NotImplementedError() diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py index 98ab84cc56..c7e01dd207 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py @@ -33,6 +33,7 @@ class ExtensionsGoalStateFromExtensionsConfig(ExtensionsGoalState): def __init__(self, incarnation, xml_text, wire_client): super(ExtensionsGoalStateFromExtensionsConfig, self).__init__() self._id = "incarnation_{0}".format(incarnation) + self._is_outdated = False self._incarnation = incarnation self._text = xml_text self._status_upload_blob = None diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 7c4c7c8c52..6d362438d5 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -23,11 +23,12 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.AgentGlobals import AgentGlobals from azurelinuxagent.common.datacontract import set_properties +from azurelinuxagent.common.event import add_event, WALAEventOperation from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.protocol.extensions_goal_state import VmSettingsParseError, GoalStateSource -from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported +from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported, VmSettingsSupportStopped from azurelinuxagent.common.protocol.restapi import Cert, CertList, RemoteAccessUser, RemoteAccessUsersList from azurelinuxagent.common.utils import fileutil, timeutil from azurelinuxagent.common.utils.archive import GoalStateHistory @@ -134,9 +135,15 @@ def update(self): incarnation, xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client) goal_state_updated = incarnation != self._incarnation if goal_state_updated: - logger.info('Fetched new goal state from the WireServer [incarnation {0}]', incarnation) + logger.info('Fetched a new incarnation for the WireServer goal state [incarnation {0}]', incarnation) + + vm_settings, vm_settings_updated = None, False + try: + vm_settings, vm_settings_updated = GoalState._fetch_vm_settings(self._wire_client) + except VmSettingsSupportStopped as exception: # If the HGAP stopped supporting vmSettings, we need to use the goal state from the WireServer + self._restore_wire_server_goal_state(incarnation, xml_text, xml_doc, exception) + return - vm_settings, vm_settings_updated = GoalState._fetch_vm_settings(self._wire_client) if vm_settings_updated: logger.info("Fetched new vmSettings [HostGAPlugin correlation ID: {0} eTag: {1} source: {2}]", vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source) # Ignore the vmSettings if their source is Fabric (processing a Fabric goal state may require the tenant certificate and the vmSettings don't include it.) @@ -177,6 +184,18 @@ def update(self): if self._extensions_goal_state is None or most_recent.created_on_timestamp > self._extensions_goal_state.created_on_timestamp: self._extensions_goal_state = most_recent + def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_settings_support_stopped_error): + logger.info('The HGAP stopped supporting vmSettings; will fetched the goal state from the WireServer.') + self._history = GoalStateHistory(timeutil.create_timestamp(), incarnation) + self._history.save_goal_state(xml_text) + self._extensions_goal_state = self._fetch_full_wire_server_goal_state(incarnation, xml_doc) + if self._extensions_goal_state.created_on_timestamp < vm_settings_support_stopped_error.timestamp: + self._extensions_goal_state.is_outdated = True + msg = "Fetched a Fabric goal state older than the most recent FastTrack goal state; will skip it. (Fabric: {0} FastTrack: {1})".format( + self._extensions_goal_state.created_on_timestamp, vm_settings_support_stopped_error.timestamp) + logger.info(msg) + add_event(op=WALAEventOperation.VmSettings, message=msg, is_success=True) + def save_to_history(self, data, file_name): self._history.save(data, file_name) @@ -231,6 +250,8 @@ def _fetch_vm_settings(wire_client): GoalState.update_host_plugin_headers(wire_client) vm_settings, vm_settings_updated = wire_client.get_host_plugin().fetch_vm_settings() + except VmSettingsSupportStopped: + raise except VmSettingsNotSupported: pass except VmSettingsParseError as exception: @@ -249,7 +270,7 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): Returns the value of ExtensionsConfig. """ try: - logger.info('Fetching full goal state from the WireServer') + logger.info('Fetching full goal state from the WireServer [incarnation {0}]', incarnation) role_instance = find(xml_doc, "RoleInstance") role_instance_id = findtext(role_instance, "InstanceId") diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 2e895ff669..477ccf9bd2 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -16,23 +16,22 @@ # # Requires Python 2.6+ and Openssl 1.0+ # - import base64 import datetime import json +import os.path import uuid -from azurelinuxagent.common import logger +from azurelinuxagent.common import logger, conf from azurelinuxagent.common.errorstate import ErrorState, ERROR_STATE_HOST_PLUGIN_FAILURE from azurelinuxagent.common.event import WALAEventOperation, add_event from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.protocol.healthservice import HealthService -from azurelinuxagent.common.protocol.extensions_goal_state import VmSettingsParseError +from azurelinuxagent.common.protocol.extensions_goal_state import VmSettingsParseError, GoalStateSource from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory -from azurelinuxagent.common.utils import restutil -from azurelinuxagent.common.utils import textutil +from azurelinuxagent.common.utils import restutil, textutil, timeutil from azurelinuxagent.common.utils.textutil import remove_bom from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, PY_VERSION_MAJOR @@ -86,11 +85,21 @@ def __init__(self, endpoint): self.status_error_state = ErrorState(min_timedelta=ERROR_STATE_HOST_PLUGIN_FAILURE) self.fetch_last_timestamp = None self.status_last_timestamp = None - self._host_plugin_version = FlexibleVersion("0.0.0.0") # Version 0 means "unknown" - self._host_plugin_supports_vm_settings = None # Tri-state variable: None == Not Initialized, True == Supports, False == Does Not Support - self._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() + self._version = FlexibleVersion("0.0.0.0") # Version 0 means "unknown" + self._supports_vm_settings = None # Tri-state variable: None == Not Initialized, True == Supports, False == Does Not Support + self._supports_vm_settings_next_check = datetime.datetime.now() self._vm_settings_error_reporter = _VmSettingsErrorReporter() - self._cached_vm_settings = None # Cached value of the most recent ExtensionsGoalStateFromVmSettings + self._cached_vm_settings = None # Cached value of the most recent vmSettings + + # restore the state of Fast Track + if not os.path.exists(self._get_fast_track_state_file()): + self._supports_vm_settings = False + self._supports_vm_settings_next_check = datetime.datetime.now() + self._fast_track_timestamp = None + else: + self._supports_vm_settings = True + self._supports_vm_settings_next_check = datetime.datetime.now() + self._fast_track_timestamp = HostPluginProtocol._get_fast_track_timestamp() @staticmethod def _extract_deployment_id(role_config_name): @@ -102,9 +111,9 @@ def check_vm_settings_support(self): Returns True if the HostGAPlugin supports the vmSettings API. """ # _host_plugin_supports_vm_settings is set by fetch_vm_settings() - if self._host_plugin_supports_vm_settings is None: + if self._supports_vm_settings is None: _, _ = self.fetch_vm_settings() - return self._host_plugin_supports_vm_settings + return self._supports_vm_settings def update_container_id(self, new_container_id): self.container_id = new_container_id @@ -405,34 +414,75 @@ def _base64_encode(self, data): return s.decode('utf-8') return s + @staticmethod + def _get_fast_track_state_file(): + # This file keeps the timestamp of the most recent goal state if it was retrieved via Fast Track + return os.path.join(conf.get_lib_dir(), "fast_track.json") + + @staticmethod + def _save_fast_track_state(timestamp): + try: + with open(HostPluginProtocol._get_fast_track_state_file(), "w") as file_: + json.dump({"timestamp": timestamp}, file_) + except Exception as e: + logger.warn("Error updating the Fast Track state ({0}): {1}", HostPluginProtocol._get_fast_track_state_file(), ustr(e)) + + @staticmethod + def clear_fast_track_state(): + try: + if os.path.exists(HostPluginProtocol._get_fast_track_state_file()): + os.remove(HostPluginProtocol._get_fast_track_state_file()) + except Exception as e: + logger.warn("Error clearing the current state for Fast Track ({0}): {1}", HostPluginProtocol._get_fast_track_state_file(), + ustr(e)) + + @staticmethod + def _get_fast_track_timestamp(): + try: + with open(HostPluginProtocol._get_fast_track_state_file(), "r") as file_: + return json.load(file_)["timestamp"] + except Exception as e: + logger.warn("Can't retrieve the timestamp for the most recent Fast Track goal state ({0}), will assume the current time. Error: {1}", + HostPluginProtocol._get_fast_track_state_file(), ustr(e)) + return timeutil.create_timestamp(datetime.datetime.utcnow()) + def fetch_vm_settings(self): """ - Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and + Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalState, bool) tuple with the vmSettings and a boolean indicating if they are an updated (True) or a cached value (False). Raises * VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API - * VmSettingsError if the HostGAPlugin returned invalid vmSettings (e.g. syntax error) + * VmSettingsSupportStopped if the HostGAPlugin stopped supporting the vmSettings API + * VmSettingsParseError if the HostGAPlugin returned invalid vmSettings (e.g. syntax error) * ResourceGoneError if the container ID and roleconfig name need to be refreshed * ProtocolError if the request fails for any other reason (e.g. not supported, time out, server error) """ - def raise_not_supported(reset_state=False): - if reset_state: - self._host_plugin_supports_vm_settings = False - self._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() + datetime.timedelta(hours=6) # check again in 6 hours - # "Not supported" is not considered an error, so don't use self._vm_settings_error_reporter to report it - logger.info("vmSettings is not supported") - add_event(op=WALAEventOperation.HostPlugin, message="vmSettings is not supported", is_success=True) - raise VmSettingsNotSupported() + def raise_not_supported(): + try: + if self._supports_vm_settings: + # The most recent goal state was delivered using FastTrack, and suddenly the HostGAPlugin does not support the vmSettings API anymore. + # This can happen if, for example, the VM is migrated across host nodes that are running different versions of the HostGAPlugin. + logger.warn("The HostGAPlugin stopped supporting the vmSettings API. If there is a pending FastTrack goal state, it will not be executed.") + add_event(op=WALAEventOperation.VmSettings, message="[VmSettingsSupportStopped] HostGAPlugin: {0}".format(self._version), is_success=False, log_event=False) + raise VmSettingsSupportStopped(self._fast_track_timestamp) + else: + logger.info("HostGAPlugin {0} does not support the vmSettings API. Will not use FastTrack.", self._version) + add_event(op=WALAEventOperation.VmSettings, message="[VmSettingsNotSupported] HostGAPlugin: {0}".format(self._version), is_success=True) + raise VmSettingsNotSupported() + finally: + self._supports_vm_settings = False + self._supports_vm_settings_next_check = datetime.datetime.now() + datetime.timedelta(hours=6) # check again in 6 hours def format_message(msg): return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) try: - # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check + # Raise if VmSettings are not supported, but check again periodically since the HostGAPlugin could have been updated since the last check # Note that self._host_plugin_supports_vm_settings can be None, so we need to compare against False - if self._host_plugin_supports_vm_settings == False and self._host_plugin_supports_vm_settings_next_check > datetime.datetime.now(): - raise_not_supported() + if self._supports_vm_settings == False and self._supports_vm_settings_next_check > datetime.datetime.now(): + # Raise VmSettingsNotSupported directly instead of using raise_not_supported() to avoid resetting the timestamp for the next check + raise VmSettingsNotSupported() etag = None if self._cached_vm_settings is None else self._cached_vm_settings.etag correlation_id = str(uuid.uuid4()) @@ -448,7 +498,7 @@ def format_message(msg): raise ResourceGoneError() if response.status == httpclient.NOT_FOUND: # the HostGAPlugin does not support FastTrack - raise_not_supported(reset_state=True) + raise_not_supported() if response.status == httpclient.NOT_MODIFIED: # The goal state hasn't changed, return the current instance return self._cached_vm_settings, False @@ -467,7 +517,7 @@ def format_message(msg): elif 500 <= response.status <= 599: self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ServerError) else: - self._vm_settings_error_reporter.report_error(error_description) + self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.HttpError) raise ProtocolError(error_description) @@ -477,7 +527,6 @@ def format_message(msg): break else: # since the vmSettings were updated, the response must include an etag message = format_message("The vmSettings response does not include an Etag header") - self._vm_settings_error_reporter.report_error(message) raise ProtocolError(message) response_content = ustr(response.read(), encoding='utf-8') @@ -485,8 +534,8 @@ def format_message(msg): vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content, correlation_id) # log the HostGAPlugin version - if vm_settings.host_ga_plugin_version != self._host_plugin_version: - self._host_plugin_version = vm_settings.host_ga_plugin_version + if vm_settings.host_ga_plugin_version != self._version: + self._version = vm_settings.host_ga_plugin_version message = "HostGAPlugin version: {0}".format(vm_settings.host_ga_plugin_version) logger.info(message) add_event(op=WALAEventOperation.HostPlugin, message=message, is_success=True) @@ -494,17 +543,20 @@ def format_message(msg): # Don't support HostGAPlugin versions older than 123 # TODO: update the minimum version to 1.0.8.123 before release if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.117"): - raise_not_supported(reset_state=True) + raise_not_supported() - self._host_plugin_supports_vm_settings = True + self._supports_vm_settings = True self._cached_vm_settings = vm_settings + + if vm_settings.source == GoalStateSource.FastTrack: + self._fast_track_timestamp = vm_settings.created_on_timestamp + self._save_fast_track_state(vm_settings.created_on_timestamp) + else: + self.clear_fast_track_state() + return vm_settings, True - except (ProtocolError, ResourceGoneError, VmSettingsNotSupported): - raise - except VmSettingsParseError as exception: - message = format_message(ustr(exception)) - self._vm_settings_error_reporter.report_error(message) + except (ProtocolError, ResourceGoneError, VmSettingsNotSupported, VmSettingsParseError): raise except Exception as exception: if isinstance(exception, IOError) and "timed out" in ustr(exception): @@ -519,19 +571,31 @@ def format_message(msg): class VmSettingsNotSupported(TypeError): - pass + """ + Indicates that the HostGAPlugin does not support the vmSettings API + """ + + +class VmSettingsSupportStopped(VmSettingsNotSupported): + """ + Indicates that the HostGAPlugin supported the vmSettings API in previous calls, but now it does not support it for current call. + This can happen, for example, if the VM is migrated across nodes with different HostGAPlugin versions. + """ + def __init__(self, timestamp): + super(VmSettingsSupportStopped, self).__init__() + self.timestamp = timestamp class _VmSettingsError(object): - ServerError = 'ServerError' - ClientError = 'ClientError' - Timeout = 'Timeout' - RequestFailed = 'RequestFailed' + ClientError = 'ClientError' + HttpError = 'HttpError' + RequestFailed = 'RequestFailed' + ServerError = 'ServerError' + Timeout = 'Timeout' class _VmSettingsErrorReporter(object): - _MaxLogErrors = 1 # Max number of errors by period reported to the local log - _MaxTelemetryErrors = 3 # Max number of errors by period reported to telemetry + _MaxErrors = 3 # Max number of errors reported to telemetry (by period) _Period = datetime.timedelta(hours=1) # How often to report the summary def __init__(self): @@ -540,32 +604,32 @@ def __init__(self): def _reset(self): self._request_count = 0 # Total number of vmSettings HTTP requests self._error_count = 0 # Total number of errors issuing vmSettings requests (includes all kinds of errors) - self._server_error_count = 0 # Count of server side errors (HTTP status in the 500s) self._client_error_count = 0 # Count of client side errors (HTTP status in the 400s) - self._timeout_count = 0 # Count of timeouts on vmSettings requests + self._http_error_count = 0 # Count of HTTP errors other than 400s and 500s self._request_failure_count = 0 # Total count of requests that could not be issued (does not include timeouts or requests that were actually issued and failed, for example, with 500 or 400 statuses) + self._server_error_count = 0 # Count of server side errors (HTTP status in the 500s) + self._timeout_count = 0 # Count of timeouts on vmSettings requests self._next_period = datetime.datetime.now() + _VmSettingsErrorReporter._Period def report_request(self): self._request_count += 1 - def report_error(self, error, category=None): + def report_error(self, error, category): self._error_count += 1 - if self._error_count <= _VmSettingsErrorReporter._MaxLogErrors: - logger.info("[VmSettings] [Informational only, the Agent will continue normal operation] {0}", error) - - if self._error_count <= _VmSettingsErrorReporter._MaxTelemetryErrors: - add_event(op=WALAEventOperation.VmSettings, message=error, is_success=False, log_event=False) + if self._error_count <= _VmSettingsErrorReporter._MaxErrors: + add_event(op=WALAEventOperation.VmSettings, message="[{0}] {1}".format(category, error), is_success=False, log_event=False) - if category == _VmSettingsError.ServerError: - self._server_error_count += 1 - elif category == _VmSettingsError.ClientError: + if category == _VmSettingsError.ClientError: self._client_error_count += 1 - elif category == _VmSettingsError.Timeout: - self._timeout_count += 1 + elif category == _VmSettingsError.HttpError: + self._http_error_count += 1 elif category == _VmSettingsError.RequestFailed: self._request_failure_count += 1 + elif category == _VmSettingsError.ServerError: + self._server_error_count += 1 + elif category == _VmSettingsError.Timeout: + self._timeout_count += 1 def report_summary(self): if datetime.datetime.now() >= self._next_period: diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 582716acb2..7e8f08b613 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -567,7 +567,8 @@ def _processing_new_extensions_goal_state(self): """ True if we are currently processing a new extensions goal state """ - return self._goal_state is not None and self._goal_state.extensions_goal_state.id != self._last_extensions_gs_id + egs = self._goal_state.extensions_goal_state + return self._goal_state is not None and egs.id != self._last_extensions_gs_id and not egs.is_outdated def _process_goal_state(self, exthandlers_handler, remote_access_handler): try: @@ -670,6 +671,8 @@ def _report_status(self, exthandlers_handler): if self._goal_state is not None: agent_status = exthandlers_handler.get_ext_handlers_status_debug_info(vm_status) self._goal_state.save_to_history(agent_status, AGENT_STATUS_FILE) + if self._goal_state.extensions_goal_state.is_outdated: + exthandlers_handler.protocol.client.get_host_plugin().clear_fast_track_state() def _report_extensions_summary(self, vm_status): try: diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 22594056ee..d9e1fe04d8 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1356,26 +1356,27 @@ def iterator(*_, **__): with patch('azurelinuxagent.ga.update.get_collect_telemetry_events_handler') as mock_event_collector: with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): - with patch('time.sleep') as sleep_mock: - with patch('sys.exit') as mock_exit: - if isinstance(os.getppid, MagicMock): - self.update_handler.run() - else: - with patch('os.getppid', return_value=42): + with patch.object(self.update_handler, "_processing_new_extensions_goal_state", return_value=True): + with patch('time.sleep') as sleep_mock: + with patch('sys.exit') as mock_exit: + if isinstance(os.getppid, MagicMock): self.update_handler.run() - - self.assertEqual(1, mock_handler.call_count) - self.assertEqual(calls, len([c for c in [call[0] for call in mock_handler.return_value.method_calls] if c == 'run'])) - self.assertEqual(1, mock_ra_handler.call_count) - self.assertEqual(calls, len(mock_ra_handler.return_value.method_calls)) - if calls > 0: - self.assertEqual(sleep_interval, sleep_mock.call_args[0]) - self.assertEqual(1, mock_monitor.call_count) - self.assertEqual(1, mock_env.call_count) - self.assertEqual(1, mock_collect_logs.call_count) - self.assertEqual(1, mock_telemetry_send_events.call_count) - self.assertEqual(1, mock_event_collector.call_count) - self.assertEqual(1, mock_exit.call_count) + else: + with patch('os.getppid', return_value=42): + self.update_handler.run() + + self.assertEqual(1, mock_handler.call_count) + self.assertEqual(calls, len([c for c in [call[0] for call in mock_handler.return_value.method_calls] if c == 'run'])) + self.assertEqual(1, mock_ra_handler.call_count) + self.assertEqual(calls, len(mock_ra_handler.return_value.method_calls)) + if calls > 0: + self.assertEqual(sleep_interval, sleep_mock.call_args[0]) + self.assertEqual(1, mock_monitor.call_count) + self.assertEqual(1, mock_env.call_count) + self.assertEqual(1, mock_collect_logs.call_count) + self.assertEqual(1, mock_telemetry_send_events.call_count) + self.assertEqual(1, mock_event_collector.call_count) + self.assertEqual(1, mock_exit.call_count) def test_run(self): self._test_run() @@ -1463,6 +1464,7 @@ def _test_upgrade_available( self.update_handler.protocol_util = protocol self.update_handler._goal_state = protocol.get_goal_state() + self.update_handler._goal_state.extensions_goal_state.is_outdated = False conf.get_autoupdate_gafamily = Mock(return_value=protocol.family) return self.update_handler._download_agent_if_upgrade_available(protocol, base_version=base_version) diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index ce1c29640a..65d050d862 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -10,6 +10,9 @@ from azurelinuxagent.common.future import httpclient from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource, GoalStateChannel +from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig +from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings +from azurelinuxagent.common.protocol import hostplugin from azurelinuxagent.common.protocol.goal_state import GoalState, _GET_GOAL_STATE_MAX_ATTEMPTS from azurelinuxagent.common.exception import ProtocolError from azurelinuxagent.common.utils import fileutil @@ -20,7 +23,68 @@ from tests.tools import AgentTestCase, patch, load_data -class GoalStateTestCase(AgentTestCase): +class GoalStateTestCase(AgentTestCase, HttpRequestPredicates): + def test_it_should_use_vm_settings_by_default(self): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + extensions_goal_state = GoalState(protocol.client).extensions_goal_state + self.assertTrue( + isinstance(extensions_goal_state, ExtensionsGoalStateFromVmSettings), + 'The extensions goal state should have been created from the vmSettings (got: {0})'.format(type(extensions_goal_state))) + + def _assert_is_extensions_goal_state_from_extensions_config(self, extensions_goal_state): + self.assertTrue( + isinstance(extensions_goal_state, ExtensionsGoalStateFromExtensionsConfig), + 'The extensions goal state should have been created from the extensionsConfig (got: {0})'.format(type(extensions_goal_state))) + + def test_it_should_use_extensions_config_when_fast_track_is_disabled(self): + with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=False): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + self._assert_is_extensions_goal_state_from_extensions_config(GoalState(protocol.client).extensions_goal_state) + + def test_it_should_use_extensions_config_when_fast_track_is_not_supported(self): + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.NOT_FOUND) + return None + + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: + self._assert_is_extensions_goal_state_from_extensions_config(GoalState(protocol.client).extensions_goal_state) + + def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_supported(self): + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-unsupported_version.json" + + with mock_wire_protocol(data_file) as protocol: + self._assert_is_extensions_goal_state_from_extensions_config(GoalState(protocol.client).extensions_goal_state) + + def test_it_should_retry_get_vm_settings_on_resource_gone_error(self): + # Requests to the hostgaplugin incude the Container ID and the RoleConfigName as headers; when the hostgaplugin returns GONE (HTTP status 410) the agent + # needs to get a new goal state and retry the request with updated values for the Container ID and RoleConfigName headers. + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + # Do not mock the vmSettings request at the level of azurelinuxagent.common.utils.restutil.http_request. The GONE status is handled + # in the internal _http_request, which we mock below. + protocol.do_not_mock = lambda method, url: method == "GET" and self.is_host_plugin_vm_settings_request(url) + + request_headers = [] # we expect a retry with new headers and use this array to persist the headers of each request + + def http_get_vm_settings(_method, _host, _relative_url, **kwargs): + request_headers.append(kwargs["headers"]) + if len(request_headers) == 1: + # Fail the first request with status GONE and update the mock data to return the new Container ID and RoleConfigName that should be + # used in the headers of the retry request. + protocol.mock_wire_data.set_container_id("GET_VM_SETTINGS_TEST_CONTAINER_ID") + protocol.mock_wire_data.set_role_config_name("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME") + return MockHttpResponse(status=httpclient.GONE) + # For this test we are interested only on the retry logic, so the second request (the retry) is not important; we use NOT_MODIFIED (304) for simplicity. + return MockHttpResponse(status=httpclient.NOT_MODIFIED) + + with patch("azurelinuxagent.common.utils.restutil._http_request", side_effect=http_get_vm_settings): + protocol.client.update_goal_state() + + self.assertEqual(2, len(request_headers), "We expected 2 requests for vmSettings: the original request and the retry request") + self.assertEqual("GET_VM_SETTINGS_TEST_CONTAINER_ID", request_headers[1][hostplugin._HEADER_CONTAINER_ID], "The retry request did not include the expected header for the ContainerId") + self.assertEqual("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME", request_headers[1][hostplugin._HEADER_HOST_CONFIG_NAME], "The retry request did not include the expected header for the RoleConfigName") + def test_fetch_goal_state_should_raise_on_incomplete_goal_state(self): with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: protocol.mock_wire_data.data_files = mockwiredata.DATA_FILE_NOOP_GS @@ -166,13 +230,13 @@ def _create_protocol_ws_and_hgap_in_sync(): # that may invalidate this setup. vm_settings, _ = protocol.client.get_host_plugin().fetch_vm_settings() if vm_settings.etag != etag: - raise Exception("The HostGAPlugin is no in sync. Expected ETag {0}. Got {1}".format(etag, vm_settings.etag)) + raise Exception("The HostGAPlugin is not in sync. Expected ETag {0}. Got {1}".format(etag, vm_settings.etag)) if vm_settings.source != GoalStateSource.Fabric: raise Exception("The HostGAPlugin should be returning a Fabric goal state. Got {0}".format(vm_settings.source)) goal_state = GoalState(protocol.client) if goal_state.incarnation != incarnation: - raise Exception("The WireServer is no in sync. Expected incarnation {0}. Got {1}".format(incarnation, goal_state.incarnation)) + raise Exception("The WireServer is not in sync. Expected incarnation {0}. Got {1}".format(incarnation, goal_state.incarnation)) if goal_state.extensions_goal_state.correlation_id != vm_settings.correlation_id: raise Exception( @@ -267,3 +331,30 @@ def test_it_should_use_the_most_recent_goal_state(self): goal_state.update() self._assert_goal_state(goal_state, '222', channel=GoalStateChannel.WireServer, source=GoalStateSource.Fabric) + + def test_it_should_mark_outdated_goal_states(self): + with GoalStateTestCase._create_protocol_ws_and_hgap_in_sync() as protocol: + goal_state = GoalState(protocol.client) + initial_incarnation = goal_state.incarnation + initial_timestamp = goal_state.extensions_goal_state.created_on_timestamp + + # Make the most recent goal state FastTrack + timestamp = datetime.datetime.utcnow() + datetime.timedelta(seconds=15) + protocol.mock_wire_data.set_vm_settings_source(GoalStateSource.FastTrack) + protocol.mock_wire_data.set_etag('444444', timestamp) + + goal_state.update() + + # Update the goal state after the HGAP plugin stops supporting vmSettings + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.NOT_FOUND) + return None + + protocol.set_http_handlers(http_get_handler=http_get_handler) + + goal_state.update() + + self._assert_goal_state(goal_state, initial_incarnation, channel=GoalStateChannel.WireServer, source=GoalStateSource.Fabric) + self.assertEqual(initial_timestamp, goal_state.extensions_goal_state.created_on_timestamp, "The timestamp of the updated goal state is incorrect") + self.assertTrue(goal_state.extensions_goal_state.is_outdated, "The updated goal state should be marked as outdated") diff --git a/tests/protocol/test_hostplugin.py b/tests/protocol/test_hostplugin.py index f3b5a5889e..16bb7ef0b6 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/protocol/test_hostplugin.py @@ -19,17 +19,20 @@ import contextlib import datetime import json +import os.path import sys import unittest import azurelinuxagent.common.protocol.hostplugin as hostplugin import azurelinuxagent.common.protocol.restapi as restapi import azurelinuxagent.common.protocol.wire as wire +from azurelinuxagent.common import conf from azurelinuxagent.common.errorstate import ErrorState from azurelinuxagent.common.exception import HttpError, ResourceGoneError, ProtocolError from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.osutil.default import UUID_PATTERN -from azurelinuxagent.common.protocol.hostplugin import API_VERSION, _VmSettingsErrorReporter, VmSettingsNotSupported +from azurelinuxagent.common.protocol.hostplugin import API_VERSION, _VmSettingsErrorReporter, VmSettingsNotSupported, VmSettingsSupportStopped +from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource from azurelinuxagent.common.protocol.goal_state import GoalState from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.version import AGENT_VERSION, AGENT_NAME @@ -926,40 +929,23 @@ def http_get_handler(url, *_, **__): def get_telemetry_messages(): return [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] - def get_log_messages(): - return [arg[0][0] for arg in logger_info.call_args_list if "[VmSettings]" in arg[0][0]] - - def fetch_vm_settings(): - try: - host_plugin.fetch_vm_settings() - except ProtocolError: - pass # All calls produce an error; ignore it - with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: - with patch('azurelinuxagent.common.logger.info') as logger_info: - host_plugin = protocol.client.get_host_plugin() - for _ in range(_VmSettingsErrorReporter._MaxTelemetryErrors + 3): - fetch_vm_settings() + for _ in range(_VmSettingsErrorReporter._MaxErrors + 3): + self._fetch_vm_settings_ignoring_errors(protocol) - telemetry_messages = get_telemetry_messages() - self.assertEqual(_VmSettingsErrorReporter._MaxTelemetryErrors, len(telemetry_messages), "The number of errors reported to telemetry is not the max allowed (got: {0})".format(telemetry_messages)) - - log_messages = get_log_messages() - self.assertEqual(_VmSettingsErrorReporter._MaxLogErrors, len(log_messages), "The number of errors reported to the local log is not the max allowed (got: {0})".format(telemetry_messages)) + telemetry_messages = get_telemetry_messages() + self.assertEqual(_VmSettingsErrorReporter._MaxErrors, len(telemetry_messages), "The number of errors reported to telemetry is not the max allowed (got: {0})".format(telemetry_messages)) # Reset the error reporter and verify that additional errors are reported - host_plugin._vm_settings_error_reporter._next_period = datetime.datetime.now() - fetch_vm_settings() # this triggers the reset + protocol.client.get_host_plugin()._vm_settings_error_reporter._next_period = datetime.datetime.now() + self._fetch_vm_settings_ignoring_errors(protocol) # this triggers the reset with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: - fetch_vm_settings() + self._fetch_vm_settings_ignoring_errors(protocol) telemetry_messages = get_telemetry_messages() self.assertEqual(1, len(telemetry_messages), "Expected additional errors to be reported to telemetry in the next period (got: {0})".format(telemetry_messages)) - log_messages = get_log_messages() - self.assertEqual(1, len(log_messages), "Expected additional errors to be reported to the local log in the next period (got: {0})".format(telemetry_messages)) - def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: def http_get_handler(url, *_, **__): @@ -974,15 +960,54 @@ def get_vm_settings_call_count(): self._fetch_vm_settings_ignoring_errors(protocol) self.assertEqual(1, get_vm_settings_call_count(), "There should have been an initial call to vmSettings.") - protocol.client.update_goal_state() - protocol.client.update_goal_state() + self._fetch_vm_settings_ignoring_errors(protocol) + self._fetch_vm_settings_ignoring_errors(protocol) self.assertEqual(1, get_vm_settings_call_count(), "Additional calls to update_goal_state should not have produced extra calls to vmSettings.") # reset the vmSettings check period; this should restart the calls to the API - protocol.client._host_plugin._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() + protocol.client._host_plugin._supports_vm_settings_next_check = datetime.datetime.now() protocol.client.update_goal_state() self.assertEqual(2, get_vm_settings_call_count(), "A second call to vmSettings was expecting after the check period has elapsed.") + def test_it_should_raise_when_the_vm_settings_api_stops_being_supported(self): + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.NOT_FOUND) # HostGAPlugin returns 404 if the API is not supported + return None + + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + host_ga_plugin = protocol.client.get_host_plugin() + + # Do an initial call to ensure the API is supported + vm_settings, _ = host_ga_plugin.fetch_vm_settings() + + # Now return NOT_FOUND to indicate the API is not supported + protocol.set_http_handlers(http_get_handler=http_get_handler) + + with self.assertRaises(VmSettingsSupportStopped) as cm: + host_ga_plugin.fetch_vm_settings() + + self.assertEqual(vm_settings.created_on_timestamp, cm.exception.timestamp) + + def test_it_should_save_the_timestamp_of_the_most_recent_fast_track_goal_state(self): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + host_ga_plugin = protocol.client.get_host_plugin() + + vm_settings, _ = host_ga_plugin.fetch_vm_settings() + + state_file = os.path.join(conf.get_lib_dir(), "fast_track.json") + self.assertTrue(os.path.exists(state_file), "The timestamp was not saved (can't find {0})".format(state_file)) + + with open(state_file, "r") as state_file_: + state = json.load(state_file_) + self.assertEqual(vm_settings.created_on_timestamp, state["timestamp"], "{0} does not contain the expected timestamp".format(state_file)) + + # A fabric goal state should remove the state file + protocol.mock_wire_data.set_vm_settings_source(GoalStateSource.Fabric) + + _ = host_ga_plugin.fetch_vm_settings() + + self.assertFalse(os.path.exists(state_file), "{0} was not removed by a Fabric goal state".format(state_file)) class MockResponse: def __init__(self, body, status_code, reason=''): diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 0b619c66d3..0cc8a01e9a 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -26,13 +26,10 @@ from azurelinuxagent.common.agent_supported_feature import SupportedFeatureNames, get_supported_feature_by_name, \ get_agent_supported_features_list_for_crp -from azurelinuxagent.common.future import httpclient from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.exception import ResourceGoneError, ProtocolError, \ ExtensionDownloadError, HttpError -from azurelinuxagent.common.protocol import hostplugin from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig -from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.wire import WireProtocol, WireClient, \ StatusBlob, VMStatus @@ -1097,67 +1094,6 @@ def test_forced_update_should_update_the_goal_state_and_the_host_plugin_when_the self.assertEqual(protocol.client.get_host_plugin().container_id, new_container_id) self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) - def test_it_should_retry_get_vm_settings_on_resource_gone_error(self): - # Requests to the hostgaplugin incude the Container ID and the RoleConfigName as headers; when the hostgaplugin returns GONE (HTTP status 410) the agent - # needs to get a new goal state and retry the request with updated values for the Container ID and RoleConfigName headers. - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - # Do not mock the vmSettings request at the level of azurelinuxagent.common.utils.restutil.http_request. The GONE status is handled - # in the internal _http_request, which we mock below. - protocol.do_not_mock = lambda method, url: method == "GET" and self.is_host_plugin_vm_settings_request(url) - - request_headers = [] # we expect a retry with new headers and use this array to persist the headers of each request - - def http_get_vm_settings(_method, _host, _relative_url, **kwargs): - request_headers.append(kwargs["headers"]) - if len(request_headers) == 1: - # Fail the first request with status GONE and update the mock data to return the new Container ID and RoleConfigName that should be - # used in the headers of the retry request. - protocol.mock_wire_data.set_container_id("GET_VM_SETTINGS_TEST_CONTAINER_ID") - protocol.mock_wire_data.set_role_config_name("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME") - return MockHttpResponse(status=httpclient.GONE) - # For this test we are interested only on the retry logic, so the second request (the retry) is not important; we use NOT_MODIFIED (304) for simplicity. - return MockHttpResponse(status=httpclient.NOT_MODIFIED) - - with patch("azurelinuxagent.common.utils.restutil._http_request", side_effect=http_get_vm_settings): - protocol.client.update_goal_state() - - self.assertEqual(2, len(request_headers), "We expected 2 requests for vmSettings: the original request and the retry request") - self.assertEqual("GET_VM_SETTINGS_TEST_CONTAINER_ID", request_headers[1][hostplugin._HEADER_CONTAINER_ID], "The retry request did not include the expected header for the ContainerId") - self.assertEqual("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME", request_headers[1][hostplugin._HEADER_HOST_CONFIG_NAME], "The retry request did not include the expected header for the RoleConfigName") - - def test_it_should_use_vm_settings_by_default(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - extensions_goal_state = protocol.get_goal_state().extensions_goal_state - self.assertTrue( - isinstance(extensions_goal_state, ExtensionsGoalStateFromVmSettings), - 'The extensions goal state should have been created from the vmSettings (got: {0})'.format(type(extensions_goal_state))) - - def _assert_is_extensions_goal_state_from_extensions_config(self, extensions_goal_state): - self.assertTrue( - isinstance(extensions_goal_state, ExtensionsGoalStateFromExtensionsConfig), - 'The extensions goal state should have been created from the extensionsConfig (got: {0})'.format(type(extensions_goal_state))) - - def test_it_should_use_extensions_config_when_fast_track_is_disabled(self): - with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=False): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_goal_state().extensions_goal_state) - - def test_it_should_use_extensions_config_when_fast_track_is_not_supported(self): - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - return MockHttpResponse(httpclient.NOT_FOUND) - return None - - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: - self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_goal_state().extensions_goal_state) - - def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_supported(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() - data_file["vm_settings"] = "hostgaplugin/vm_settings-unsupported_version.json" - - with mock_wire_protocol(data_file) as protocol: - self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_goal_state().extensions_goal_state) - class UpdateHostPluginFromGoalStateTestCase(AgentTestCase): """ From e09d4497aa27fe94bb0d4ec3a8ab76079eed3ab6 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 8 Apr 2022 13:01:30 -0700 Subject: [PATCH 56/84] Detect outdated Fabric goal states when FastTrack is disabled (#2544) Co-authored-by: narrieta --- azurelinuxagent/common/protocol/goal_state.py | 2 +- azurelinuxagent/common/protocol/hostplugin.py | 13 +++- azurelinuxagent/ga/update.py | 39 ++++++++--- tests/ga/mocks.py | 68 +++++++++++++++++++ tests/ga/test_report_status.py | 43 ++---------- tests/ga/test_update.py | 55 ++++++++++++++- 6 files changed, 169 insertions(+), 51 deletions(-) create mode 100644 tests/ga/mocks.py diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 6d362438d5..ae01e4d22c 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -191,7 +191,7 @@ def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_set self._extensions_goal_state = self._fetch_full_wire_server_goal_state(incarnation, xml_doc) if self._extensions_goal_state.created_on_timestamp < vm_settings_support_stopped_error.timestamp: self._extensions_goal_state.is_outdated = True - msg = "Fetched a Fabric goal state older than the most recent FastTrack goal state; will skip it. (Fabric: {0} FastTrack: {1})".format( + msg = "Fetched a Fabric goal state older than the most recent FastTrack goal state; will skip it.\nFabric: {0}\nFastTrack: {1}".format( self._extensions_goal_state.created_on_timestamp, vm_settings_support_stopped_error.timestamp) logger.info(msg) add_event(op=WALAEventOperation.VmSettings, message=msg, is_success=True) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 477ccf9bd2..cf254be304 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -99,7 +99,7 @@ def __init__(self, endpoint): else: self._supports_vm_settings = True self._supports_vm_settings_next_check = datetime.datetime.now() - self._fast_track_timestamp = HostPluginProtocol._get_fast_track_timestamp() + self._fast_track_timestamp = HostPluginProtocol.get_fast_track_timestamp() @staticmethod def _extract_deployment_id(role_config_name): @@ -437,13 +437,20 @@ def clear_fast_track_state(): ustr(e)) @staticmethod - def _get_fast_track_timestamp(): + def get_fast_track_timestamp(): + """ + Returns the timestamp of the most recent FastTrack goal state retrieved by fetch_vm_settings(), or None if the most recent + goal state was Fabric or fetch_vm_settings() has not been invoked. + """ + if not os.path.exists(HostPluginProtocol._get_fast_track_state_file()): + return None + try: with open(HostPluginProtocol._get_fast_track_state_file(), "r") as file_: return json.load(file_)["timestamp"] except Exception as e: logger.warn("Can't retrieve the timestamp for the most recent Fast Track goal state ({0}), will assume the current time. Error: {1}", - HostPluginProtocol._get_fast_track_state_file(), ustr(e)) + HostPluginProtocol._get_fast_track_state_file(), ustr(e)) return timeutil.create_timestamp(datetime.datetime.utcnow()) def fetch_vm_settings(self): diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 7e8f08b613..2118ca6831 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -45,7 +45,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler -from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol +from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol, VmSettingsNotSupported from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList, \ VERSION_0 from azurelinuxagent.common.protocol.util import get_protocol_util @@ -332,10 +332,7 @@ def run(self, debug=False): # protocol = self.protocol_util.get_protocol() - while not self._try_update_goal_state(protocol): - # Don't proceed with processing anything until we're able to fetch the first goal state. - # self._try_update_goal_state() has its own logging and error handling so not adding anything here. - time.sleep(conf.get_goal_state_period()) + self._initialize_goal_state(protocol) # Initialize the common parameters for telemetry events initialize_event_logger_vminfo_common_parameters(protocol) @@ -392,8 +389,7 @@ def run(self, debug=False): all_thread_handlers.append(get_collect_logs_handler()) # Launch all monitoring threads - for thread_handler in all_thread_handlers: - thread_handler.run() + self._start_threads(all_thread_handlers) logger.info("Goal State Period: {0} sec. This indicates how often the agent checks for new goal states and reports status.", self._goal_state_period) @@ -423,6 +419,26 @@ def run(self, debug=False): self._shutdown() sys.exit(0) + def _initialize_goal_state(self, protocol): + # + # Block until we can fetch the first goal state (self._try_update_goal_state() does its own logging and error handling). + # + while not self._try_update_goal_state(protocol): + time.sleep(conf.get_goal_state_period()) + + # + # If FastTrack is disabled we need to check if the current goal state (which will be retrieved using the WireServer and + # hence will be a Fabric goal state) is outdated. + # + if not conf.get_enable_fast_track(): + last_fast_track_timestamp = HostPluginProtocol.get_fast_track_timestamp() + if last_fast_track_timestamp is not None: + egs = protocol.client.get_goal_state().extensions_goal_state + if egs.created_on_timestamp < last_fast_track_timestamp: + egs.is_outdated = True + logger.info("The current Fabric goal state is older than the most recent FastTrack goal state; will skip it.\nFabric: {0}\nFastTrack: {1}", + egs.created_on_timestamp, last_fast_track_timestamp) + def _get_vm_size(self, protocol): """ Including VMSize is meant to capture the architecture of the VM (i.e. arm64 VMs will @@ -447,6 +463,10 @@ def _check_daemon_running(self, debug): if not debug and self._is_orphaned: raise ExitException("Agent {0} is an orphan -- exiting".format(CURRENT_AGENT)) + def _start_threads(self, all_thread_handlers): + for thread_handler in all_thread_handlers: + thread_handler.run() + def _check_threads_running(self, all_thread_handlers): # Check that all the threads are still running for thread_handler in all_thread_handlers: @@ -469,7 +489,10 @@ def _try_update_goal_state(self, protocol): add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=True, message=message, log_event=False) logger.info(message) - self._supports_fast_track = conf.get_enable_fast_track() and protocol.client.get_host_plugin().check_vm_settings_support() + try: + self._supports_fast_track = conf.get_enable_fast_track() and protocol.client.get_host_plugin().check_vm_settings_support() + except VmSettingsNotSupported: + self._supports_fast_track = False except Exception as e: if not self._last_try_update_goal_state_failed: diff --git a/tests/ga/mocks.py b/tests/ga/mocks.py new file mode 100644 index 0000000000..2563d402fe --- /dev/null +++ b/tests/ga/mocks.py @@ -0,0 +1,68 @@ +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ +# + +import contextlib + +from mock import PropertyMock +from azurelinuxagent.ga.exthandlers import ExtHandlersHandler +from azurelinuxagent.ga.remoteaccess import RemoteAccessHandler +from azurelinuxagent.ga.update import UpdateHandler, get_update_handler +from tests.tools import patch, Mock, mock_sleep + + +@contextlib.contextmanager +def mock_update_handler(protocol, iterations=1, on_new_iteration=lambda _: None, exthandlers_handler=None, remote_access_handler=None, enable_agent_updates=False): + """ + Creates a mock UpdateHandler that executes its main loop for the given 'iterations'. + If 'on_new_iteration' is given, it is invoked at the beginning of each iteration passing the iteration number as argument. + Network requests (e.g. requests for the goal state) are done using the given 'protocol'. + The mock UpdateHandler uses mock no-op ExtHandlersHandler and RemoteAccessHandler, unless they are given by 'exthandlers_handler' and 'remote_access_handler'. + Agent updates are disabled, unless specified otherwise with 'enable_agent_updates'. + Background threads (monitor, env, telemetry, etc) are not started. + """ + iteration_count = [0] + + def is_running(*args): # mock for property UpdateHandler.is_running, which controls the main loop + if len(args) == 0: + # getter + iteration_count[0] += 1 + on_new_iteration(iteration_count[0]) + return iteration_count[0] <= iterations + else: + # setter + return None + + if exthandlers_handler is None: + exthandlers_handler = ExtHandlersHandler(protocol) + + if remote_access_handler is None: + remote_access_handler = RemoteAccessHandler(protocol) + + with patch("azurelinuxagent.ga.exthandlers.get_exthandlers_handler", return_value=exthandlers_handler): + with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler): + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=enable_agent_updates): + with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): + with patch.object(UpdateHandler, "_check_daemon_running"): + with patch.object(UpdateHandler, "_start_threads"): + with patch.object(UpdateHandler, "_check_threads_running"): + with patch('time.sleep', side_effect=lambda _: mock_sleep(0.001)): + with patch('sys.exit', side_effect=lambda _: 0): + update_handler = get_update_handler() + update_handler.protocol_util.get_protocol = Mock(return_value=protocol) + + yield update_handler + diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index acd3f6653c..e4e1a96421 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -1,14 +1,13 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the Apache License. -import contextlib import json -from mock import PropertyMock from azurelinuxagent.ga.exthandlers import ExtHandlersHandler -from azurelinuxagent.ga.update import UpdateHandler, get_update_handler +from azurelinuxagent.ga.update import get_update_handler +from tests.ga.mocks import mock_update_handler from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.tools import AgentTestCase, patch, Mock, mock_sleep +from tests.tools import AgentTestCase, patch from tests.protocol import mockwiredata from tests.protocol.HttpRequestPredicates import HttpRequestPredicates @@ -18,38 +17,6 @@ class ReportStatusTestCase(AgentTestCase): Tests for UpdateHandler._report_status() """ - @staticmethod - @contextlib.contextmanager - def _mock_update_handler(protocol, exthandlers_handler=None, iterations=1, on_new_iteration=lambda _: None): - """ - The run() method of the mock handler will execute its main loop for the given 'iterations', and will invoke 'on_new_iteration' when - starting each iteration, passing the iteration number as argument. - """ - iteration_count = [0] - - def is_running(*args): # mock for property UpdateHandler.is_running, which controls the main loop - if len(args) == 0: - # getter - iteration_count[0] += 1 - on_new_iteration(iteration_count[0]) - return iteration_count[0] <= iterations - else: - # setter - return None - - if exthandlers_handler is None: - exthandlers_handler = ExtHandlersHandler(protocol) - - with patch("azurelinuxagent.ga.exthandlers.get_exthandlers_handler", return_value=exthandlers_handler): - with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): # skip agent update - with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): - with patch('time.sleep', side_effect=lambda _: mock_sleep(0.001)): - with patch('sys.exit', side_effect=lambda _: 0): - update_handler = get_update_handler() - update_handler.protocol_util.get_protocol = Mock(return_value=protocol) - - yield update_handler - def test_update_handler_should_report_status_when_fetch_goal_state_fails(self): # The test executes the main loop of UpdateHandler.run() twice, failing requests for the goal state # on the second iteration. We expect the 2 iterations to report status, despite the goal state failure. @@ -66,7 +33,7 @@ def on_new_iteration(iteration): with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: exthandlers_handler = ExtHandlersHandler(protocol) with patch.object(exthandlers_handler, "run", wraps=exthandlers_handler.run) as exthandlers_handler_run: - with ReportStatusTestCase._mock_update_handler(protocol, exthandlers_handler=exthandlers_handler, iterations=2, on_new_iteration=on_new_iteration) as update_handler: + with mock_update_handler(protocol, iterations=2, on_new_iteration=on_new_iteration, exthandlers_handler=exthandlers_handler) as update_handler: update_handler.run(debug=True) self.assertEqual(1, exthandlers_handler_run.call_count, "Extensions should have been executed only once.") @@ -139,7 +106,7 @@ def http_get_handler(url, *_, **__): self._test_supported_features_includes_fast_track(protocol, False) def _test_supported_features_includes_fast_track(self, protocol, expected): - with ReportStatusTestCase._mock_update_handler(protocol) as update_handler: + with mock_update_handler(protocol) as update_handler: update_handler.run(debug=True) status = json.loads(protocol.mock_wire_data.status_blobs[0]) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index d9e1fe04d8..8a648a8c41 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -31,7 +31,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common.event import EVENTS_DIRECTORY, WALAEventOperation from azurelinuxagent.common.exception import ProtocolError, UpdateError, ResourceGoneError, HttpError -from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler from azurelinuxagent.common.protocol.hostplugin import URI_FORMAT_GET_API_VERSIONS, HOST_PLUGIN_PORT, \ URI_FORMAT_GET_EXTENSION_ARTIFACT, HostPluginProtocol @@ -52,6 +52,7 @@ get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, AGENT_ERROR_FILE, ORPHAN_WAIT_INTERVAL, \ CHILD_LAUNCH_RESTART_MAX, CHILD_HEALTH_INTERVAL, GOAL_STATE_PERIOD_EXTENSIONS_DISABLED, UpdateHandler, \ READONLY_FILE_GLOBS, ExtensionsSummary, AgentUpgradeType +from tests.ga.mocks import mock_update_handler from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT from tests.tools import AgentTestCase, AgentTestCaseWithGetVmSizeMock, data_dir, DEFAULT, patch, load_bin_data, Mock, MagicMock, \ @@ -2832,6 +2833,57 @@ def test_it_should_write_the_agent_status_to_the_history_folder(self): status_file = os.path.join(matches[0], AGENT_STATUS_FILE) self.assertTrue(os.path.exists(status_file), "Could not find {0}".format(status_file)) + @staticmethod + def _prepare_fast_track_goal_state(): + """ + Creates a set of mock wire data where the most recent goal state is a FastTrack goal state; also + invokes HostPluginProtocol.fetch_vm_settings() to save the Fast Track status to disk + """ + # Do a query for the vmSettings; this would retrieve a FastTrack goal state and keep track of its timestamp + mock_wire_data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + with mock_wire_protocol(mock_wire_data_file) as protocol: + protocol.mock_wire_data.set_etag("0123456789") + _ = protocol.client.get_host_plugin().fetch_vm_settings() + return mock_wire_data_file + + def test_it_should_mark_outdated_goal_states_on_service_restart_when_fast_track_is_disabled(self): + data_file = self._prepare_fast_track_goal_state() + + with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=False): + with mock_wire_protocol(data_file) as protocol: + with mock_update_handler(protocol) as update_handler: + update_handler.run() + + self.assertTrue(protocol.client.get_goal_state().extensions_goal_state.is_outdated) + + @staticmethod + def _http_get_vm_settings_handler_not_found(url, *_, **__): + if HttpRequestPredicates.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.NOT_FOUND) # HostGAPlugin returns 404 if the API is not supported + return None + + def test_it_should_mark_outdated_goal_states_on_service_restart_when_host_ga_plugin_stops_supporting_vm_settings(self): + data_file = self._prepare_fast_track_goal_state() + + with mock_wire_protocol(data_file, http_get_handler=self._http_get_vm_settings_handler_not_found) as protocol: + with mock_update_handler(protocol) as update_handler: + update_handler.run() + + self.assertTrue(protocol.client.get_goal_state().extensions_goal_state.is_outdated) + + def test_it_should_clear_the_timestamp_for_the_most_recent_fast_track_goal_state(self): + data_file = self._prepare_fast_track_goal_state() + + if HostPluginProtocol.get_fast_track_timestamp() is None: + raise Exception("The test setup did not save the Fast Track state") + + with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=False): + with mock_wire_protocol(data_file) as protocol: + with mock_update_handler(protocol) as update_handler: + update_handler.run() + + self.assertIsNone(HostPluginProtocol.get_fast_track_timestamp(), "The Fast Track state was not cleared") + class HeartbeatTestCase(AgentTestCase): @@ -2890,6 +2942,7 @@ def validate_single_heartbeat_event_matches_vm_size(vm_size): validate_single_heartbeat_event_matches_vm_size("TestVmSizeValue") + class GoalStateIntervalTestCase(AgentTestCase): def test_initial_goal_state_period_should_default_to_goal_state_period(self): configuration_provider = conf.ConfigurationProvider() From a5bb8695f84ac7e5aa86bd971f0d97b7472a889c Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 12 Apr 2022 11:14:52 -0700 Subject: [PATCH 57/84] Set agent version to 2.8.0.0 (#2545) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index ff9c903b93..85a0d5624c 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -207,9 +207,9 @@ def has_logrotate(): # IMPORTANT: Please be sure that the version is always 9.9.9.9 on the develop branch. Automation requires this, otherwise # DCR may test the wrong agent version. # -# When doing a release, be sure to use the actual agent version. Current agent version: 2.4.0.0 +# When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '9.9.9.9' +AGENT_VERSION = '2.8.0.0' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 50eaac1de1d707e4e0fcd74616cfef80ae54d463 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 13 Apr 2022 13:08:59 -0700 Subject: [PATCH 58/84] Update HGAP minimum required version for FastTrack (#2549) Co-authored-by: narrieta --- azurelinuxagent/common/protocol/hostplugin.py | 5 ++--- .../vm_settings-difference_in_required_features.json | 2 +- tests/data/hostgaplugin/vm_settings-empty_depends_on.json | 2 +- tests/data/hostgaplugin/vm_settings-invalid_blob_type.json | 2 +- .../hostgaplugin/vm_settings-no_extension_manifests.json | 2 +- .../data/hostgaplugin/vm_settings-no_status_upload_blob.json | 2 +- tests/data/hostgaplugin/vm_settings-out-of-sync.json | 2 +- tests/data/hostgaplugin/vm_settings-parse_error.json | 2 +- tests/data/hostgaplugin/vm_settings-requested_version.json | 2 +- tests/data/hostgaplugin/vm_settings.json | 2 +- 10 files changed, 11 insertions(+), 12 deletions(-) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index cf254be304..81b9062566 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -547,9 +547,8 @@ def format_message(msg): logger.info(message) add_event(op=WALAEventOperation.HostPlugin, message=message, is_success=True) - # Don't support HostGAPlugin versions older than 123 - # TODO: update the minimum version to 1.0.8.123 before release - if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.117"): + # Don't support HostGAPlugin versions older than 124 + if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.124"): raise_not_supported() self._supports_vm_settings = True diff --git a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json index 9cfb42752c..9fd2e4f722 100644 --- a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json +++ b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.123", + "hostGAPluginVersion": "1.0.8.124", "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/hostgaplugin/vm_settings-empty_depends_on.json b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json index 6fa93452cf..94d9f0eb1f 100644 --- a/tests/data/hostgaplugin/vm_settings-empty_depends_on.json +++ b/tests/data/hostgaplugin/vm_settings-empty_depends_on.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.123", + "hostGAPluginVersion": "1.0.8.124", "vmSettingsSchemaVersion": "0.0", "activityId": "2e7f8b5d-f637-4721-b757-cb190d49b4e9", "correlationId": "1bef4c48-044e-4225-8f42-1d1eac1eb158", diff --git a/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json b/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json index 62314a403d..e7945845ac 100644 --- a/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json +++ b/tests/data/hostgaplugin/vm_settings-invalid_blob_type.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.123", + "hostGAPluginVersion": "1.0.8.124", "vmSettingsSchemaVersion": "0.0", "activityId": "2e7f8b5d-f637-4721-b757-cb190d49b4e9", "correlationId": "1bef4c48-044e-4225-8f42-1d1eac1eb158", diff --git a/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json b/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json index 7deff8d5eb..b084900b68 100644 --- a/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json +++ b/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.123", + "hostGAPluginVersion": "1.0.8.124", "vmSettingsSchemaVersion": "0.0", "activityId": "89d50bf1-fa55-4257-8af3-3db0c9f81ab4", "correlationId": "c143f8f0-a66b-4881-8c06-1efd278b0b02", diff --git a/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json index 27ebebcefe..2f70b55762 100644 --- a/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json +++ b/tests/data/hostgaplugin/vm_settings-no_status_upload_blob.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.123", + "hostGAPluginVersion": "1.0.8.124", "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/hostgaplugin/vm_settings-out-of-sync.json b/tests/data/hostgaplugin/vm_settings-out-of-sync.json index 737350d698..0aae82031c 100644 --- a/tests/data/hostgaplugin/vm_settings-out-of-sync.json +++ b/tests/data/hostgaplugin/vm_settings-out-of-sync.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.123", + "hostGAPluginVersion": "1.0.8.124", "vmSettingsSchemaVersion": "0.0", "activityId": "AAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE", "correlationId": "EEEEEEEE-DDDD-CCCC-BBBB-AAAAAAAAAAAA", diff --git a/tests/data/hostgaplugin/vm_settings-parse_error.json b/tests/data/hostgaplugin/vm_settings-parse_error.json index e817a1e888..bae5de4cb8 100644 --- a/tests/data/hostgaplugin/vm_settings-parse_error.json +++ b/tests/data/hostgaplugin/vm_settings-parse_error.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.123", + "hostGAPluginVersion": "1.0.8.124", "vmSettingsSchemaVersion": THIS_IS_A_SYNTAX_ERROR, "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/hostgaplugin/vm_settings-requested_version.json b/tests/data/hostgaplugin/vm_settings-requested_version.json index 1b5023b117..49e6a27780 100644 --- a/tests/data/hostgaplugin/vm_settings-requested_version.json +++ b/tests/data/hostgaplugin/vm_settings-requested_version.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.123", + "hostGAPluginVersion": "1.0.8.124", "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", diff --git a/tests/data/hostgaplugin/vm_settings.json b/tests/data/hostgaplugin/vm_settings.json index b67ee0a23b..96836e7663 100644 --- a/tests/data/hostgaplugin/vm_settings.json +++ b/tests/data/hostgaplugin/vm_settings.json @@ -1,5 +1,5 @@ { - "hostGAPluginVersion": "1.0.8.123", + "hostGAPluginVersion": "1.0.8.124", "vmSettingsSchemaVersion": "0.0", "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", From 9e0bb7b5f2177eb2ae2a5bd8736f2c6f4489ff50 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 13 Apr 2022 13:18:44 -0700 Subject: [PATCH 59/84] Update agent version to 2.8.0.1 (#2550) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 85a0d5624c..1c3b0639a3 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.0' +AGENT_VERSION = '2.8.0.1' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From fa8e3708328de50c899e68b6cfc519c1bda40361 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 19 Apr 2022 16:51:49 -0700 Subject: [PATCH 60/84] Improvements in waagent.log (#2558) * Improvements in waagent.log * fix function call * update comment * typo Co-authored-by: narrieta --- .../extensions_goal_state_from_vm_settings.py | 2 +- azurelinuxagent/common/protocol/goal_state.py | 29 +++++--- azurelinuxagent/common/utils/archive.py | 9 ++- azurelinuxagent/daemon/main.py | 2 +- azurelinuxagent/ga/exthandlers.py | 3 +- azurelinuxagent/ga/update.py | 36 ++++++---- .../ext_conf-requested_version.xml | 4 +- tests/data/hostgaplugin/ext_conf.xml | 4 +- ...tings-difference_in_required_features.json | 4 +- .../vm_settings-missing_cert.json | 68 +++++++++++++++++++ .../hostgaplugin/vm_settings-out-of-sync.json | 2 +- .../vm_settings-requested_version.json | 4 +- tests/data/hostgaplugin/vm_settings.json | 6 +- tests/protocol/test_goal_state.py | 22 +++++- tests/utils/test_archive.py | 3 +- 15 files changed, 154 insertions(+), 44 deletions(-) create mode 100644 tests/data/hostgaplugin/vm_settings-missing_cert.json diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index ce99a26079..38cca48f1b 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -289,7 +289,7 @@ def _parse_extensions(self, vm_settings): # "settingsSeqNo": 0, # "settings": [ # { - # "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + # "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", # "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", # "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" # } diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index ae01e4d22c..6ec0a5ab64 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -18,6 +18,7 @@ import os import re import time +import json import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger @@ -145,6 +146,7 @@ def update(self): return if vm_settings_updated: + logger.info('') logger.info("Fetched new vmSettings [HostGAPlugin correlation ID: {0} eTag: {1} source: {2}]", vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source) # Ignore the vmSettings if their source is Fabric (processing a Fabric goal state may require the tenant certificate and the vmSettings don't include it.) if vm_settings is not None and vm_settings.source == GoalStateSource.Fabric: @@ -184,6 +186,17 @@ def update(self): if self._extensions_goal_state is None or most_recent.created_on_timestamp > self._extensions_goal_state.created_on_timestamp: self._extensions_goal_state = most_recent + # For Fast Track goal states, verify that the required certificates are in the goal state + if self.extensions_goal_state.source == GoalStateSource.FastTrack: + for extension in self.extensions_goal_state.extensions: + for settings in extension.settings: + if settings.protectedSettings is None: + continue + certificates = self.certs.summary + if not any(settings.certificateThumbprint == c['thumbprint'] for c in certificates): + message = "Certificate {0} needed by {1} is missing from the goal state".format(settings.certificateThumbprint, extension.name) + add_event(op=WALAEventOperation.VmSettings, message=message, is_success=False) + def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_settings_support_stopped_error): logger.info('The HGAP stopped supporting vmSettings; will fetched the goal state from the WireServer.') self._history = GoalStateHistory(timeutil.create_timestamp(), incarnation) @@ -270,6 +283,7 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): Returns the value of ExtensionsConfig. """ try: + logger.info('') logger.info('Fetching full goal state from the WireServer [incarnation {0}]', incarnation) role_instance = find(xml_doc, "RoleInstance") @@ -300,9 +314,10 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): certs = None certs_uri = findtext(xml_doc, "Certificates") if certs_uri is not None: - # Note that we do not save the certificates to the goal state history xml_text = self._wire_client.fetch_config(certs_uri, self._wire_client.get_header_for_cert()) certs = Certificates(xml_text) + # Save the certificate summary, which includes only the thumbprint but not the certificate itself, to the goal state history + self._history.save_certificates(json.dumps(certs.summary)) remote_access = None remote_access_uri = findtext(container, "RemoteAccessInfo") @@ -349,6 +364,7 @@ def __init__(self, xml_text): class Certificates(object): def __init__(self, xml_text): self.cert_list = CertList() + self.summary = [] # debugging info # Save the certificates local_file = os.path.join(conf.get_lib_dir(), CERTS_FILE_NAME) @@ -428,18 +444,15 @@ def __init__(self, xml_text): tmp_file = prvs[pubkey] prv = "{0}.prv".format(thumbprint) os.rename(tmp_file, os.path.join(conf.get_lib_dir(), prv)) - logger.info("Found private key matching thumbprint {0}".format(thumbprint)) else: # Since private key has *no* matching certificate, # it will not be named correctly logger.warn("Found NO matching cert/thumbprint for private key!") - # Log if any certificates were found without matching private keys - # This can happen (rarely), and is useful to know for debugging - for pubkey in thumbprints: - if not pubkey in prvs: - msg = "Certificate with thumbprint {0} has no matching private key." - logger.info(msg.format(thumbprints[pubkey])) + for pubkey, thumbprint in thumbprints.items(): + has_private_key = pubkey in prvs + logger.info("Downloaded certificate with thumbprint {0} (has private key: {1})".format(thumbprint, has_private_key)) + self.summary.append({"thumbprint": thumbprint, "hasPrivateKey": has_private_key}) for v1_cert in v1_cert_list: cert = Cert() diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index ed8122e970..880a23a119 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -42,13 +42,14 @@ _MAX_ARCHIVED_STATES = 50 _CACHE_PATTERNS = [ - re.compile(r"^VmSettings.\d+\.json$"), + re.compile(r"^VmSettings\.\d+\.json$"), re.compile(r"^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), re.compile(r"^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE), re.compile(r"^SharedConfig\.xml$", re.IGNORECASE), re.compile(r"^HostingEnvironmentConfig\.xml$", re.IGNORECASE), - re.compile(r"^RemoteAccess\.xml$", re.IGNORECASE) + re.compile(r"^RemoteAccess\.xml$", re.IGNORECASE), + re.compile(r"^waagent_status\.\d+\.json$"), ] # @@ -69,6 +70,7 @@ _GOAL_STATE_FILE_NAME = "GoalState.xml" _VM_SETTINGS_FILE_NAME = "VmSettings.json" +_CERTIFICATES_FILE_NAME = "Certificates.json" _HOSTING_ENV_FILE_NAME = "HostingEnvironmentConfig.xml" _SHARED_CONF_FILE_NAME = "SharedConfig.xml" _REMOTE_ACCESS_FILE_NAME = "RemoteAccess.xml" @@ -239,6 +241,9 @@ def save_vm_settings(self, text): def save_remote_access(self, text): self.save(text, _REMOTE_ACCESS_FILE_NAME) + def save_certificates(self, text): + self.save(text, _CERTIFICATES_FILE_NAME) + def save_hosting_env(self, text): self.save(text, _HOSTING_ENV_FILE_NAME) diff --git a/azurelinuxagent/daemon/main.py b/azurelinuxagent/daemon/main.py index 91685bc641..c608768a67 100644 --- a/azurelinuxagent/daemon/main.py +++ b/azurelinuxagent/daemon/main.py @@ -64,7 +64,7 @@ def run(self, child_args=None): # # Be aware that telemetry events emitted before that will not include the Container ID. # - logger.info("{0} Version:{1}", AGENT_LONG_NAME, AGENT_VERSION) + logger.info("{0} Version: {1}", AGENT_LONG_NAME, AGENT_VERSION) logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION) logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index cc9d0afc32..3e8dbc23dd 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -308,6 +308,7 @@ def run(self): error = None message = "ProcessExtensionsGoalState started [{0} channel: {1} source: {2} activity: {3} correlation {4} created: {5}]".format( egs.id, egs.channel, egs.source, egs.activity_id, egs.correlation_id, egs.created_on_timestamp) + logger.info('') logger.info(message) add_event(op=WALAEventOperation.ExtensionProcessing, message=message) @@ -319,7 +320,7 @@ def run(self): finally: duration = elapsed_milliseconds(utc_start) if error is None: - message = 'ProcessExtensionsGoalState completed [{0} {1} ms]'.format(egs.id, duration) + message = 'ProcessExtensionsGoalState completed [{0} {1} ms]\n'.format(egs.id, duration) logger.info(message) else: message = 'ProcessExtensionsGoalState failed [{0} {1} ms]\n{2}'.format(egs.id, duration, error) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 2118ca6831..9932674eac 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -54,7 +54,7 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import AddFirewallRules from azurelinuxagent.common.utils.shellutil import CommandError -from azurelinuxagent.common.version import AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT, \ +from azurelinuxagent.common.version import AGENT_LONG_NAME, AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT, \ CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION, get_lis_version, \ has_logrotate, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, get_daemon_version from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed @@ -324,20 +324,11 @@ def run(self, debug=False): """ try: + logger.info("{0} Version: {1}", AGENT_LONG_NAME, CURRENT_AGENT) + logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION) + logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) - # - # Initialize the goal state; some components depend on information provided by the goal state and this - # call ensures the required info is initialized (e.g. telemetry depends on the container ID.) - # - protocol = self.protocol_util.get_protocol() - - self._initialize_goal_state(protocol) - - # Initialize the common parameters for telemetry events - initialize_event_logger_vminfo_common_parameters(protocol) - - # Log OS-specific info. os_info_msg = u"Distro: {dist_name}-{dist_ver}; "\ u"OSUtil: {util_name}; AgentService: {service_name}; "\ u"Python: {py_major}.{py_minor}.{py_micro}; "\ @@ -351,8 +342,20 @@ def run(self, debug=False): py_micro=PY_VERSION_MICRO, systemd=systemd.is_systemd(), lis_ver=get_lis_version(), has_logrotate=has_logrotate() ) - logger.info(os_info_msg) + + # + # Initialize the goal state; some components depend on information provided by the goal state and this + # call ensures the required info is initialized (e.g. telemetry depends on the container ID.) + # + protocol = self.protocol_util.get_protocol() + + self._initialize_goal_state(protocol) + + # Initialize the common parameters for telemetry events + initialize_event_logger_vminfo_common_parameters(protocol) + + # Send telemetry for the OS-specific info. add_event(AGENT_NAME, op=WALAEventOperation.OSInfo, message=os_info_msg) # @@ -734,7 +737,7 @@ def forward_signal(self, signum, frame): return logger.info( - u"Agent {0} forwarding signal {1} to {2}", + u"Agent {0} forwarding signal {1} to {2}\n", CURRENT_AGENT, signum, self.child_agent.name if self.child_agent is not None else CURRENT_AGENT) @@ -823,6 +826,9 @@ def log_if_op_disabled(name, value): if conf.get_autoupdate_enabled(): log_if_int_changed_from_default("Autoupdate.Frequency", conf.get_autoupdate_frequency()) + if conf.get_enable_fast_track(): + log_if_op_disabled("Debug.EnableFastTrack", conf.get_enable_fast_track()) + if conf.get_lib_dir() != "/var/lib/waagent": log_event("lib dir is in an unexpected location: {0}".format(conf.get_lib_dir())) diff --git a/tests/data/hostgaplugin/ext_conf-requested_version.xml b/tests/data/hostgaplugin/ext_conf-requested_version.xml index c3bd928236..bbb8a20feb 100644 --- a/tests/data/hostgaplugin/ext_conf-requested_version.xml +++ b/tests/data/hostgaplugin/ext_conf-requested_version.xml @@ -60,7 +60,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": {"GCS_AUTO_CONFIG":true} } @@ -73,7 +73,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": {"enableGenevaUpload":true} } diff --git a/tests/data/hostgaplugin/ext_conf.xml b/tests/data/hostgaplugin/ext_conf.xml index eac5d63647..ebd90aa0b2 100644 --- a/tests/data/hostgaplugin/ext_conf.xml +++ b/tests/data/hostgaplugin/ext_conf.xml @@ -58,7 +58,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent==", "publicSettings": {"GCS_AUTO_CONFIG":true} } @@ -71,7 +71,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent==", "publicSettings": {"enableGenevaUpload":true} } diff --git a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json index 9fd2e4f722..5601268706 100644 --- a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json +++ b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json @@ -56,7 +56,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } @@ -76,7 +76,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"enableGenevaUpload\":true}" } diff --git a/tests/data/hostgaplugin/vm_settings-missing_cert.json b/tests/data/hostgaplugin/vm_settings-missing_cert.json new file mode 100644 index 0000000000..a7192e942d --- /dev/null +++ b/tests/data/hostgaplugin/vm_settings-missing_cert.json @@ -0,0 +1,68 @@ +{ + "hostGAPluginVersion": "1.0.8.124", + "vmSettingsSchemaVersion": "0.0", + "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", + "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "inSvdSeqNo": 1, + "extensionsLastModifiedTickCount": 637726657706205299, + "extensionGoalStatesSource": "FastTrack", + "onHold": true, + "statusUploadBlob": { + "statusBlobType": "BlockBlob", + "value": "https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w" + }, + "inVMMetadata": { + "subscriptionId": "8e037ad4-618f-4466-8bc8-5099d41ac15b", + "resourceGroupName": "rg-dc-86fjzhp", + "vmName": "edp0plkw2b", + "location": "CentralUSEUAP", + "vmId": "86f4ae0a-61f8-48ae-9199-40f402d56864", + "vmSize": "Standard_B2s", + "osType": "Linux" + }, + "requiredFeatures": [ + { + "name": "MultipleExtensionsPerHandler" + } + ], + "gaFamilies": [ + { + "name": "Prod", + "uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml" + ] + }, + { + "name": "Test", + "uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml" + ] + } + ], + "extensionGoalStates": [ + { + "name": "Microsoft.OSTCExtensions.VMAccessForLinux", + "version": "1.5.11", + "location": "https://umsasc25p0kjg0c1dg4b.blob.core.windows.net/2bbece4f-0283-d415-b034-cc0adc6997a1/2bbece4f-0283-d415-b034-cc0adc6997a1_manifest.xml", + "failoverlocation": "https://umsamfwlmfshvxx2lsjm.blob.core.windows.net/2bbece4f-0283-d415-b034-cc0adc6997a1/2bbece4f-0283-d415-b034-cc0adc6997a1_manifest.xml", + "additionalLocations": [ + "https://umsah3cwjlctnmhsvzqv.blob.core.windows.net/2bbece4f-0283-d415-b034-cc0adc6997a1/2bbece4f-0283-d415-b034-cc0adc6997a1_manifest.xml" + ], + "state": "enabled", + "autoUpgrade": false, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "isMultiConfig": false, + "settings": [ + { + "protectedSettingsCertThumbprint": "59A10F50FFE2A0408D3F03FE336C8FD5716CF25C", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpddesZQewdDBgegkxNzA1BgoJkgergres/Microsoft.OSTCExtensions.VMAccessForLinux==" + } + ] + } + ] +} diff --git a/tests/data/hostgaplugin/vm_settings-out-of-sync.json b/tests/data/hostgaplugin/vm_settings-out-of-sync.json index 0aae82031c..1f369ae5bc 100644 --- a/tests/data/hostgaplugin/vm_settings-out-of-sync.json +++ b/tests/data/hostgaplugin/vm_settings-out-of-sync.json @@ -56,7 +56,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } diff --git a/tests/data/hostgaplugin/vm_settings-requested_version.json b/tests/data/hostgaplugin/vm_settings-requested_version.json index 49e6a27780..98959dd4ec 100644 --- a/tests/data/hostgaplugin/vm_settings-requested_version.json +++ b/tests/data/hostgaplugin/vm_settings-requested_version.json @@ -56,7 +56,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } @@ -74,7 +74,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"enableGenevaUpload\":true}" } diff --git a/tests/data/hostgaplugin/vm_settings.json b/tests/data/hostgaplugin/vm_settings.json index 96836e7663..a4ef0f785f 100644 --- a/tests/data/hostgaplugin/vm_settings.json +++ b/tests/data/hostgaplugin/vm_settings.json @@ -56,7 +56,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } @@ -76,7 +76,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "4C4F304667711036E64AF4894B76EB208A863BD4", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent==", "publicSettings": "{\"enableGenevaUpload\":true}" } @@ -192,7 +192,7 @@ "isMultiConfig": false, "settings": [ { - "protectedSettingsCertThumbprint": "59A10F50FFE2A0408D3F03FE336C8FD5716CF25C", + "protectedSettingsCertThumbprint": "4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpddesZQewdDBgegkxNzA1BgoJkgergres/Microsoft.OSTCExtensions.VMAccessForLinux==" } ] diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index 65d050d862..8cfa4842a8 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -8,6 +8,7 @@ import re import time +from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.future import httpclient from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource, GoalStateChannel from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig @@ -105,7 +106,7 @@ def test_instantiating_goal_state_should_save_the_goal_state_to_the_history_dire self._assert_directory_contents( self._find_history_subdirectory("999-888"), - ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) + ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "Certificates.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) def _find_history_subdirectory(self, tag): matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_{0}".format(tag))) @@ -128,7 +129,7 @@ def test_update_should_create_new_history_subdirectories(self): goal_state = GoalState(protocol.client) self._assert_directory_contents( self._find_history_subdirectory("123-654"), - ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) + ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "Certificates.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) def http_get_handler(url, *_, **__): if HttpRequestPredicates.is_host_plugin_vm_settings_request(url): @@ -140,7 +141,7 @@ def http_get_handler(url, *_, **__): goal_state.update() self._assert_directory_contents( self._find_history_subdirectory("234-654"), - ["GoalState.xml", "ExtensionsConfig.xml", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) + ["GoalState.xml", "ExtensionsConfig.xml", "Certificates.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) protocol.mock_wire_data.set_etag(987) protocol.set_http_handlers(http_get_handler=None) @@ -358,3 +359,18 @@ def http_get_handler(url, *_, **__): self._assert_goal_state(goal_state, initial_incarnation, channel=GoalStateChannel.WireServer, source=GoalStateSource.Fabric) self.assertEqual(initial_timestamp, goal_state.extensions_goal_state.created_on_timestamp, "The timestamp of the updated goal state is incorrect") self.assertTrue(goal_state.extensions_goal_state.is_outdated, "The updated goal state should be marked as outdated") + + def test_it_should_report_missing_certificates(self): + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-missing_cert.json" + + with mock_wire_protocol(data_file) as protocol: + with patch("azurelinuxagent.common.protocol.goal_state.add_event") as add_event: + _ = GoalState(protocol.client) + + expected_message = "Certificate 59A10F50FFE2A0408D3F03FE336C8FD5716CF25C needed by Microsoft.OSTCExtensions.VMAccessForLinux is missing from the goal state" + events = [kwargs for _, kwargs in add_event.call_args_list if kwargs['op'] == WALAEventOperation.VmSettings and kwargs['message'] == expected_message] + + self.assertTrue( + len(events) == 1, + "Missing certificate 59A10F50FFE2A0408D3F03FE336C8FD5716CF25C was note reported. Telemetry: {0}".format([kwargs['message'] for _, kwargs in add_event.call_args_list])) diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 466d674c79..61214558c5 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -135,7 +135,8 @@ def test_purge_legacy_goal_state_history(self): 'Microsoft.Azure.Extensions.CustomScript.1.xml', 'SharedConfig.xml', 'HostingEnvironmentConfig.xml', - 'RemoteAccess.xml' + 'RemoteAccess.xml', + 'waagent_status.1.json' ] legacy_files = [os.path.join(self.tmp_dir, f) for f in legacy_files] for f in legacy_files: From 93a2564fd6509a93ddab1417507a61f40ba56424 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 20 Apr 2022 15:43:45 -0700 Subject: [PATCH 61/84] Change format of history items (#2560) * Change format of history directory * Update message; fix typo * py2 compat * py2 compat Co-authored-by: narrieta --- azurelinuxagent/common/protocol/goal_state.py | 9 +++++---- azurelinuxagent/common/utils/archive.py | 19 +++++++++++-------- azurelinuxagent/common/utils/timeutil.py | 11 ++++++++++- azurelinuxagent/ga/update.py | 5 ++--- tests/protocol/test_goal_state.py | 2 +- tests/utils/test_archive.py | 10 ++++++++-- 6 files changed, 37 insertions(+), 19 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 6ec0a5ab64..da0de9a032 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -15,6 +15,7 @@ # limitations under the License. # # Requires Python 2.6+ and Openssl 1.0+ +import datetime import os import re import time @@ -31,7 +32,7 @@ from azurelinuxagent.common.protocol.extensions_goal_state import VmSettingsParseError, GoalStateSource from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported, VmSettingsSupportStopped from azurelinuxagent.common.protocol.restapi import Cert, CertList, RemoteAccessUser, RemoteAccessUsersList -from azurelinuxagent.common.utils import fileutil, timeutil +from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.archive import GoalStateHistory from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, findtext, getattrib @@ -131,7 +132,7 @@ def update(self): # # Fetch the goal state from both the HGAP and the WireServer # - timestamp = timeutil.create_timestamp() + timestamp = datetime.datetime.utcnow() incarnation, xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client) goal_state_updated = incarnation != self._incarnation @@ -199,7 +200,7 @@ def update(self): def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_settings_support_stopped_error): logger.info('The HGAP stopped supporting vmSettings; will fetched the goal state from the WireServer.') - self._history = GoalStateHistory(timeutil.create_timestamp(), incarnation) + self._history = GoalStateHistory(datetime.datetime.utcnow(), incarnation) self._history.save_goal_state(xml_text) self._extensions_goal_state = self._fetch_full_wire_server_goal_state(incarnation, xml_doc) if self._extensions_goal_state.created_on_timestamp < vm_settings_support_stopped_error.timestamp: @@ -270,7 +271,7 @@ def _fetch_vm_settings(wire_client): except VmSettingsParseError as exception: # ensure we save the vmSettings if there were parsing errors, but save them only once per ETag if not GoalStateHistory.tag_exists(exception.etag): - GoalStateHistory(timeutil.create_timestamp(), exception.etag).save_vm_settings(exception.vm_settings_text) + GoalStateHistory(datetime.datetime.utcnow(), exception.etag).save_vm_settings(exception.vm_settings_text) raise return vm_settings, vm_settings_updated diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 880a23a119..6123fdb0d2 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -9,7 +9,7 @@ import azurelinuxagent.common.logger as logger import azurelinuxagent.common.conf as conf -from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.common.utils import fileutil, timeutil # pylint: disable=W0105 @@ -58,13 +58,15 @@ # 2018-04-06T08:21:37.142697.zip # 2018-04-06T08:21:37.142697_incarnation_N # 2018-04-06T08:21:37.142697_incarnation_N.zip +# 2018-04-06T08:21:37.142697_N-M +# 2018-04-06T08:21:37.142697_N-M.zip # # Current names # -# 2018-04-06T08:21:37.142697_N-M -# 2018-04-06T08:21:37.142697_N-M.zip +# 2018-04-06T08-21-37__N-M +# 2018-04-06T08-21-37__N-M.zip # -_ARCHIVE_BASE_PATTERN = r"\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+((_incarnation)?_(\d+|status)(-\d+)?)?" +_ARCHIVE_BASE_PATTERN = r"\d{4}\-\d{2}\-\d{2}T\d{2}[:-]\d{2}[:-]\d{2}(\.\d+)?((_incarnation)?_+(\d+|status)(-\d+)?)?" _ARCHIVE_PATTERNS_DIRECTORY = re.compile(r'^{0}$'.format(_ARCHIVE_BASE_PATTERN)) _ARCHIVE_PATTERNS_ZIP = re.compile(r'^{0}\.zip$'.format(_ARCHIVE_BASE_PATTERN)) @@ -163,7 +165,6 @@ def purge(self): newest ones. Also, clean up any legacy history files. """ states = self._get_archive_states() - states.sort(reverse=True) for state in states[_MAX_ARCHIVED_STATES:]: state.delete() @@ -184,7 +185,6 @@ def purge_legacy_goal_state_history(): def archive(self): states = self._get_archive_states() - states.sort(reverse=True) if len(states) > 0: # Skip the most recent goal state, since it may still be in use @@ -203,13 +203,16 @@ def _get_archive_states(self): if match is not None: states.append(StateZip(full_path, match.group(0))) + states.sort(key=lambda state: os.path.getctime(state._path), reverse=True) + return states class GoalStateHistory(object): - def __init__(self, timestamp, tag): + def __init__(self, time, tag): self._errors = False - self._root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "{0}_{1}".format(timestamp, tag) if tag is not None else timestamp) + timestamp = timeutil.create_history_timestamp(time) + self._root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "{0}__{1}".format(timestamp, tag) if tag is not None else timestamp) @staticmethod def tag_exists(tag): diff --git a/azurelinuxagent/common/utils/timeutil.py b/azurelinuxagent/common/utils/timeutil.py index c4dd755a0c..c8fa37647c 100644 --- a/azurelinuxagent/common/utils/timeutil.py +++ b/azurelinuxagent/common/utils/timeutil.py @@ -5,7 +5,7 @@ def create_timestamp(dt=None): """ - Returns a string with the given datetime iso format. If no datetime is given as parameter, it + Returns a string with the given datetime in iso format. If no datetime is given as parameter, it uses datetime.utcnow(). """ if dt is None: @@ -13,6 +13,15 @@ def create_timestamp(dt=None): return dt.isoformat() +def create_history_timestamp(dt=None): + """ + Returns a string with the given datetime formatted as a timestamp for the agent's history folder + """ + if dt is None: + dt = datetime.datetime.utcnow() + return dt.strftime('%Y-%m-%dT%H-%M-%S') + + def datetime_to_ticks(dt): """ Converts 'dt', a datetime, to the number of ticks (1 tick == 1/10000000 sec) since datetime.min (0001-01-01 00:00:00). diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 9932674eac..bf0b539938 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -54,7 +54,7 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import AddFirewallRules from azurelinuxagent.common.utils.shellutil import CommandError -from azurelinuxagent.common.version import AGENT_LONG_NAME, AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT, \ +from azurelinuxagent.common.version import AGENT_LONG_NAME, AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT, AGENT_VERSION, \ CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION, get_lis_version, \ has_logrotate, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, get_daemon_version from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed @@ -324,10 +324,9 @@ def run(self, debug=False): """ try: - logger.info("{0} Version: {1}", AGENT_LONG_NAME, CURRENT_AGENT) + logger.info("{0} (Goal State Agent version {1})", AGENT_LONG_NAME, AGENT_VERSION) logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION) logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) - logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) os_info_msg = u"Distro: {dist_name}-{dist_ver}; "\ u"OSUtil: {util_name}; AgentService: {service_name}; "\ diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index 8cfa4842a8..c54a65f9f7 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -373,4 +373,4 @@ def test_it_should_report_missing_certificates(self): self.assertTrue( len(events) == 1, - "Missing certificate 59A10F50FFE2A0408D3F03FE336C8FD5716CF25C was note reported. Telemetry: {0}".format([kwargs['message'] for _, kwargs in add_event.call_args_list])) + "Missing certificate 59A10F50FFE2A0408D3F03FE336C8FD5716CF25C was not reported. Telemetry: {0}".format([kwargs['message'] for _, kwargs in add_event.call_args_list])) diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 61214558c5..0c649c9e24 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -67,7 +67,10 @@ def test_archive_should_zip_all_but_the_latest_goal_state_in_the_history_folder( test_directories.append(directory) test_subject = StateArchiver(self.tmp_dir) - test_subject.archive() + # NOTE: StateArchiver sorts the state directories by creation time, but the test files are created too fast and the + # time resolution is too coarse, so instead we mock getctime to simply return the path of the file + with patch("azurelinuxagent.common.utils.archive.os.path.getctime", side_effect=lambda path: path): + test_subject.archive() for directory in test_directories[0:2]: zip_file = directory + ".zip" @@ -110,7 +113,10 @@ def test_archive02(self): self.assertEqual(total, len(os.listdir(self.history_dir))) test_subject = StateArchiver(self.tmp_dir) - test_subject.purge() + # NOTE: StateArchiver sorts the state directories by creation time, but the test files are created too fast and the + # time resolution is too coarse, so instead we mock getctime to simply return the path of the file + with patch("azurelinuxagent.common.utils.archive.os.path.getctime", side_effect=lambda path: path): + test_subject.purge() archived_entries = os.listdir(self.history_dir) self.assertEqual(_MAX_ARCHIVED_STATES, len(archived_entries)) From a3d761f6c8187ab6f2f2deda839eb31419043ab3 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 20 Apr 2022 16:31:13 -0700 Subject: [PATCH 62/84] Update agent version to 2.8.0.2 (#2561) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 1c3b0639a3..b6fb5f2281 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.1' +AGENT_VERSION = '2.8.0.2' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From cd03ff2c22c572fcafb74d645ed5a437a2c3bf5d Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 25 Apr 2022 13:00:51 -0700 Subject: [PATCH 63/84] Refresh goal state when certificates are missing (#2562) * Refresh goal state when certificates are missing * Improve error reporting * Fix assert message Co-authored-by: narrieta --- azurelinuxagent/common/logger.py | 4 + azurelinuxagent/common/protocol/goal_state.py | 100 ++++++++++++------ azurelinuxagent/common/protocol/hostplugin.py | 4 +- azurelinuxagent/common/protocol/wire.py | 12 +-- azurelinuxagent/ga/update.py | 52 +++++---- tests/data/wire/certs-2.xml | 85 +++++++++++++++ tests/data/wire/goal_state.xml | 12 +-- tests/data/wire/goal_state_no_ext.xml | 10 +- tests/data/wire/goal_state_remote_access.xml | 13 +-- tests/ga/test_update.py | 48 ++++----- tests/protocol/HttpRequestPredicates.py | 16 +++ tests/protocol/mockwiredata.py | 28 ++--- tests/protocol/test_goal_state.py | 56 ++++++++-- tests/protocol/test_hostplugin.py | 12 +-- tests/protocol/test_wire.py | 2 +- 15 files changed, 323 insertions(+), 131 deletions(-) create mode 100644 tests/data/wire/certs-2.xml diff --git a/azurelinuxagent/common/logger.py b/azurelinuxagent/common/logger.py index 07e3f23936..3d0dc617d3 100644 --- a/azurelinuxagent/common/logger.py +++ b/azurelinuxagent/common/logger.py @@ -45,6 +45,7 @@ def __init__(self, logger=None, prefix=None): self.logger = self if logger is None else logger self.periodic_messages = {} self.prefix = prefix + self.silent = False def reset_periodic(self): self.logger.periodic_messages = {} @@ -124,6 +125,9 @@ def write_log(log_appender): # pylint: disable=W0612 finally: log_appender.appender_lock = False + if self.silent: + return + # if msg_format is not unicode convert it to unicode if type(msg_format) is not ustr: msg_format = ustr(msg_format, errors="backslashreplace") diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index da0de9a032..edfd9d14f7 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -48,8 +48,16 @@ _GET_GOAL_STATE_MAX_ATTEMPTS = 6 +class GoalStateInconsistentError(ProtocolError): + """ + Indicates an inconsistency in the goal state (e.g. missing tenant certificate) + """ + def __init__(self, msg, inner=None): + super(GoalStateInconsistentError, self).__init__(msg, inner) + + class GoalState(object): - def __init__(self, wire_client): + def __init__(self, wire_client, silent=False): """ Fetches the goal state using the given wire client. @@ -64,6 +72,8 @@ def __init__(self, wire_client): self._wire_client = wire_client self._history = None self._extensions_goal_state = None # populated from vmSettings or extensionsConfig + self.logger = logger.Logger(logger.DEFAULT_LOGGER) + self.logger.silent = silent # These properties hold the goal state from the WireServer and are initialized by self._fetch_full_wire_server_goal_state() self._incarnation = None @@ -75,8 +85,10 @@ def __init__(self, wire_client): self._certs = None self._remote_access = None - self.update() + self.update(silent=silent) + except ProtocolError: + raise except Exception as exception: # We don't log the error here since fetching the goal state is done every few seconds raise ProtocolError(msg="Error fetching goal state", inner=exception) @@ -125,34 +137,47 @@ def update_host_plugin_headers(wire_client): # Fetching the goal state updates the HostGAPlugin so simply trigger the request GoalState._fetch_goal_state(wire_client) - def update(self): + def update(self, silent=False): """ Updates the current GoalState instance fetching values from the WireServer/HostGAPlugin as needed """ + self.logger.silent = silent + + try: + self._update(force_update=False) + except GoalStateInconsistentError as e: + self.logger.warn("Detected an inconsistency in the goal state: {0}", ustr(e)) + self._update(force_update=True) + self.logger.info("The goal state is consistent") + + def _update(self, force_update): # # Fetch the goal state from both the HGAP and the WireServer # timestamp = datetime.datetime.utcnow() + if force_update: + self.logger.info("Refreshing goal state and vmSettings") + incarnation, xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client) - goal_state_updated = incarnation != self._incarnation + goal_state_updated = force_update or incarnation != self._incarnation if goal_state_updated: - logger.info('Fetched a new incarnation for the WireServer goal state [incarnation {0}]', incarnation) + self.logger.info('Fetched a new incarnation for the WireServer goal state [incarnation {0}]', incarnation) vm_settings, vm_settings_updated = None, False try: - vm_settings, vm_settings_updated = GoalState._fetch_vm_settings(self._wire_client) + vm_settings, vm_settings_updated = GoalState._fetch_vm_settings(self._wire_client, force_update=force_update) except VmSettingsSupportStopped as exception: # If the HGAP stopped supporting vmSettings, we need to use the goal state from the WireServer self._restore_wire_server_goal_state(incarnation, xml_text, xml_doc, exception) return if vm_settings_updated: - logger.info('') - logger.info("Fetched new vmSettings [HostGAPlugin correlation ID: {0} eTag: {1} source: {2}]", vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source) + self.logger.info('') + self.logger.info("Fetched new vmSettings [HostGAPlugin correlation ID: {0} eTag: {1} source: {2}]", vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source) # Ignore the vmSettings if their source is Fabric (processing a Fabric goal state may require the tenant certificate and the vmSettings don't include it.) if vm_settings is not None and vm_settings.source == GoalStateSource.Fabric: if vm_settings_updated: - logger.info("The vmSettings originated via Fabric; will ignore them.") + self.logger.info("The vmSettings originated via Fabric; will ignore them.") vm_settings, vm_settings_updated = None, False # If neither goal state has changed we are done with the update @@ -187,19 +212,30 @@ def update(self): if self._extensions_goal_state is None or most_recent.created_on_timestamp > self._extensions_goal_state.created_on_timestamp: self._extensions_goal_state = most_recent - # For Fast Track goal states, verify that the required certificates are in the goal state + # + # For Fast Track goal states, verify that the required certificates are in the goal state. + # + # Some scenarios can produce inconsistent goal states. For example, during hibernation/resume, the Fabric goal state changes (the + # tenant certificate is re-generated when the VM is restarted) *without* the incarnation necessarily changing (e.g. if the incarnation + # is 1 before the hibernation; on resume the incarnation is set to 1 even though the goal state has a new certificate). If a Fast + # Track goal state comes after that, the extensions will need the new certificate. The Agent needs to refresh the goal state in that + # case, to ensure it fetches the new certificate. + # if self.extensions_goal_state.source == GoalStateSource.FastTrack: - for extension in self.extensions_goal_state.extensions: - for settings in extension.settings: - if settings.protectedSettings is None: - continue - certificates = self.certs.summary - if not any(settings.certificateThumbprint == c['thumbprint'] for c in certificates): - message = "Certificate {0} needed by {1} is missing from the goal state".format(settings.certificateThumbprint, extension.name) - add_event(op=WALAEventOperation.VmSettings, message=message, is_success=False) + self._check_certificates() + + def _check_certificates(self): + for extension in self.extensions_goal_state.extensions: + for settings in extension.settings: + if settings.protectedSettings is None: + continue + certificates = self.certs.summary + if not any(settings.certificateThumbprint == c['thumbprint'] for c in certificates): + message = "Certificate {0} needed by {1} is missing from the goal state".format(settings.certificateThumbprint, extension.name) + raise GoalStateInconsistentError(message) def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_settings_support_stopped_error): - logger.info('The HGAP stopped supporting vmSettings; will fetched the goal state from the WireServer.') + self.logger.info('The HGAP stopped supporting vmSettings; will fetched the goal state from the WireServer.') self._history = GoalStateHistory(datetime.datetime.utcnow(), incarnation) self._history.save_goal_state(xml_text) self._extensions_goal_state = self._fetch_full_wire_server_goal_state(incarnation, xml_doc) @@ -207,7 +243,7 @@ def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_set self._extensions_goal_state.is_outdated = True msg = "Fetched a Fabric goal state older than the most recent FastTrack goal state; will skip it.\nFabric: {0}\nFastTrack: {1}".format( self._extensions_goal_state.created_on_timestamp, vm_settings_support_stopped_error.timestamp) - logger.info(msg) + self.logger.info(msg) add_event(op=WALAEventOperation.VmSettings, message=msg, is_success=True) def save_to_history(self, data, file_name): @@ -249,7 +285,7 @@ def _fetch_goal_state(wire_client): return incarnation, xml_text, xml_doc @staticmethod - def _fetch_vm_settings(wire_client): + def _fetch_vm_settings(wire_client, force_update=False): """ Issues an HTTP request (HostGAPlugin) for the vm settings and returns the response as an ExtensionsGoalState. """ @@ -258,11 +294,11 @@ def _fetch_vm_settings(wire_client): if conf.get_enable_fast_track(): try: try: - vm_settings, vm_settings_updated = wire_client.get_host_plugin().fetch_vm_settings() + vm_settings, vm_settings_updated = wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) except ResourceGoneError: # retry after refreshing the HostGAPlugin GoalState.update_host_plugin_headers(wire_client) - vm_settings, vm_settings_updated = wire_client.get_host_plugin().fetch_vm_settings() + vm_settings, vm_settings_updated = wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update) except VmSettingsSupportStopped: raise @@ -284,8 +320,8 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): Returns the value of ExtensionsConfig. """ try: - logger.info('') - logger.info('Fetching full goal state from the WireServer [incarnation {0}]', incarnation) + self.logger.info('') + self.logger.info('Fetching full goal state from the WireServer [incarnation {0}]', incarnation) role_instance = find(xml_doc, "RoleInstance") role_instance_id = findtext(role_instance, "InstanceId") @@ -317,7 +353,11 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): if certs_uri is not None: xml_text = self._wire_client.fetch_config(certs_uri, self._wire_client.get_header_for_cert()) certs = Certificates(xml_text) - # Save the certificate summary, which includes only the thumbprint but not the certificate itself, to the goal state history + # Log and save the certificates summary (i.e. the thumbprint but not the certificate itself) to the goal state history + for c in certs.summary: + logger.info("Downloaded certificate {0}".format(c)) + if len(certs.warnings) > 0: + logger.warn(certs.warnings) self._history.save_certificates(json.dumps(certs.summary)) remote_access = None @@ -339,10 +379,10 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): return extensions_config except Exception as exception: - logger.warn("Fetching the goal state failed: {0}", ustr(exception)) + self.logger.warn("Fetching the goal state failed: {0}", ustr(exception)) raise ProtocolError(msg="Error fetching goal state", inner=exception) finally: - logger.info('Fetch goal state completed') + self.logger.info('Fetch goal state completed') class HostingEnv(object): @@ -366,6 +406,7 @@ class Certificates(object): def __init__(self, xml_text): self.cert_list = CertList() self.summary = [] # debugging info + self.warnings = [] # Save the certificates local_file = os.path.join(conf.get_lib_dir(), CERTS_FILE_NAME) @@ -448,11 +489,10 @@ def __init__(self, xml_text): else: # Since private key has *no* matching certificate, # it will not be named correctly - logger.warn("Found NO matching cert/thumbprint for private key!") + self.warnings.append("Found NO matching cert/thumbprint for private key!") for pubkey, thumbprint in thumbprints.items(): has_private_key = pubkey in prvs - logger.info("Downloaded certificate with thumbprint {0} (has private key: {1})".format(thumbprint, has_private_key)) self.summary.append({"thumbprint": thumbprint, "hasPrivateKey": has_private_key}) for v1_cert in v1_cert_list: diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 81b9062566..5f795dca3e 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -453,7 +453,7 @@ def get_fast_track_timestamp(): HostPluginProtocol._get_fast_track_state_file(), ustr(e)) return timeutil.create_timestamp(datetime.datetime.utcnow()) - def fetch_vm_settings(self): + def fetch_vm_settings(self, force_update=False): """ Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalState, bool) tuple with the vmSettings and a boolean indicating if they are an updated (True) or a cached value (False). @@ -491,7 +491,7 @@ def format_message(msg): # Raise VmSettingsNotSupported directly instead of using raise_not_supported() to avoid resetting the timestamp for the next check raise VmSettingsNotSupported() - etag = None if self._cached_vm_settings is None else self._cached_vm_settings.etag + etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag correlation_id = str(uuid.uuid4()) self._vm_settings_error_reporter.report_request() diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 40e58cc0f5..7923bea75d 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -83,8 +83,8 @@ def detect(self): logger.info('Initializing goal state during protocol detection') self.client.update_goal_state(force_update=True) - def update_goal_state(self): - self.client.update_goal_state() + def update_goal_state(self, silent=False): + self.client.update_goal_state(silent=silent) def update_host_plugin_from_goal_state(self): self.client.update_host_plugin_from_goal_state() @@ -759,18 +759,18 @@ def update_host_plugin(self, container_id, role_config_name): self._host_plugin.update_container_id(container_id) self._host_plugin.update_role_config_name(role_config_name) - def update_goal_state(self, force_update=False): + def update_goal_state(self, force_update=False, silent=False): """ Updates the goal state if the incarnation or etag changed or if 'force_update' is True """ try: if force_update: - logger.info("Forcing an update of the goal state..") + logger.info("Forcing an update of the goal state.") if self._goal_state is None or force_update: - self._goal_state = GoalState(self) + self._goal_state = GoalState(self, silent=silent) else: - self._goal_state.update() + self._goal_state.update(silent=silent) except ProtocolError: raise diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index bf0b539938..cbf0ac38b2 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -168,7 +168,8 @@ def __init__(self): # these members are used to avoid reporting errors too frequently self._heartbeat_update_goal_state_error_count = 0 - self._last_try_update_goal_state_failed = False + self._update_goal_state_error_count = 0 + self._update_goal_state_last_error_report = datetime.min self._report_status_last_failed_goal_state = None # incarnation of the last goal state that has been fully processed @@ -481,13 +482,16 @@ def _try_update_goal_state(self, protocol): Attempts to update the goal state and returns True on success or False on failure, sending telemetry events about the failures. """ try: - protocol.update_goal_state() + max_errors_to_log = 3 + + protocol.update_goal_state(silent=self._update_goal_state_error_count >= max_errors_to_log) self._goal_state = protocol.get_goal_state() - if self._last_try_update_goal_state_failed: - self._last_try_update_goal_state_failed = False - message = u"Retrieving the goal state recovered from previous errors" + if self._update_goal_state_error_count > 0: + self._update_goal_state_error_count = 0 + message = u"Fetching the goal state recovered from previous errors. Fetched {0} (certificates: {1})".format( + self._goal_state.extensions_goal_state.id, self._goal_state.certs.summary) add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=True, message=message, log_event=False) logger.info(message) @@ -497,15 +501,21 @@ def _try_update_goal_state(self, protocol): self._supports_fast_track = False except Exception as e: - if not self._last_try_update_goal_state_failed: - self._last_try_update_goal_state_failed = True - message = u"An error occurred while retrieving the goal state: {0}".format(textutil.format_exception(e)) - logger.warn(message) - add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=False, message=message, log_event=False) - message = u"Attempts to retrieve the goal state are failing: {0}".format(ustr(e)) - logger.periodic_warn(logger.EVERY_SIX_HOURS, "[PERIODIC] {0}".format(message)) + self._update_goal_state_error_count += 1 self._heartbeat_update_goal_state_error_count += 1 + if self._update_goal_state_error_count <= max_errors_to_log: + message = u"Error fetching the goal state: {0}".format(textutil.format_exception(e)) + logger.error(message) + add_event(op=WALAEventOperation.FetchGoalState, is_success=False, message=message, log_event=False) + self._update_goal_state_last_error_report = datetime.now() + else: + if self._update_goal_state_last_error_report + timedelta(hours=6) > datetime.now(): + self._update_goal_state_last_error_report = datetime.now() + message = u"Fetching the goal state is still failing: {0}".format(textutil.format_exception(e)) + logger.error(message) + add_event(op=WALAEventOperation.FetchGoalState, is_success=False, message=message, log_event=False) return False + return True def __update_guest_agent(self, protocol): @@ -559,8 +569,8 @@ def handle_updates_for_requested_version(): raise AgentUpgradeExitException( "Exiting current process to {0} to the request Agent version {1}".format(prefix, requested_version)) - # Ignore new agents if updating is disabled - if not conf.get_autoupdate_enabled(): + # Skip the update if there is no goal state yet or auto-update is disabled + if self._goal_state is None or not conf.get_autoupdate_enabled(): return False if self._download_agent_if_upgrade_available(protocol): @@ -600,11 +610,14 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): protocol = exthandlers_handler.protocol # update self._goal_state - self._try_update_goal_state(protocol) - - # Update the Guest Agent if a new version is available - if self._goal_state is not None: + if not self._try_update_goal_state(protocol): + # agent updates and status reporting should be done even when the goal state is not updated self.__update_guest_agent(protocol) + self._report_status(exthandlers_handler) + return + + # check for agent updates + self.__update_guest_agent(protocol) if self._processing_new_extensions_goal_state(): if not self._extensions_summary.converged: @@ -616,8 +629,7 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): self._extensions_summary = ExtensionsSummary() exthandlers_handler.run() - # always report status, even if the goal state did not change - # do it before processing the remote access, since that operation can take a long time + # report status before processing the remote access, since that operation can take a long time self._report_status(exthandlers_handler) if self._processing_new_incarnation(): diff --git a/tests/data/wire/certs-2.xml b/tests/data/wire/certs-2.xml new file mode 100644 index 0000000000..66a231ee87 --- /dev/null +++ b/tests/data/wire/certs-2.xml @@ -0,0 +1,85 @@ + + + 2012-11-30 + 5 + Pkcs7BlobWithPfxContents + MIIOgwYJKoZIhvcNAQcDoIIOdDCCDnACAQIxggEwMIIBLAIBAoAUiF8ZYMs9mMa8 +QOEMxDaIhGza+0IwDQYJKoZIhvcNAQEBBQAEggEAQW7GyeRVEhHSU1/dzV0IndH0 +rDQk+27MvlsWTcpNcgGFtfRYxu5bzmp0+DoimX3pRBlSFOpMJ34jpg4xs78EsSWH +FRhCf3EGuEUBHo6yR8FhXDTuS7kZ0UmquiCI2/r8j8gbaGBNeP8IRizcAYrPMA5S +E8l1uCrw7DHuLscbVni/7UglGaTfFS3BqS5jYbiRt2Qh3p+JPUfm51IG3WCIw/WS +2QHebmHxvMFmAp8AiBWSQJizQBEJ1lIfhhBMN4A7NadMWAe6T2DRclvdrQhJX32k +amOiogbW4HJsL6Hphn7Frrw3CENOdWMAvgQBvZ3EjAXgsJuhBA1VIrwofzlDljCC +DTUGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQIxcvw9qx4y0qAgg0QrINXpC23BWT2 +Fb9N8YS3Be9eO3fF8KNdM6qGf0kKR16l/PWyP2L+pZxCcCPk83d070qPdnJK9qpJ +6S1hI80Y0oQnY9VBFrdfkc8fGZHXqm5jNS9G32v/AxYpJJC/qrAQnWuOdLtOZaGL +94GEh3XRagvz1wifv8SRI8B1MzxrpCimeMxHkL3zvJFg9FjLGdrak868feqhr6Nb +pqH9zL7bMq8YP788qTRELUnL72aDzGAM7HEj7V4yu2uD3i3Ryz3bqWaj9IF38Sa0 +6rACBkiNfZBPgExoMUm2GNVyx8hTis2XKRgz4NLh29bBkKrArK9sYDncE9ocwrrX +AQ99yn03Xv6TH8bRp0cSj4jzBXc5RFsUQG/LxzJVMjvnkDbwNE41DtFiYz5QVcv1 +cMpTH16YfzSL34a479eNq/4+JAs/zcb2wjBskJipMUU4hNx5fhthvfKwDOQbLTqN +HcP23iPQIhjdUXf6gpu5RGu4JZ0dAMHMHFKvNL6TNejwx/H6KAPp6rCRsYi6QhAb +42SXdZmhAyQsFpGD9U5ieJApqeCHfj9Xhld61GqLJA9+WLVhDPADjqHoAVvrOkKH +OtPegId/lWnCB7p551klAjiEA2/DKxFBIAEhqZpiLl+juZfMXovkdmGxMP4gvNNF +gbS2k5A0IJ8q51gZcH1F56smdAmi5kvhPnFdy/9gqeI/F11F1SkbPVLImP0mmrFi +zQD5JGfEu1psUYvhpOdaYDkmAK5qU5xHSljqZFz5hXNt4ebvSlurHAhunJb2ln3g +AJUHwtZnVBrtYMB0w6fdwYqMxXi4vLeqUiHtIQtbOq32zlSryNPQqG9H0iP9l/G1 +t7oUfr9woI/B0kduaY9jd5Qtkqs1DoyfNMSaPNohUK/CWOTD51qOadzSvK0hJ+At +033PFfv9ilaX6GmzHdEVEanrn9a+BoBCnGnuysHk/8gdswj9OzeCemyIFJD7iObN +rNex3SCf3ucnAejJOA0awaLx88O1XTteUjcFn26EUji6DRK+8JJiN2lXSyQokNeY +ox6Z4hFQDmw/Q0k/iJqe9/Dq4zA0l3Krkpra0DZoWh5kzYUA0g5+Yg6GmRNRa8YG +tuuD6qK1SBEzmCYff6ivjgsXV5+vFBSjEpx2dPEaKdYxtHMOjkttuTi1mr+19dVf +hSltbzfISbV9HafX76dhwZJ0QwsUx+aOW6OrnK8zoQc5AFOXpe9BrrOuEX01qrM0 +KX5tS8Zx5HqDLievjir194oi3r+nAiG14kYlGmOTHshu7keGCgJmzJ0iVG/i+TnV +ZSLyd8OqV1F6MET1ijgR3OPL3kt81Zy9lATWk/DgKbGBkkKAnXO2HUw9U34JFyEy +vEc81qeHci8sT5QKSFHiP3r8EcK8rT5k9CHpnbFmg7VWSMVD0/wRB/C4BiIw357a +xyJ/q1NNvOZVAyYzIzf9TjwREtyeHEo5kS6hyWSn7fbFf3sNGO2I30veWOvE6kFA +HMtF3NplOrTYcM7fAK5zJCBK20oU645TxI8GsICMog7IFidFMdRn4MaXpwAjEZO4 +44m2M+4XyeRCAZhp1Fu4mDiHGqgd44mKtwvLACVF4ygWZnACDpI17X88wMnwL4uU +vgehLZdAE89gvukSCsET1inVBnn/hVenCRbbZ++IGv2XoYvRfeezfOoNUcJXyawQ +JFqN0CRB5pliuCesTO2urn4HSwGGoeBd507pGWZmOAjbNjGswlJJXF0NFnNW/zWw +UFYy+BI9axuhWTSnCXbNbngdNQKHznKe1Lwit6AI3U9jS33pM3W+pwUAQegVdtpG +XT01YgiMCBX+b8B/xcWTww0JbeUwKXudzKsPhQmaA0lubAo04JACMfON8jSZCeRV +TyIzgacxGU6YbEKH4PhYTGl9srcWIT9iGSYD53V7Kyvjumd0Y3Qc3JLnuWZT6Oe3 +uJ4xz9jJtoaTDvPJQNK3igscjZnWZSP8XMJo1/f7vbvD57pPt1Hqdirp1EBQNshk +iX9CUh4fuGFFeHf6MtGxPofbXmvA2GYcFsOez4/2eOTEmo6H3P4Hrya97XHS0dmD +zFSAjzAlacTrn1uuxtxFTikdOwvdmQJJEfyYWCB1lqWOZi97+7nzqyXMLvMgmwug +ZF/xHFMhFTR8Wn7puuwf36JpPQiM4oQ/Lp66zkS4UlKrVsmSXIXudLMg8SQ5WqK8 +DjevEZwsHHaMtfDsnCAhAdRc2jCpyHKKnmhCDdkcdJJEymWKILUJI5PJ3XtiMHnR +Sa35OOICS0lTq4VwhUdkGwGjRoY1GsriPHd6LOt1aom14yJros1h7ta604hSCn4k +zj9p7wY9gfgkXWXNfmarrZ9NNwlHxzgSva+jbJcLmE4GMX5OFHHGlRj/9S1xC2Wf +MY9orzlooGM74NtmRi4qNkFj3dQCde8XRR4wh2IvPUCsr4j+XaoCoc3R5Rn/yNJK +zIkccJ2K14u9X/A0BLXHn5Gnd0tBYcVOqP6dQlW9UWdJC/Xooh7+CVU5cZIxuF/s +Vvg+Xwiv3XqekJRu3cMllJDp5rwe5EWZSmnoAiGKjouKAIszlevaRiD/wT6Zra3c +Wn/1U/sGop6zRscHR7pgI99NSogzpVGThUs+ez7otDBIdDbLpMjktahgWoi1Vqhc +fNZXjA6ob4zTWY/16Ys0YWxHO+MtyWTMP1dnsqePDfYXGUHe8yGxylbcjfrsVYta +4H6eYR86eU3eXB+MpS/iA4jBq4QYWR9QUkd6FDfmRGgWlMXhisPv6Pfnj384NzEV +Emeg7tW8wzWR64EON9iGeGYYa2BBl2FVaayMEoUhthhFcDM1r3/Mox5xF0qnlys4 +goWkMzqbzA2t97bC0KDGzkcHT4wMeiJBLDZ7S2J2nDAEhcTLY0P2zvOB4879pEWx +Bd15AyG1DvNssA5ooaDzKi/Li6NgDuMJ8W7+tmsBwDvwuf2N3koqBeXfKhR4rTqu +Wg1k9fX3+8DzDf0EjtDZJdfWZAynONi1PhZGbNbaMKsQ+6TflkCACInRdOADR5GM +rL7JtrgF1a9n0HD9vk2WGZqKI71tfS8zODkOZDD8aAusD2DOSmVZl48HX/t4i4Wc +3dgi/gkCMrfK3wOujb8tL4zjnlVkM7kzKk0MgHuA1w81zFjeMFvigHes4IWhQVcz +ek3l4bGifI2kzU7bGIi5e/019ppJzGsVcrOE/3z4GS0DJVk6fy7MEMIFx0LhJPlL +T+9HMH85sSYb97PTiMWpfBvNw3FSC7QQT9FC3L8d/XtMY3NvZoc7Fz7cSGaj7NXG +1OgVnAzMunPa3QaduoxMF9346s+4a+FrpRxL/3bb4skojjmmLqP4dsbD1uz0fP9y +xSifnTnrtjumYWMVi+pEb5kR0sTHl0XS7qKRi3SEfv28uh72KdvcufonIA5rnEb5 ++yqAZiqW2OxVsRoVLVODPswP4VIDiun2kCnfkQygPzxlZUeDZur0mmZ3vwC81C1Q +dZcjlukZcqUaxybUloUilqfNeby+2Uig0krLh2+AM4EqR63LeZ/tk+zCitHeRBW0 +wl3Bd7ShBFg6kN5tCJlHf/G6suIJVr+A9BXfwekO9+//CutKakCwmJTUiNWbQbtN +q3aNCnomyD3WjvUbitVO0CWYjZrmMLIsPtzyLQydpT7tjXpHgvwm5GYWdUGnNs4y +NbA262sUl7Ku/GDw1CnFYXbxl+qxbucLtCdSIFR2xUq3rEO1MXlD/txdTxn6ANax +hi9oBg8tHzuGYJFiCDCvbVVTHgWUSnm/EqfclpJzGmxt8g7vbaohW7NMmMQrLBFP +G6qBypgvotx1iJWaHVLNNiXvyqQwTtelNPAUweRoNawBp/5KTwwy/tHeF0gsVQ7y +mFX4umub9YT34Lpe7qUPKNxXzFcUgAf1SA6vyZ20UI7p42S2OT2PrahJ+uO6LQVD ++REhtN0oyS3G6HzAmKkBgw7LcV3XmAr39iSR7mdmoHSJuI9bjveAPhniK+N6uuln +xf17Qnw5NWfr9MXcLli7zqwMglU/1bNirkwVqf/ogi/zQ3JYCo6tFGf/rnGQAORJ +hvOq2SEYXnizPPIH7VrpE16+jUXwgpiQ8TDyeLPmpZVuhXTXiCaJO5lIwmLQqkmg +JqNiT9V44sksNFTGNKgZo5O9rEqfqX4dLjfv6pGJL+MFXD9if4f1JQiXJfhcRcDh +Ff9B6HukgbJ1H96eLUUNj8sL1+WPOqawkS4wg7tVaERE8CW7mqk15dCysn9shSut +I+7JU7+dZsxpj0ownrxuPAFuT8ZlcBPrFzPUwTlW1G0CbuEco8ijfy5IfbyGCn5s +K/0bOfAuNVGoOpLZ1dMki2bGdBwQOQlkLKhAxYcCVQ0/urr1Ab+VXU9kBsIU8ssN +GogKngYpuUV0PHmpzmobielOHLjNqA2v9vQSV3Ed48wRy5OCwLX1+vYmYlggMDGt +wfl+7QbXYf+k5WnELf3IqYvh8ZWexa0= + + \ No newline at end of file diff --git a/tests/data/wire/goal_state.xml b/tests/data/wire/goal_state.xml index 579b5e87ad..0ccff211c9 100644 --- a/tests/data/wire/goal_state.xml +++ b/tests/data/wire/goal_state.xml @@ -15,12 +15,12 @@ b61f93d0-e1ed-40b2-b067-22c243233448.MachineRole_IN_0 Started - http://168.63.129.16:80/hostingenvuri/ - http://168.63.129.16:80/sharedconfiguri/ - http://168.63.129.16:80/certificatesuri/ - http://168.63.129.16:80/extensionsconfiguri/ - http://168.63.129.16:80/fullconfiguri/ - b61f93d0-e1ed-40b2-b067-22c243233448.1.b61f93d0-e1ed-40b2-b067-22c243233448.2.MachineRole_IN_0.xml + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=hostingEnvironmentConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=sharedConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=extensionsConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=fullConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=certificates&incarnation=1 + bc8b9d47-b5ed-4704-85d9-fd74cc967ec2.5.bc8b9d47-b5ed-4704-85d9-fd74cc967ec2.5._canary.1.xml diff --git a/tests/data/wire/goal_state_no_ext.xml b/tests/data/wire/goal_state_no_ext.xml index ef7e3989e6..e9048daf6e 100644 --- a/tests/data/wire/goal_state_no_ext.xml +++ b/tests/data/wire/goal_state_no_ext.xml @@ -15,11 +15,11 @@ b61f93d0-e1ed-40b2-b067-22c243233448.MachineRole_IN_0 Started - http://168.63.129.16:80/hostingenvuri/ - http://168.63.129.16:80/sharedconfiguri/ - http://168.63.129.16:80/certificatesuri/ - http://168.63.129.16:80/fullconfiguri/ - b61f93d0-e1ed-40b2-b067-22c243233448.1.b61f93d0-e1ed-40b2-b067-22c243233448.2.MachineRole_IN_0.xml + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=hostingEnvironmentConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=sharedConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=fullConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=certificates&incarnation=1 + bc8b9d47-b5ed-4704-85d9-fd74cc967ec2.5.bc8b9d47-b5ed-4704-85d9-fd74cc967ec2.5._canary.1.xml diff --git a/tests/data/wire/goal_state_remote_access.xml b/tests/data/wire/goal_state_remote_access.xml index c2840645fd..279006f219 100644 --- a/tests/data/wire/goal_state_remote_access.xml +++ b/tests/data/wire/goal_state_remote_access.xml @@ -17,12 +17,13 @@ b61f93d0-e1ed-40b2-b067-22c243233448.MachineRole_IN_0 Started - http://168.63.129.16:80/hostingenvuri/ - http://168.63.129.16:80/sharedconfiguri/ - http://168.63.129.16:80/certificatesuri/ - http://168.63.129.16:80/extensionsconfiguri/ - http://168.63.129.16:80/fullconfiguri/ - b61f93d0-e1ed-40b2-b067-22c243233448.1.b61f93d0-e1ed-40b2-b067-22c243233448.2.MachineRole_IN_0.xml + b61f93d0-e1ed-40b2-b067-22c243233448.1.b61f93d0-e1ed-40b2-b067-22c243233448.2.MachineRole_IN_0.xml + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=hostingEnvironmentConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=sharedConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=extensionsConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=fullConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=certificates&incarnation=1 + bc8b9d47-b5ed-4704-85d9-fd74cc967ec2.5.bc8b9d47-b5ed-4704-85d9-fd74cc967ec2.5._canary.1.xml diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 8a648a8c41..91545db2e0 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1898,7 +1898,7 @@ def update_goal_state_and_run_handler(): def test_it_should_wait_to_fetch_first_goal_state(self): with _get_update_handler() as (update_handler, protocol): - with patch("azurelinuxagent.common.logger.warn") as patch_warn: + with patch("azurelinuxagent.common.logger.error") as patch_error: with patch("azurelinuxagent.common.logger.info") as patch_info: # Fail GS fetching for the 1st 5 times the agent asks for it update_handler._fail_gs_count = 5 @@ -1914,13 +1914,13 @@ def get_handler(url, **kwargs): self.assertTrue(update_handler.exit_mock.called, "The process should have exited") exit_args, _ = update_handler.exit_mock.call_args - self.assertEqual(exit_args[0], 0, "Exit code should be 0; List of all warnings logged by the agent: {0}".format( - patch_warn.call_args_list)) - warn_msgs = [args[0] for (args, _) in patch_warn.call_args_list if - "An error occurred while retrieving the goal state" in args[0]] - self.assertTrue(len(warn_msgs) > 0, "Error should've been reported when failed to retrieve GS") + self.assertEqual(exit_args[0], 0, "Exit code should be 0; List of all errors logged by the agent: {0}".format( + patch_error.call_args_list)) + error_msgs = [args[0] for (args, _) in patch_error.call_args_list if + "Error fetching the goal state" in args[0]] + self.assertTrue(len(error_msgs) > 0, "Error should've been reported when failed to retrieve GS") info_msgs = [args[0] for (args, _) in patch_info.call_args_list if - "Retrieving the goal state recovered from previous errors" in args[0]] + "Fetching the goal state recovered from previous errors." in args[0]] self.assertTrue(len(info_msgs) > 0, "Agent should've logged a message when recovered from GS errors") def test_it_should_reset_legacy_blacklisted_agents_on_process_start(self): @@ -2684,9 +2684,9 @@ def create_log_and_telemetry_mocks(): calls_to_strings = lambda calls: (str(c) for c in calls) filter_calls = lambda calls, regex=None: (c for c in calls_to_strings(calls) if regex is None or re.match(regex, c)) logger_calls = lambda regex=None: [m for m in filter_calls(logger.method_calls, regex)] # pylint: disable=used-before-assignment,unnecessary-comprehension - warnings = lambda: logger_calls(r'call.warn\(.*An error occurred while retrieving the goal state.*') - periodic_warnings = lambda: logger_calls(r'call.periodic_warn\(.*Attempts to retrieve the goal state are failing.*') - success_messages = lambda: logger_calls(r'call.info\(.*Retrieving the goal state recovered from previous errors.*') + errors = lambda: logger_calls(r'call.error\(.*Error fetching the goal state.*') + periodic_errors = lambda: logger_calls(r'call.error\(.*Fetching the goal state is still failing*') + success_messages = lambda: logger_calls(r'call.info\(.*Fetching the goal state recovered from previous errors.*') telemetry_calls = lambda regex=None: [m for m in filter_calls(add_event.mock_calls, regex)] # pylint: disable=used-before-assignment,unnecessary-comprehension goal_state_events = lambda: telemetry_calls(r".*op='FetchGoalState'.*") @@ -2711,10 +2711,8 @@ def create_log_and_telemetry_mocks(): with create_log_and_telemetry_mocks() as (logger, add_event): update_handler._try_update_goal_state(protocol) - w = warnings() - pw = periodic_warnings() - self.assertEqual(1, len(w), "A failure should have produced a warning: [{0}]".format(w)) - self.assertEqual(1, len(pw), "A failure should have produced a periodic warning: [{0}]".format(pw)) + e = errors() + self.assertEqual(1, len(e), "A failure should have produced an error: [{0}]".format(e)) gs = goal_state_events() self.assertTrue(len(gs) == 1 and 'is_success=False' in gs[0], "A failure should produce a telemetry event (success=false): [{0}]".format(gs)) @@ -2723,17 +2721,17 @@ def create_log_and_telemetry_mocks(): # ... and errors continue happening... # with create_log_and_telemetry_mocks() as (logger, add_event): - update_handler._try_update_goal_state(protocol) - update_handler._try_update_goal_state(protocol) - update_handler._try_update_goal_state(protocol) + for _ in range(5): + update_handler._update_goal_state_last_error_report = datetime.now() + timedelta(days=1) + update_handler._try_update_goal_state(protocol) - w = warnings() - pw = periodic_warnings() - self.assertTrue(len(w) == 0, "Subsequent failures should not produce warnings: [{0}]".format(w)) - self.assertEqual(len(pw), 3, "Subsequent failures should produce periodic warnings: [{0}]".format(pw)) + e = errors() + pe = periodic_errors() + self.assertEqual(2, len(e), "Two additional errors should have been reported: [{0}]".format(e)) + self.assertEqual(len(pe), 3, "Subsequent failures should produce periodic errors: [{0}]".format(pe)) tc = telemetry_calls() - self.assertTrue(len(tc) == 0, "Subsequent failures should not produce any telemetry events: [{0}]".format(tc)) + self.assertTrue(len(tc) == 5, "The failures should have produced telemetry events. Got: [{0}]".format(tc)) # # ... until we finally succeed @@ -2743,10 +2741,10 @@ def create_log_and_telemetry_mocks(): update_handler._try_update_goal_state(protocol) s = success_messages() - w = warnings() - pw = periodic_warnings() + e = errors() + pe = periodic_errors() self.assertEqual(len(s), 1, "Recovering after failures should have produced an info message: [{0}]".format(s)) - self.assertTrue(len(w) == 0 and len(pw) == 0, "Recovering after failures should have not produced any warnings: [{0}] [{1}]".format(w, pw)) + self.assertTrue(len(e) == 0 and len(pe) == 0, "Recovering after failures should have not produced any errors: [{0}] [{1}]".format(e, pe)) gs = goal_state_events() self.assertTrue(len(gs) == 1 and 'is_success=True' in gs[0], "Recovering after failures should produce a telemetry event (success=true): [{0}]".format(gs)) diff --git a/tests/protocol/HttpRequestPredicates.py b/tests/protocol/HttpRequestPredicates.py index 39243d5431..db3ab8b2a6 100644 --- a/tests/protocol/HttpRequestPredicates.py +++ b/tests/protocol/HttpRequestPredicates.py @@ -11,6 +11,22 @@ class HttpRequestPredicates(object): def is_goal_state_request(url): return url.lower() == 'http://{0}/machine/?comp=goalstate'.format(restutil.KNOWN_WIRESERVER_IP) + @staticmethod + def is_certificates_request(url): + return re.match(r'http://{0}(:80)?/machine/.*?comp=certificates'.format(restutil.KNOWN_WIRESERVER_IP), url, re.IGNORECASE) + + @staticmethod + def is_extensions_config_request(url): + return re.match(r'http://{0}(:80)?/machine/.*?comp=config&type=extensionsConfig'.format(restutil.KNOWN_WIRESERVER_IP), url, re.IGNORECASE) + + @staticmethod + def is_hosting_environment_config_request(url): + return re.match(r'http://{0}(:80)?/machine/.*?comp=config&type=hostingEnvironmentConfig'.format(restutil.KNOWN_WIRESERVER_IP), url, re.IGNORECASE) + + @staticmethod + def is_shared_config_request(url): + return re.match(r'http://{0}(:80)?/machine/.*?comp=config&type=sharedConfig'.format(restutil.KNOWN_WIRESERVER_IP), url, re.IGNORECASE) + @staticmethod def is_telemetry_request(url): return url.lower() == 'http://{0}/machine?comp=telemetrydata'.format(restutil.KNOWN_WIRESERVER_IP) diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index 218bd29377..7ec311af46 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -135,10 +135,10 @@ def __init__(self, data_files=None): "/HealthService": 0, "/vmAgentLog": 0, "goalstate": 0, - "hostingenvuri": 0, - "sharedconfiguri": 0, - "certificatesuri": 0, - "extensionsconfiguri": 0, + "hostingEnvironmentConfig": 0, + "sharedConfig": 0, + "certificates": 0, + "extensionsConfig": 0, "remoteaccessinfouri": 0, "extensionArtifact": 0, "agentArtifact": 0, @@ -198,6 +198,10 @@ def reload(self): if in_vm_artifacts_profile_file is not None: self.in_vm_artifacts_profile = load_data(in_vm_artifacts_profile_file) + def reset_call_counts(self): + for counter in self.call_counts: + self.call_counts[counter] = 0 + def mock_http_get(self, url, *_, **kwargs): content = '' response_headers = [] @@ -217,18 +221,18 @@ def mock_http_get(self, url, *_, **kwargs): elif "goalstate" in url: content = self.goal_state self.call_counts["goalstate"] += 1 - elif "hostingenvuri" in url: + elif HttpRequestPredicates.is_hosting_environment_config_request(url): content = self.hosting_env - self.call_counts["hostingenvuri"] += 1 - elif "sharedconfiguri" in url: + self.call_counts["hostingEnvironmentConfig"] += 1 + elif HttpRequestPredicates.is_shared_config_request(url): content = self.shared_config - self.call_counts["sharedconfiguri"] += 1 - elif "certificatesuri" in url: + self.call_counts["sharedConfig"] += 1 + elif HttpRequestPredicates.is_certificates_request(url): content = self.certs - self.call_counts["certificatesuri"] += 1 - elif "extensionsconfiguri" in url: + self.call_counts["certificates"] += 1 + elif HttpRequestPredicates.is_extensions_config_request(url): content = self.ext_conf - self.call_counts["extensionsconfiguri"] += 1 + self.call_counts["extensionsConfig"] += 1 elif "remoteaccessinfouri" in url: content = self.remote_access self.call_counts["remoteaccessinfouri"] += 1 diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index c54a65f9f7..c774171595 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -8,13 +8,12 @@ import re import time -from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.future import httpclient from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource, GoalStateChannel from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings from azurelinuxagent.common.protocol import hostplugin -from azurelinuxagent.common.protocol.goal_state import GoalState, _GET_GOAL_STATE_MAX_ATTEMPTS +from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateInconsistentError, _GET_GOAL_STATE_MAX_ATTEMPTS from azurelinuxagent.common.exception import ProtocolError from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME @@ -360,17 +359,54 @@ def http_get_handler(url, *_, **__): self.assertEqual(initial_timestamp, goal_state.extensions_goal_state.created_on_timestamp, "The timestamp of the updated goal state is incorrect") self.assertTrue(goal_state.extensions_goal_state.is_outdated, "The updated goal state should be marked as outdated") - def test_it_should_report_missing_certificates(self): + def test_it_should_raise_when_the_tenant_certificate_is_missing(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() - data_file["vm_settings"] = "hostgaplugin/vm_settings-missing_cert.json" with mock_wire_protocol(data_file) as protocol: - with patch("azurelinuxagent.common.protocol.goal_state.add_event") as add_event: + data_file["vm_settings"] = "hostgaplugin/vm_settings-missing_cert.json" + protocol.mock_wire_data.reload() + + with self.assertRaises(GoalStateInconsistentError) as context: _ = GoalState(protocol.client) - expected_message = "Certificate 59A10F50FFE2A0408D3F03FE336C8FD5716CF25C needed by Microsoft.OSTCExtensions.VMAccessForLinux is missing from the goal state" - events = [kwargs for _, kwargs in add_event.call_args_list if kwargs['op'] == WALAEventOperation.VmSettings and kwargs['message'] == expected_message] + expected_message = "Certificate 59A10F50FFE2A0408D3F03FE336C8FD5716CF25C needed by Microsoft.OSTCExtensions.VMAccessForLinux is missing from the goal state" + self.assertIn(expected_message, str(context.exception)) + + def test_it_should_refresh_the_goal_state_when_it_is_inconsistent(self): + # + # Some scenarios can produce inconsistent goal states. For example, during hibernation/resume, the Fabric goal state changes (the + # tenant certificate is re-generated when the VM is restarted) *without* the incarnation changing. If a Fast Track goal state + # comes after that, the extensions will need the new certificate. This test simulates that scenario by mocking the certificates + # request and returning first a set of certificates (certs-2.xml) that do not match those needed by the extensions, and then a + # set (certs.xml) that does match. The test then ensures that the goal state was refreshed and the correct certificates were + # fetched. + # + data_files = [ + "wire/certs-2.xml", + "wire/certs.xml" + ] + + def http_get_handler(url, *_, **__): + if HttpRequestPredicates.is_certificates_request(url): + http_get_handler.certificate_requests += 1 + if http_get_handler.certificate_requests < len(data_files): + data = load_data(data_files[http_get_handler.certificate_requests - 1]) + return MockHttpResponse(status=200, body=data.encode('utf-8')) + return None + http_get_handler.certificate_requests = 0 + + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + protocol.mock_wire_data.reset_call_counts() + + goal_state = GoalState(protocol.client) + + self.assertEqual(2, protocol.mock_wire_data.call_counts['goalstate'], "There should have been exactly 2 requests for the goal state (original + refresh)") + self.assertEqual(2, http_get_handler.certificate_requests, "There should have been exactly 2 requests for the goal state certificates (original + refresh)") + + thumbprints = [c.thumbprint for c in goal_state.certs.cert_list.certificates] - self.assertTrue( - len(events) == 1, - "Missing certificate 59A10F50FFE2A0408D3F03FE336C8FD5716CF25C was not reported. Telemetry: {0}".format([kwargs['message'] for _, kwargs in add_event.call_args_list])) + for extension in goal_state.extensions_goal_state.extensions: + for settings in extension.settings: + if settings.protectedSettings is not None: + self.assertIn(settings.certificateThumbprint, thumbprints, "Certificate is missing from the goal state.") diff --git a/tests/protocol/test_hostplugin.py b/tests/protocol/test_hostplugin.py index 16bb7ef0b6..9f96f7d554 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/protocol/test_hostplugin.py @@ -257,9 +257,8 @@ def test_default_channel(self, patch_put, patch_upload, _): # assert host plugin route is called self.assertEqual(1, patch_put.call_count, "Host plugin was not used") - # assert update goal state is only called once, non-forced + # assert update goal state is only called once self.assertEqual(1, wire_protocol.client.update_goal_state.call_count, "Unexpected call count") - self.assertEqual(0, len(wire_protocol.client.update_goal_state.call_args[1]), "Unexpected parameters") # ensure the correct url is used self.assertEqual(sas_url, patch_put.call_args[0][0]) @@ -291,9 +290,8 @@ def test_fallback_channel_503(self, patch_put, patch_upload, _): # assert host plugin route is called self.assertEqual(1, patch_put.call_count, "Host plugin was not used") - # assert update goal state is only called once, non-forced + # assert update goal state is only called once self.assertEqual(1, wire_protocol.client.update_goal_state.call_count, "Update goal state unexpected call count") - self.assertEqual(0, len(wire_protocol.client.update_goal_state.call_args[1]), "Update goal state unexpected call count") # ensure the correct url is used self.assertEqual(sas_url, patch_put.call_args[0][0]) @@ -326,9 +324,8 @@ def test_fallback_channel_410(self, patch_refresh_host_plugin, patch_put, patch_ # assert host plugin route is called self.assertEqual(1, patch_put.call_count, "Host plugin was not used") - # assert update goal state is called with no arguments (forced=False), then update_host_plugin_from_goal_state is called + # assert update goal state is called, then update_host_plugin_from_goal_state is called self.assertEqual(1, wire_protocol.client.update_goal_state.call_count, "Update goal state unexpected call count") - self.assertEqual(0, len(wire_protocol.client.update_goal_state.call_args[1]), "Update goal state unexpected argument count") self.assertEqual(1, patch_refresh_host_plugin.call_count, "Refresh host plugin unexpected call count") # ensure the correct url is used @@ -361,9 +358,8 @@ def test_fallback_channel_failure(self, patch_put, patch_upload, _): # assert host plugin route is called self.assertEqual(1, patch_put.call_count, "Host plugin was not used") - # assert update goal state is called twice, forced=True on the second + # assert update goal state is called twice self.assertEqual(1, wire_protocol.client.update_goal_state.call_count, "Update goal state unexpected call count") - self.assertEqual(0, len(wire_protocol.client.update_goal_state.call_args[1]), "Update goal state unexpected call count") # ensure the correct url is used self.assertEqual(sas_url, patch_put.call_args[0][0]) diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 0cc8a01e9a..c564af7217 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -160,7 +160,7 @@ def test_getters_with_stale_goal_state(self, patch_report, *args): # -- Tracking calls to retrieve GoalState is problematic since it is # fetched often; however, the dependent documents, such as the # HostingEnvironmentConfig, will be retrieved the expected number - self.assertEqual(1, test_data.call_counts["hostingenvuri"]) + self.assertEqual(1, test_data.call_counts["hostingEnvironmentConfig"]) self.assertEqual(1, patch_report.call_count) def test_call_storage_kwargs(self, *args): # pylint: disable=unused-argument From a4836c54da9a69ff5c03c7b1224d88de8cf4ffe3 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 25 Apr 2022 15:50:55 -0700 Subject: [PATCH 64/84] Update agent version to 2.8.0.3 (#2563) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index b6fb5f2281..3e465b3cfd 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.2' +AGENT_VERSION = '2.8.0.3' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 4bc921590409e610d225387afb13f9b7c821777a Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 27 Apr 2022 15:22:34 -0700 Subject: [PATCH 65/84] Do not mark goal state as processed when goal state fails to update (#2569) Co-authored-by: narrieta --- azurelinuxagent/common/protocol/goal_state.py | 4 ++-- azurelinuxagent/ga/update.py | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index edfd9d14f7..3301d783da 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -209,7 +209,7 @@ def _update(self, force_update): else: # vm_settings_updated most_recent = vm_settings - if self._extensions_goal_state is None or most_recent.created_on_timestamp > self._extensions_goal_state.created_on_timestamp: + if self._extensions_goal_state is None or most_recent.created_on_timestamp >= self._extensions_goal_state.created_on_timestamp: self._extensions_goal_state = most_recent # @@ -221,7 +221,7 @@ def _update(self, force_update): # Track goal state comes after that, the extensions will need the new certificate. The Agent needs to refresh the goal state in that # case, to ensure it fetches the new certificate. # - if self.extensions_goal_state.source == GoalStateSource.FastTrack: + if self._extensions_goal_state.source == GoalStateSource.FastTrack: self._check_certificates() def _check_certificates(self): diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index cbf0ac38b2..f393aef8ad 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -606,19 +606,19 @@ def _processing_new_extensions_goal_state(self): return self._goal_state is not None and egs.id != self._last_extensions_gs_id and not egs.is_outdated def _process_goal_state(self, exthandlers_handler, remote_access_handler): - try: - protocol = exthandlers_handler.protocol - - # update self._goal_state - if not self._try_update_goal_state(protocol): - # agent updates and status reporting should be done even when the goal state is not updated - self.__update_guest_agent(protocol) - self._report_status(exthandlers_handler) - return + protocol = exthandlers_handler.protocol - # check for agent updates + # update self._goal_state + if not self._try_update_goal_state(protocol): + # agent updates and status reporting should be done even when the goal state is not updated self.__update_guest_agent(protocol) + self._report_status(exthandlers_handler) + return + + # check for agent updates + self.__update_guest_agent(protocol) + try: if self._processing_new_extensions_goal_state(): if not self._extensions_summary.converged: message = "A new goal state was received, but not all the extensions in the previous goal state have completed: {0}".format(self._extensions_summary) From 8d7237f1e5b5cd2a7f3066797dad995114f818de Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 27 Apr 2022 15:27:54 -0700 Subject: [PATCH 66/84] Update agent version to 2.8.0.4 (#2570) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 3e465b3cfd..99eb6473f9 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.3' +AGENT_VERSION = '2.8.0.4' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 8e9c1b5b514ffccb10f664f9f1b7c9edee1ddced Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Fri, 29 Apr 2022 14:03:49 -0700 Subject: [PATCH 67/84] Bug fix for fetching a goal state with empty certificates property (#2575) --- azurelinuxagent/common/protocol/goal_state.py | 9 +++- tests/data/wire/goal_state_no_certs.xml | 27 +++++++++++ tests/ga/test_update.py | 47 +++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 tests/data/wire/goal_state_no_certs.xml diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 3301d783da..8b508f61ad 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -82,7 +82,7 @@ def __init__(self, wire_client, silent=False): self._container_id = None self._hosting_env = None self._shared_conf = None - self._certs = None + self._certs = EmptyCertificates() self._remote_access = None self.update(silent=silent) @@ -348,7 +348,7 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): shared_conf = SharedConfig(xml_text) self._history.save_shared_conf(xml_text) - certs = None + certs = EmptyCertificates() certs_uri = findtext(xml_doc, "Certificates") if certs_uri is not None: xml_text = self._wire_client.fetch_config(certs_uri, self._wire_client.get_header_for_cert()) @@ -506,6 +506,11 @@ def _write_to_tmp_file(index, suffix, buf): fileutil.write_file(file_name, "".join(buf)) return file_name +class EmptyCertificates: + def __init__(self): + self.cert_list = CertList() + self.summary = [] # debugging info + self.warnings = [] class RemoteAccess(object): """ diff --git a/tests/data/wire/goal_state_no_certs.xml b/tests/data/wire/goal_state_no_certs.xml new file mode 100644 index 0000000000..1ab7fa2172 --- /dev/null +++ b/tests/data/wire/goal_state_no_certs.xml @@ -0,0 +1,27 @@ + + + 2010-12-15 + 1 + + Started + + 16001 + + + + c6d5526c-5ac2-4200-b6e2-56f2b70c5ab2 + + + b61f93d0-e1ed-40b2-b067-22c243233448.MachineRole_IN_0 + Started + + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=hostingEnvironmentConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=sharedConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=extensionsConfig&incarnation=1 + http://168.63.129.16:80/machine/865a6683-91d8-450f-99ae/bc8b9d47%2Db5ed%2D4704%2D85d9%2Dfd74cc967ec2.%5Fcanary?comp=config&type=fullConfig&incarnation=1 + bc8b9d47-b5ed-4704-85d9-fd74cc967ec2.5.bc8b9d47-b5ed-4704-85d9-fd74cc967ec2.5._canary.1.xml + + + + + diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 91545db2e0..4ac1ba6451 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1564,6 +1564,53 @@ def _get_test_ext_handler_instance(protocol, name="OSTCExtensions.ExampleHandler eh = Extension(name=name) eh.version = version return ExtHandlerInstance(eh, protocol) + + def test_update_handler_recovers_from_error_with_no_certs(self): + data = DATA_FILE.copy() + data['goal_state'] = 'wire/goal_state_no_certs.xml' + + def fail_gs_fetch(url, *_, **__): + if HttpRequestPredicates.is_goal_state_request(url): + return MockHttpResponse(status=500) + return None + + with mock_wire_protocol(data) as protocol: + + def fail_fetch_on_second_iter(iteration): + if iteration == 2: + protocol.set_http_handlers(http_get_handler=fail_gs_fetch) + if iteration > 2: # Zero out the fail handler for subsequent iterations. + protocol.set_http_handlers(http_get_handler=None) + + with mock_update_handler(protocol, 3, on_new_iteration=fail_fetch_on_second_iter) as update_handler: + with patch("azurelinuxagent.ga.update.logger.error") as patched_error: + with patch("azurelinuxagent.ga.update.logger.info") as patched_info: + def match_unexpected_errors(): + unexpected_msg_fragment = "Error fetching the goal state:" + + matching_errors = [] + for (args, _) in filter(lambda a: len(a) > 0, patched_error.call_args_list): + if unexpected_msg_fragment in args[0]: + matching_errors.append(args[0]) + + if len(matching_errors) > 1: + self.fail("Guest Agent did not recover, with new error(s): {}"\ + .format(matching_errors[1:])) + + def match_expected_info(): + expected_msg_fragment = "Fetching the goal state recovered from previous errors" + + for (call_args, _) in filter(lambda a: len(a) > 0, patched_info.call_args_list): + if expected_msg_fragment in call_args[0]: + break + else: + self.fail("Expected the guest agent to recover with '{}', but it didn't"\ + .format(expected_msg_fragment)) + + update_handler.run(debug=True) + match_unexpected_errors() # Match on errors first, they can provide more info. + match_expected_info() + def test_it_should_recreate_handler_env_on_service_startup(self): iterations = 5 From dee5fef9b98bf3e50b8fce213f67bd0d0b288c21 Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Fri, 29 Apr 2022 14:48:26 -0700 Subject: [PATCH 68/84] Move error counter reset down to end of block. (#2576) --- azurelinuxagent/ga/update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index f393aef8ad..469b7ece6a 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -489,11 +489,11 @@ def _try_update_goal_state(self, protocol): self._goal_state = protocol.get_goal_state() if self._update_goal_state_error_count > 0: - self._update_goal_state_error_count = 0 message = u"Fetching the goal state recovered from previous errors. Fetched {0} (certificates: {1})".format( self._goal_state.extensions_goal_state.id, self._goal_state.certs.summary) add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=True, message=message, log_event=False) logger.info(message) + self._update_goal_state_error_count = 0 try: self._supports_fast_track = conf.get_enable_fast_track() and protocol.client.get_host_plugin().check_vm_settings_support() From 35fed83afb5da1e93d0e207f4b54a863f188ec6c Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Fri, 29 Apr 2022 19:23:02 -0700 Subject: [PATCH 69/84] Bug Fix: Change fast track timestamp default from None to datetime.min (#2577) --- azurelinuxagent/common/protocol/hostplugin.py | 4 +- .../vm_settings-fabric-no_thumbprints.json | 192 ++++++++++++++++++ tests/ga/test_update.py | 55 ++++- 3 files changed, 245 insertions(+), 6 deletions(-) create mode 100644 tests/data/hostgaplugin/vm_settings-fabric-no_thumbprints.json diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 5f795dca3e..f79076f8ef 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -95,7 +95,7 @@ def __init__(self, endpoint): if not os.path.exists(self._get_fast_track_state_file()): self._supports_vm_settings = False self._supports_vm_settings_next_check = datetime.datetime.now() - self._fast_track_timestamp = None + self._fast_track_timestamp = timeutil.create_timestamp(datetime.datetime.min) else: self._supports_vm_settings = True self._supports_vm_settings_next_check = datetime.datetime.now() @@ -443,7 +443,7 @@ def get_fast_track_timestamp(): goal state was Fabric or fetch_vm_settings() has not been invoked. """ if not os.path.exists(HostPluginProtocol._get_fast_track_state_file()): - return None + return timeutil.create_timestamp(datetime.datetime.min) try: with open(HostPluginProtocol._get_fast_track_state_file(), "r") as file_: diff --git a/tests/data/hostgaplugin/vm_settings-fabric-no_thumbprints.json b/tests/data/hostgaplugin/vm_settings-fabric-no_thumbprints.json new file mode 100644 index 0000000000..bbd9459336 --- /dev/null +++ b/tests/data/hostgaplugin/vm_settings-fabric-no_thumbprints.json @@ -0,0 +1,192 @@ +{ + "hostGAPluginVersion": "1.0.8.124", + "vmSettingsSchemaVersion": "0.0", + "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", + "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "inSvdSeqNo": 1, + "extensionsLastModifiedTickCount": 637726657706205299, + "extensionGoalStatesSource": "Fabric", + "onHold": true, + "statusUploadBlob": { + "statusBlobType": "BlockBlob", + "value": "https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w" + }, + "inVMMetadata": { + "subscriptionId": "8e037ad4-618f-4466-8bc8-5099d41ac15b", + "resourceGroupName": "rg-dc-86fjzhp", + "vmName": "edp0plkw2b", + "location": "CentralUSEUAP", + "vmId": "86f4ae0a-61f8-48ae-9199-40f402d56864", + "vmSize": "Standard_B2s", + "osType": "Linux" + }, + "requiredFeatures": [ + { + "name": "MultipleExtensionsPerHandler" + } + ], + "gaFamilies": [ + { + "name": "Prod", + "uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml" + ] + }, + { + "name": "Test", + "uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml" + ] + } + ], + "extensionGoalStates": [ + { + "name": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent", + "version": "1.9.1", + "location": "https://zrdfepirv2cbn04prdstr01a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml", + "failoverlocation": "https://zrdfepirv2cbn06prdstr01a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml", + "additionalLocations": ["https://zrdfepirv2cbn09pr02a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml"], + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "settings": [ + { + "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" + } + ] + }, + { + "name": "Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent", + "version": "2.15.112", + "location": "https://zrdfepirv2cbn04prdstr01a.blob.core.windows.net/4ef06ad957494df49c807a5334f2b5d2/Microsoft.Azure.Security.Monitoring_AzureSecurityLinuxAgent_useast2euap_manifest.xml", + "failoverlocation": "https://zrdfepirv2cbz06prdstr01a.blob.core.windows.net/4ef06ad957494df49c807a5334f2b5d2/Microsoft.Azure.Security.Monitoring_AzureSecurityLinuxAgent_useast2euap_manifest.xml", + "additionalLocations": ["https://zrdfepirv2cbn06prdstr01a.blob.core.windows.net/4ef06ad957494df49c807a5334f2b5d2/Microsoft.Azure.Security.Monitoring_AzureSecurityLinuxAgent_useast2euap_manifest.xml"], + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "settings": [ + { + "publicSettings": "{\"enableGenevaUpload\":true}" + } + ] + }, + { + "name": "Microsoft.Azure.Extensions.CustomScript", + "version": "2.1.6", + "location": "https://umsavwggj2v40kvqhc0w.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml", + "failoverlocation": "https://umsafwzhkbm1rfrhl0ws.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml", + "additionalLocations": [ + "https://umsanh4b5rfz0q0p4pwm.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml" + ], + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "isMultiConfig": false, + "settings": [ + { + "publicSettings": "{\"commandToExecute\":\"echo 'cee174d4-4daa-4b07-9958-53b9649445c2'\"}" + } + ], + "dependsOn": [ + { + "DependsOnExtension": [ + { + "handler": "Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent" + } + ], + "dependencyLevel": 1 + } + ] + }, + { + "name": "Microsoft.CPlat.Core.RunCommandHandlerLinux", + "version": "1.2.0", + "location": "https://umsavbvncrpzbnxmxzmr.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml", + "failoverlocation": "https://umsajbjtqrb3zqjvgb2z.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml", + "additionalLocations": [ + "https://umsawqtlsshtn5v2nfgh.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml" + ], + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "isMultiConfig": true, + "settings": [ + { + "publicSettings": "{\"source\":{\"script\":\"echo '4abb1e88-f349-41f8-8442-247d9fdfcac5'\"}}", + "seqNo": 0, + "extensionName": "MCExt1", + "extensionState": "enabled" + }, + { + "publicSettings": "{\"source\":{\"script\":\"echo 'e865c9bc-a7b3-42c6-9a79-cfa98a1ee8b3'\"}}", + "seqNo": 0, + "extensionName": "MCExt2", + "extensionState": "enabled" + }, + { + "publicSettings": "{\"source\":{\"script\":\"echo 'f923e416-0340-485c-9243-8b84fb9930c6'\"}}", + "seqNo": 0, + "extensionName": "MCExt3", + "extensionState": "enabled" + } + ], + "dependsOn": [ + { + "dependsOnExtension": [ + { + "extension": "...", + "handler": "..." + }, + { + "extension": "...", + "handler": "..." + } + ], + "dependencyLevel": 2, + "name": "MCExt1" + }, + { + "dependsOnExtension": [ + { + "extension": "...", + "handler": "..." + } + ], + "dependencyLevel": 1, + "name": "MCExt2" + } + ] + }, + { + "name": "Microsoft.OSTCExtensions.VMAccessForLinux", + "version": "1.5.11", + "location": "https://umsasc25p0kjg0c1dg4b.blob.core.windows.net/2bbece4f-0283-d415-b034-cc0adc6997a1/2bbece4f-0283-d415-b034-cc0adc6997a1_manifest.xml", + "failoverlocation": "https://umsamfwlmfshvxx2lsjm.blob.core.windows.net/2bbece4f-0283-d415-b034-cc0adc6997a1/2bbece4f-0283-d415-b034-cc0adc6997a1_manifest.xml", + "additionalLocations": [ + "https://umsah3cwjlctnmhsvzqv.blob.core.windows.net/2bbece4f-0283-d415-b034-cc0adc6997a1/2bbece4f-0283-d415-b034-cc0adc6997a1_manifest.xml" + ], + "state": "enabled", + "autoUpgrade": false, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "isMultiConfig": false, + "settings": [ ] + } + ] +} diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 4ac1ba6451..cd5595569f 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -40,7 +40,7 @@ VMAgentUpdateStatuses from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.protocol.wire import WireProtocol -from azurelinuxagent.common.utils import fileutil, restutil, textutil +from azurelinuxagent.common.utils import fileutil, restutil, textutil, timeutil from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME, AGENT_STATUS_FILE from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules @@ -54,7 +54,7 @@ READONLY_FILE_GLOBS, ExtensionsSummary, AgentUpgradeType from tests.ga.mocks import mock_update_handler from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT +from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT, DATA_FILE_VM_SETTINGS from tests.tools import AgentTestCase, AgentTestCaseWithGetVmSizeMock, data_dir, DEFAULT, patch, load_bin_data, Mock, MagicMock, \ clear_singleton_instances, mock_sleep from tests.protocol import mockwiredata @@ -2919,7 +2919,7 @@ def test_it_should_mark_outdated_goal_states_on_service_restart_when_host_ga_plu def test_it_should_clear_the_timestamp_for_the_most_recent_fast_track_goal_state(self): data_file = self._prepare_fast_track_goal_state() - if HostPluginProtocol.get_fast_track_timestamp() is None: + if HostPluginProtocol.get_fast_track_timestamp() == timeutil.create_timestamp(datetime.min): raise Exception("The test setup did not save the Fast Track state") with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=False): @@ -2927,8 +2927,55 @@ def test_it_should_clear_the_timestamp_for_the_most_recent_fast_track_goal_state with mock_update_handler(protocol) as update_handler: update_handler.run() - self.assertIsNone(HostPluginProtocol.get_fast_track_timestamp(), "The Fast Track state was not cleared") + self.assertEqual(HostPluginProtocol.get_fast_track_timestamp(), timeutil.create_timestamp(datetime.min), + "The Fast Track state was not cleared") + + def test_it_should_default_fast_track_timestamp_to_datetime_min(self): + data = DATA_FILE_VM_SETTINGS.copy() + # TODO: Currently, there's a limitation in the mocks where bumping the incarnation but the goal + # state will cause the agent to error out while trying to write the certificates to disk. These + # files have no dependencies on certs, so using them does not present that issue. + # + # Note that the scenario this test is representing does not depend on certificates at all, and + # can be changed to use the default files when the above limitation is addressed. + data["vm_settings"] = "hostgaplugin/vm_settings-fabric-no_thumbprints.json" + data['goal_state'] = 'wire/goal_state_no_certs.xml' + + def vm_settings_no_change(url, *_, **__): + if HttpRequestPredicates.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.NOT_MODIFIED) + return None + + def vm_settings_not_supported(url, *_, **__): + if HttpRequestPredicates.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(404) + return None + + with mock_wire_protocol(data) as protocol: + + def mock_live_migration(iteration): + if iteration == 1: + protocol.mock_wire_data.set_incarnation(2) + protocol.set_http_handlers(http_get_handler=vm_settings_no_change) + elif iteration == 2: + protocol.mock_wire_data.set_incarnation(3) + protocol.set_http_handlers(http_get_handler=vm_settings_not_supported) + + with mock_update_handler(protocol, 3, on_new_iteration=mock_live_migration) as update_handler: + with patch("azurelinuxagent.ga.update.logger.error") as patched_error: + def check_for_errors(): + msg_fragment = "Error fetching the goal state:" + + for (args, _) in filter(lambda a: len(a) > 0, patched_error.call_args_list): + if msg_fragment in args[0]: + self.fail("Found error: {}".format(args[0])) + update_handler.run(debug=True) + check_for_errors() + + timestamp = protocol.client.get_host_plugin()._fast_track_timestamp + self.assertEqual(timestamp, timeutil.create_timestamp(datetime.min), + "Expected fast track time stamp to be set to {0}, got {1}".format(datetime.min, timestamp)) class HeartbeatTestCase(AgentTestCase): From 9895809483361a28a60322d8dc9be74771a9c28b Mon Sep 17 00:00:00 2001 From: Kevin Clark Date: Mon, 2 May 2022 13:37:19 -0700 Subject: [PATCH 70/84] Update agent version to 2.8.0.5. (#2580) --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 99eb6473f9..398367e6c9 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.4' +AGENT_VERSION = '2.8.0.5' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 7b92617d5fb155b4c56a03313b00a77382db4d3c Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 19 May 2022 16:44:19 -0700 Subject: [PATCH 71/84] Create placeholder GoalState.*.xml file (#2594) Co-authored-by: narrieta --- azurelinuxagent/common/utils/archive.py | 22 ++++++++++++++++++++++ tests/utils/test_archive.py | 8 ++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 6123fdb0d2..0be1544c57 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -39,6 +39,10 @@ ARCHIVE_DIRECTORY_NAME = 'history' +# TODO: See comment in GoalStateHistory._save_placeholder and remove this code when no longer needed +_PLACEHOLDER_FILE_NAME = 'GoalState.1.xml' +# END TODO + _MAX_ARCHIVED_STATES = 50 _CACHE_PATTERNS = [ @@ -173,6 +177,10 @@ def purge(self): def purge_legacy_goal_state_history(): lib_dir = conf.get_lib_dir() for current_file in os.listdir(lib_dir): + # TODO: See comment in GoalStateHistory._save_placeholder and remove this code when no longer needed + if current_file == _PLACEHOLDER_FILE_NAME: + return + # END TODO full_path = os.path.join(lib_dir, current_file) for pattern in _CACHE_PATTERNS: match = pattern.match(current_file) @@ -232,8 +240,22 @@ def save(self, data, file_name): self._errors = True logger.warn("Failed to save {0} to the goal state history: {1} [no additional errors saving the goal state will be reported]".format(file_name, e)) + @staticmethod + def _save_placeholder(): + """ + Some internal components took a dependency in the legacy GoalState.*.xml file. We create it here while those components are updated to remove the dependency. + When removing this code, also remove the check in StateArchiver.purge_legacy_goal_state_history, and the definition of _PLACEHOLDER_FILE_NAME + """ + try: + placeholder = os.path.join(conf.get_lib_dir(), _PLACEHOLDER_FILE_NAME) + with open(placeholder, "w") as handle: + handle.write("empty placeholder file") + except Exception as e: + logger.warn("Failed to save placeholder file ({0}): {1}".format(_PLACEHOLDER_FILE_NAME, e)) + def save_goal_state(self, text): self.save(text, _GOAL_STATE_FILE_NAME) + self._save_placeholder() def save_extensions_config(self, text): self.save(text, _EXT_CONF_FILE_NAME) diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 0c649c9e24..5eee67c7da 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -134,10 +134,10 @@ def test_archive02(self): def test_purge_legacy_goal_state_history(self): with patch("azurelinuxagent.common.conf.get_lib_dir", return_value=self.tmp_dir): legacy_files = [ - 'GoalState.1.xml', - 'VmSettings.1.json', - 'Prod.1.manifest.xml', - 'ExtensionsConfig.1.xml', + 'GoalState.2.xml', + 'VmSettings.2.json', + 'Prod.2.manifest.xml', + 'ExtensionsConfig.2.xml', 'Microsoft.Azure.Extensions.CustomScript.1.xml', 'SharedConfig.xml', 'HostingEnvironmentConfig.xml', From fcc81a1c3c2a7167201d7e27852c615d15168302 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 19 May 2022 17:01:50 -0700 Subject: [PATCH 72/84] Update version to 2.8.0.6 (#2595) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 398367e6c9..4ada1d1355 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.5' +AGENT_VERSION = '2.8.0.6' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 7e39ec80f27f6fa04b3780b23b7e4a4332c2d0b0 Mon Sep 17 00:00:00 2001 From: narrieta Date: Fri, 17 Jun 2022 12:54:26 -0700 Subject: [PATCH 73/84] Parse missing agent manifests as empty --- .../protocol/extensions_goal_state_from_vm_settings.py | 4 +++- ...on_manifests.json => vm_settings-no_manifests.json} | 7 +------ .../test_extensions_goal_state_from_vm_settings.py | 10 +++++++++- 3 files changed, 13 insertions(+), 8 deletions(-) rename tests/data/hostgaplugin/{vm_settings-no_extension_manifests.json => vm_settings-no_manifests.json} (83%) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index 38cca48f1b..10e036c9c9 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -266,7 +266,9 @@ def _parse_agent_manifests(self, vm_settings): for family in families: name = family["name"] version = family.get("version") - uris = family["uris"] + uris = family.get("uris") + if uris is None: + uris = [] manifest = VMAgentManifest(name, version) for u in uris: manifest.uris.append(u) diff --git a/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json b/tests/data/hostgaplugin/vm_settings-no_manifests.json similarity index 83% rename from tests/data/hostgaplugin/vm_settings-no_extension_manifests.json rename to tests/data/hostgaplugin/vm_settings-no_manifests.json index b084900b68..7ec3a5c3d1 100644 --- a/tests/data/hostgaplugin/vm_settings-no_extension_manifests.json +++ b/tests/data/hostgaplugin/vm_settings-no_manifests.json @@ -27,12 +27,7 @@ }, "gaFamilies": [ { - "name": "Prod", - "uris": [ - "https://zrdfepirv2dz5prdstr07a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentral_manifest.xml", - "https://rdfepirv2dm1prdstr09.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentral_manifest.xml", - "https://zrdfepirv2dm5prdstr06a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentral_manifest.xml" - ] + "name": "Prod" } ], "extensionGoalStates": [ diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/protocol/test_extensions_goal_state_from_vm_settings.py index 9bcba5ece4..8cdfa81bf9 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/protocol/test_extensions_goal_state_from_vm_settings.py @@ -69,9 +69,17 @@ def test_it_should_parse_missing_status_upload_blob_as_none(self): self.assertIsNone(extensions_goal_state.status_upload_blob, "Expected status upload blob to be None") self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") + def test_it_should_parse_missing_agent_manifests_as_empty(self): + data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-no_manifests.json" + with mock_wire_protocol(data_file) as protocol: + extensions_goal_state = protocol.get_goal_state().extensions_goal_state + self.assertEqual(1, len(extensions_goal_state.agent_manifests), "Expected exactly one agent manifest. Got: {0}".format(extensions_goal_state.agent_manifests)) + self.assertListEqual([], extensions_goal_state.agent_manifests[0].uris, "Expected an empty list of agent manifests") + def test_it_should_parse_missing_extension_manifests_as_empty(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() - data_file["vm_settings"] = "hostgaplugin/vm_settings-no_extension_manifests.json" + data_file["vm_settings"] = "hostgaplugin/vm_settings-no_manifests.json" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state From 40b3c50910683cda9287a4d3fe4777936a23d51b Mon Sep 17 00:00:00 2001 From: narrieta Date: Fri, 17 Jun 2022 13:18:11 -0700 Subject: [PATCH 74/84] Set agent version to 2.8.0.7 --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 4ada1d1355..43769a02f9 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.6' +AGENT_VERSION = '2.8.0.7' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From dbc82d3a948ff423529e8ea75d5ba9465709d67c Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 5 Jul 2022 13:21:32 -0700 Subject: [PATCH 75/84] Retry HGAP's extensionsArtifact requests on BAD_REQUEST status (#2621) * Retry HGAP's extensionsArtifact requests on BAD_REQUEST status * python 2.6 compat Co-authored-by: narrieta --- ...ensions_goal_state_from_extensions_config.py | 3 ++- azurelinuxagent/common/protocol/wire.py | 17 ++++++++++------- azurelinuxagent/common/utils/restutil.py | 9 +++++++++ azurelinuxagent/ga/update.py | 6 +++--- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py index c7e01dd207..8dce261ce6 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py @@ -25,6 +25,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, GoalStateSource from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, VMAgentManifest, ExtensionState, InVMGoalStateMetaData +from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.utils.textutil import parse_doc, parse_json, findall, find, findtext, getattrib, gettext, format_exception, \ is_str_none_or_whitespace, is_str_empty @@ -99,7 +100,7 @@ def fetch_direct(): def fetch_through_host(): host = wire_client.get_host_plugin() uri, headers = host.get_artifact_request(artifacts_profile_blob) - content, _ = wire_client.fetch(uri, headers, use_proxy=False) + content, _ = wire_client.fetch(uri, headers, use_proxy=False, retry_codes=restutil.HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES) return content logger.verbose("Retrieving the artifacts profile") diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 7923bea75d..a57355e07a 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -130,7 +130,7 @@ def get_goal_state(self): def _download_ext_handler_pkg_through_host(self, uri, destination): host = self.client.get_host_plugin() uri, headers = host.get_artifact_request(uri, host.manifest_uri) - success = self.client.stream(uri, destination, headers=headers, use_proxy=False, max_retry=1) + success = self.client.stream(uri, destination, headers=headers, use_proxy=False, max_retry=1) # set max_retry to 1 because extension packages already have a retry loop (see ExtHandlerInstance.download()) return success def download_ext_handler_pkg(self, uri, destination, headers=None, use_proxy=True): # pylint: disable=W0613 @@ -626,7 +626,7 @@ def call_storage_service(http_req, *args, **kwargs): def fetch_manifest_through_host(self, uri): host = self.get_host_plugin() uri, headers = host.get_artifact_request(uri) - response, _ = self.fetch(uri, headers, use_proxy=False, max_retry=1) + response, _ = self.fetch(uri, headers, use_proxy=False, retry_codes=restutil.HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES) return response def fetch_manifest(self, version_uris, timeout_in_minutes=5, timeout_in_ms=0): @@ -649,9 +649,11 @@ def fetch_manifest(self, version_uris, timeout_in_minutes=5, timeout_in_ms=0): logger.verbose('The specified manifest URL is empty, ignored.') continue - direct_func = lambda: self.fetch(version_uri, max_retry=1)[0] # pylint: disable=W0640 + # Disable W0640: OK to use version_uri in a lambda within the loop's body + direct_func = lambda: self.fetch(version_uri)[0] # pylint: disable=W0640 # NOTE: the host_func may be called after refreshing the goal state, be careful about any goal state data # in the lambda. + # Disable W0640: OK to use version_uri in a lambda within the loop's body host_func = lambda: self.fetch_manifest_through_host(version_uri) # pylint: disable=W0640 try: @@ -690,7 +692,7 @@ def stream(self, uri, destination, headers=None, use_proxy=None, max_retry=None) return success - def fetch(self, uri, headers=None, use_proxy=None, decode=True, max_retry=None, ok_codes=None): + def fetch(self, uri, headers=None, use_proxy=None, decode=True, max_retry=None, retry_codes=None, ok_codes=None): """ max_retry indicates the maximum number of retries for the HTTP request; None indicates that the default value should be used @@ -699,14 +701,14 @@ def fetch(self, uri, headers=None, use_proxy=None, decode=True, max_retry=None, logger.verbose("Fetch [{0}] with headers [{1}]", uri, headers) content = None response_headers = None - response = self._fetch_response(uri, headers, use_proxy, max_retry=max_retry, ok_codes=ok_codes) + response = self._fetch_response(uri, headers, use_proxy, max_retry=max_retry, retry_codes=retry_codes, ok_codes=ok_codes) if response is not None and not restutil.request_failed(response, ok_codes=ok_codes): response_content = response.read() content = self.decode_config(response_content) if decode else response_content response_headers = response.getheaders() return content, response_headers - def _fetch_response(self, uri, headers=None, use_proxy=None, max_retry=None, ok_codes=None): + def _fetch_response(self, uri, headers=None, use_proxy=None, max_retry=None, retry_codes=None, ok_codes=None): """ max_retry indicates the maximum number of retries for the HTTP request; None indicates that the default value should be used """ @@ -717,7 +719,8 @@ def _fetch_response(self, uri, headers=None, use_proxy=None, max_retry=None, ok_ uri, headers=headers, use_proxy=use_proxy, - max_retry=max_retry) + max_retry=max_retry, + retry_codes=retry_codes) host_plugin = self.get_host_plugin() diff --git a/azurelinuxagent/common/utils/restutil.py b/azurelinuxagent/common/utils/restutil.py index 0c6d6d9ad0..8c2fc4e4e6 100644 --- a/azurelinuxagent/common/utils/restutil.py +++ b/azurelinuxagent/common/utils/restutil.py @@ -56,6 +56,15 @@ 429, # Request Rate Limit Exceeded ] +# +# Currently the HostGAPlugin has an issue its cache that may produce a BAD_REQUEST failure for valid URIs when using the extensionArtifact API. +# Add this status to the retryable codes, but use it only when requesting downloads via the HostGAPlugin. The retry logic in the download code +# would give enough time to the HGAP to refresh its cache. Once the fix to address that issue is deployed, consider removing the use of +# HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES. +# +HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES = RETRY_CODES[:] # make a copy of RETRY_CODES +HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES.append(httpclient.BAD_REQUEST) + RESOURCE_GONE_CODES = [ httpclient.GONE ] diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 469b7ece6a..583f389438 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -1614,7 +1614,7 @@ def _download(self): uri, headers = self.host.get_artifact_request(uri, self.host.manifest_uri) try: - if self._fetch(uri, headers=headers, use_proxy=False): + if self._fetch(uri, headers=headers, use_proxy=False, retry_codes=restutil.HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES): if not HostPluginProtocol.is_default_channel: logger.verbose("Setting host plugin as default channel") HostPluginProtocol.is_default_channel = True @@ -1641,12 +1641,12 @@ def _download(self): message=msg) raise UpdateError(msg) - def _fetch(self, uri, headers=None, use_proxy=True): + def _fetch(self, uri, headers=None, use_proxy=True, retry_codes=None): package = None try: is_healthy = True error_response = '' - resp = restutil.http_get(uri, use_proxy=use_proxy, headers=headers, max_retry=1) + resp = restutil.http_get(uri, use_proxy=use_proxy, headers=headers, max_retry=3, retry_codes=retry_codes) # Use only 3 retries, since there are usually 5 or 6 URIs and we try all of them if restutil.request_succeeded(resp): package = resp.read() fileutil.write_file(self.get_agent_pkg_path(), From 4a304a878fe1c618dad48d922ad12158bad6acee Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 6 Jul 2022 16:48:10 -0700 Subject: [PATCH 76/84] Set Agent version to 2.8.0.8 (#2627) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 43769a02f9..b65a1d79cf 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.7' +AGENT_VERSION = '2.8.0.8' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 3866c691e9fb1b8dcaae3a559608e56cb07e6848 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 11 Jul 2022 11:38:26 -0700 Subject: [PATCH 77/84] fix network interface restart in RHEL9 (#2592) (#2629) (cherry picked from commit b8ca4323d91fd2fac54fbcb7aab0f6988b4cd0e5) Co-authored-by: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> --- azurelinuxagent/common/osutil/factory.py | 5 ++++- azurelinuxagent/common/osutil/redhat.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index 2ed4be78ba..61c2e6d20d 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -34,7 +34,7 @@ from .nsbsd import NSBSDOSUtil from .openbsd import OpenBSDOSUtil from .openwrt import OpenWRTOSUtil -from .redhat import RedhatOSUtil, Redhat6xOSUtil +from .redhat import RedhatOSUtil, Redhat6xOSUtil, RedhatOSModernUtil from .suse import SUSEOSUtil, SUSE11OSUtil from .photonos import PhotonOSUtil from .ubuntu import UbuntuOSUtil, Ubuntu12OSUtil, Ubuntu14OSUtil, \ @@ -107,6 +107,9 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) if Version(distro_version) < Version("7"): return Redhat6xOSUtil() + if Version(distro_version) == Version("8.6") or Version(distro_version) > Version("9"): + return RedhatOSModernUtil() + return RedhatOSUtil() if distro_name == "euleros": diff --git a/azurelinuxagent/common/osutil/redhat.py b/azurelinuxagent/common/osutil/redhat.py index 9759d1136b..312dd16084 100644 --- a/azurelinuxagent/common/osutil/redhat.py +++ b/azurelinuxagent/common/osutil/redhat.py @@ -142,3 +142,25 @@ def get_dhcp_lease_endpoint(self): endpoint = self.get_endpoint_from_leases_path('/var/lib/NetworkManager/dhclient-*.lease') return endpoint + + +class RedhatOSModernUtil(RedhatOSUtil): + def __init__(self): # pylint: disable=W0235 + super(RedhatOSModernUtil, self).__init__() + + def restart_if(self, ifname, retries=3, wait=5): + """ + Restart an interface by bouncing the link. systemd-networkd observes + this event, and forces a renew of DHCP. + """ + retry_limit = retries + 1 + for attempt in range(1, retry_limit): + return_code = shellutil.run("ip link set {0} down && ip link set {0} up".format(ifname)) + if return_code == 0: + return + logger.warn("failed to restart {0}: return code {1}".format(ifname, return_code)) + if attempt < retry_limit: + logger.info("retrying in {0} seconds".format(wait)) + time.sleep(wait) + else: + logger.warn("exceeded restart retries") From 3a9fc4524ef9343f058c33afdcf2f759607c2770 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 11 Jul 2022 12:58:37 -0700 Subject: [PATCH 78/84] fix if command in rhel v8.6+ (#2624) (#2630) (cherry picked from commit e54072819d985059caf40fc59816dbb81f80050f) Co-authored-by: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> --- azurelinuxagent/common/osutil/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index 61c2e6d20d..f14fdfbb56 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -107,7 +107,7 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) if Version(distro_version) < Version("7"): return Redhat6xOSUtil() - if Version(distro_version) == Version("8.6") or Version(distro_version) > Version("9"): + if Version(distro_version) >= Version("8.6"): return RedhatOSModernUtil() return RedhatOSUtil() From 672dbf32f565a14632bcfd081c3c553c821fca77 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 11 Jul 2022 13:18:52 -0700 Subject: [PATCH 79/84] Set Agent version to 2.8.0.9 (#2631) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index b65a1d79cf..75820d69be 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.8' +AGENT_VERSION = '2.8.0.9' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From ac56d0e1395db01e23c5e4a3aeefcc702752dd84 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 22 Jul 2022 16:19:14 -0700 Subject: [PATCH 80/84] Cleanup history directory when creating new subdirectories (#2633) * Cleanup history directory when creating new subdirectories * Review feedback Co-authored-by: narrieta --- azurelinuxagent/common/protocol/goal_state.py | 10 ++-- azurelinuxagent/common/protocol/wire.py | 10 ++-- azurelinuxagent/common/utils/archive.py | 51 ++++++++++++---- azurelinuxagent/ga/update.py | 7 +-- tests/utils/test_archive.py | 58 +++---------------- 5 files changed, 63 insertions(+), 73 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 8b508f61ad..4d354e5673 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -352,12 +352,12 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): certs_uri = findtext(xml_doc, "Certificates") if certs_uri is not None: xml_text = self._wire_client.fetch_config(certs_uri, self._wire_client.get_header_for_cert()) - certs = Certificates(xml_text) + certs = Certificates(xml_text, self.logger) # Log and save the certificates summary (i.e. the thumbprint but not the certificate itself) to the goal state history for c in certs.summary: - logger.info("Downloaded certificate {0}".format(c)) + self.logger.info("Downloaded certificate {0}".format(c)) if len(certs.warnings) > 0: - logger.warn(certs.warnings) + self.logger.warn(certs.warnings) self._history.save_certificates(json.dumps(certs.summary)) remote_access = None @@ -403,7 +403,7 @@ def __init__(self, xml_text): class Certificates(object): - def __init__(self, xml_text): + def __init__(self, xml_text, my_logger): self.cert_list = CertList() self.summary = [] # debugging info self.warnings = [] @@ -421,7 +421,7 @@ def __init__(self, xml_text): # if the certificates format is not Pkcs7BlobWithPfxContents do not parse it certificateFormat = findtext(xml_doc, "Format") if certificateFormat and certificateFormat != "Pkcs7BlobWithPfxContents": - logger.warn("The Format is not Pkcs7BlobWithPfxContents. Format is " + certificateFormat) + my_logger.warn("The Format is not Pkcs7BlobWithPfxContents. Format is " + certificateFormat) return cryptutil = CryptUtil(conf.get_openssl_cmd()) diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index a57355e07a..b8b05c98b8 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -767,7 +767,7 @@ def update_goal_state(self, force_update=False, silent=False): Updates the goal state if the incarnation or etag changed or if 'force_update' is True """ try: - if force_update: + if force_update and not silent: logger.info("Forcing an update of the goal state.") if self._goal_state is None or force_update: @@ -970,11 +970,13 @@ def upload_status_blob(self): if extensions_goal_state.status_upload_blob is None: # the status upload blob is in ExtensionsConfig so force a full goal state refresh - self.update_goal_state(force_update=True) + self.update_goal_state(force_update=True, silent=True) extensions_goal_state = self.get_goal_state().extensions_goal_state - if extensions_goal_state.status_upload_blob is None: - raise ProtocolNotFoundError("Status upload uri is missing") + if extensions_goal_state.status_upload_blob is None: + raise ProtocolNotFoundError("Status upload uri is missing") + + logger.info("Refreshed the goal state to get the status upload blob. New Goal State ID: {0}", extensions_goal_state.id) blob_type = extensions_goal_state.status_upload_blob_type diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 0be1544c57..b624d1742c 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -162,17 +162,6 @@ def __init__(self, lib_dir): if exception.errno != errno.EEXIST: logger.warn("{0} : {1}", self._source, exception.strerror) - def purge(self): - """ - Delete "old" archive directories and .zip archives. Old - is defined as any directories or files older than the X - newest ones. Also, clean up any legacy history files. - """ - states = self._get_archive_states() - - for state in states[_MAX_ARCHIVED_STATES:]: - state.delete() - @staticmethod def purge_legacy_goal_state_history(): lib_dir = conf.get_lib_dir() @@ -222,6 +211,8 @@ def __init__(self, time, tag): timestamp = timeutil.create_history_timestamp(time) self._root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "{0}__{1}".format(timestamp, tag) if tag is not None else timestamp) + GoalStateHistory._purge() + @staticmethod def tag_exists(tag): """ @@ -240,6 +231,44 @@ def save(self, data, file_name): self._errors = True logger.warn("Failed to save {0} to the goal state history: {1} [no additional errors saving the goal state will be reported]".format(file_name, e)) + _purge_error_count = 0 + + @staticmethod + def _purge(): + """ + Delete "old" history directories and .zip archives. Old is defined as any directories or files older than the X newest ones. + """ + try: + history_root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME) + + if not os.path.exists(history_root): + return + + items = [] + for current_item in os.listdir(history_root): + full_path = os.path.join(history_root, current_item) + items.append(full_path) + items.sort(key=os.path.getctime, reverse=True) + + for current_item in items[_MAX_ARCHIVED_STATES:]: + if os.path.isfile(current_item): + os.remove(current_item) + else: + shutil.rmtree(current_item) + + if GoalStateHistory._purge_error_count > 0: + GoalStateHistory._purge_error_count = 0 + # Log a success message when we are recovering from errors. + logger.info("Successfully cleaned up the goal state history directory") + + except Exception as e: + GoalStateHistory._purge_error_count += 1 + if GoalStateHistory._purge_error_count < 5: + logger.warn("Failed to clean up the goal state history directory: {0}".format(e)) + elif GoalStateHistory._purge_error_count == 5: + logger.warn("Failed to clean up the goal state history directory [will stop reporting these errors]: {0}".format(e)) + + @staticmethod def _save_placeholder(): """ diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 583f389438..66b0de5d40 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -635,9 +635,9 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): if self._processing_new_incarnation(): remote_access_handler.run() - # lastly, cleanup the goal state history (but do it only on new goal states - no need to do it on every iteration) + # lastly, archive the goal state history (but do it only on new goal states - no need to do it on every iteration) if self._processing_new_extensions_goal_state(): - UpdateHandler._cleanup_goal_state_history() + UpdateHandler._archive_goal_state_history() finally: if self._goal_state is not None: @@ -645,10 +645,9 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): self._last_extensions_gs_id = self._goal_state.extensions_goal_state.id @staticmethod - def _cleanup_goal_state_history(): + def _archive_goal_state_history(): try: archiver = StateArchiver(conf.get_lib_dir()) - archiver.purge() archiver.archive() except Exception as exception: logger.warn("Error cleaning up the goal state history: {0}", ustr(exception)) diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 5eee67c7da..ce97d65fde 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -6,8 +6,9 @@ from datetime import datetime, timedelta import azurelinuxagent.common.logger as logger +from azurelinuxagent.common import conf from azurelinuxagent.common.utils import fileutil, timeutil -from azurelinuxagent.common.utils.archive import StateArchiver, _MAX_ARCHIVED_STATES +from azurelinuxagent.common.utils.archive import GoalStateHistory, StateArchiver, _MAX_ARCHIVED_STATES, ARCHIVE_DIRECTORY_NAME from tests.tools import AgentTestCase, patch debug = False @@ -28,7 +29,7 @@ def setUp(self): self.tmp_dir = tempfile.mkdtemp(prefix=prefix) def _write_file(self, filename, contents=None): - full_name = os.path.join(self.tmp_dir, filename) + full_name = os.path.join(conf.get_lib_dir(), filename) fileutil.mkdir(os.path.dirname(full_name)) with open(full_name, 'w') as file_handler: @@ -38,7 +39,7 @@ def _write_file(self, filename, contents=None): @property def history_dir(self): - return os.path.join(self.tmp_dir, 'history') + return os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME) @staticmethod def _parse_archive_name(name): @@ -66,7 +67,7 @@ def test_archive_should_zip_all_but_the_latest_goal_state_in_the_history_folder( self._write_file(os.path.join(directory, current_file)) test_directories.append(directory) - test_subject = StateArchiver(self.tmp_dir) + test_subject = StateArchiver(conf.get_lib_dir()) # NOTE: StateArchiver sorts the state directories by creation time, but the test files are created too fast and the # time resolution is too coarse, so instead we mock getctime to simply return the path of the file with patch("azurelinuxagent.common.utils.archive.os.path.getctime", side_effect=lambda path: path): @@ -83,9 +84,9 @@ def test_archive_should_zip_all_but_the_latest_goal_state_in_the_history_folder( self.assertTrue(os.path.exists(test_directories[2]), "{0}, the latest goal state, should not have being removed".format(test_directories[2])) - def test_archive02(self): + def test_goal_state_history_init_should_purge_old_items(self): """ - StateArchiver should purge the MAX_ARCHIVED_STATES oldest files + GoalStateHistory.__init__ should _purge the MAX_ARCHIVED_STATES oldest files or directories. The oldest timestamps are purged first. This test case creates a mixture of archive files and directories. @@ -112,11 +113,10 @@ def test_archive02(self): self.assertEqual(total, len(os.listdir(self.history_dir))) - test_subject = StateArchiver(self.tmp_dir) - # NOTE: StateArchiver sorts the state directories by creation time, but the test files are created too fast and the + # NOTE: The purge method sorts the items by creation time, but the test files are created too fast and the # time resolution is too coarse, so instead we mock getctime to simply return the path of the file with patch("azurelinuxagent.common.utils.archive.os.path.getctime", side_effect=lambda path: path): - test_subject.purge() + GoalStateHistory(datetime.utcnow(), 'test') archived_entries = os.listdir(self.history_dir) self.assertEqual(_MAX_ARCHIVED_STATES, len(archived_entries)) @@ -153,46 +153,6 @@ def test_purge_legacy_goal_state_history(self): for f in legacy_files: self.assertFalse(os.path.exists(f), "Legacy file {0} was not removed".format(f)) - def test_archive03(self): - """ - All archives should be purged, both with the legacy naming (with incarnation number) and with the new naming. - """ - start = datetime.now() - timestamp1 = start + timedelta(seconds=5) - timestamp2 = start + timedelta(seconds=10) - timestamp3 = start + timedelta(seconds=10) - - dir_old = timestamp1.isoformat() - dir_new = "{0}_incarnation_1".format(timestamp2.isoformat()) - - archive_old = "{0}.zip".format(timestamp1.isoformat()) - archive_new = "{0}_incarnation_1.zip".format(timestamp2.isoformat()) - - status = "{0}.zip".format(timestamp3.isoformat()) - - self._write_file(os.path.join("history", dir_old, "Prod.manifest.xml")) - self._write_file(os.path.join("history", dir_new, "Prod.manifest.xml")) - self._write_file(os.path.join("history", archive_old)) - self._write_file(os.path.join("history", archive_new)) - self._write_file(os.path.join("history", status)) - - self.assertEqual(5, len(os.listdir(self.history_dir)), "Not all entries were archived!") - - test_subject = StateArchiver(self.tmp_dir) - with patch("azurelinuxagent.common.utils.archive._MAX_ARCHIVED_STATES", 0): - test_subject.purge() - - archived_entries = os.listdir(self.history_dir) - self.assertEqual(0, len(archived_entries), "Not all entries were purged!") - - def test_archive04(self): - """ - The archive directory is created if it does not exist. - - This failure was caught when .purge() was called before .archive(). - """ - test_subject = StateArchiver(os.path.join(self.tmp_dir, 'does-not-exist')) - test_subject.purge() @staticmethod def parse_isoformat(timestamp_str): From 0312e9584341a7529a62de717b3503138fdd5f69 Mon Sep 17 00:00:00 2001 From: narrieta Date: Mon, 1 Aug 2022 15:45:11 -0700 Subject: [PATCH 81/84] Set agent version to 2.8.0.10 --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 75820d69be..891e1c3959 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.9' +AGENT_VERSION = '2.8.0.10' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 6f9c516ac9ee20413ab3c90acdf125acfd3e9976 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 15 Aug 2022 16:03:06 -0700 Subject: [PATCH 82/84] Save sharedconfig to disk (#2649) * Save sharedconfig to disk * Update tests * pylint warnings Co-authored-by: narrieta --- azurelinuxagent/common/protocol/goal_state.py | 12 +++++++++--- azurelinuxagent/common/utils/archive.py | 11 +++++++---- azurelinuxagent/ga/update.py | 3 +-- tests/protocol/test_goal_state.py | 11 ++++++++++- tests/utils/test_archive.py | 9 +++++++-- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 4d354e5673..97ae270f87 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -33,7 +33,7 @@ from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported, VmSettingsSupportStopped from azurelinuxagent.common.protocol.restapi import Cert, CertList, RemoteAccessUser, RemoteAccessUsersList from azurelinuxagent.common.utils import fileutil -from azurelinuxagent.common.utils.archive import GoalStateHistory +from azurelinuxagent.common.utils.archive import GoalStateHistory, SHARED_CONF_FILE_NAME from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, findtext, getattrib @@ -345,8 +345,14 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): shared_conf_uri = findtext(xml_doc, "SharedConfig") xml_text = self._wire_client.fetch_config(shared_conf_uri, self._wire_client.get_header()) - shared_conf = SharedConfig(xml_text) + shared_config = SharedConfig(xml_text) self._history.save_shared_conf(xml_text) + # SharedConfig.xml is used by other components (Azsec and Singularity/HPC Infiniband), so save it to the agent's root directory as well + shared_config_file = os.path.join(conf.get_lib_dir(), SHARED_CONF_FILE_NAME) + try: + fileutil.write_file(shared_config_file, xml_text) + except Exception as e: + logger.warn("Failed to save {0}: {1}".format(shared_config, e)) certs = EmptyCertificates() certs_uri = findtext(xml_doc, "Certificates") @@ -372,7 +378,7 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): self._role_config_name = role_config_name self._container_id = container_id self._hosting_env = hosting_env - self._shared_conf = shared_conf + self._shared_conf = shared_config self._certs = certs self._remote_access = remote_access diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index b624d1742c..1f8fdd9311 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -46,11 +46,13 @@ _MAX_ARCHIVED_STATES = 50 _CACHE_PATTERNS = [ + # + # Note that SharedConfig.xml is not included here; this file is used by other components (Azsec and Singularity/HPC Infiniband) + # re.compile(r"^VmSettings\.\d+\.json$"), re.compile(r"^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), re.compile(r"^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE), - re.compile(r"^SharedConfig\.xml$", re.IGNORECASE), re.compile(r"^HostingEnvironmentConfig\.xml$", re.IGNORECASE), re.compile(r"^RemoteAccess\.xml$", re.IGNORECASE), re.compile(r"^waagent_status\.\d+\.json$"), @@ -78,12 +80,12 @@ _VM_SETTINGS_FILE_NAME = "VmSettings.json" _CERTIFICATES_FILE_NAME = "Certificates.json" _HOSTING_ENV_FILE_NAME = "HostingEnvironmentConfig.xml" -_SHARED_CONF_FILE_NAME = "SharedConfig.xml" _REMOTE_ACCESS_FILE_NAME = "RemoteAccess.xml" _EXT_CONF_FILE_NAME = "ExtensionsConfig.xml" _MANIFEST_FILE_NAME = "{0}.manifest.xml" AGENT_STATUS_FILE = "waagent_status.json" +SHARED_CONF_FILE_NAME = "SharedConfig.xml" # TODO: use @total_ordering once RHEL/CentOS and SLES 11 are EOL. # @total_ordering first appeared in Python 2.7 and 3.2 @@ -166,9 +168,10 @@ def __init__(self, lib_dir): def purge_legacy_goal_state_history(): lib_dir = conf.get_lib_dir() for current_file in os.listdir(lib_dir): + # Don't remove the placeholder goal state file. # TODO: See comment in GoalStateHistory._save_placeholder and remove this code when no longer needed if current_file == _PLACEHOLDER_FILE_NAME: - return + continue # END TODO full_path = os.path.join(lib_dir, current_file) for pattern in _CACHE_PATTERNS: @@ -302,4 +305,4 @@ def save_hosting_env(self, text): self.save(text, _HOSTING_ENV_FILE_NAME) def save_shared_conf(self, text): - self.save(text, _SHARED_CONF_FILE_NAME) + self.save(text, SHARED_CONF_FILE_NAME) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 66b0de5d40..d17fff6a46 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -378,6 +378,7 @@ def run(self, debug=False): self._ensure_firewall_rules_persisted(dst_ip=protocol.get_endpoint()) self._add_accept_tcp_firewall_rule_if_not_enabled(dst_ip=protocol.get_endpoint()) self._reset_legacy_blacklisted_agents() + self._cleanup_legacy_goal_state_history() # Get all thread handlers telemetry_handler = get_send_telemetry_events_handler(self.protocol_util) @@ -396,8 +397,6 @@ def run(self, debug=False): logger.info("Goal State Period: {0} sec. This indicates how often the agent checks for new goal states and reports status.", self._goal_state_period) - self._cleanup_legacy_goal_state_history() - while self.is_running: self._check_daemon_running(debug) self._check_threads_running(all_thread_handlers) diff --git a/tests/protocol/test_goal_state.py b/tests/protocol/test_goal_state.py index c774171595..87a1db50e1 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/protocol/test_goal_state.py @@ -8,6 +8,7 @@ import re import time +from azurelinuxagent.common import conf from azurelinuxagent.common.future import httpclient from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource, GoalStateChannel from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig @@ -96,7 +97,15 @@ def test_fetch_goal_state_should_raise_on_incomplete_goal_state(self): GoalState(protocol.client) self.assertEqual(_GET_GOAL_STATE_MAX_ATTEMPTS, mock_sleep.call_count, "Unexpected number of retries") - def test_instantiating_goal_state_should_save_the_goal_state_to_the_history_directory(self): + def test_fetching_the_goal_state_should_save_the_shared_config(self): + # SharedConfig.xml is used by other components (Azsec and Singularity/HPC Infiniband); verify that we do not delete it + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + _ = GoalState(protocol.client) + + shared_config = os.path.join(conf.get_lib_dir(), 'SharedConfig.xml') + self.assertTrue(os.path.exists(shared_config), "{0} should have been created".format(shared_config)) + + def test_fetching_the_goal_state_should_save_the_goal_state_to_the_history_directory(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: protocol.mock_wire_data.set_incarnation(999) protocol.mock_wire_data.set_etag(888) diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index ce97d65fde..54766862f8 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -133,27 +133,32 @@ def test_goal_state_history_init_should_purge_old_items(self): def test_purge_legacy_goal_state_history(self): with patch("azurelinuxagent.common.conf.get_lib_dir", return_value=self.tmp_dir): + # SharedConfig.xml is used by other components (Azsec and Singularity/HPC Infiniband); verify that we do not delete it + shared_config = os.path.join(self.tmp_dir, 'SharedConfig.xml') + legacy_files = [ 'GoalState.2.xml', 'VmSettings.2.json', 'Prod.2.manifest.xml', 'ExtensionsConfig.2.xml', 'Microsoft.Azure.Extensions.CustomScript.1.xml', - 'SharedConfig.xml', 'HostingEnvironmentConfig.xml', 'RemoteAccess.xml', 'waagent_status.1.json' ] legacy_files = [os.path.join(self.tmp_dir, f) for f in legacy_files] + + self._write_file(shared_config) for f in legacy_files: self._write_file(f) StateArchiver.purge_legacy_goal_state_history() + self.assertTrue(os.path.exists(shared_config), "{0} should not have been removed".format(shared_config)) + for f in legacy_files: self.assertFalse(os.path.exists(f), "Legacy file {0} was not removed".format(f)) - @staticmethod def parse_isoformat(timestamp_str): return datetime.strptime(timestamp_str, '%Y-%m-%dT%H:%M:%S.%f') From 1b08441e3d4432bbc4641cec0d31b65e810334ff Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 15 Aug 2022 16:15:37 -0700 Subject: [PATCH 83/84] Set Agent version to 2.8.0.11 (#2650) Co-authored-by: narrieta --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 891e1c3959..099ebc5424 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. # -AGENT_VERSION = '2.8.0.10' +AGENT_VERSION = '2.8.0.11' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 3928dbd1796d9d9a898f6516b43c35aa0d080dd0 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 20 Sep 2022 11:46:51 -0700 Subject: [PATCH 84/84] test fixes --- azurelinuxagent/common/conf.py | 4 -- tests/ga/test_update.py | 49 ------------------- ..._extensions_goal_state_from_vm_settings.py | 8 --- 3 files changed, 61 deletions(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 8664d77b8e..bd101f617f 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -559,10 +559,6 @@ def get_cgroup_monitor_expiry_time(conf=__conf__): """ cgroups monitoring for pilot extensions disabled after expiry time - NOTE: This option is experimental and may be removed in later versions of the Agent. - """ - cgroups monitoring for pilot extensions disabled after expiry time - NOTE: This option is experimental and may be removed in later versions of the Agent. """ return conf.get("Debug.CgroupMonitorExpiryTime", "2022-03-31") diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 7489ed9ad6..cd5595569f 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1322,7 +1322,6 @@ def test_run_latest_creates_only_one_signal_handler(self, mock_signal): self._test_run_latest() self.assertEqual(0, mock_signal.call_count) - # @skip_if_predicate_true(lambda: True, "This test has a dependency on the agent version being 9.9.* and breaks when updating the agent version during release") def test_get_latest_agent_should_return_latest_agent_even_on_bad_error_json(self): dst_ver = self.prepare_agents() # Add a malformed error.json file in all existing agents @@ -1709,7 +1708,6 @@ def test_it_should_set_dns_tcp_iptable_if_drop_available_accept_unavailable(self with patch('azurelinuxagent.common.conf.enable_firewall', return_value=True): with patch.object(osutil, '_enable_firewall', True): # drop rule is present -# <<<<<<< HEAD mock_iptables.set_command( AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, @@ -1743,21 +1741,6 @@ def test_it_should_set_dns_tcp_iptable_if_drop_available_accept_unavailable(self self.assertEqual(len(filtered_mock_iptable_calls), 3, "Incorrect number of calls to iptables: [{0}]".format( mock_iptables.command_calls)) -# ======= -# mock_iptables.set_command(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=0) -# # non root tcp iptable rule is absent -# mock_iptables.set_command(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=1) -# update_handler.run(debug=True) -# -# drop_check_command = TestOSUtil._command_to_string(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) -# accept_tcp_check_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) -# accept_tcp_insert_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.INSERT_COMMAND, mock_iptables.destination)) -# -# # Filtering the mock iptable command calls with only the once related to this test. -# filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if cmd in [drop_check_command, accept_tcp_check_rule, accept_tcp_insert_rule]] -# -# self.assertEqual(len(filtered_mock_iptable_calls), 3, "Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls)) -# >>>>>>> master self.assertEqual(filtered_mock_iptable_calls[0], drop_check_command, "The first command should check the drop rule") self.assertEqual(filtered_mock_iptable_calls[1], accept_tcp_check_rule, @@ -1772,7 +1755,6 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_unavailable(self): with patch('azurelinuxagent.common.conf.enable_firewall', return_value=True): with patch.object(osutil, '_enable_firewall', True): # drop rule is not available -# <<<<<<< HEAD mock_iptables.set_command( AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, @@ -1801,20 +1783,6 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_unavailable(self): self.assertEqual(len(filtered_mock_iptable_calls), 1, "Incorrect number of calls to iptables: [{0}]".format( mock_iptables.command_calls)) -# ======= -# mock_iptables.set_command(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=1) -# -# update_handler.run(debug=True) -# -# drop_check_command = TestOSUtil._command_to_string(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) -# accept_tcp_check_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) -# accept_tcp_insert_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.INSERT_COMMAND, mock_iptables.destination)) -# -# # Filtering the mock iptable command calls with only the once related to this test. -# filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if cmd in [drop_check_command, accept_tcp_check_rule, accept_tcp_insert_rule]] -# -# self.assertEqual(len(filtered_mock_iptable_calls), 1, "Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls)) -# >>>>>>> master self.assertEqual(filtered_mock_iptable_calls[0], drop_check_command, "The first command should check the drop rule") @@ -1825,7 +1793,6 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_and_accept_available(self): with patch('azurelinuxagent.common.conf.enable_firewall', return_value=True): with patch.object(osutil, '_enable_firewall', True): # drop rule is available -# <<<<<<< HEAD mock_iptables.set_command( AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, mock_iptables.destination, @@ -1859,22 +1826,6 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_and_accept_available(self): self.assertEqual(len(filtered_mock_iptable_calls), 2, "Incorrect number of calls to iptables: [{0}]".format( mock_iptables.command_calls)) -# ======= -# mock_iptables.set_command(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=0) -# # non root tcp iptable rule is available -# mock_iptables.set_command(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination), exit_code=0) -# -# update_handler.run(debug=True) -# -# drop_check_command = TestOSUtil._command_to_string(osutil.get_firewall_drop_command(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) -# accept_tcp_check_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.CHECK_COMMAND, mock_iptables.destination)) -# accept_tcp_insert_rule = TestOSUtil._command_to_string(osutil.get_accept_tcp_rule(mock_iptables.wait, AddFirewallRules.INSERT_COMMAND, mock_iptables.destination)) -# -# # Filtering the mock iptable command calls with only the once related to this test. -# filtered_mock_iptable_calls = [cmd for cmd in mock_iptables.command_calls if cmd in [drop_check_command, accept_tcp_check_rule, accept_tcp_insert_rule]] -# -# self.assertEqual(len(filtered_mock_iptable_calls), 2, "Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls)) -# >>>>>>> master self.assertEqual(filtered_mock_iptable_calls[0], drop_check_command, "The first command should check the drop rule") self.assertEqual(filtered_mock_iptable_calls[1], accept_tcp_check_rule, diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/protocol/test_extensions_goal_state_from_vm_settings.py index 8a7d4c89a5..8cdfa81bf9 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/protocol/test_extensions_goal_state_from_vm_settings.py @@ -108,14 +108,6 @@ def test_its_source_channel_should_be_host_ga_plugin(self): self.assertEqual(GoalStateChannel.HostGAPlugin, extensions_goal_state.channel, "The channel is incorrect") - def test_create_from_vm_settings_should_parse_missing_status_upload_blob_as_none(self): - vm_settings_text = fileutil.read_file(os.path.join(data_dir, "hostgaplugin/vm_settings-no_status_upload_blob.json")) - vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings("123", vm_settings_text) - - self.assertIsNone(vm_settings.status_upload_blob, "Expected status upload blob to be None") - self.assertEqual("BlockBlob", vm_settings.status_upload_blob_type, "Expected status upload blob to be Block") - - class CaseFoldedDictionaryTestCase(AgentTestCase): def test_it_should_retrieve_items_ignoring_case(self): dictionary = json.loads('''{