From f7be25ef32527c0867a62a86edd5cb587e3e2866 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Tue, 21 Apr 2026 16:47:26 -0400 Subject: [PATCH 01/16] Sanitize credential-like URLs in telemetry events --- .../src/service_interfaces/TelemetryWriter.py | 9 ++- src/core/tests/Test_TelemetryWriter.py | 39 ++++++++++ src/extension/src/TelemetryWriter.py | 10 ++- src/extension/src/Utility.py | 30 ++++++++ src/extension/tests/Test_TelemetryWriter.py | 53 +++++++++++++- src/extension/tests/Test_Utility.py | 73 +++++++++++++++++++ 6 files changed, 210 insertions(+), 4 deletions(-) diff --git a/src/core/src/service_interfaces/TelemetryWriter.py b/src/core/src/service_interfaces/TelemetryWriter.py index 7e565ebf1..ca7859128 100644 --- a/src/core/src/service_interfaces/TelemetryWriter.py +++ b/src/core/src/service_interfaces/TelemetryWriter.py @@ -23,6 +23,7 @@ import time from core.src.bootstrap.Constants import Constants +from extension.src.Utility import Utility class TelemetryWriter(object): @@ -127,12 +128,17 @@ def __get_events_folder_path_exists(events_folder_path): return events_folder_path is not None and os.path.exists(events_folder_path) def __new_event_json(self, event_level, message, task_name): + # Step 1: Apply message restrictions (formatting, truncation) + restricted_message = self.__ensure_message_restriction_compliance(message) + # Step 2: Sanitize credentials from URIs + sanitized_message = Utility.sanitize_credentials_from_uri(restricted_message) + return { "Version": Constants.EXT_VERSION, "Timestamp": str(datetime.datetime.utcnow()), "TaskName": task_name, "EventLevel": event_level, - "Message": self.__ensure_message_restriction_compliance(message), + "Message": sanitized_message, "EventPid": "", "EventTid": "", "OperationId": self.__operation_id # activity id from from config settings @@ -161,6 +167,7 @@ def __ensure_message_restriction_compliance(self, full_message): self.composite_logger.log_telemetry_module_error("Error occurred while formatting message for a telemetry event. [Error={0}]".format(repr(e))) raise + def write_event_with_buffer(self, message, event_level, buffer_msg): if buffer_msg == Constants.BufferMessage.TRUE and (event_level == self.last_telemetry_event_level or self.last_telemetry_event_level is None): if self.telemetry_buffer_store != "": diff --git a/src/core/tests/Test_TelemetryWriter.py b/src/core/tests/Test_TelemetryWriter.py index 96b60aee7..46a476354 100644 --- a/src/core/tests/Test_TelemetryWriter.py +++ b/src/core/tests/Test_TelemetryWriter.py @@ -311,5 +311,44 @@ def test_write_event_with_buffer_true_and_empty_string_and_then_flush_with_non_e f.close() self.assertTrue(text_found.string.startswith("Message 1")) + def test_sanitize_credentials_from_uri_in_telemetry(self): + """Test credential sanitization in telemetry events - credentials should be removed from URLs""" + message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Sanitize") + + latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if + re.search('^[0-9]+.json$', pos_json)][-1] + with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r') as f: + events = json.load(f) + telemetry_message = events[-1]["Message"] + + # Token should be removed + self.assertFalse("TESTTOKEN123456" in telemetry_message, "Token should not be in telemetry message") + # Username should be preserved + self.assertTrue("testuser@invalid.repo.example" in telemetry_message, "Username should be preserved in telemetry") + # URL structure should be preserved + self.assertTrue("https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" in telemetry_message, + "Sanitized URL should be present in telemetry") + f.close() + + def test_sanitize_multiple_credentials_in_telemetry(self): + """Test sanitization with multiple URLs containing credentials""" + message = "Failed fetching from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Multiple") + + latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if + re.search('^[0-9]+.json$', pos_json)][-1] + with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r') as f: + events = json.load(f) + telemetry_message = events[-1]["Message"] + + # All passwords should be removed + self.assertFalse("pass1" in telemetry_message) + self.assertFalse("pass2" in telemetry_message) + # Usernames should be preserved + self.assertTrue("user1@host1.com" in telemetry_message) + self.assertTrue("user2@host2.com" in telemetry_message) + f.close() + if __name__ == '__main__': unittest.main() diff --git a/src/extension/src/TelemetryWriter.py b/src/extension/src/TelemetryWriter.py index af7747eac..15af2bfa1 100644 --- a/src/extension/src/TelemetryWriter.py +++ b/src/extension/src/TelemetryWriter.py @@ -23,6 +23,7 @@ import time from extension.src.Constants import Constants +from extension.src.Utility import Utility class TelemetryWriter(object): @@ -39,12 +40,17 @@ def __init__(self, logger, env_layer): self.__task_name = Constants.TELEMETRY_TASK_NAME + self.__task_name_watermark def __new_event_json(self, event_level, message, task_name): + # Step 1: Apply message restrictions (formatting, truncation) + restricted_message = self.__ensure_message_restriction_compliance(message) + # Step 2: Sanitize credentials from URIs + sanitized_message = Utility.sanitize_credentials_from_uri(restricted_message) + return { "Version": Constants.EXT_VERSION, "Timestamp": str(datetime.datetime.utcnow()), "TaskName": task_name, "EventLevel": event_level, - "Message": self.__ensure_message_restriction_compliance(message), + "Message": sanitized_message, "EventPid": "", "EventTid": "", "OperationId": self.__operation_id # This should have activity id from from config settings, but since we only read settings file for enable command, enable command will have activity id set here and all non-enable commands will have this as a timestamp @@ -68,6 +74,8 @@ def __ensure_message_restriction_compliance(self, full_message): self.logger.log_telemetry_module_error("Error occurred while formatting message for a telemetry event. [Error={0}]".format(repr(e))) raise + # ...existing code... + def __get_agent_supports_telemetry_from_env_var(self): """ Returns True if the env var AZURE_GUEST_AGENT_EXTENSION_SUPPORTED_FEATURES has a key of ExtensionTelemetryPipeline in the list. Value of the env var looks like this: diff --git a/src/extension/src/Utility.py b/src/extension/src/Utility.py index 837c5591a..461766c9b 100644 --- a/src/extension/src/Utility.py +++ b/src/extension/src/Utility.py @@ -16,6 +16,7 @@ import datetime import os +import re import time from extension.src.Constants import Constants from extension.src.local_loggers.FileLogger import FileLogger @@ -68,3 +69,32 @@ def get_datetime_from_str(date_str): def get_str_from_datetime(date): return date.strftime(Constants.UTC_DATETIME_FORMAT) + @staticmethod + def sanitize_credentials_from_uri(message): + """ Sanitizes credential-like values from URIs. + Removes password/token from URI userinfo while preserving other details. + Example: https://user:token@host → https://user@host + + Args: + message: The message string potentially containing URIs with credentials + + Returns: + The message with credentials removed from URIs + """ + try: + # Pattern matches: scheme://user:password@host → scheme://user@host + # Handles credentials containing special characters including @ + # Groups: + # (1) scheme: https://, http://, or ftp:// + # (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters + # (3) password: zero or more non-whitespace, non-slash, non-@ characters + sanitized_message = re.sub( + r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@', + r'\1\2@', + message + ) + return sanitized_message + except Exception as e: + # Return original message if sanitization fails + return message + diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index fa91c4232..dce9dcd0d 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -158,10 +158,59 @@ def test_events_deleted_outside_of_extension_while_extension_is_running(self): self.telemetry_writer.write_event("testing telemetry write to file", Constants.TelemetryEventLevel.Error, "Test Task") os.listdir = backup_os_listdir + # ==================== Common Helper Method for Loading Event Files ==================== + def _load_sanitized_event(self, message): + """ + Common helper method to write event to telemetry and load the sanitized message. + The regex sanitization happens automatically in TelemetryWriter. + + Args: + message: The message to write to telemetry + + Returns: + The sanitized message from the event + """ + if self.runtime.is_github_runner: + return None + + # Write event to telemetry + self.telemetry_writer.write_event(message) + + # Load the event file + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + sanitized_message = events[0]["Message"] + f.close() + return sanitized_message + + # ==================== Test cases for credential sanitization in telemetry messages ==================== + def test_sanitize_credentials_multiple_repos(self): + """Test 2: Failed repo sync with multiple repo URLs containing different credentials""" + message = "Failed repo sync: https://user1:token1@repo1.example.com https://user2:token2@repo2.example.com/path" + + sanitized_message = self._load_sanitized_event(message) + expected_message = "Failed repo sync: https://user1@repo1.example.com https://user2@repo2.example.com/path" + self.assertEqual(sanitized_message, expected_message) + + def test_sanitize_credentials_username_only_no_password(self): + """Test 3: Using mirror with username only (no password)""" + message = "Using mirror https://testuser@repo.example.com/path" + + sanitized_message = self._load_sanitized_event(message) + self.assertIn("testuser@repo.example.com", sanitized_message) + + def test_sanitize_credentials_special_characters_in_password(self): + """Test 4: Downloading from repo with special characters in password""" + message = "Downloading from https://svc-user:AbC_123-.$%!@repo.contoso.com/rpm" + + sanitized_message = self._load_sanitized_event(message) + self.assertNotIn("AbC_123-.$%!", sanitized_message) + self.assertIn("svc-user@repo.contoso.com", sanitized_message) if __name__ == '__main__': - SUITE = unittest.TestLoader().loadTestsFromTestCase(TestTelemetryWriter) - unittest.TextTestRunner(verbosity=2).run(SUITE) + SUITE = unittest.TestLoader().loadTestsFromTestCase(TestTelemetryWriter) + unittest.TextTestRunner(verbosity=2).run(SUITE) diff --git a/src/extension/tests/Test_Utility.py b/src/extension/tests/Test_Utility.py index e77b1ac57..81acafea8 100644 --- a/src/extension/tests/Test_Utility.py +++ b/src/extension/tests/Test_Utility.py @@ -73,3 +73,76 @@ def test_delete_file_failure(self): # Remove the directory after the test shutil.rmtree(test_dir) + + def test_sanitize_credentials_from_uri_https(self): + """ Test sanitization of HTTPS URIs with credentials """ + message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" + sanitized = self.utility.sanitize_credentials_from_uri(message) + expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_from_uri_http(self): + """ Test sanitization of HTTP URIs with credentials """ + message = "Connection failed to http://user123:password123@example.com/path" + sanitized = self.utility.sanitize_credentials_from_uri(message) + # Password should be removed + self.assertNotIn("password123", sanitized) + # Username should be preserved + self.assertIn("user123@example.com", sanitized) + + def test_sanitize_credentials_multiple_urls(self): + """ Test sanitization with multiple URLs containing credentials """ + message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" + sanitized = self.utility.sanitize_credentials_from_uri(message) + # Passwords should be removed + self.assertNotIn("pass1", sanitized) + self.assertNotIn("pass2", sanitized) + # Usernames should be preserved + self.assertIn("user1@host1.com", sanitized) + self.assertIn("user2@host2.com", sanitized) + + def test_sanitize_credentials_jfrog_repo_error(self): + """ ERROR with 401 status code from jfrog.io """ + message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + sanitized = self.utility.sanitize_credentials_from_uri(message) + expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_curl_error_buildbot_token(self): + """ Curl error with buildbot:BuildBotToken credentials """ + message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") + sanitized = self.utility.sanitize_credentials_from_uri(message) + expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_expired_ssl_certs_error(self): + """ ERROR with expired SSL certs and TESTTOKEN123456 """ + message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + sanitized = self.utility.sanitize_credentials_from_uri(message) + expected_message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + self.assertEqual(sanitized, expected_message) + + + + + + + From d685a5299e4621baccc4289666dbbf8b5e5ff442 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 23 Apr 2026 11:02:11 -0400 Subject: [PATCH 02/16] Address Copilot Review comments --- src/core/src/Utility.py | 52 +++++++++ .../src/service_interfaces/TelemetryWriter.py | 2 +- src/core/tests/Test_Utility.py | 105 ++++++++++++++++++ src/extension/src/TelemetryWriter.py | 3 +- src/extension/src/Utility.py | 30 ----- src/extension/tests/Test_TelemetryWriter.py | 9 ++ src/extension/tests/Test_Utility.py | 71 ------------ 7 files changed, 168 insertions(+), 104 deletions(-) create mode 100644 src/core/src/Utility.py create mode 100644 src/core/tests/Test_Utility.py diff --git a/src/core/src/Utility.py b/src/core/src/Utility.py new file mode 100644 index 000000000..d3c656ae9 --- /dev/null +++ b/src/core/src/Utility.py @@ -0,0 +1,52 @@ +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.7+ + +import re + + +class Utility(object): + """Core utility functions shared across core and extension packages""" + + @staticmethod + def sanitize_credentials_from_uri(message): + """Sanitizes credential-like values from URIs. + Removes password/token from URI userinfo while preserving other details. + Example: https://user:token@host → https://user@host + + Args: + message: The message string potentially containing URIs with credentials + + Returns: + The message with credentials removed from URIs + """ + try: + # Pattern matches: scheme://user:password@host → scheme://user@host + # Handles credentials containing special characters (except @, /, whitespace) + # Groups: + # (1) scheme: https://, http://, or ftp:// + # (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters + # (3) password: zero or more non-whitespace, non-slash, non-@ characters + sanitized_message = re.sub( + r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@', + r'\1\2@', + message + ) + return sanitized_message + except Exception as e: + # Return original message if sanitization fails + return message + + diff --git a/src/core/src/service_interfaces/TelemetryWriter.py b/src/core/src/service_interfaces/TelemetryWriter.py index ca7859128..0dbf649fe 100644 --- a/src/core/src/service_interfaces/TelemetryWriter.py +++ b/src/core/src/service_interfaces/TelemetryWriter.py @@ -23,7 +23,7 @@ import time from core.src.bootstrap.Constants import Constants -from extension.src.Utility import Utility +from core.src.Utility import Utility class TelemetryWriter(object): diff --git a/src/core/tests/Test_Utility.py b/src/core/tests/Test_Utility.py new file mode 100644 index 000000000..ec44b4a72 --- /dev/null +++ b/src/core/tests/Test_Utility.py @@ -0,0 +1,105 @@ +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.7+ + +import unittest + +from core.src.Utility import Utility + + +class TestUtility(unittest.TestCase): + + def setUp(self): + self.utility = Utility() + + def tearDown(self): + pass + + def test_sanitize_credentials_from_uri_https(self): + """ Test sanitization of HTTPS URIs with credentials """ + message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" + sanitized = self.utility.sanitize_credentials_from_uri(message) + expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_from_uri_http(self): + """ Test sanitization of HTTP URIs with credentials """ + message = "Connection failed to http://user123:password123@example.com/path" + sanitized = self.utility.sanitize_credentials_from_uri(message) + # Password should be removed + self.assertNotIn("password123", sanitized) + # Username should be preserved + self.assertIn("user123@example.com", sanitized) + + def test_sanitize_credentials_multiple_urls(self): + """ Test sanitization with multiple URLs containing credentials """ + message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" + sanitized = self.utility.sanitize_credentials_from_uri(message) + # Passwords should be removed + self.assertNotIn("pass1", sanitized) + self.assertNotIn("pass2", sanitized) + # Usernames should be preserved + self.assertIn("user1@host1.com", sanitized) + self.assertIn("user2@host2.com", sanitized) + + def test_sanitize_credentials_jfrog_repo_error(self): + """ ERROR with 401 status code from jfrog.io """ + message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + sanitized = self.utility.sanitize_credentials_from_uri(message) + expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_curl_error_buildbot_token(self): + """ Curl error with buildbot:BuildBotToken credentials """ + message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") + sanitized = self.utility.sanitize_credentials_from_uri(message) + expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_expired_ssl_certs_error(self): + """ ERROR with expired SSL certs and TESTTOKEN123456 """ + message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + sanitized = self.utility.sanitize_credentials_from_uri(message) + expected_message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_exception_handling(self): + """ Test exception handling: passing None should return the input unchanged """ + result = self.utility.sanitize_credentials_from_uri(None) + self.assertIsNone(result) + + +if __name__ == '__main__': + unittest.main() + + diff --git a/src/extension/src/TelemetryWriter.py b/src/extension/src/TelemetryWriter.py index 15af2bfa1..afd610b1f 100644 --- a/src/extension/src/TelemetryWriter.py +++ b/src/extension/src/TelemetryWriter.py @@ -23,7 +23,7 @@ import time from extension.src.Constants import Constants -from extension.src.Utility import Utility +from core.src.Utility import Utility class TelemetryWriter(object): @@ -74,7 +74,6 @@ def __ensure_message_restriction_compliance(self, full_message): self.logger.log_telemetry_module_error("Error occurred while formatting message for a telemetry event. [Error={0}]".format(repr(e))) raise - # ...existing code... def __get_agent_supports_telemetry_from_env_var(self): """ Returns True if the env var AZURE_GUEST_AGENT_EXTENSION_SUPPORTED_FEATURES has a key of diff --git a/src/extension/src/Utility.py b/src/extension/src/Utility.py index 461766c9b..837c5591a 100644 --- a/src/extension/src/Utility.py +++ b/src/extension/src/Utility.py @@ -16,7 +16,6 @@ import datetime import os -import re import time from extension.src.Constants import Constants from extension.src.local_loggers.FileLogger import FileLogger @@ -69,32 +68,3 @@ def get_datetime_from_str(date_str): def get_str_from_datetime(date): return date.strftime(Constants.UTC_DATETIME_FORMAT) - @staticmethod - def sanitize_credentials_from_uri(message): - """ Sanitizes credential-like values from URIs. - Removes password/token from URI userinfo while preserving other details. - Example: https://user:token@host → https://user@host - - Args: - message: The message string potentially containing URIs with credentials - - Returns: - The message with credentials removed from URIs - """ - try: - # Pattern matches: scheme://user:password@host → scheme://user@host - # Handles credentials containing special characters including @ - # Groups: - # (1) scheme: https://, http://, or ftp:// - # (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters - # (3) password: zero or more non-whitespace, non-slash, non-@ characters - sanitized_message = re.sub( - r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@', - r'\1\2@', - message - ) - return sanitized_message - except Exception as e: - # Return original message if sanitization fails - return message - diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index dce9dcd0d..fcfbbf435 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -187,6 +187,9 @@ def _load_sanitized_event(self, message): # ==================== Test cases for credential sanitization in telemetry messages ==================== def test_sanitize_credentials_multiple_repos(self): """Test 2: Failed repo sync with multiple repo URLs containing different credentials""" + if self.runtime.is_github_runner: + return + message = "Failed repo sync: https://user1:token1@repo1.example.com https://user2:token2@repo2.example.com/path" sanitized_message = self._load_sanitized_event(message) @@ -195,6 +198,9 @@ def test_sanitize_credentials_multiple_repos(self): def test_sanitize_credentials_username_only_no_password(self): """Test 3: Using mirror with username only (no password)""" + if self.runtime.is_github_runner: + return + message = "Using mirror https://testuser@repo.example.com/path" sanitized_message = self._load_sanitized_event(message) @@ -202,6 +208,9 @@ def test_sanitize_credentials_username_only_no_password(self): def test_sanitize_credentials_special_characters_in_password(self): """Test 4: Downloading from repo with special characters in password""" + if self.runtime.is_github_runner: + return + message = "Downloading from https://svc-user:AbC_123-.$%!@repo.contoso.com/rpm" sanitized_message = self._load_sanitized_event(message) diff --git a/src/extension/tests/Test_Utility.py b/src/extension/tests/Test_Utility.py index 81acafea8..e9440042e 100644 --- a/src/extension/tests/Test_Utility.py +++ b/src/extension/tests/Test_Utility.py @@ -74,75 +74,4 @@ def test_delete_file_failure(self): # Remove the directory after the test shutil.rmtree(test_dir) - def test_sanitize_credentials_from_uri_https(self): - """ Test sanitization of HTTPS URIs with credentials """ - message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" - sanitized = self.utility.sanitize_credentials_from_uri(message) - expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" - self.assertEqual(sanitized, expected_message) - - def test_sanitize_credentials_from_uri_http(self): - """ Test sanitization of HTTP URIs with credentials """ - message = "Connection failed to http://user123:password123@example.com/path" - sanitized = self.utility.sanitize_credentials_from_uri(message) - # Password should be removed - self.assertNotIn("password123", sanitized) - # Username should be preserved - self.assertIn("user123@example.com", sanitized) - - def test_sanitize_credentials_multiple_urls(self): - """ Test sanitization with multiple URLs containing credentials """ - message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" - sanitized = self.utility.sanitize_credentials_from_uri(message) - # Passwords should be removed - self.assertNotIn("pass1", sanitized) - self.assertNotIn("pass2", sanitized) - # Usernames should be preserved - self.assertIn("user1@host1.com", sanitized) - self.assertIn("user2@host2.com", sanitized) - - def test_sanitize_credentials_jfrog_repo_error(self): - """ ERROR with 401 status code from jfrog.io """ - message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - sanitized = self.utility.sanitize_credentials_from_uri(message) - expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - self.assertEqual(sanitized, expected_message) - - def test_sanitize_credentials_curl_error_buildbot_token(self): - """ Curl error with buildbot:BuildBotToken credentials """ - message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") - sanitized = self.utility.sanitize_credentials_from_uri(message) - expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") - self.assertEqual(sanitized, expected_message) - - def test_sanitize_credentials_expired_ssl_certs_error(self): - """ ERROR with expired SSL certs and TESTTOKEN123456 """ - message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - sanitized = self.utility.sanitize_credentials_from_uri(message) - expected_message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - self.assertEqual(sanitized, expected_message) - - - - - - From e88327abe2f5c24f9f8afa8852b49b643ad26d12 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 23 Apr 2026 11:23:59 -0400 Subject: [PATCH 03/16] Code coverage Fix --- src/extension/tests/Test_TelemetryWriter.py | 68 +++++++++++---------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index fcfbbf435..430b3290b 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -158,10 +158,10 @@ def test_events_deleted_outside_of_extension_while_extension_is_running(self): self.telemetry_writer.write_event("testing telemetry write to file", Constants.TelemetryEventLevel.Error, "Test Task") os.listdir = backup_os_listdir - # ==================== Common Helper Method for Loading Event Files ==================== + # ==================== Integration test for credential sanitization in telemetry ==================== def _load_sanitized_event(self, message): """ - Common helper method to write event to telemetry and load the sanitized message. + Helper method to write event to telemetry and load the sanitized message. The regex sanitization happens automatically in TelemetryWriter. Args: @@ -184,38 +184,44 @@ def _load_sanitized_event(self, message): f.close() return sanitized_message - # ==================== Test cases for credential sanitization in telemetry messages ==================== - def test_sanitize_credentials_multiple_repos(self): - """Test 2: Failed repo sync with multiple repo URLs containing different credentials""" - if self.runtime.is_github_runner: - return - + def test_sanitize_credentials_in_telemetry_event(self): + """Integration test: Verify credentials are sanitized in telemetry events""" message = "Failed repo sync: https://user1:token1@repo1.example.com https://user2:token2@repo2.example.com/path" sanitized_message = self._load_sanitized_event(message) - expected_message = "Failed repo sync: https://user1@repo1.example.com https://user2@repo2.example.com/path" - self.assertEqual(sanitized_message, expected_message) - - def test_sanitize_credentials_username_only_no_password(self): - """Test 3: Using mirror with username only (no password)""" - if self.runtime.is_github_runner: - return - - message = "Using mirror https://testuser@repo.example.com/path" - - sanitized_message = self._load_sanitized_event(message) - self.assertIn("testuser@repo.example.com", sanitized_message) - - def test_sanitize_credentials_special_characters_in_password(self): - """Test 4: Downloading from repo with special characters in password""" - if self.runtime.is_github_runner: - return - - message = "Downloading from https://svc-user:AbC_123-.$%!@repo.contoso.com/rpm" - - sanitized_message = self._load_sanitized_event(message) - self.assertNotIn("AbC_123-.$%!", sanitized_message) - self.assertIn("svc-user@repo.contoso.com", sanitized_message) + # Skip assertion on GitHub runner + if sanitized_message is not None: + expected_message = "Failed repo sync: https://user1@repo1.example.com https://user2@repo2.example.com/path" + self.assertEqual(sanitized_message, expected_message) + + def test_load_sanitized_event_skips_on_github_runner(self): + """Test: Helper method returns None when running on GitHub runner""" + # Mock is_github_runner to be True to test the skip path + original_is_github_runner = self.runtime.is_github_runner + self.runtime.is_github_runner = True + + result = self._load_sanitized_event("test message") + self.assertIsNone(result) + + # Restore + self.runtime.is_github_runner = original_is_github_runner + + def test_load_sanitized_event_full_path(self): + """Test: Helper method executes full path when not on GitHub runner""" + # Force is_github_runner to False to ensure full path coverage on CI + original_is_github_runner = self.runtime.is_github_runner + self.runtime.is_github_runner = False + + message = "https://user:pass@example.com" + result = self._load_sanitized_event(message) + + # On non-GitHub runner, should return the sanitized message + self.assertIsNotNone(result) + self.assertIn("user@example.com", result) + self.assertNotIn("pass", result) + + # Restore + self.runtime.is_github_runner = original_is_github_runner if __name__ == '__main__': SUITE = unittest.TestLoader().loadTestsFromTestCase(TestTelemetryWriter) From 281f3e02599c457d92b818f71e1d66cea72753cc Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Fri, 24 Apr 2026 11:44:20 -0400 Subject: [PATCH 04/16] Address CR comments. --- .../CredentialSanitizer.py} | 16 +- .../src/service_interfaces/TelemetryWriter.py | 7 +- src/core/tests/Test_TelemetryWriter.py | 154 +++++++++++++----- src/core/tests/Test_Utility.py | 105 ------------ src/extension/src/CredentialSanitizer.py | 50 ++++++ src/extension/src/TelemetryWriter.py | 7 +- src/extension/tests/Test_TelemetryWriter.py | 143 +++++++++++++--- 7 files changed, 303 insertions(+), 179 deletions(-) rename src/core/src/{Utility.py => service_interfaces/CredentialSanitizer.py} (76%) delete mode 100644 src/core/tests/Test_Utility.py create mode 100644 src/extension/src/CredentialSanitizer.py diff --git a/src/core/src/Utility.py b/src/core/src/service_interfaces/CredentialSanitizer.py similarity index 76% rename from src/core/src/Utility.py rename to src/core/src/service_interfaces/CredentialSanitizer.py index d3c656ae9..b25e01d25 100644 --- a/src/core/src/Utility.py +++ b/src/core/src/service_interfaces/CredentialSanitizer.py @@ -17,17 +17,17 @@ import re -class Utility(object): - """Core utility functions shared across core and extension packages""" +class CredentialSanitizer(object): + """Sanitizes credential-like values from URIs. Removes password/token from URI userinfo.""" @staticmethod - def sanitize_credentials_from_uri(message): + def sanitize(message): """Sanitizes credential-like values from URIs. - Removes password/token from URI userinfo while preserving other details. - Example: https://user:token@host → https://user@host + + Removes password/token from URI userinfo. Args: - message: The message string potentially containing URIs with credentials + message: The message to sanitize Returns: The message with credentials removed from URIs @@ -45,8 +45,6 @@ def sanitize_credentials_from_uri(message): message ) return sanitized_message - except Exception as e: - # Return original message if sanitization fails + except Exception: return message - diff --git a/src/core/src/service_interfaces/TelemetryWriter.py b/src/core/src/service_interfaces/TelemetryWriter.py index 0dbf649fe..4dcb3f284 100644 --- a/src/core/src/service_interfaces/TelemetryWriter.py +++ b/src/core/src/service_interfaces/TelemetryWriter.py @@ -23,7 +23,7 @@ import time from core.src.bootstrap.Constants import Constants -from core.src.Utility import Utility +from core.src.service_interfaces.CredentialSanitizer import CredentialSanitizer class TelemetryWriter(object): @@ -31,7 +31,7 @@ class TelemetryWriter(object): TELEMETRY_BUFFER_DELIMETER= "\n|\t" - def __init__(self, env_layer, composite_logger, events_folder_path, telemetry_supported): + def __init__(self, env_layer, composite_logger, events_folder_path, telemetry_supported, credential_sanitizer=None): self.env_layer = env_layer self.composite_logger = composite_logger self.__operation_id = str(datetime.datetime.utcnow()) @@ -39,6 +39,7 @@ def __init__(self, env_layer, composite_logger, events_folder_path, telemetry_su self.__task_name = Constants.TelemetryTaskName.STARTUP + self.__task_name_watermark self.events_folder_path = None self.__telemetry_event_counter = 1 # will be added at the end of each event sent to telemetry to assist in tracing and identifying event/message loss in telemetry + self.credential_sanitizer = credential_sanitizer or CredentialSanitizer self.start_time_for_event_count_throttle_check = datetime.datetime.utcnow() self.event_count = 1 @@ -131,7 +132,7 @@ def __new_event_json(self, event_level, message, task_name): # Step 1: Apply message restrictions (formatting, truncation) restricted_message = self.__ensure_message_restriction_compliance(message) # Step 2: Sanitize credentials from URIs - sanitized_message = Utility.sanitize_credentials_from_uri(restricted_message) + sanitized_message = self.credential_sanitizer.sanitize(restricted_message) return { "Version": Constants.EXT_VERSION, diff --git a/src/core/tests/Test_TelemetryWriter.py b/src/core/tests/Test_TelemetryWriter.py index 46a476354..12dcd25a8 100644 --- a/src/core/tests/Test_TelemetryWriter.py +++ b/src/core/tests/Test_TelemetryWriter.py @@ -19,7 +19,10 @@ import re import time import unittest +from unittest.mock import Mock from core.src.bootstrap.Constants import Constants +from core.src.service_interfaces.CredentialSanitizer import CredentialSanitizer +from core.src.service_interfaces.TelemetryWriter import TelemetryWriter from core.tests.library.ArgumentComposer import ArgumentComposer from core.tests.library.RuntimeCompositor import RuntimeCompositor @@ -311,44 +314,121 @@ def test_write_event_with_buffer_true_and_empty_string_and_then_flush_with_non_e f.close() self.assertTrue(text_found.string.startswith("Message 1")) - def test_sanitize_credentials_from_uri_in_telemetry(self): - """Test credential sanitization in telemetry events - credentials should be removed from URLs""" + # ==================== Unit Tests for Credential Sanitization ==================== + def test_sanitize_credentials_from_uri_https(self): + """ Test sanitization of HTTPS URIs with credentials """ message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" - self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Sanitize") - - latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if - re.search('^[0-9]+.json$', pos_json)][-1] - with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r') as f: - events = json.load(f) - telemetry_message = events[-1]["Message"] - - # Token should be removed - self.assertFalse("TESTTOKEN123456" in telemetry_message, "Token should not be in telemetry message") - # Username should be preserved - self.assertTrue("testuser@invalid.repo.example" in telemetry_message, "Username should be preserved in telemetry") - # URL structure should be preserved - self.assertTrue("https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" in telemetry_message, - "Sanitized URL should be present in telemetry") - f.close() - - def test_sanitize_multiple_credentials_in_telemetry(self): - """Test sanitization with multiple URLs containing credentials""" - message = "Failed fetching from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" - self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Multiple") - - latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if - re.search('^[0-9]+.json$', pos_json)][-1] - with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r') as f: - events = json.load(f) - telemetry_message = events[-1]["Message"] - - # All passwords should be removed - self.assertFalse("pass1" in telemetry_message) - self.assertFalse("pass2" in telemetry_message) - # Usernames should be preserved - self.assertTrue("user1@host1.com" in telemetry_message) - self.assertTrue("user2@host2.com" in telemetry_message) - f.close() + sanitized = CredentialSanitizer.sanitize(message) + expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_from_uri_http(self): + """ Test sanitization of HTTP URIs with credentials """ + message = "Connection failed to http://user123:password123@example.com/path" + sanitized = CredentialSanitizer.sanitize(message) + # Password should be removed + self.assertNotIn("password123", sanitized) + # Username should be preserved + self.assertIn("user123@example.com", sanitized) + + def test_sanitize_credentials_multiple_urls(self): + """ Test sanitization with multiple URLs containing credentials """ + message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" + sanitized = CredentialSanitizer.sanitize(message) + # Passwords should be removed + self.assertNotIn("pass1", sanitized) + self.assertNotIn("pass2", sanitized) + # Usernames should be preserved + self.assertIn("user1@host1.com", sanitized) + self.assertIn("user2@host2.com", sanitized) + + def test_sanitize_credentials_jfrog_repo_error(self): + """ ERROR with 401 status code from jfrog.io """ + message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + sanitized = CredentialSanitizer.sanitize(message) + expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_curl_error_buildbot_token(self): + """ Curl error with buildbot:BuildBotToken credentials """ + message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") + sanitized = CredentialSanitizer.sanitize(message) + expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_expired_ssl_certs_error(self): + """ ERROR with expired SSL certs and TESTTOKEN123456 """ + message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + sanitized = CredentialSanitizer.sanitize(message) + expected_message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_exception_handling(self): + """ Test exception handling: passing None should return the input unchanged """ + result = CredentialSanitizer.sanitize(None) + self.assertIsNone(result) + + def test_inject_fake_sanitizer_and_verify_invocation(self): + """ Integration Test: Can inject a fake sanitizer and verify it was invoked during write_event """ + # Create a mock sanitizer + mock_sanitizer = Mock() + mock_sanitizer.sanitize = Mock(return_value="sanitized_message [TC=1]") + + # Create TelemetryWriter with injected mock sanitizer + env_layer = self.runtime.env_layer + composite_logger = self.runtime.composite_logger + writer = TelemetryWriter(env_layer, composite_logger, events_folder_path=None, + telemetry_supported=False, credential_sanitizer=mock_sanitizer) + + # Set up a temporary events folder for testing + import tempfile + import shutil + temp_folder = tempfile.mkdtemp() + writer.events_folder_path = temp_folder + writer._TelemetryWriter__is_telemetry_supported = True + + try: + # Write an event + original_message = "https://user:password@example.com/error" + writer.write_event(original_message, Constants.TelemetryEventLevel.Error, "Test Task") + + # Verify mock sanitizer was called + self.assertTrue(mock_sanitizer.sanitize.called, "Sanitizer should have been invoked") + self.assertEqual(mock_sanitizer.sanitize.call_count, 1, "Sanitizer should be called exactly once") + + # Verify the call was made with a message containing the original error info + call_args = mock_sanitizer.sanitize.call_args[0][0] + self.assertIn("example.com", call_args, "Sanitizer should be called with message containing URL") + + # Verify telemetry event was written with the mock-sanitized message + event_files = os.listdir(writer.events_folder_path) + self.assertTrue(len(event_files) > 0, "Event file should be created") + + with open(os.path.join(writer.events_folder_path, event_files[0]), 'r') as f: + events = json.load(f) + # The message should be the one returned by our mock + self.assertIn("sanitized_message", events[-1]["Message"]) + f.close() + finally: + shutil.rmtree(temp_folder) if __name__ == '__main__': unittest.main() diff --git a/src/core/tests/Test_Utility.py b/src/core/tests/Test_Utility.py deleted file mode 100644 index ec44b4a72..000000000 --- a/src/core/tests/Test_Utility.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2020 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requires Python 2.7+ - -import unittest - -from core.src.Utility import Utility - - -class TestUtility(unittest.TestCase): - - def setUp(self): - self.utility = Utility() - - def tearDown(self): - pass - - def test_sanitize_credentials_from_uri_https(self): - """ Test sanitization of HTTPS URIs with credentials """ - message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" - sanitized = self.utility.sanitize_credentials_from_uri(message) - expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" - self.assertEqual(sanitized, expected_message) - - def test_sanitize_credentials_from_uri_http(self): - """ Test sanitization of HTTP URIs with credentials """ - message = "Connection failed to http://user123:password123@example.com/path" - sanitized = self.utility.sanitize_credentials_from_uri(message) - # Password should be removed - self.assertNotIn("password123", sanitized) - # Username should be preserved - self.assertIn("user123@example.com", sanitized) - - def test_sanitize_credentials_multiple_urls(self): - """ Test sanitization with multiple URLs containing credentials """ - message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" - sanitized = self.utility.sanitize_credentials_from_uri(message) - # Passwords should be removed - self.assertNotIn("pass1", sanitized) - self.assertNotIn("pass2", sanitized) - # Usernames should be preserved - self.assertIn("user1@host1.com", sanitized) - self.assertIn("user2@host2.com", sanitized) - - def test_sanitize_credentials_jfrog_repo_error(self): - """ ERROR with 401 status code from jfrog.io """ - message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - sanitized = self.utility.sanitize_credentials_from_uri(message) - expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - self.assertEqual(sanitized, expected_message) - - def test_sanitize_credentials_curl_error_buildbot_token(self): - """ Curl error with buildbot:BuildBotToken credentials """ - message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") - sanitized = self.utility.sanitize_credentials_from_uri(message) - expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") - self.assertEqual(sanitized, expected_message) - - def test_sanitize_credentials_expired_ssl_certs_error(self): - """ ERROR with expired SSL certs and TESTTOKEN123456 """ - message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - sanitized = self.utility.sanitize_credentials_from_uri(message) - expected_message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - self.assertEqual(sanitized, expected_message) - - def test_sanitize_credentials_exception_handling(self): - """ Test exception handling: passing None should return the input unchanged """ - result = self.utility.sanitize_credentials_from_uri(None) - self.assertIsNone(result) - - -if __name__ == '__main__': - unittest.main() - - diff --git a/src/extension/src/CredentialSanitizer.py b/src/extension/src/CredentialSanitizer.py new file mode 100644 index 000000000..b25e01d25 --- /dev/null +++ b/src/extension/src/CredentialSanitizer.py @@ -0,0 +1,50 @@ +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.7+ + +import re + + +class CredentialSanitizer(object): + """Sanitizes credential-like values from URIs. Removes password/token from URI userinfo.""" + + @staticmethod + def sanitize(message): + """Sanitizes credential-like values from URIs. + + Removes password/token from URI userinfo. + + Args: + message: The message to sanitize + + Returns: + The message with credentials removed from URIs + """ + try: + # Pattern matches: scheme://user:password@host → scheme://user@host + # Handles credentials containing special characters (except @, /, whitespace) + # Groups: + # (1) scheme: https://, http://, or ftp:// + # (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters + # (3) password: zero or more non-whitespace, non-slash, non-@ characters + sanitized_message = re.sub( + r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@', + r'\1\2@', + message + ) + return sanitized_message + except Exception: + return message + diff --git a/src/extension/src/TelemetryWriter.py b/src/extension/src/TelemetryWriter.py index afd610b1f..009d520f3 100644 --- a/src/extension/src/TelemetryWriter.py +++ b/src/extension/src/TelemetryWriter.py @@ -23,13 +23,13 @@ import time from extension.src.Constants import Constants -from core.src.Utility import Utility +from extension.src.CredentialSanitizer import CredentialSanitizer class TelemetryWriter(object): """Class for writing telemetry data to events""" - def __init__(self, logger, env_layer): + def __init__(self, logger, env_layer, credential_sanitizer=None): self.logger = logger self.env_layer = env_layer self.events_folder_path = None @@ -38,12 +38,13 @@ def __init__(self, logger, env_layer): self.__agent_is_compatible = self.__get_agent_supports_telemetry_from_env_var() self.__task_name_watermark = "." + str(datetime.datetime.utcnow().hour) + "." + str(datetime.datetime.utcnow().minute) + "." + str(datetime.datetime.utcnow().second) + "." + str(os.getpid()) self.__task_name = Constants.TELEMETRY_TASK_NAME + self.__task_name_watermark + self.credential_sanitizer = credential_sanitizer or CredentialSanitizer def __new_event_json(self, event_level, message, task_name): # Step 1: Apply message restrictions (formatting, truncation) restricted_message = self.__ensure_message_restriction_compliance(message) # Step 2: Sanitize credentials from URIs - sanitized_message = Utility.sanitize_credentials_from_uri(restricted_message) + sanitized_message = self.credential_sanitizer.sanitize(restricted_message) return { "Version": Constants.EXT_VERSION, diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 430b3290b..49b87aded 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -4,7 +4,10 @@ import tempfile import time import unittest +from unittest.mock import Mock from extension.src.Constants import Constants +from extension.src.CredentialSanitizer import CredentialSanitizer +from extension.src.TelemetryWriter import TelemetryWriter from extension.tests.helpers.VirtualTerminal import VirtualTerminal from extension.tests.helpers.RuntimeComposer import RuntimeComposer @@ -184,28 +187,6 @@ def _load_sanitized_event(self, message): f.close() return sanitized_message - def test_sanitize_credentials_in_telemetry_event(self): - """Integration test: Verify credentials are sanitized in telemetry events""" - message = "Failed repo sync: https://user1:token1@repo1.example.com https://user2:token2@repo2.example.com/path" - - sanitized_message = self._load_sanitized_event(message) - # Skip assertion on GitHub runner - if sanitized_message is not None: - expected_message = "Failed repo sync: https://user1@repo1.example.com https://user2@repo2.example.com/path" - self.assertEqual(sanitized_message, expected_message) - - def test_load_sanitized_event_skips_on_github_runner(self): - """Test: Helper method returns None when running on GitHub runner""" - # Mock is_github_runner to be True to test the skip path - original_is_github_runner = self.runtime.is_github_runner - self.runtime.is_github_runner = True - - result = self._load_sanitized_event("test message") - self.assertIsNone(result) - - # Restore - self.runtime.is_github_runner = original_is_github_runner - def test_load_sanitized_event_full_path(self): """Test: Helper method executes full path when not on GitHub runner""" # Force is_github_runner to False to ensure full path coverage on CI @@ -223,9 +204,127 @@ def test_load_sanitized_event_full_path(self): # Restore self.runtime.is_github_runner = original_is_github_runner + # ==================== Unit Tests for Credential Sanitization ==================== + def test_sanitize_credentials_from_uri_https(self): + """ Test sanitization of HTTPS URIs with credentials """ + message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" + sanitized = CredentialSanitizer.sanitize(message) + expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_from_uri_http(self): + """ Test sanitization of HTTP URIs with credentials """ + message = "Connection failed to http://user123:password123@example.com/path" + sanitized = CredentialSanitizer.sanitize(message) + # Password should be removed + self.assertNotIn("password123", sanitized) + # Username should be preserved + self.assertIn("user123@example.com", sanitized) + + def test_sanitize_credentials_multiple_urls(self): + """ Test sanitization with multiple URLs containing credentials """ + message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" + sanitized = CredentialSanitizer.sanitize(message) + # Passwords should be removed + self.assertNotIn("pass1", sanitized) + self.assertNotIn("pass2", sanitized) + # Usernames should be preserved + self.assertIn("user1@host1.com", sanitized) + self.assertIn("user2@host2.com", sanitized) + + def test_sanitize_credentials_jfrog_repo_error(self): + """ ERROR with 401 status code from jfrog.io """ + message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + sanitized = CredentialSanitizer.sanitize(message) + expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_curl_error_buildbot_token(self): + """ Curl error with buildbot:BuildBotToken credentials """ + message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") + sanitized = CredentialSanitizer.sanitize(message) + expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_expired_ssl_certs_error(self): + """ ERROR with expired SSL certs and TESTTOKEN123456 """ + message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + sanitized = CredentialSanitizer.sanitize(message) + expected_message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + self.assertEqual(sanitized, expected_message) + + def test_sanitize_credentials_exception_handling(self): + """ Test exception handling: passing None should return the input unchanged """ + result = CredentialSanitizer.sanitize(None) + self.assertIsNone(result) + + def test_inject_fake_sanitizer_and_verify_invocation(self): + """ Test: Can inject a fake sanitizer and verify it was invoked during write_event """ + # Create a mock sanitizer + mock_sanitizer = Mock() + mock_sanitizer.sanitize = Mock(return_value="sanitized_message") + + # Create TelemetryWriter with injected mock sanitizer + logger = self.runtime.logger + env_layer = self.runtime.env_layer + writer = TelemetryWriter(logger, env_layer, mock_sanitizer) + writer.events_folder_path = tempfile.mkdtemp() + + try: + # Write an event + original_message = "https://user:password@example.com/error" + writer.write_event(original_message, Constants.TelemetryEventLevel.Error, "Test Task") + + # Verify mock sanitizer was called + self.assertTrue(mock_sanitizer.sanitize.called, "Sanitizer should have been invoked") + self.assertEqual(mock_sanitizer.sanitize.call_count, 1, "Sanitizer should be called exactly once") + + # Verify the call was made with a message containing the original error info + call_args = mock_sanitizer.sanitize.call_args[0][0] + self.assertIn("example.com", call_args, "Sanitizer should be called with message containing URL") + + # Verify telemetry event was written with the mock-sanitized message + event_files = os.listdir(writer.events_folder_path) + self.assertTrue(len(event_files) > 0, "Event file should be created") + + with open(os.path.join(writer.events_folder_path, event_files[0]), 'r') as f: + events = json.load(f) + # The message should be the one returned by our mock + self.assertIn("sanitized_message", events[0]["Message"]) + f.close() + finally: + shutil.rmtree(writer.events_folder_path) + if __name__ == '__main__': SUITE = unittest.TestLoader().loadTestsFromTestCase(TestTelemetryWriter) unittest.TextTestRunner(verbosity=2).run(SUITE) + + + + + + + + + From ac91bd527c1e8f0a9441ed9811b31c56691f4c61 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Fri, 24 Apr 2026 11:46:10 -0400 Subject: [PATCH 05/16] Remove unchanged file --- src/extension/tests/Test_Utility.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/extension/tests/Test_Utility.py b/src/extension/tests/Test_Utility.py index e9440042e..e77b1ac57 100644 --- a/src/extension/tests/Test_Utility.py +++ b/src/extension/tests/Test_Utility.py @@ -73,5 +73,3 @@ def test_delete_file_failure(self): # Remove the directory after the test shutil.rmtree(test_dir) - - From 586190040473c94a9e4fa3144fea670ca5b77b15 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Tue, 28 Apr 2026 13:00:50 -0400 Subject: [PATCH 06/16] Address code review comments --- src/core/src/bootstrap/Bootstrapper.py | 1 + .../src/bootstrap/ConfigurationFactory.py | 8 +- .../service_interfaces/CredentialSanitizer.py | 21 ++- .../src/service_interfaces/TelemetryWriter.py | 5 +- src/core/tests/Test_TelemetryWriter.py | 141 ++++++++++++----- src/core/tests/library/RuntimeCompositor.py | 3 +- src/extension/src/CredentialSanitizer.py | 21 ++- src/extension/src/TelemetryWriter.py | 4 +- src/extension/src/__main__.py | 5 +- src/extension/tests/Test_TelemetryWriter.py | 145 +++++++++++------- .../tests/helpers/RuntimeComposer.py | 4 +- 11 files changed, 233 insertions(+), 125 deletions(-) diff --git a/src/core/src/bootstrap/Bootstrapper.py b/src/core/src/bootstrap/Bootstrapper.py index 080b94393..52d0dfd36 100644 --- a/src/core/src/bootstrap/Bootstrapper.py +++ b/src/core/src/bootstrap/Bootstrapper.py @@ -49,6 +49,7 @@ def __init__(self, argv, capture_stdout=True): if capture_stdout: self.stdout_file_mirror = StdOutFileMirror(self.env_layer, self.file_logger) self.composite_logger = self.container.get('composite_logger') + self.credential_sanitizer = self.container.get("credential_sanitizer") self.telemetry_writer = self.container.get('telemetry_writer') self.composite_logger.telemetry_writer = self.telemetry_writer # Need to set telemetry_writer within logger to enable sending all logs to telemetry diff --git a/src/core/src/bootstrap/ConfigurationFactory.py b/src/core/src/bootstrap/ConfigurationFactory.py index 30bf7db0e..69be2ba0d 100644 --- a/src/core/src/bootstrap/ConfigurationFactory.py +++ b/src/core/src/bootstrap/ConfigurationFactory.py @@ -41,6 +41,7 @@ from core.src.package_managers.YumPackageManager import YumPackageManager from core.src.package_managers.ZypperPackageManager import ZypperPackageManager +from core.src.service_interfaces.CredentialSanitizer import CredentialSanitizer from core.src.service_interfaces.LifecycleManager import LifecycleManager from core.src.service_interfaces.LifecycleManagerAzure import LifecycleManagerAzure from core.src.service_interfaces.LifecycleManagerArc import LifecycleManagerArc @@ -151,9 +152,14 @@ def new_bootstrap_configuration(config_env, log_file_path, events_folder, teleme 'telemetry_writer': None # Has to be initialized without telemetry_writer to avoid running into a circular dependency loop. Telemetry writer within composite logger will be set later after telemetry writer has been initialized } }, + 'credential_sanitizer': { + 'component': CredentialSanitizer, + 'component_args': [], + 'component_kwargs': {} + }, 'telemetry_writer': { 'component': TelemetryWriter, - 'component_args': ['env_layer', 'composite_logger'], + 'component_args': ['env_layer', 'composite_logger', 'credential_sanitizer'], 'component_kwargs': { 'events_folder_path': events_folder, 'telemetry_supported': telemetry_supported diff --git a/src/core/src/service_interfaces/CredentialSanitizer.py b/src/core/src/service_interfaces/CredentialSanitizer.py index b25e01d25..dacabc2b4 100644 --- a/src/core/src/service_interfaces/CredentialSanitizer.py +++ b/src/core/src/service_interfaces/CredentialSanitizer.py @@ -1,4 +1,4 @@ -# Copyright 2020 Microsoft Corporation +# Copyright 2026 Microsoft Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,23 +14,22 @@ # # Requires Python 2.7+ +import logging import re class CredentialSanitizer(object): - """Sanitizes credential-like values from URIs. Removes password/token from URI userinfo.""" + """Service that sanitizes credential-like values from URIs by removing password/token from URI userinfo.""" + + def __init__(self): + pass @staticmethod def sanitize(message): - """Sanitizes credential-like values from URIs. - - Removes password/token from URI userinfo. - + """Removes password/token from URI credentials in the given message. Args: message: The message to sanitize - - Returns: - The message with credentials removed from URIs + Returns: The message with credentials removed from URIs """ try: # Pattern matches: scheme://user:password@host → scheme://user@host @@ -45,6 +44,6 @@ def sanitize(message): message ) return sanitized_message - except Exception: + except Exception as error: + logging.error("Error occurred while sanitizing credentials from message: %s", repr(error)) return message - diff --git a/src/core/src/service_interfaces/TelemetryWriter.py b/src/core/src/service_interfaces/TelemetryWriter.py index 4dcb3f284..a2ecaaf60 100644 --- a/src/core/src/service_interfaces/TelemetryWriter.py +++ b/src/core/src/service_interfaces/TelemetryWriter.py @@ -1,4 +1,4 @@ -# Copyright 2020 Microsoft Corporation +# Copyright 2026 Microsoft Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ def __init__(self, env_layer, composite_logger, events_folder_path, telemetry_su self.__task_name = Constants.TelemetryTaskName.STARTUP + self.__task_name_watermark self.events_folder_path = None self.__telemetry_event_counter = 1 # will be added at the end of each event sent to telemetry to assist in tracing and identifying event/message loss in telemetry - self.credential_sanitizer = credential_sanitizer or CredentialSanitizer + self.credential_sanitizer = credential_sanitizer or CredentialSanitizer() self.start_time_for_event_count_throttle_check = datetime.datetime.utcnow() self.event_count = 1 @@ -168,7 +168,6 @@ def __ensure_message_restriction_compliance(self, full_message): self.composite_logger.log_telemetry_module_error("Error occurred while formatting message for a telemetry event. [Error={0}]".format(repr(e))) raise - def write_event_with_buffer(self, message, event_level, buffer_msg): if buffer_msg == Constants.BufferMessage.TRUE and (event_level == self.last_telemetry_event_level or self.last_telemetry_event_level is None): if self.telemetry_buffer_store != "": diff --git a/src/core/tests/Test_TelemetryWriter.py b/src/core/tests/Test_TelemetryWriter.py index 12dcd25a8..28503bf98 100644 --- a/src/core/tests/Test_TelemetryWriter.py +++ b/src/core/tests/Test_TelemetryWriter.py @@ -74,7 +74,7 @@ def test_write_event(self): self.assertTrue(telemetry_event_counter_in_first_test_event is not None) self.assertTrue(telemetry_event_counter_in_second_test_event is not None) - self.assertTrue(int(telemetry_event_counter_in_second_test_event) - int(telemetry_event_counter_in_first_test_event) == 1) + self.assertTrue(int(telemetry_event_counter_in_second_test_event) - int(telemetry_event_counter_in_first_test_event) == 1 if telemetry_event_counter_in_first_test_event and telemetry_event_counter_in_second_test_event else False) def test_write_multiple_events_in_same_file(self): time_backup = time.time @@ -315,50 +315,97 @@ def test_write_event_with_buffer_true_and_empty_string_and_then_flush_with_non_e self.assertTrue(text_found.string.startswith("Message 1")) # ==================== Unit Tests for Credential Sanitization ==================== - def test_sanitize_credentials_from_uri_https(self): + def test_sanitize_credentials_from_uri_https_with_credentials_leak_in_input(self): """ Test sanitization of HTTPS URIs with credentials """ message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" - sanitized = CredentialSanitizer.sanitize(message) - expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" - self.assertEqual(sanitized, expected_message) + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - def test_sanitize_credentials_from_uri_http(self): + latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] + with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Verify password was removed but username preserved + self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) + self.assertIn("testuser@invalid.repo.example", events[-1]["Message"]) + f.close() + + def test_sanitize_credentials_from_uri_http_with_credentials_leak_in_input(self): """ Test sanitization of HTTP URIs with credentials """ message = "Connection failed to http://user123:password123@example.com/path" - sanitized = CredentialSanitizer.sanitize(message) - # Password should be removed - self.assertNotIn("password123", sanitized) - # Username should be preserved - self.assertIn("user123@example.com", sanitized) + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - def test_sanitize_credentials_multiple_urls(self): + latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] + with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Password should be removed + self.assertNotIn("password123", events[-1]["Message"]) + # Username should be preserved + self.assertIn("user123@example.com", events[-1]["Message"]) + f.close() + + def test_sanitize_credentials_multiple_urls_with_credentials_leak_in_input(self): """ Test sanitization with multiple URLs containing credentials """ message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" - sanitized = CredentialSanitizer.sanitize(message) - # Passwords should be removed - self.assertNotIn("pass1", sanitized) - self.assertNotIn("pass2", sanitized) - # Usernames should be preserved - self.assertIn("user1@host1.com", sanitized) - self.assertIn("user2@host2.com", sanitized) - - def test_sanitize_credentials_jfrog_repo_error(self): + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] + with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Passwords should be removed + self.assertNotIn("pass1", events[-1]["Message"]) + self.assertNotIn("pass2", events[-1]["Message"]) + # Usernames should be preserved + self.assertIn("user1@host1.com", events[-1]["Message"]) + self.assertIn("user2@host2.com", events[-1]["Message"]) + # Verify entire message matches expected output (excluding TC counter) + message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] + self.assertIn("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", message_without_tc) + f.close() + + def test_sanitize_credentials_with_no_credentials_in_input_leak_in_input(self): """ ERROR with 401 status code from jfrog.io """ message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - sanitized = CredentialSanitizer.sanitize(message) - expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - self.assertEqual(sanitized, expected_message) + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] + with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Message should remain unchanged (no credentials to sanitize) + self.assertIn("jfrog.io", events[-1]["Message"]) + # Verify entire message matches expected output (excluding TC counter) + message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] + self.assertIn("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", message_without_tc) + f.close() - def test_sanitize_credentials_curl_error_buildbot_token(self): + def test_sanitize_credentials_with_error_and_credentials_in_input(self): """ Curl error with buildbot:BuildBotToken credentials """ message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") - sanitized = CredentialSanitizer.sanitize(message) - expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") - self.assertEqual(sanitized, expected_message) + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] + with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Token should be removed but username preserved + self.assertNotIn("BuildBotToken", events[-1]["Message"]) + self.assertIn("buildbot@mirror.example.com", events[-1]["Message"]) + # Verify entire message matches expected output (excluding TC counter) + message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] + expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") + self.assertEqual(expected_message, message_without_tc) + f.close() - def test_sanitize_credentials_expired_ssl_certs_error(self): + def test_sanitize_credentials_with_credentials_leak_in_input(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " @@ -369,17 +416,29 @@ def test_sanitize_credentials_expired_ssl_certs_error(self): "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " "Cannot download repomd.xml: All mirrors were tried") - sanitized = CredentialSanitizer.sanitize(message) - expected_message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - self.assertEqual(sanitized, expected_message) + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] + with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Token should be removed but username preserved + self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) + self.assertIn("testuser@packages-microsoft-com-prod", events[-1]["Message"]) + # Verify entire message matches expected output (excluding TC counter) + message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] + expected_message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + self.assertEqual(expected_message, message_without_tc) + f.close() def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ diff --git a/src/core/tests/library/RuntimeCompositor.py b/src/core/tests/library/RuntimeCompositor.py index 5cbc6a037..460da784a 100644 --- a/src/core/tests/library/RuntimeCompositor.py +++ b/src/core/tests/library/RuntimeCompositor.py @@ -90,9 +90,10 @@ def mkdtemp_runner(): self.container = self.bootstrapper.build_out_container() self.file_logger = self.bootstrapper.file_logger self.composite_logger = self.bootstrapper.composite_logger + self.credential_sanitizer = self.bootstrapper.credential_sanitizer # re-initializing telemetry_writer, outside of Bootstrapper, to correctly set the env_layer configured for tests - self.telemetry_writer = TelemetryWriter(self.env_layer, self.composite_logger, self.bootstrapper.telemetry_writer.events_folder_path, self.bootstrapper.telemetry_supported) + self.telemetry_writer = TelemetryWriter(self.env_layer, self.composite_logger, self.bootstrapper.telemetry_writer.events_folder_path, self.bootstrapper.telemetry_supported, self.credential_sanitizer) self.bootstrapper.telemetry_writer = self.telemetry_writer self.bootstrapper.composite_logger.telemetry_writer = self.telemetry_writer diff --git a/src/extension/src/CredentialSanitizer.py b/src/extension/src/CredentialSanitizer.py index b25e01d25..dacabc2b4 100644 --- a/src/extension/src/CredentialSanitizer.py +++ b/src/extension/src/CredentialSanitizer.py @@ -1,4 +1,4 @@ -# Copyright 2020 Microsoft Corporation +# Copyright 2026 Microsoft Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,23 +14,22 @@ # # Requires Python 2.7+ +import logging import re class CredentialSanitizer(object): - """Sanitizes credential-like values from URIs. Removes password/token from URI userinfo.""" + """Service that sanitizes credential-like values from URIs by removing password/token from URI userinfo.""" + + def __init__(self): + pass @staticmethod def sanitize(message): - """Sanitizes credential-like values from URIs. - - Removes password/token from URI userinfo. - + """Removes password/token from URI credentials in the given message. Args: message: The message to sanitize - - Returns: - The message with credentials removed from URIs + Returns: The message with credentials removed from URIs """ try: # Pattern matches: scheme://user:password@host → scheme://user@host @@ -45,6 +44,6 @@ def sanitize(message): message ) return sanitized_message - except Exception: + except Exception as error: + logging.error("Error occurred while sanitizing credentials from message: %s", repr(error)) return message - diff --git a/src/extension/src/TelemetryWriter.py b/src/extension/src/TelemetryWriter.py index 009d520f3..b85c03900 100644 --- a/src/extension/src/TelemetryWriter.py +++ b/src/extension/src/TelemetryWriter.py @@ -1,4 +1,4 @@ -# Copyright 2020 Microsoft Corporation +# Copyright 2026 Microsoft Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ def __init__(self, logger, env_layer, credential_sanitizer=None): self.__agent_is_compatible = self.__get_agent_supports_telemetry_from_env_var() self.__task_name_watermark = "." + str(datetime.datetime.utcnow().hour) + "." + str(datetime.datetime.utcnow().minute) + "." + str(datetime.datetime.utcnow().second) + "." + str(os.getpid()) self.__task_name = Constants.TELEMETRY_TASK_NAME + self.__task_name_watermark - self.credential_sanitizer = credential_sanitizer or CredentialSanitizer + self.credential_sanitizer = credential_sanitizer or CredentialSanitizer() def __new_event_json(self, event_level, message, task_name): # Step 1: Apply message restrictions (formatting, truncation) diff --git a/src/extension/src/__main__.py b/src/extension/src/__main__.py index b178f21b7..0c9eaaeae 100644 --- a/src/extension/src/__main__.py +++ b/src/extension/src/__main__.py @@ -18,6 +18,8 @@ import os import sys from extension.src.ActionHandler import ActionHandler +from extension.src.Constants import Constants +from extension.src.CredentialSanitizer import CredentialSanitizer from extension.src.EnvLayer import EnvLayer from extension.src.EnvHealthManager import EnvHealthManager from extension.src.RuntimeContextHandler import RuntimeContextHandler @@ -39,7 +41,8 @@ def main(argv): file_logger = None env_layer = EnvLayer() logger = Logger() - telemetry_writer = TelemetryWriter(logger, env_layer) + credential_sanitizer = CredentialSanitizer() + telemetry_writer = TelemetryWriter(logger, env_layer, credential_sanitizer) logger.telemetry_writer = telemetry_writer # Need to set telemetry_writer within logger to enable sending all logs to telemetry exit_code = None try: diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 49b87aded..35c0dc08d 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -199,56 +199,97 @@ def test_load_sanitized_event_full_path(self): # On non-GitHub runner, should return the sanitized message self.assertIsNotNone(result) self.assertIn("user@example.com", result) - self.assertNotIn("pass", result) + self.assertEqual("https://user@example.com", result) # Restore self.runtime.is_github_runner = original_is_github_runner # ==================== Unit Tests for Credential Sanitization ==================== - def test_sanitize_credentials_from_uri_https(self): + def test_sanitize_credentials_from_uri_https_credentials_leak_in_input(self): """ Test sanitization of HTTPS URIs with credentials """ message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" - sanitized = CredentialSanitizer.sanitize(message) - expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" - self.assertEqual(sanitized, expected_message) + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Verify password was removed but username preserved + self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) + self.assertIn("testuser@invalid.repo.example", events[-1]["Message"]) + f.close() - def test_sanitize_credentials_from_uri_http(self): + def test_sanitize_credentials_from_uri_http_credentials_leak_in_input(self): """ Test sanitization of HTTP URIs with credentials """ message = "Connection failed to http://user123:password123@example.com/path" - sanitized = CredentialSanitizer.sanitize(message) - # Password should be removed - self.assertNotIn("password123", sanitized) - # Username should be preserved - self.assertIn("user123@example.com", sanitized) + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Password should be removed + self.assertNotIn("password123", events[-1]["Message"]) + # Username should be preserved + self.assertIn("user123@example.com", events[-1]["Message"]) + self.assertEqual("Connection failed to http://user123@example.com/path", events[-1]["Message"]) + f.close() - def test_sanitize_credentials_multiple_urls(self): + def test_sanitize_credentials_multiple_urls_with_credentials_leak_in_input(self): """ Test sanitization with multiple URLs containing credentials """ message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" - sanitized = CredentialSanitizer.sanitize(message) - # Passwords should be removed - self.assertNotIn("pass1", sanitized) - self.assertNotIn("pass2", sanitized) - # Usernames should be preserved - self.assertIn("user1@host1.com", sanitized) - self.assertIn("user2@host2.com", sanitized) - - def test_sanitize_credentials_jfrog_repo_error(self): + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Passwords should be removed + self.assertNotIn("pass1", events[-1]["Message"]) + self.assertNotIn("pass2", events[-1]["Message"]) + # Usernames should be preserved + self.assertIn("user1@host1.com", events[-1]["Message"]) + self.assertIn("user2@host2.com", events[-1]["Message"]) + self.assertEqual("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", events[-1]["Message"]) + f.close() + + def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak_in_input(self): """ ERROR with 401 status code from jfrog.io """ message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - sanitized = CredentialSanitizer.sanitize(message) - expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - self.assertEqual(sanitized, expected_message) + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - def test_sanitize_credentials_curl_error_buildbot_token(self): + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Message should remain unchanged (no credentials to sanitize) + self.assertIn("jfrog.io", events[-1]["Message"]) + self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", events[-1]["Message"]) + f.close() + + def test_sanitize_credentials_with_error_and_credentials_leak_in_input(self): """ Curl error with buildbot:BuildBotToken credentials """ message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") - sanitized = CredentialSanitizer.sanitize(message) - expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") - self.assertEqual(sanitized, expected_message) + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Token should be removed but username preserved + self.assertNotIn("BuildBotToken", events[-1]["Message"]) + self.assertIn("buildbot@mirror.example.com", events[-1]["Message"]) + self.assertEqual(("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml"), events[-1]["Message"]) + f.close() - def test_sanitize_credentials_expired_ssl_certs_error(self): + def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " @@ -259,17 +300,27 @@ def test_sanitize_credentials_expired_ssl_certs_error(self): "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " "Cannot download repomd.xml: All mirrors were tried") - sanitized = CredentialSanitizer.sanitize(message) - expected_message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - self.assertEqual(sanitized, expected_message) + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Token should be removed but username preserved + self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) + self.assertIn("testuser@packages-microsoft-com-prod", events[-1]["Message"]) + expected_message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + self.assertEqual(expected_message, events[-1]["Message"]) + f.close() def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ @@ -316,15 +367,3 @@ def test_inject_fake_sanitizer_and_verify_invocation(self): if __name__ == '__main__': SUITE = unittest.TestLoader().loadTestsFromTestCase(TestTelemetryWriter) unittest.TextTestRunner(verbosity=2).run(SUITE) - - - - - - - - - - - - diff --git a/src/extension/tests/helpers/RuntimeComposer.py b/src/extension/tests/helpers/RuntimeComposer.py index 2a9edbd84..1c6dd426c 100644 --- a/src/extension/tests/helpers/RuntimeComposer.py +++ b/src/extension/tests/helpers/RuntimeComposer.py @@ -4,6 +4,7 @@ import uuid from extension.src.Constants import Constants +from extension.src.CredentialSanitizer import CredentialSanitizer from extension.src.EnvLayer import EnvLayer from extension.src.EnvHealthManager import EnvHealthManager from extension.src.TelemetryWriter import TelemetryWriter @@ -21,7 +22,8 @@ def __init__(self): self.json_file_handler = JsonFileHandler(self.logger) self.env_layer = EnvLayer() self.env_health_manager = EnvHealthManager(self.env_layer) - self.telemetry_writer = TelemetryWriter(self.logger, self.env_layer) + self.credential_sanitizer = CredentialSanitizer() + self.telemetry_writer = TelemetryWriter(self.logger, self.env_layer, self.credential_sanitizer) time.sleep = self.mock_sleep self.env_layer.is_tty_required = self.mock_is_tty_required self.env_health_manager.check_sudo_status = self.mock_check_sudo_status From e0edee233f23b9ca3afca8ee96aada1fecc07e53 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 30 Apr 2026 12:35:55 -0400 Subject: [PATCH 07/16] Address Code Review --- .../src/bootstrap/ConfigurationFactory.py | 4 +- .../service_interfaces/CredentialSanitizer.py | 17 ++- .../src/service_interfaces/TelemetryWriter.py | 7 +- src/core/tests/Test_TelemetryWriter.py | 116 ++++++----------- src/core/tests/library/RuntimeCompositor.py | 2 +- src/extension/src/CredentialSanitizer.py | 18 ++- src/extension/src/TelemetryWriter.py | 7 +- src/extension/src/__main__.py | 3 +- src/extension/tests/Test_TelemetryWriter.py | 118 +++++++----------- .../tests/helpers/RuntimeComposer.py | 2 +- 10 files changed, 109 insertions(+), 185 deletions(-) diff --git a/src/core/src/bootstrap/ConfigurationFactory.py b/src/core/src/bootstrap/ConfigurationFactory.py index 69be2ba0d..c0b679f85 100644 --- a/src/core/src/bootstrap/ConfigurationFactory.py +++ b/src/core/src/bootstrap/ConfigurationFactory.py @@ -40,8 +40,8 @@ from core.src.package_managers.AzL3TdnfPackageManager import AzL3TdnfPackageManager from core.src.package_managers.YumPackageManager import YumPackageManager from core.src.package_managers.ZypperPackageManager import ZypperPackageManager - from core.src.service_interfaces.CredentialSanitizer import CredentialSanitizer + from core.src.service_interfaces.LifecycleManager import LifecycleManager from core.src.service_interfaces.LifecycleManagerAzure import LifecycleManagerAzure from core.src.service_interfaces.LifecycleManagerArc import LifecycleManagerArc @@ -154,7 +154,7 @@ def new_bootstrap_configuration(config_env, log_file_path, events_folder, teleme }, 'credential_sanitizer': { 'component': CredentialSanitizer, - 'component_args': [], + 'component_args': ['composite_logger'], 'component_kwargs': {} }, 'telemetry_writer': { diff --git a/src/core/src/service_interfaces/CredentialSanitizer.py b/src/core/src/service_interfaces/CredentialSanitizer.py index dacabc2b4..519250033 100644 --- a/src/core/src/service_interfaces/CredentialSanitizer.py +++ b/src/core/src/service_interfaces/CredentialSanitizer.py @@ -13,7 +13,6 @@ # limitations under the License. # # Requires Python 2.7+ - import logging import re @@ -21,11 +20,10 @@ class CredentialSanitizer(object): """Service that sanitizes credential-like values from URIs by removing password/token from URI userinfo.""" - def __init__(self): - pass + def __init__(self, composite_logger): + self.composite_logger = composite_logger - @staticmethod - def sanitize(message): + def sanitize(self, message): """Removes password/token from URI credentials in the given message. Args: message: The message to sanitize @@ -39,11 +37,10 @@ def sanitize(message): # (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters # (3) password: zero or more non-whitespace, non-slash, non-@ characters sanitized_message = re.sub( - r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@', - r'\1\2@', - message - ) + r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message) + self.composite_logger.log_verbose("Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message))) return sanitized_message except Exception as error: - logging.error("Error occurred while sanitizing credentials from message: %s", repr(error)) + self.composite_logger.log_error("Error occurred while sanitizing credentials from message: {0}".format(repr(error))) return message + diff --git a/src/core/src/service_interfaces/TelemetryWriter.py b/src/core/src/service_interfaces/TelemetryWriter.py index a2ecaaf60..a5b1f2a4c 100644 --- a/src/core/src/service_interfaces/TelemetryWriter.py +++ b/src/core/src/service_interfaces/TelemetryWriter.py @@ -1,4 +1,4 @@ -# Copyright 2026 Microsoft Corporation +# Copyright 2020 Microsoft Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ import time from core.src.bootstrap.Constants import Constants -from core.src.service_interfaces.CredentialSanitizer import CredentialSanitizer class TelemetryWriter(object): @@ -31,7 +30,7 @@ class TelemetryWriter(object): TELEMETRY_BUFFER_DELIMETER= "\n|\t" - def __init__(self, env_layer, composite_logger, events_folder_path, telemetry_supported, credential_sanitizer=None): + def __init__(self, env_layer, composite_logger, credential_sanitizer, events_folder_path, telemetry_supported): self.env_layer = env_layer self.composite_logger = composite_logger self.__operation_id = str(datetime.datetime.utcnow()) @@ -39,7 +38,7 @@ def __init__(self, env_layer, composite_logger, events_folder_path, telemetry_su self.__task_name = Constants.TelemetryTaskName.STARTUP + self.__task_name_watermark self.events_folder_path = None self.__telemetry_event_counter = 1 # will be added at the end of each event sent to telemetry to assist in tracing and identifying event/message loss in telemetry - self.credential_sanitizer = credential_sanitizer or CredentialSanitizer() + self.credential_sanitizer = credential_sanitizer self.start_time_for_event_count_throttle_check = datetime.datetime.utcnow() self.event_count = 1 diff --git a/src/core/tests/Test_TelemetryWriter.py b/src/core/tests/Test_TelemetryWriter.py index 28503bf98..8a4fdf936 100644 --- a/src/core/tests/Test_TelemetryWriter.py +++ b/src/core/tests/Test_TelemetryWriter.py @@ -19,10 +19,7 @@ import re import time import unittest -from unittest.mock import Mock from core.src.bootstrap.Constants import Constants -from core.src.service_interfaces.CredentialSanitizer import CredentialSanitizer -from core.src.service_interfaces.TelemetryWriter import TelemetryWriter from core.tests.library.ArgumentComposer import ArgumentComposer from core.tests.library.RuntimeCompositor import RuntimeCompositor @@ -315,22 +312,33 @@ def test_write_event_with_buffer_true_and_empty_string_and_then_flush_with_non_e self.assertTrue(text_found.string.startswith("Message 1")) # ==================== Unit Tests for Credential Sanitization ==================== - def test_sanitize_credentials_from_uri_https_with_credentials_leak_in_input(self): + def test_sanitize_credentials_from_uri_https_with_credentials_leak(self): """ Test sanitization of HTTPS URIs with credentials """ + # Clear any existing event files before test + for f in os.listdir(self.runtime.telemetry_writer.events_folder_path): + if f.endswith('.json'): + os.remove(os.path.join(self.runtime.telemetry_writer.events_folder_path, f)) + + # Verify events folder is empty before test + self.assertTrue(len(os.listdir(self.runtime.telemetry_writer.events_folder_path)) == 0, "Events folder should be empty before writing") + message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + # Verify exactly one event file was created latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] + event_files_count = len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]) + self.assertEqual(event_files_count, 1, "Events folder should contain exactly one event file") + with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Verify password was removed but username preserved - self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) - self.assertIn("testuser@invalid.repo.example", events[-1]["Message"]) + message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] + self.assertEqual("Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml", message_without_tc) f.close() - def test_sanitize_credentials_from_uri_http_with_credentials_leak_in_input(self): + def test_sanitize_credentials_from_uri_http_with_credentials_leak(self): """ Test sanitization of HTTP URIs with credentials """ message = "Connection failed to http://user123:password123@example.com/path" self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -340,13 +348,12 @@ def test_sanitize_credentials_from_uri_http_with_credentials_leak_in_input(self) events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Password should be removed - self.assertNotIn("password123", events[-1]["Message"]) - # Username should be preserved - self.assertIn("user123@example.com", events[-1]["Message"]) + # Verify entire message matches expected output (excluding TC counter) + message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] + self.assertEqual("Connection failed to http://user123@example.com/path", message_without_tc) f.close() - def test_sanitize_credentials_multiple_urls_with_credentials_leak_in_input(self): + def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -356,18 +363,11 @@ def test_sanitize_credentials_multiple_urls_with_credentials_leak_in_input(self) events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Passwords should be removed - self.assertNotIn("pass1", events[-1]["Message"]) - self.assertNotIn("pass2", events[-1]["Message"]) - # Usernames should be preserved - self.assertIn("user1@host1.com", events[-1]["Message"]) - self.assertIn("user2@host2.com", events[-1]["Message"]) - # Verify entire message matches expected output (excluding TC counter) message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] - self.assertIn("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", message_without_tc) + self.assertEqual("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", message_without_tc) f.close() - def test_sanitize_credentials_with_no_credentials_in_input_leak_in_input(self): + def test_sanitize_credentials_with_error_and_no_credentials(self): """ ERROR with 401 status code from jfrog.io """ message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -377,14 +377,12 @@ def test_sanitize_credentials_with_no_credentials_in_input_leak_in_input(self): events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Message should remain unchanged (no credentials to sanitize) - self.assertIn("jfrog.io", events[-1]["Message"]) # Verify entire message matches expected output (excluding TC counter) message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] - self.assertIn("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", message_without_tc) + self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", message_without_tc) f.close() - def test_sanitize_credentials_with_error_and_credentials_in_input(self): + def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") @@ -395,9 +393,6 @@ def test_sanitize_credentials_with_error_and_credentials_in_input(self): events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Token should be removed but username preserved - self.assertNotIn("BuildBotToken", events[-1]["Message"]) - self.assertIn("buildbot@mirror.example.com", events[-1]["Message"]) # Verify entire message matches expected output (excluding TC counter) message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " @@ -405,8 +400,16 @@ def test_sanitize_credentials_with_error_and_credentials_in_input(self): self.assertEqual(expected_message, message_without_tc) f.close() - def test_sanitize_credentials_with_credentials_leak_in_input(self): + def test_sanitize_credentials_with_credentials_leak(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ + # Clear any existing event files before test + for f in os.listdir(self.runtime.telemetry_writer.events_folder_path): + if f.endswith('.json'): + os.remove(os.path.join(self.runtime.telemetry_writer.events_folder_path, f)) + + # Verify events folder is empty before test + self.assertTrue(len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]) == 0, "Events folder should be empty before writing") + message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " "--enablerepo='microsoft' !!Code=11 Out- Updating " @@ -418,14 +421,15 @@ def test_sanitize_credentials_with_credentials_leak_in_input(self): "Cannot download repomd.xml: All mirrors were tried") self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + # Verify exactly one event file was created + event_files_count = len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]) + self.assertEqual(event_files_count, 1, "Events folder should contain exactly one event file") + latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Token should be removed but username preserved - self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) - self.assertIn("testuser@packages-microsoft-com-prod", events[-1]["Message"]) # Verify entire message matches expected output (excluding TC counter) message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] expected_message = ("ERROR: Customer environment error (expired SSL certs): " @@ -442,52 +446,10 @@ def test_sanitize_credentials_with_credentials_leak_in_input(self): def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ - result = CredentialSanitizer.sanitize(None) + result = self.runtime.telemetry_writer.credential_sanitizer.sanitize(None) self.assertIsNone(result) - def test_inject_fake_sanitizer_and_verify_invocation(self): - """ Integration Test: Can inject a fake sanitizer and verify it was invoked during write_event """ - # Create a mock sanitizer - mock_sanitizer = Mock() - mock_sanitizer.sanitize = Mock(return_value="sanitized_message [TC=1]") - - # Create TelemetryWriter with injected mock sanitizer - env_layer = self.runtime.env_layer - composite_logger = self.runtime.composite_logger - writer = TelemetryWriter(env_layer, composite_logger, events_folder_path=None, - telemetry_supported=False, credential_sanitizer=mock_sanitizer) - - # Set up a temporary events folder for testing - import tempfile - import shutil - temp_folder = tempfile.mkdtemp() - writer.events_folder_path = temp_folder - writer._TelemetryWriter__is_telemetry_supported = True - - try: - # Write an event - original_message = "https://user:password@example.com/error" - writer.write_event(original_message, Constants.TelemetryEventLevel.Error, "Test Task") - - # Verify mock sanitizer was called - self.assertTrue(mock_sanitizer.sanitize.called, "Sanitizer should have been invoked") - self.assertEqual(mock_sanitizer.sanitize.call_count, 1, "Sanitizer should be called exactly once") - - # Verify the call was made with a message containing the original error info - call_args = mock_sanitizer.sanitize.call_args[0][0] - self.assertIn("example.com", call_args, "Sanitizer should be called with message containing URL") - - # Verify telemetry event was written with the mock-sanitized message - event_files = os.listdir(writer.events_folder_path) - self.assertTrue(len(event_files) > 0, "Event file should be created") - - with open(os.path.join(writer.events_folder_path, event_files[0]), 'r') as f: - events = json.load(f) - # The message should be the one returned by our mock - self.assertIn("sanitized_message", events[-1]["Message"]) - f.close() - finally: - shutil.rmtree(temp_folder) if __name__ == '__main__': unittest.main() + diff --git a/src/core/tests/library/RuntimeCompositor.py b/src/core/tests/library/RuntimeCompositor.py index 460da784a..ad4ef178d 100644 --- a/src/core/tests/library/RuntimeCompositor.py +++ b/src/core/tests/library/RuntimeCompositor.py @@ -93,7 +93,7 @@ def mkdtemp_runner(): self.credential_sanitizer = self.bootstrapper.credential_sanitizer # re-initializing telemetry_writer, outside of Bootstrapper, to correctly set the env_layer configured for tests - self.telemetry_writer = TelemetryWriter(self.env_layer, self.composite_logger, self.bootstrapper.telemetry_writer.events_folder_path, self.bootstrapper.telemetry_supported, self.credential_sanitizer) + self.telemetry_writer = TelemetryWriter(self.env_layer, self.composite_logger, self.credential_sanitizer, self.bootstrapper.telemetry_writer.events_folder_path, self.bootstrapper.telemetry_supported) self.bootstrapper.telemetry_writer = self.telemetry_writer self.bootstrapper.composite_logger.telemetry_writer = self.telemetry_writer diff --git a/src/extension/src/CredentialSanitizer.py b/src/extension/src/CredentialSanitizer.py index dacabc2b4..4b47e7bc8 100644 --- a/src/extension/src/CredentialSanitizer.py +++ b/src/extension/src/CredentialSanitizer.py @@ -14,18 +14,16 @@ # # Requires Python 2.7+ -import logging import re class CredentialSanitizer(object): """Service that sanitizes credential-like values from URIs by removing password/token from URI userinfo.""" - def __init__(self): - pass + def __init__(self, logger): + self.logger = logger - @staticmethod - def sanitize(message): + def sanitize(self, message): """Removes password/token from URI credentials in the given message. Args: message: The message to sanitize @@ -39,11 +37,11 @@ def sanitize(message): # (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters # (3) password: zero or more non-whitespace, non-slash, non-@ characters sanitized_message = re.sub( - r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@', - r'\1\2@', - message - ) + r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message) + self.logger.log_verbose( + "Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message))) return sanitized_message except Exception as error: - logging.error("Error occurred while sanitizing credentials from message: %s", repr(error)) + self.logger.log_error("Error occurred while sanitizing credentials from message: {0}".format(repr(error))) return message + diff --git a/src/extension/src/TelemetryWriter.py b/src/extension/src/TelemetryWriter.py index b85c03900..b00f19d70 100644 --- a/src/extension/src/TelemetryWriter.py +++ b/src/extension/src/TelemetryWriter.py @@ -1,4 +1,4 @@ -# Copyright 2026 Microsoft Corporation +# Copyright 2020 Microsoft Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,13 +23,12 @@ import time from extension.src.Constants import Constants -from extension.src.CredentialSanitizer import CredentialSanitizer class TelemetryWriter(object): """Class for writing telemetry data to events""" - def __init__(self, logger, env_layer, credential_sanitizer=None): + def __init__(self, logger, env_layer, credential_sanitizer): self.logger = logger self.env_layer = env_layer self.events_folder_path = None @@ -38,7 +37,7 @@ def __init__(self, logger, env_layer, credential_sanitizer=None): self.__agent_is_compatible = self.__get_agent_supports_telemetry_from_env_var() self.__task_name_watermark = "." + str(datetime.datetime.utcnow().hour) + "." + str(datetime.datetime.utcnow().minute) + "." + str(datetime.datetime.utcnow().second) + "." + str(os.getpid()) self.__task_name = Constants.TELEMETRY_TASK_NAME + self.__task_name_watermark - self.credential_sanitizer = credential_sanitizer or CredentialSanitizer() + self.credential_sanitizer = credential_sanitizer def __new_event_json(self, event_level, message, task_name): # Step 1: Apply message restrictions (formatting, truncation) diff --git a/src/extension/src/__main__.py b/src/extension/src/__main__.py index 0c9eaaeae..2664ead73 100644 --- a/src/extension/src/__main__.py +++ b/src/extension/src/__main__.py @@ -18,7 +18,6 @@ import os import sys from extension.src.ActionHandler import ActionHandler -from extension.src.Constants import Constants from extension.src.CredentialSanitizer import CredentialSanitizer from extension.src.EnvLayer import EnvLayer from extension.src.EnvHealthManager import EnvHealthManager @@ -41,7 +40,7 @@ def main(argv): file_logger = None env_layer = EnvLayer() logger = Logger() - credential_sanitizer = CredentialSanitizer() + credential_sanitizer = CredentialSanitizer(logger) telemetry_writer = TelemetryWriter(logger, env_layer, credential_sanitizer) logger.telemetry_writer = telemetry_writer # Need to set telemetry_writer within logger to enable sending all logs to telemetry exit_code = None diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 35c0dc08d..838e080da 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -4,12 +4,9 @@ import tempfile import time import unittest -from unittest.mock import Mock from extension.src.Constants import Constants -from extension.src.CredentialSanitizer import CredentialSanitizer -from extension.src.TelemetryWriter import TelemetryWriter -from extension.tests.helpers.VirtualTerminal import VirtualTerminal from extension.tests.helpers.RuntimeComposer import RuntimeComposer +from extension.tests.helpers.VirtualTerminal import VirtualTerminal class TestTelemetryWriter(unittest.TestCase): @@ -166,12 +163,9 @@ def _load_sanitized_event(self, message): """ Helper method to write event to telemetry and load the sanitized message. The regex sanitization happens automatically in TelemetryWriter. - Args: message: The message to write to telemetry - - Returns: - The sanitized message from the event + Returns: The sanitized message from the event """ if self.runtime.is_github_runner: return None @@ -205,23 +199,34 @@ def test_load_sanitized_event_full_path(self): self.runtime.is_github_runner = original_is_github_runner # ==================== Unit Tests for Credential Sanitization ==================== - def test_sanitize_credentials_from_uri_https_credentials_leak_in_input(self): + def test_sanitize_credentials_from_uri_https_credentials_leak(self): """ Test sanitization of HTTPS URIs with credentials """ + if self.runtime.is_github_runner: + return + + # Verify events folder is empty before test + self.assertTrue(len(os.listdir(self.telemetry_writer.events_folder_path)) == 0, "Events folder should be empty before writing") + message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + # Verify exactly one event file was created event_files = os.listdir(self.telemetry_writer.events_folder_path) + self.assertEqual(len(event_files), 1, "Events folder should contain exactly one event file") + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Verify password was removed but username preserved - self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) - self.assertIn("testuser@invalid.repo.example", events[-1]["Message"]) + expected_message = ("Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml") + self.assertEqual(expected_message, events[-1]["Message"]) f.close() - def test_sanitize_credentials_from_uri_http_credentials_leak_in_input(self): + def test_sanitize_credentials_from_uri_http_credentials_leak(self): """ Test sanitization of HTTP URIs with credentials """ + if self.runtime.is_github_runner: + return + message = "Connection failed to http://user123:password123@example.com/path" self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -230,15 +235,15 @@ def test_sanitize_credentials_from_uri_http_credentials_leak_in_input(self): events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Password should be removed - self.assertNotIn("password123", events[-1]["Message"]) - # Username should be preserved - self.assertIn("user123@example.com", events[-1]["Message"]) - self.assertEqual("Connection failed to http://user123@example.com/path", events[-1]["Message"]) + expected_message = ("Connection failed to http://user123@example.com/path") + self.assertEqual(expected_message, events[-1]["Message"]) f.close() - def test_sanitize_credentials_multiple_urls_with_credentials_leak_in_input(self): + def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ + if self.runtime.is_github_runner: + return + message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -247,17 +252,15 @@ def test_sanitize_credentials_multiple_urls_with_credentials_leak_in_input(self) events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Passwords should be removed - self.assertNotIn("pass1", events[-1]["Message"]) - self.assertNotIn("pass2", events[-1]["Message"]) - # Usernames should be preserved - self.assertIn("user1@host1.com", events[-1]["Message"]) - self.assertIn("user2@host2.com", events[-1]["Message"]) - self.assertEqual("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", events[-1]["Message"]) + expected_message = "Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data" + self.assertEqual(expected_message, events[-1]["Message"]) f.close() - def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak_in_input(self): + def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak(self): """ ERROR with 401 status code from jfrog.io """ + if self.runtime.is_github_runner: + return + message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -266,13 +269,14 @@ def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Message should remain unchanged (no credentials to sanitize) - self.assertIn("jfrog.io", events[-1]["Message"]) self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", events[-1]["Message"]) f.close() - def test_sanitize_credentials_with_error_and_credentials_leak_in_input(self): + def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ + if self.runtime.is_github_runner: + return + message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -282,15 +286,18 @@ def test_sanitize_credentials_with_error_and_credentials_leak_in_input(self): events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Token should be removed but username preserved - self.assertNotIn("BuildBotToken", events[-1]["Message"]) - self.assertIn("buildbot@mirror.example.com", events[-1]["Message"]) self.assertEqual(("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml"), events[-1]["Message"]) f.close() def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ + if self.runtime.is_github_runner: + return + + # Verify events folder is empty before test + self.assertTrue(len(os.listdir(self.telemetry_writer.events_folder_path)) == 0, "Events folder should be empty before writing") + message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " "--enablerepo='microsoft' !!Code=11 Out- Updating " @@ -302,14 +309,14 @@ def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): "Cannot download repomd.xml: All mirrors were tried") self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + # Verify exactly one event file was created event_files = os.listdir(self.telemetry_writer.events_folder_path) + self.assertEqual(len(event_files), 1, "Events folder should contain exactly one event file") + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") - # Token should be removed but username preserved - self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) - self.assertIn("testuser@packages-microsoft-com-prod", events[-1]["Message"]) expected_message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " "--enablerepo='microsoft' !!Code=11 Out- Updating " @@ -324,46 +331,9 @@ def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ - result = CredentialSanitizer.sanitize(None) + result = self.runtime.telemetry_writer.credential_sanitizer.sanitize(None) self.assertIsNone(result) - def test_inject_fake_sanitizer_and_verify_invocation(self): - """ Test: Can inject a fake sanitizer and verify it was invoked during write_event """ - # Create a mock sanitizer - mock_sanitizer = Mock() - mock_sanitizer.sanitize = Mock(return_value="sanitized_message") - - # Create TelemetryWriter with injected mock sanitizer - logger = self.runtime.logger - env_layer = self.runtime.env_layer - writer = TelemetryWriter(logger, env_layer, mock_sanitizer) - writer.events_folder_path = tempfile.mkdtemp() - - try: - # Write an event - original_message = "https://user:password@example.com/error" - writer.write_event(original_message, Constants.TelemetryEventLevel.Error, "Test Task") - - # Verify mock sanitizer was called - self.assertTrue(mock_sanitizer.sanitize.called, "Sanitizer should have been invoked") - self.assertEqual(mock_sanitizer.sanitize.call_count, 1, "Sanitizer should be called exactly once") - - # Verify the call was made with a message containing the original error info - call_args = mock_sanitizer.sanitize.call_args[0][0] - self.assertIn("example.com", call_args, "Sanitizer should be called with message containing URL") - - # Verify telemetry event was written with the mock-sanitized message - event_files = os.listdir(writer.events_folder_path) - self.assertTrue(len(event_files) > 0, "Event file should be created") - - with open(os.path.join(writer.events_folder_path, event_files[0]), 'r') as f: - events = json.load(f) - # The message should be the one returned by our mock - self.assertIn("sanitized_message", events[0]["Message"]) - f.close() - finally: - shutil.rmtree(writer.events_folder_path) - if __name__ == '__main__': SUITE = unittest.TestLoader().loadTestsFromTestCase(TestTelemetryWriter) unittest.TextTestRunner(verbosity=2).run(SUITE) diff --git a/src/extension/tests/helpers/RuntimeComposer.py b/src/extension/tests/helpers/RuntimeComposer.py index 1c6dd426c..f9fdcf710 100644 --- a/src/extension/tests/helpers/RuntimeComposer.py +++ b/src/extension/tests/helpers/RuntimeComposer.py @@ -22,7 +22,7 @@ def __init__(self): self.json_file_handler = JsonFileHandler(self.logger) self.env_layer = EnvLayer() self.env_health_manager = EnvHealthManager(self.env_layer) - self.credential_sanitizer = CredentialSanitizer() + self.credential_sanitizer = CredentialSanitizer(self.logger) self.telemetry_writer = TelemetryWriter(self.logger, self.env_layer, self.credential_sanitizer) time.sleep = self.mock_sleep self.env_layer.is_tty_required = self.mock_is_tty_required From 292547e9131714428fa2770fd105c158211effa1 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 30 Apr 2026 14:42:28 -0400 Subject: [PATCH 08/16] fix UT coverage --- src/extension/tests/Test_TelemetryWriter.py | 36 +++------------------ 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 838e080da..4cc6819c1 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -187,6 +187,8 @@ def test_load_sanitized_event_full_path(self): original_is_github_runner = self.runtime.is_github_runner self.runtime.is_github_runner = False + self.telemetry_writer.events_folder_path = tempfile.mkdtemp() + message = "https://user:pass@example.com" result = self._load_sanitized_event(message) @@ -201,20 +203,11 @@ def test_load_sanitized_event_full_path(self): # ==================== Unit Tests for Credential Sanitization ==================== def test_sanitize_credentials_from_uri_https_credentials_leak(self): """ Test sanitization of HTTPS URIs with credentials """ - if self.runtime.is_github_runner: - return - - # Verify events folder is empty before test - self.assertTrue(len(os.listdir(self.telemetry_writer.events_folder_path)) == 0, "Events folder should be empty before writing") - message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - # Verify exactly one event file was created event_files = os.listdir(self.telemetry_writer.events_folder_path) - self.assertEqual(len(event_files), 1, "Events folder should contain exactly one event file") - - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[-1]), 'r+') as f: events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") @@ -224,9 +217,6 @@ def test_sanitize_credentials_from_uri_https_credentials_leak(self): def test_sanitize_credentials_from_uri_http_credentials_leak(self): """ Test sanitization of HTTP URIs with credentials """ - if self.runtime.is_github_runner: - return - message = "Connection failed to http://user123:password123@example.com/path" self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -241,9 +231,6 @@ def test_sanitize_credentials_from_uri_http_credentials_leak(self): def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ - if self.runtime.is_github_runner: - return - message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -258,9 +245,6 @@ def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak(self): """ ERROR with 401 status code from jfrog.io """ - if self.runtime.is_github_runner: - return - message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -274,9 +258,6 @@ def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ - if self.runtime.is_github_runner: - return - message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") @@ -292,12 +273,6 @@ def test_sanitize_credentials_with_error_and_credentials_leak(self): def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ - if self.runtime.is_github_runner: - return - - # Verify events folder is empty before test - self.assertTrue(len(os.listdir(self.telemetry_writer.events_folder_path)) == 0, "Events folder should be empty before writing") - message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " "--enablerepo='microsoft' !!Code=11 Out- Updating " @@ -309,11 +284,8 @@ def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): "Cannot download repomd.xml: All mirrors were tried") self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - # Verify exactly one event file was created event_files = os.listdir(self.telemetry_writer.events_folder_path) - self.assertEqual(len(event_files), 1, "Events folder should contain exactly one event file") - - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[-1]), 'r+') as f: events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") From 5e8f80f9b2bf00960557661ef251fd92d74b5b3a Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 30 Apr 2026 15:48:14 -0400 Subject: [PATCH 09/16] Remove couple of tests that are repeatative --- src/extension/tests/Test_TelemetryWriter.py | 30 --------------------- 1 file changed, 30 deletions(-) diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 4cc6819c1..73fef09f7 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -187,8 +187,6 @@ def test_load_sanitized_event_full_path(self): original_is_github_runner = self.runtime.is_github_runner self.runtime.is_github_runner = False - self.telemetry_writer.events_folder_path = tempfile.mkdtemp() - message = "https://user:pass@example.com" result = self._load_sanitized_event(message) @@ -201,34 +199,6 @@ def test_load_sanitized_event_full_path(self): self.runtime.is_github_runner = original_is_github_runner # ==================== Unit Tests for Credential Sanitization ==================== - def test_sanitize_credentials_from_uri_https_credentials_leak(self): - """ Test sanitization of HTTPS URIs with credentials """ - message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[-1]), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - expected_message = ("Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml") - self.assertEqual(expected_message, events[-1]["Message"]) - f.close() - - def test_sanitize_credentials_from_uri_http_credentials_leak(self): - """ Test sanitization of HTTP URIs with credentials """ - message = "Connection failed to http://user123:password123@example.com/path" - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - expected_message = ("Connection failed to http://user123@example.com/path") - self.assertEqual(expected_message, events[-1]["Message"]) - f.close() - def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" From e489e9f3034cb114c571bbae99128c8fd59aa54f Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 30 Apr 2026 16:55:17 -0400 Subject: [PATCH 10/16] Convert few tests to UT's to fix code coverage --- src/extension/tests/Test_TelemetryWriter.py | 112 ++++++++------------ 1 file changed, 45 insertions(+), 67 deletions(-) diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 73fef09f7..36b011e5d 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -159,72 +159,56 @@ def test_events_deleted_outside_of_extension_while_extension_is_running(self): os.listdir = backup_os_listdir # ==================== Integration test for credential sanitization in telemetry ==================== - def _load_sanitized_event(self, message): - """ - Helper method to write event to telemetry and load the sanitized message. - The regex sanitization happens automatically in TelemetryWriter. - Args: - message: The message to write to telemetry - Returns: The sanitized message from the event - """ - if self.runtime.is_github_runner: - return None - - # Write event to telemetry - self.telemetry_writer.write_event(message) - - # Load the event file - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - events = json.load(f) - sanitized_message = events[0]["Message"] - f.close() - return sanitized_message - - def test_load_sanitized_event_full_path(self): - """Test: Helper method executes full path when not on GitHub runner""" - # Force is_github_runner to False to ensure full path coverage on CI - original_is_github_runner = self.runtime.is_github_runner - self.runtime.is_github_runner = False - - message = "https://user:pass@example.com" - result = self._load_sanitized_event(message) - - # On non-GitHub runner, should return the sanitized message - self.assertIsNotNone(result) - self.assertIn("user@example.com", result) - self.assertEqual("https://user@example.com", result) - - # Restore - self.runtime.is_github_runner = original_is_github_runner + # def _load_sanitized_event(self, message): + # """ + # Helper method to write event to telemetry and load the sanitized message. + # The regex sanitization happens automatically in TelemetryWriter. + # Args: + # message: The message to write to telemetry + # Returns: The sanitized message from the event + # """ + # if self.runtime.is_github_runner: + # return None + # + # # Write event to telemetry + # actual_message = self.telemetry_writer.write_event(message) + # + # # Load the event file + # event_files = os.listdir(self.telemetry_writer.events_folder_path) + # with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + # events = json.load(f) + # sanitized_message = events[0]["Message"] + # f.close() + # return sanitized_message + # + # def test_load_sanitized_event_full_path(self): + # """Test: Helper method executes full path when not on GitHub runner""" + # # Force is_github_runner to False to ensure full path coverage on CI + # original_is_github_runner = self.runtime.is_github_runner + # self.runtime.is_github_runner = False + # + # message = "https://user:pass@example.com" + # result = self._load_sanitized_event(message) + # + # # On non-GitHub runner, should return the sanitized message + # self.assertIsNotNone(result) + # self.assertIn("user@example.com", result) + # self.assertEqual("https://user@example.com", result) + # + # # Restore + # self.runtime.is_github_runner = original_is_github_runner - # ==================== Unit Tests for Credential Sanitization ==================== def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - expected_message = "Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data" - self.assertEqual(expected_message, events[-1]["Message"]) - f.close() + actual_message = self.telemetry_writer.credential_sanitizer.sanitize(message) + self.assertEqual("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", actual_message) def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak(self): """ ERROR with 401 status code from jfrog.io """ message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", events[-1]["Message"]) - f.close() + actual_message = self.telemetry_writer.credential_sanitizer.sanitize(message) + self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", actual_message) def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ @@ -252,14 +236,9 @@ def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " "Cannot download repomd.xml: All mirrors were tried") - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + actual_message = self.telemetry_writer.credential_sanitizer.sanitize(message) - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[-1]), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - expected_message = ("ERROR: Customer environment error (expired SSL certs): " + expected_message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " "--enablerepo='microsoft' !!Code=11 Out- Updating " "Subscription Management repositories. " @@ -268,8 +247,7 @@ def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " "Cannot download repomd.xml: All mirrors were tried") - self.assertEqual(expected_message, events[-1]["Message"]) - f.close() + self.assertEqual(expected_message, actual_message) def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ From 20c682475ab64e215a36c73a62d16efa1cf2b317 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Fri, 1 May 2026 11:30:32 -0400 Subject: [PATCH 11/16] Coverage --- src/extension/tests/Test_TelemetryWriter.py | 121 ++++++++++++-------- 1 file changed, 74 insertions(+), 47 deletions(-) diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 36b011e5d..cda078b48 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -159,56 +159,71 @@ def test_events_deleted_outside_of_extension_while_extension_is_running(self): os.listdir = backup_os_listdir # ==================== Integration test for credential sanitization in telemetry ==================== - # def _load_sanitized_event(self, message): - # """ - # Helper method to write event to telemetry and load the sanitized message. - # The regex sanitization happens automatically in TelemetryWriter. - # Args: - # message: The message to write to telemetry - # Returns: The sanitized message from the event - # """ - # if self.runtime.is_github_runner: - # return None - # - # # Write event to telemetry - # actual_message = self.telemetry_writer.write_event(message) - # - # # Load the event file - # event_files = os.listdir(self.telemetry_writer.events_folder_path) - # with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - # events = json.load(f) - # sanitized_message = events[0]["Message"] - # f.close() - # return sanitized_message - # - # def test_load_sanitized_event_full_path(self): - # """Test: Helper method executes full path when not on GitHub runner""" - # # Force is_github_runner to False to ensure full path coverage on CI - # original_is_github_runner = self.runtime.is_github_runner - # self.runtime.is_github_runner = False - # - # message = "https://user:pass@example.com" - # result = self._load_sanitized_event(message) - # - # # On non-GitHub runner, should return the sanitized message - # self.assertIsNotNone(result) - # self.assertIn("user@example.com", result) - # self.assertEqual("https://user@example.com", result) - # - # # Restore - # self.runtime.is_github_runner = original_is_github_runner + def _load_sanitized_event(self, message): + """ + Helper method to write event to telemetry and load the sanitized message. + The regex sanitization happens automatically in TelemetryWriter. + Args: + message: The message to write to telemetry + Returns: The sanitized message from the event + """ + if self.runtime.is_github_runner: + return None + + # Write event to telemetry + actual_message = self.telemetry_writer.write_event(message) + + # Load the event file + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + sanitized_message = events[0]["Message"] + f.close() + return sanitized_message + + def test_load_sanitized_event_full_path(self): + """Test: Helper method executes full path when not on GitHub runner""" + # Force is_github_runner to False to ensure full path coverage on CI + original_is_github_runner = self.runtime.is_github_runner + self.runtime.is_github_runner = False + + message = "https://user:pass@example.com" + result = self._load_sanitized_event(message) - def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): + # On non-GitHub runner, should return the sanitized message + self.assertIsNotNone(result) + self.assertIn("user@example.com", result) + self.assertEqual("https://user@example.com", result) + + # Restore + self.runtime.is_github_runner = original_is_github_runner + + def test_sanitize_credentials_multiple_urls_with_credentials_leak_in_input(self): """ Test sanitization with multiple URLs containing credentials """ message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" - actual_message = self.telemetry_writer.credential_sanitizer.sanitize(message) - self.assertEqual("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", actual_message) + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + self.assertEqual("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", + events[-1]["Message"]) + f.close() def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak(self): """ ERROR with 401 status code from jfrog.io """ message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - actual_message = self.telemetry_writer.credential_sanitizer.sanitize(message) - self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", actual_message) + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", events[-1]["Message"]) + f.close() def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ @@ -221,6 +236,9 @@ def test_sanitize_credentials_with_error_and_credentials_leak(self): events = json.load(f) self.assertTrue(events is not None) self.assertEqual(events[-1]["TaskName"], "Test Task") + # Token should be removed but username preserved + self.assertNotIn("BuildBotToken", events[-1]["Message"]) + self.assertIn("buildbot@mirror.example.com", events[-1]["Message"]) self.assertEqual(("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml"), events[-1]["Message"]) f.close() @@ -236,9 +254,17 @@ def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " "Cannot download repomd.xml: All mirrors were tried") - actual_message = self.telemetry_writer.credential_sanitizer.sanitize(message) + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - expected_message = ("ERROR: Customer environment error (expired SSL certs): " + event_files = os.listdir(self.telemetry_writer.events_folder_path) + with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: + events = json.load(f) + self.assertTrue(events is not None) + self.assertEqual(events[-1]["TaskName"], "Test Task") + # Token should be removed but username preserved + self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) + self.assertIn("testuser@packages-microsoft-com-prod", events[-1]["Message"]) + expected_message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " "--enablerepo='microsoft' !!Code=11 Out- Updating " "Subscription Management repositories. " @@ -247,11 +273,12 @@ def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " "Cannot download repomd.xml: All mirrors were tried") - self.assertEqual(expected_message, actual_message) + self.assertEqual(expected_message, events[-1]["Message"]) + f.close() def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ - result = self.runtime.telemetry_writer.credential_sanitizer.sanitize(None) + result = self.telemetry_writer.credential_sanitizer.sanitize(None) self.assertIsNone(result) if __name__ == '__main__': From 8d86f4031bbcbbf6ba291a06bae31718dde2e38f Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Tue, 5 May 2026 11:35:46 -0400 Subject: [PATCH 12/16] Address Code Review --- .../service_interfaces/CredentialSanitizer.py | 6 +- src/core/tests/Test_TelemetryWriter.py | 178 ++++++++++-------- src/extension/src/CredentialSanitizer.py | 8 +- src/extension/src/TelemetryWriter.py | 1 - src/extension/tests/Test_TelemetryWriter.py | 145 ++++++++------ 5 files changed, 187 insertions(+), 151 deletions(-) diff --git a/src/core/src/service_interfaces/CredentialSanitizer.py b/src/core/src/service_interfaces/CredentialSanitizer.py index 519250033..3d570692c 100644 --- a/src/core/src/service_interfaces/CredentialSanitizer.py +++ b/src/core/src/service_interfaces/CredentialSanitizer.py @@ -13,7 +13,6 @@ # limitations under the License. # # Requires Python 2.7+ -import logging import re @@ -36,11 +35,10 @@ def sanitize(self, message): # (1) scheme: https://, http://, or ftp:// # (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters # (3) password: zero or more non-whitespace, non-slash, non-@ characters - sanitized_message = re.sub( - r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message) + sanitized_message = re.sub(r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message) self.composite_logger.log_verbose("Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message))) return sanitized_message except Exception as error: - self.composite_logger.log_error("Error occurred while sanitizing credentials from message: {0}".format(repr(error))) + self.composite_logger.log_error("Error occurred while sanitizing credentials from message: [Error={0}]".format(repr(error))) return message diff --git a/src/core/tests/Test_TelemetryWriter.py b/src/core/tests/Test_TelemetryWriter.py index 8a4fdf936..c76d94717 100644 --- a/src/core/tests/Test_TelemetryWriter.py +++ b/src/core/tests/Test_TelemetryWriter.py @@ -71,7 +71,7 @@ def test_write_event(self): self.assertTrue(telemetry_event_counter_in_first_test_event is not None) self.assertTrue(telemetry_event_counter_in_second_test_event is not None) - self.assertTrue(int(telemetry_event_counter_in_second_test_event) - int(telemetry_event_counter_in_first_test_event) == 1 if telemetry_event_counter_in_first_test_event and telemetry_event_counter_in_second_test_event else False) + self.assertTrue(int(telemetry_event_counter_in_second_test_event) - int(telemetry_event_counter_in_first_test_event) == 1) def test_write_multiple_events_in_same_file(self): time_backup = time.time @@ -312,103 +312,125 @@ def test_write_event_with_buffer_true_and_empty_string_and_then_flush_with_non_e self.assertTrue(text_found.string.startswith("Message 1")) # ==================== Unit Tests for Credential Sanitization ==================== - def test_sanitize_credentials_from_uri_https_with_credentials_leak(self): - """ Test sanitization of HTTPS URIs with credentials """ - # Clear any existing event files before test + # ==================== Helper functions for Credential Sanitization Tests ==================== + def _clear_events_folder(self): + """ + Helper method to clear the events folder for sanitization test setup. + Removes all existing JSON event files. + """ for f in os.listdir(self.runtime.telemetry_writer.events_folder_path): if f.endswith('.json'): os.remove(os.path.join(self.runtime.telemetry_writer.events_folder_path, f)) - # Verify events folder is empty before test - self.assertTrue(len(os.listdir(self.runtime.telemetry_writer.events_folder_path)) == 0, "Events folder should be empty before writing") + def _read_event_from_file(self, file_index=None, event_index=-1): + """ + Helper method to open and read an event from an event file in the events folder. + Args: + file_index: Index of the event file to read. If None, uses latest file + event_index: Index of the event within the file (default: -1 for last event) + Returns: The parsed event dictionary from the JSON file + """ + event_files = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)] + if not event_files: + raise Exception("No event files found in events folder") + + if file_index is None: + event_file_path = os.path.join(self.runtime.telemetry_writer.events_folder_path, event_files[-1]) + else: + event_file_path = os.path.join(self.runtime.telemetry_writer.events_folder_path, event_files[file_index]) + + with open(event_file_path, 'r+') as f: + events = json.load(f) + f.close() + if not events: + raise Exception("No events found in event file") + return events[event_index] + + def _get_message_without_tc(self, event): + """ + Helper method to extract the message without the TC (telemetry counter) portion. + Args: + event: The event dictionary + Returns: The message portion before " [TC=" marker + """ + return event["Message"][:event["Message"].rfind(" [TC=")] + + def _validate_sanitized_event(self, expected_message, task_name=None, event_index=-1, file_index=None): + """ + Helper method to validate an event's message and task name against expected values. + Args: + expected_message: The expected sanitized message (without TC counter) + task_name: The expected task name (optional validation) + event_index: Index of the event within the file (default: -1 for last event) + file_index: Index of the event file (default: None for latest file) + """ + event = self._read_event_from_file(file_index=file_index, event_index=event_index) + + self.assertIsNotNone(event) + message_without_tc = self._get_message_without_tc(event) + self.assertEqual(expected_message, message_without_tc) + if task_name is not None: + self.assertEqual(task_name, event["TaskName"]) + + # ==================== Credential Sanitization Test Cases ==================== + def test_sanitize_credentials_from_uri_https_with_credentials_leak(self): + """ Test sanitization of HTTPS URIs with credentials """ + self._clear_events_folder() + self.assertEqual(len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]), 0) message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" + expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml" + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - # Verify exactly one event file was created - latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] + # Validate exactly one event file was created event_files_count = len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]) - self.assertEqual(event_files_count, 1, "Events folder should contain exactly one event file") + self.assertEqual(event_files_count, 1) - with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] - self.assertEqual("Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml", message_without_tc) - f.close() + # Validate using helper + self._validate_sanitized_event(expected_message, task_name="Test Task") def test_sanitize_credentials_from_uri_http_with_credentials_leak(self): """ Test sanitization of HTTP URIs with credentials """ message = "Connection failed to http://user123:password123@example.com/path" + expected_message = "Connection failed to http://user123@example.com/path" + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] - with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - # Verify entire message matches expected output (excluding TC counter) - message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] - self.assertEqual("Connection failed to http://user123@example.com/path", message_without_tc) - f.close() + self._validate_sanitized_event(expected_message, task_name="Test Task") def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" + expected_message = "Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data" + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] - with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] - self.assertEqual("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", message_without_tc) - f.close() + self._validate_sanitized_event(expected_message, task_name="Test Task") def test_sanitize_credentials_with_error_and_no_credentials(self): """ ERROR with 401 status code from jfrog.io """ message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] - with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - # Verify entire message matches expected output (excluding TC counter) - message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] - self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", message_without_tc) - f.close() + self._validate_sanitized_event(expected_message, task_name="Test Task") def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") - self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") - latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] - with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - # Verify entire message matches expected output (excluding TC counter) - message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] - expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") - self.assertEqual(expected_message, message_without_tc) - f.close() + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + self._validate_sanitized_event(expected_message, task_name="Test Task") def test_sanitize_credentials_with_credentials_leak(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ - # Clear any existing event files before test - for f in os.listdir(self.runtime.telemetry_writer.events_folder_path): - if f.endswith('.json'): - os.remove(os.path.join(self.runtime.telemetry_writer.events_folder_path, f)) - - # Verify events folder is empty before test - self.assertTrue(len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]) == 0, "Events folder should be empty before writing") + self._clear_events_folder() + self.assertEqual(len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]), 0) message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " @@ -419,30 +441,22 @@ def test_sanitize_credentials_with_credentials_leak(self): "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " "Cannot download repomd.xml: All mirrors were tried") + expected_message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - # Verify exactly one event file was created + # Validate exactly one event file was created event_files_count = len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]) - self.assertEqual(event_files_count, 1, "Events folder should contain exactly one event file") - - latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1] - with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - # Verify entire message matches expected output (excluding TC counter) - message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")] - expected_message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - self.assertEqual(expected_message, message_without_tc) - f.close() + self.assertEqual(event_files_count, 1) + self._validate_sanitized_event(expected_message, task_name="Test Task") def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ diff --git a/src/extension/src/CredentialSanitizer.py b/src/extension/src/CredentialSanitizer.py index 4b47e7bc8..86967ac57 100644 --- a/src/extension/src/CredentialSanitizer.py +++ b/src/extension/src/CredentialSanitizer.py @@ -36,12 +36,10 @@ def sanitize(self, message): # (1) scheme: https://, http://, or ftp:// # (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters # (3) password: zero or more non-whitespace, non-slash, non-@ characters - sanitized_message = re.sub( - r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message) - self.logger.log_verbose( - "Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message))) + sanitized_message = re.sub(r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message) + self.logger.log_verbose("Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message))) return sanitized_message except Exception as error: - self.logger.log_error("Error occurred while sanitizing credentials from message: {0}".format(repr(error))) + self.logger.log_error("Error occurred while sanitizing credentials from message: [Error={0}]".format(repr(error))) return message diff --git a/src/extension/src/TelemetryWriter.py b/src/extension/src/TelemetryWriter.py index b00f19d70..0cd77ef76 100644 --- a/src/extension/src/TelemetryWriter.py +++ b/src/extension/src/TelemetryWriter.py @@ -74,7 +74,6 @@ def __ensure_message_restriction_compliance(self, full_message): self.logger.log_telemetry_module_error("Error occurred while formatting message for a telemetry event. [Error={0}]".format(repr(e))) raise - def __get_agent_supports_telemetry_from_env_var(self): """ Returns True if the env var AZURE_GUEST_AGENT_EXTENSION_SUPPORTED_FEATURES has a key of ExtensionTelemetryPipeline in the list. Value of the env var looks like this: diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index cda078b48..2c360e84e 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -5,8 +5,8 @@ import time import unittest from extension.src.Constants import Constants -from extension.tests.helpers.RuntimeComposer import RuntimeComposer from extension.tests.helpers.VirtualTerminal import VirtualTerminal +from extension.tests.helpers.RuntimeComposer import RuntimeComposer class TestTelemetryWriter(unittest.TestCase): @@ -158,7 +158,65 @@ def test_events_deleted_outside_of_extension_while_extension_is_running(self): self.telemetry_writer.write_event("testing telemetry write to file", Constants.TelemetryEventLevel.Error, "Test Task") os.listdir = backup_os_listdir - # ==================== Integration test for credential sanitization in telemetry ==================== + # ==================== Unit tests for credential sanitization in telemetry ==================== + def _clear_events_folder(self): + """ + Helper method to clear the events folder for sanitization test setup. + """ + shutil.rmtree(self.telemetry_writer.events_folder_path) + self.telemetry_writer.events_folder_path = tempfile.mkdtemp() + + def _read_event_from_file(self, file_index=None, event_index=-1): + """ + Helper method to open and read an event from an event file in the events folder. + Args: + file_index: Index of the event file to read. If None, uses latest file (default: None for latest file) + event_index: Index of the event within the file (default: -1 for last event) + Returns: The parsed event dictionary from the JSON file + """ + event_files = sorted(os.listdir(self.telemetry_writer.events_folder_path)) + if not event_files: + raise Exception("No event files found in events folder") + + if file_index is None: + event_file_path = os.path.join(self.telemetry_writer.events_folder_path, event_files[-1]) + else: + event_file_path = os.path.join(self.telemetry_writer.events_folder_path, event_files[file_index]) + + with open(event_file_path, 'r+') as f: + events = json.load(f) + f.close() + if not events: + raise Exception("No events found in event file") + return events[event_index] + + def _get_message_without_tc(self, event): + """ + Helper method to extract the message without the TC (telemetry counter) portion. + Args: + event: The event dictionary + Returns: The message from the event + """ + return event["Message"] + + def _validate_sanitized_event(self, expected_message, task_name=None, event_index=-1, file_index=None): + """ + Helper method to validate an event's message and task name against expected values. + This internally calls _read_event_from_file to retrieve the event. + Args: + expected_message: The expected sanitized message + task_name: The expected task name (optional validation) + event_index: Index of the event within the file (default: -1 for last event) + file_index: Index of the event file (default: None for latest file) + """ + event = self._read_event_from_file(file_index=file_index, event_index=event_index) + + self.assertIsNotNone(event) + message = self._get_message_without_tc(event) + self.assertEqual(expected_message, message) + if task_name is not None: + self.assertEqual(task_name, event["TaskName"]) + def _load_sanitized_event(self, message): """ Helper method to write event to telemetry and load the sanitized message. @@ -167,19 +225,12 @@ def _load_sanitized_event(self, message): message: The message to write to telemetry Returns: The sanitized message from the event """ - if self.runtime.is_github_runner: - return None - # Write event to telemetry actual_message = self.telemetry_writer.write_event(message) - # Load the event file - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - events = json.load(f) - sanitized_message = events[0]["Message"] - f.close() - return sanitized_message + # Load the event file using helper method (gets first event from latest file) + event = self._read_event_from_file(file_index=None, event_index=0) + return event["Message"] def test_load_sanitized_event_full_path(self): """Test: Helper method executes full path when not on GitHub runner""" @@ -198,52 +249,36 @@ def test_load_sanitized_event_full_path(self): # Restore self.runtime.is_github_runner = original_is_github_runner - def test_sanitize_credentials_multiple_urls_with_credentials_leak_in_input(self): + def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" + expected_message = "Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data" + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - self.assertEqual("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", - events[-1]["Message"]) - f.close() + self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak(self): """ ERROR with 401 status code from jfrog.io """ message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", events[-1]["Message"]) - f.close() + self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") + expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - # Token should be removed but username preserved - self.assertNotIn("BuildBotToken", events[-1]["Message"]) - self.assertIn("buildbot@mirror.example.com", events[-1]["Message"]) - self.assertEqual(("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml"), events[-1]["Message"]) - f.close() + self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) - def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): + def test_sanitize_credentials_expired_with_credentials_leak(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ message = ("ERROR: Customer environment error (expired SSL certs): " "Command=sudo yum update -y --disablerepo='*' " @@ -254,27 +289,19 @@ def test_sanitize_credentials_expired_with_credentials_leak_in_input(self): "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " "Cannot download repomd.xml: All mirrors were tried") + expected_message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - event_files = os.listdir(self.telemetry_writer.events_folder_path) - with open(os.path.join(self.telemetry_writer.events_folder_path, event_files[0]), 'r+') as f: - events = json.load(f) - self.assertTrue(events is not None) - self.assertEqual(events[-1]["TaskName"], "Test Task") - # Token should be removed but username preserved - self.assertNotIn("TESTTOKEN123456", events[-1]["Message"]) - self.assertIn("testuser@packages-microsoft-com-prod", events[-1]["Message"]) - expected_message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - self.assertEqual(expected_message, events[-1]["Message"]) - f.close() + self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ From 3ddb7993de2fd28fcae0ca6f6114e44f8ae324e2 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 7 May 2026 09:17:40 -0400 Subject: [PATCH 13/16] Code coverage fix --- scratch/config/ExtState.json | 1 + scratch/log/events/1777045280270.json | 1 + scratch/log/events/1777045280353.json | 1 + scratch/log/events/1777045280356.json | 1 + scratch/log/events/1777045280358.json | 1 + scratch/status/1.complete.status | 1 + scratch/status/1.status | 1 + scratch/tmp/temp1.list | 1 + src/config/26.settings | 24 ++++ .../package_managers/Dnf5PackageManager.py | 0 src/core/tests/Test_CredentialSanitizer.py | 122 +++++++++++++++++ .../tests/Test_CredentialSanitizer.py | 125 ++++++++++++++++++ src/extension/tests/Test_TelemetryWriter.py | 70 +++------- src/scratch/config/ExtState.json | 1 + src/scratch/log/events/1234000.json | 1 + src/scratch/log/events/1777995214540.json | 1 + src/scratch/log/events/1777995214585.json | 1 + src/scratch/log/events/1777995214589.json | 1 + src/scratch/log/events/1777995214592.json | 1 + src/scratch/status/1.complete.status | 1 + src/scratch/status/1.status | 1 + src/scratch/tmp/temp1.list | 1 + test_real_event.py | 0 test_real_message.py | 0 test_sanitization.py | 0 test_sanitize.py | 0 tools/sanitization_demo.py | 0 27 files changed, 303 insertions(+), 55 deletions(-) create mode 100644 scratch/config/ExtState.json create mode 100644 scratch/log/events/1777045280270.json create mode 100644 scratch/log/events/1777045280353.json create mode 100644 scratch/log/events/1777045280356.json create mode 100644 scratch/log/events/1777045280358.json create mode 100644 scratch/status/1.complete.status create mode 100644 scratch/status/1.status create mode 100644 scratch/tmp/temp1.list create mode 100644 src/config/26.settings create mode 100644 src/core/src/package_managers/Dnf5PackageManager.py create mode 100644 src/core/tests/Test_CredentialSanitizer.py create mode 100644 src/extension/tests/Test_CredentialSanitizer.py create mode 100644 src/scratch/config/ExtState.json create mode 100644 src/scratch/log/events/1234000.json create mode 100644 src/scratch/log/events/1777995214540.json create mode 100644 src/scratch/log/events/1777995214585.json create mode 100644 src/scratch/log/events/1777995214589.json create mode 100644 src/scratch/log/events/1777995214592.json create mode 100644 src/scratch/status/1.complete.status create mode 100644 src/scratch/status/1.status create mode 100644 src/scratch/tmp/temp1.list create mode 100644 test_real_event.py create mode 100644 test_real_message.py create mode 100644 test_sanitization.py create mode 100644 test_sanitize.py create mode 100644 tools/sanitization_demo.py diff --git a/scratch/config/ExtState.json b/scratch/config/ExtState.json new file mode 100644 index 000000000..c1d864ad5 --- /dev/null +++ b/scratch/config/ExtState.json @@ -0,0 +1 @@ +{"extensionSequence": {"number": "1", "achieveEnableBy": "2026-04-24T15:41:20.374263Z", "operation": "Installation"}} \ No newline at end of file diff --git a/scratch/log/events/1777045280270.json b/scratch/log/events/1777045280270.json new file mode 100644 index 000000000..296dc2804 --- /dev/null +++ b/scratch/log/events/1777045280270.json @@ -0,0 +1 @@ +[{"Version": "1.6.64", "Timestamp": "2026-04-24 15:41:20.270182", "TaskName": "Core.Startup_15:41:20_20760", "EventLevel": "Informational", "Message": "Started Linux patch core operation. [TC=1]", "EventPid": "", "EventTid": "", "OperationId": "2026-04-24 15:41:20.269189"}] \ No newline at end of file diff --git a/scratch/log/events/1777045280353.json b/scratch/log/events/1777045280353.json new file mode 100644 index 000000000..9e7cab0dd --- /dev/null +++ b/scratch/log/events/1777045280353.json @@ -0,0 +1 @@ +[{"Version": "1.6.64", "Timestamp": "2026-04-24 15:41:20.352251", "TaskName": "Core.Startup_15:41:20_20760", "EventLevel": "Informational", "Message": "Machine info is: [PlatformName=][PlatformVersion=][MachineCpu=No information found][MachineArch=AMD64][DiskType=Unknown] [TC=2]", "EventPid": "", "EventTid": "", "OperationId": "2026-04-24 15:41:20.269189"}] \ No newline at end of file diff --git a/scratch/log/events/1777045280356.json b/scratch/log/events/1777045280356.json new file mode 100644 index 000000000..37d6b9cf8 --- /dev/null +++ b/scratch/log/events/1777045280356.json @@ -0,0 +1 @@ +[{"Version": "1.6.64", "Timestamp": "2026-04-24 15:41:20.356248", "TaskName": "Core.Startup_15:41:20_20760", "EventLevel": "Informational", "Message": "Started Linux patch core operation. [TC=1]", "EventPid": "", "EventTid": "", "OperationId": "2026-04-24 15:41:20.355247"}] \ No newline at end of file diff --git a/scratch/log/events/1777045280358.json b/scratch/log/events/1777045280358.json new file mode 100644 index 000000000..8639402cd --- /dev/null +++ b/scratch/log/events/1777045280358.json @@ -0,0 +1 @@ +[{"Version": "1.6.64", "Timestamp": "2026-04-24 15:41:20.358249", "TaskName": "Core.Startup_15:41:20_20760", "EventLevel": "Informational", "Message": "Machine info is: [PlatformName=Ubuntu][PlatformVersion=16.04][MachineCpu=Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz][MachineArch=x86_64][DiskType=Unknown] [TC=2]", "EventPid": "", "EventTid": "", "OperationId": "2026-04-24 15:41:20.355247"}] \ No newline at end of file diff --git a/scratch/status/1.complete.status b/scratch/status/1.complete.status new file mode 100644 index 000000000..d4dc81886 --- /dev/null +++ b/scratch/status/1.complete.status @@ -0,0 +1 @@ +[{"version": 1.0, "timestampUTC": "2026-04-24T15:41:20Z", "status": {"name": "Azure Patch Management", "operation": "Installation", "status": "success", "code": 0, "formattedMessage": {"lang": "en-US", "message": ""}, "substatus": []}}] \ No newline at end of file diff --git a/scratch/status/1.status b/scratch/status/1.status new file mode 100644 index 000000000..d4dc81886 --- /dev/null +++ b/scratch/status/1.status @@ -0,0 +1 @@ +[{"version": 1.0, "timestampUTC": "2026-04-24T15:41:20Z", "status": {"name": "Azure Patch Management", "operation": "Installation", "status": "success", "code": 0, "formattedMessage": {"lang": "en-US", "message": ""}, "substatus": []}}] \ No newline at end of file diff --git a/scratch/tmp/temp1.list b/scratch/tmp/temp1.list new file mode 100644 index 000000000..7d7e801b2 --- /dev/null +++ b/scratch/tmp/temp1.list @@ -0,0 +1 @@ +test temp file \ No newline at end of file diff --git a/src/config/26.settings b/src/config/26.settings new file mode 100644 index 000000000..525419df2 --- /dev/null +++ b/src/config/26.settings @@ -0,0 +1,24 @@ +{ + runtimeSettings [{ + handlerSettings { + protectedSettingsCertThumbprint , + protectedSettings , + publicSettings { + operation Installation, + activityId 12345-2312-1234-23245-32112, + startTime 2021-08-08T123456Z, + maximumDuration PT3H, + rebootSetting IfRequired, + classificationsToInclude[Critical,Security], + patchesToInclude [ern=1.2, kern=1.23.45], + patchesToExclude [test, test], + internalSettings test, + maintenanceRunId 2019-07-20T121214Z, + healthStoreId 2019-07-20T121214Z, + patchMode AutomaticByPlatform, + assessmentMode AutomaticByPlatform, + maximumAssessmentInterval PT4H + } + } + }] +} \ No newline at end of file diff --git a/src/core/src/package_managers/Dnf5PackageManager.py b/src/core/src/package_managers/Dnf5PackageManager.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/core/tests/Test_CredentialSanitizer.py b/src/core/tests/Test_CredentialSanitizer.py new file mode 100644 index 000000000..86a714273 --- /dev/null +++ b/src/core/tests/Test_CredentialSanitizer.py @@ -0,0 +1,122 @@ +# Copyright 2026 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.7+ + +import unittest +from core.src.service_interfaces.CredentialSanitizer import CredentialSanitizer +from core.tests.library.ArgumentComposer import ArgumentComposer +from core.tests.library.RuntimeCompositor import RuntimeCompositor + + +class TestCredentialSanitizer(unittest.TestCase): + """Tests for CredentialSanitizer class in core""" + + def setUp(self): + self.runtime = RuntimeCompositor(ArgumentComposer().get_composed_arguments(), True) + self.logger = self.runtime.composite_logger + self.sanitizer = CredentialSanitizer(self.logger) + + def tearDown(self): + self.runtime.stop() + + def test_sanitize_uri_with_credentials_all_schemes(self): + """Test sanitization of URIs (HTTPS, HTTP, FTP) with user:password@host pattern""" + # Test HTTPS + https_message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" + https_result = self.sanitizer.sanitize(https_message) + self.assertNotIn("TESTTOKEN123456", https_result) + self.assertEqual("Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml", https_result) + + # Test HTTP + http_message = "Connection failed to http://user123:password123@example.com/path" + http_result = self.sanitizer.sanitize(http_message) + self.assertNotIn("password123", http_result) + self.assertIn("user123@example.com", http_result) + + # Test FTP + ftp_message = "ftp://user:pass@host/file" + ftp_result = self.sanitizer.sanitize(ftp_message) + self.assertEqual("ftp://user@host/file", ftp_result) + + def test_sanitize_multiple_urls_and_special_cases(self): + """Test multiple URLs, newlines, and special characters in credentials""" + # Test multiple URLs + multi_message = "a https://u:p@h and b https://u2:p2@h2" + self.assertEqual("a https://u@h and b https://u2@h2", self.sanitizer.sanitize(multi_message)) + + # Test newlines + newline_message = "err\nhttps://user:token@repo\nmore" + self.assertEqual("err\nhttps://user@repo\nmore", self.sanitizer.sanitize(newline_message)) + + # Test special characters in token + special_message = "https://user:tok-en_123.456@host" + self.assertEqual("https://user@host", self.sanitizer.sanitize(special_message)) + + # Test query string preservation + query_message = "https://user:tok@host/path?x=1&y=2" + self.assertEqual("https://user@host/path?x=1&y=2", self.sanitizer.sanitize(query_message)) + + def test_no_sanitization_when_not_needed(self): + """Test that URLs without credentials or non-URL patterns remain unchanged""" + # No userinfo + self.assertEqual("https://host/path", self.sanitizer.sanitize("https://host/path")) + + # Username only (no colon) + self.assertEqual("https://user@host", self.sanitizer.sanitize("https://user@host")) + + # Port number + self.assertEqual("https://host:8080/path", self.sanitizer.sanitize("https://host:8080/path")) + + # Query token (out of scope) + self.assertEqual("https://host/path?token=abc", self.sanitizer.sanitize("https://host/path?token=abc")) + + # Random colon-at without scheme + self.assertEqual("user:pass@host", self.sanitizer.sanitize("user:pass@host")) + + def test_edge_cases_and_exception_handling(self): + """Test edge cases: empty string, None input, messages without URIs""" + # None input + self.assertIsNone(self.sanitizer.sanitize(None)) + + # Empty string + self.assertEqual("", self.sanitizer.sanitize("")) + + # No URIs + message = "This is a normal error message without URLs" + self.assertEqual(message, self.sanitizer.sanitize(message)) + + # Message with @ but no scheme + message_with_at = "Failed auth user:pass@host" + self.assertEqual(message_with_at, self.sanitizer.sanitize(message_with_at)) + + def test_real_world_scenarios(self): + """Test real-world error messages with embedded credentials""" + # YUM error + yum_message = ("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Status code: 401 for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm") + yum_result = self.sanitizer.sanitize(yum_message) + self.assertNotIn("TESTTOKEN123456", yum_result) + self.assertIn("testuser@packages-microsoft-com-prod", yum_result) + + # JFrog Artifactory error + jfrog_message = ("Failed to retrieve from https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml " + "Status code: 401 for https://user:token@cec-aa.jfrog.io/artifactory/repo") + jfrog_result = self.sanitizer.sanitize(jfrog_message) + self.assertNotIn(":token@", jfrog_result) + self.assertIn("user@cec-aa.jfrog.io", jfrog_result) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/src/extension/tests/Test_CredentialSanitizer.py b/src/extension/tests/Test_CredentialSanitizer.py new file mode 100644 index 000000000..2a8223e0a --- /dev/null +++ b/src/extension/tests/Test_CredentialSanitizer.py @@ -0,0 +1,125 @@ +# Copyright 2026 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.7+ + +import unittest +from extension.src.CredentialSanitizer import CredentialSanitizer +from extension.tests.helpers.RuntimeComposer import RuntimeComposer +from extension.tests.helpers.VirtualTerminal import VirtualTerminal + + +class TestCredentialSanitizer(unittest.TestCase): + """Tests for CredentialSanitizer class""" + + def setUp(self): + VirtualTerminal().print_lowlight("\n----------------- setup test runner -----------------") + self.runtime = RuntimeComposer() + self.logger = self.runtime.logger + self.sanitizer = CredentialSanitizer(self.logger) + + def tearDown(self): + VirtualTerminal().print_lowlight("\n----------------- tear down test runner -----------------") + + def test_sanitize_uri_with_credentials_all_schemes(self): + """Test sanitization of URIs (HTTPS, HTTP, FTP) with user:password@host pattern""" + # Test HTTPS + https_message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" + https_result = self.sanitizer.sanitize(https_message) + self.assertNotIn("TESTTOKEN123456", https_result) + self.assertEqual("Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml", https_result) + + # Test HTTP + http_message = "Connection failed to http://user123:password123@example.com/path" + http_result = self.sanitizer.sanitize(http_message) + self.assertNotIn("password123", http_result) + self.assertIn("user123@example.com", http_result) + + # Test FTP + ftp_message = "ftp://user:pass@host/file" + ftp_result = self.sanitizer.sanitize(ftp_message) + self.assertEqual("ftp://user@host/file", ftp_result) + + def test_sanitize_multiple_urls_and_special_cases(self): + """Test multiple URLs, newlines, and special characters in credentials""" + # Test multiple URLs + multi_message = "a https://u:p@h and b https://u2:p2@h2" + self.assertEqual("a https://u@h and b https://u2@h2", self.sanitizer.sanitize(multi_message)) + + # Test newlines + newline_message = "err\nhttps://user:token@repo\nmore" + self.assertEqual("err\nhttps://user@repo\nmore", self.sanitizer.sanitize(newline_message)) + + # Test special characters in token + special_message = "https://user:tok-en_123.456@host" + self.assertEqual("https://user@host", self.sanitizer.sanitize(special_message)) + + # Test query string preservation + query_message = "https://user:tok@host/path?x=1&y=2" + self.assertEqual("https://user@host/path?x=1&y=2", self.sanitizer.sanitize(query_message)) + + def test_no_sanitization_when_not_needed(self): + """Test that URLs without credentials or non-URL patterns remain unchanged""" + # No userinfo + self.assertEqual("https://host/path", self.sanitizer.sanitize("https://host/path")) + + # Username only (no colon) + self.assertEqual("https://user@host", self.sanitizer.sanitize("https://user@host")) + + # Port number + self.assertEqual("https://host:8080/path", self.sanitizer.sanitize("https://host:8080/path")) + + # Query token (out of scope) + self.assertEqual("https://host/path?token=abc", self.sanitizer.sanitize("https://host/path?token=abc")) + + # Random colon-at without scheme + self.assertEqual("user:pass@host", self.sanitizer.sanitize("user:pass@host")) + + def test_edge_cases_and_exception_handling(self): + """Test edge cases: empty string, None input, messages without URIs""" + # None input + self.assertIsNone(self.sanitizer.sanitize(None)) + + # Empty string + self.assertEqual("", self.sanitizer.sanitize("")) + + # No URIs + message = "This is a normal error message without URLs" + self.assertEqual(message, self.sanitizer.sanitize(message)) + + # Message with @ but no scheme + message_with_at = "Failed auth user:pass@host" + self.assertEqual(message_with_at, self.sanitizer.sanitize(message_with_at)) + + def test_real_world_scenarios(self): + """Test real-world error messages with embedded credentials""" + # YUM error + yum_message = ("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Status code: 401 for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm") + yum_result = self.sanitizer.sanitize(yum_message) + self.assertNotIn("TESTTOKEN123456", yum_result) + self.assertIn("testuser@packages-microsoft-com-prod", yum_result) + + # JFrog Artifactory error + jfrog_message = ("Failed to retrieve from https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml " + "Status code: 401 for https://user:token@cec-aa.jfrog.io/artifactory/repo") + jfrog_result = self.sanitizer.sanitize(jfrog_message) + self.assertNotIn(":token@", jfrog_result) + self.assertIn("user@cec-aa.jfrog.io", jfrog_result) + + +if __name__ == '__main__': + unittest.main() + + diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 2c360e84e..7b66fd545 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -190,15 +190,6 @@ def _read_event_from_file(self, file_index=None, event_index=-1): raise Exception("No events found in event file") return events[event_index] - def _get_message_without_tc(self, event): - """ - Helper method to extract the message without the TC (telemetry counter) portion. - Args: - event: The event dictionary - Returns: The message from the event - """ - return event["Message"] - def _validate_sanitized_event(self, expected_message, task_name=None, event_index=-1, file_index=None): """ Helper method to validate an event's message and task name against expected values. @@ -212,8 +203,7 @@ def _validate_sanitized_event(self, expected_message, task_name=None, event_inde event = self._read_event_from_file(file_index=file_index, event_index=event_index) self.assertIsNotNone(event) - message = self._get_message_without_tc(event) - self.assertEqual(expected_message, message) + self.assertEqual(expected_message, event["Message"]) if task_name is not None: self.assertEqual(task_name, event["TaskName"]) @@ -226,7 +216,7 @@ def _load_sanitized_event(self, message): Returns: The sanitized message from the event """ # Write event to telemetry - actual_message = self.telemetry_writer.write_event(message) + self.telemetry_writer.write_event(message) # Load the event file using helper method (gets first event from latest file) event = self._read_event_from_file(file_index=None, event_index=0) @@ -238,12 +228,9 @@ def test_load_sanitized_event_full_path(self): original_is_github_runner = self.runtime.is_github_runner self.runtime.is_github_runner = False - message = "https://user:pass@example.com" - result = self._load_sanitized_event(message) + result = self._load_sanitized_event("https://user:pass@example.com") - # On non-GitHub runner, should return the sanitized message self.assertIsNotNone(result) - self.assertIn("user@example.com", result) self.assertEqual("https://user@example.com", result) # Restore @@ -251,57 +238,30 @@ def test_load_sanitized_event_full_path(self): def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ - message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" - expected_message = "Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data" - - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + self.telemetry_writer.write_event("Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data", Constants.TelemetryEventLevel.Error, "Test Task") - self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) + self._validate_sanitized_event("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", task_name="Test Task", event_index=-1) def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak(self): """ ERROR with 401 status code from jfrog.io """ - message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + self.telemetry_writer.write_event("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", Constants.TelemetryEventLevel.Error, "Test Task") - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - - self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) + self._validate_sanitized_event("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", task_name="Test Task", event_index=-1) def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ - message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") - expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") - - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + self.telemetry_writer.write_event("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml", Constants.TelemetryEventLevel.Error, "Test Task") - self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) + self._validate_sanitized_event("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml", task_name="Test Task", event_index=-1) def test_sanitize_credentials_expired_with_credentials_leak(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ - message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - expected_message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - - self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) + self.telemetry_writer.write_event("ERROR: Customer environment error (expired SSL certs):Command=sudo yum update -y --disablerepo='*' Status code: 401 " + "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm", Constants.TelemetryEventLevel.Error, "Test Task") + self._validate_sanitized_event("ERROR: Customer environment error (expired SSL certs):Command=sudo yum update -y --disablerepo='*' Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm", task_name="Test Task", event_index=-1) def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ diff --git a/src/scratch/config/ExtState.json b/src/scratch/config/ExtState.json new file mode 100644 index 000000000..63a532a4b --- /dev/null +++ b/src/scratch/config/ExtState.json @@ -0,0 +1 @@ +{"extensionSequence": {"number": "1", "achieveEnableBy": "2026-05-05T15:33:34.612394Z", "operation": "Installation"}} \ No newline at end of file diff --git a/src/scratch/log/events/1234000.json b/src/scratch/log/events/1234000.json new file mode 100644 index 000000000..7d3635dd0 --- /dev/null +++ b/src/scratch/log/events/1234000.json @@ -0,0 +1 @@ +[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.616416", "TaskName": "Test Task", "EventLevel": "Error", "Message": "testing telemetry write to file [TC=3]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.589333"}, {"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.619406", "TaskName": "Test Task2", "EventLevel": "Error", "Message": "testing telemetry write to file [TC=4]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.589333"}] \ No newline at end of file diff --git a/src/scratch/log/events/1777995214540.json b/src/scratch/log/events/1777995214540.json new file mode 100644 index 000000000..c60e81dbb --- /dev/null +++ b/src/scratch/log/events/1777995214540.json @@ -0,0 +1 @@ +[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.540176", "TaskName": "Core.Startup_15:33:34_32196", "EventLevel": "Informational", "Message": "Started Linux patch core operation. [TC=1]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.540176"}] \ No newline at end of file diff --git a/src/scratch/log/events/1777995214585.json b/src/scratch/log/events/1777995214585.json new file mode 100644 index 000000000..64b0c010f --- /dev/null +++ b/src/scratch/log/events/1777995214585.json @@ -0,0 +1 @@ +[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.585325", "TaskName": "Core.Startup_15:33:34_32196", "EventLevel": "Informational", "Message": "Machine info is: [PlatformName=][PlatformVersion=][MachineCpu=No information found][MachineArch=AMD64][DiskType=Unknown] [TC=2]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.540176"}] \ No newline at end of file diff --git a/src/scratch/log/events/1777995214589.json b/src/scratch/log/events/1777995214589.json new file mode 100644 index 000000000..dd5c67a6c --- /dev/null +++ b/src/scratch/log/events/1777995214589.json @@ -0,0 +1 @@ +[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.589333", "TaskName": "Core.Startup_15:33:34_32196", "EventLevel": "Informational", "Message": "Started Linux patch core operation. [TC=1]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.589333"}] \ No newline at end of file diff --git a/src/scratch/log/events/1777995214592.json b/src/scratch/log/events/1777995214592.json new file mode 100644 index 000000000..bf4f147bb --- /dev/null +++ b/src/scratch/log/events/1777995214592.json @@ -0,0 +1 @@ +[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.592325", "TaskName": "Core.Startup_15:33:34_32196", "EventLevel": "Informational", "Message": "Machine info is: [PlatformName=Ubuntu][PlatformVersion=16.04][MachineCpu=Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz][MachineArch=x86_64][DiskType=Unknown] [TC=2]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.589333"}] \ No newline at end of file diff --git a/src/scratch/status/1.complete.status b/src/scratch/status/1.complete.status new file mode 100644 index 000000000..782705291 --- /dev/null +++ b/src/scratch/status/1.complete.status @@ -0,0 +1 @@ +[{"version": 1.0, "timestampUTC": "2026-05-05T15:33:34Z", "status": {"name": "Azure Patch Management", "operation": "Installation", "status": "success", "code": 0, "formattedMessage": {"lang": "en-US", "message": ""}, "substatus": []}}] \ No newline at end of file diff --git a/src/scratch/status/1.status b/src/scratch/status/1.status new file mode 100644 index 000000000..782705291 --- /dev/null +++ b/src/scratch/status/1.status @@ -0,0 +1 @@ +[{"version": 1.0, "timestampUTC": "2026-05-05T15:33:34Z", "status": {"name": "Azure Patch Management", "operation": "Installation", "status": "success", "code": 0, "formattedMessage": {"lang": "en-US", "message": ""}, "substatus": []}}] \ No newline at end of file diff --git a/src/scratch/tmp/temp1.list b/src/scratch/tmp/temp1.list new file mode 100644 index 000000000..7d7e801b2 --- /dev/null +++ b/src/scratch/tmp/temp1.list @@ -0,0 +1 @@ +test temp file \ No newline at end of file diff --git a/test_real_event.py b/test_real_event.py new file mode 100644 index 000000000..e69de29bb diff --git a/test_real_message.py b/test_real_message.py new file mode 100644 index 000000000..e69de29bb diff --git a/test_sanitization.py b/test_sanitization.py new file mode 100644 index 000000000..e69de29bb diff --git a/test_sanitize.py b/test_sanitize.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/sanitization_demo.py b/tools/sanitization_demo.py new file mode 100644 index 000000000..e69de29bb From 5d433f36093855f8a335ebf5b7587be070259bbe Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 7 May 2026 11:00:57 -0400 Subject: [PATCH 14/16] Update RunTimeComposer to fix issue --- src/extension/tests/helpers/RuntimeComposer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/extension/tests/helpers/RuntimeComposer.py b/src/extension/tests/helpers/RuntimeComposer.py index f9fdcf710..590f06cc3 100644 --- a/src/extension/tests/helpers/RuntimeComposer.py +++ b/src/extension/tests/helpers/RuntimeComposer.py @@ -30,9 +30,12 @@ def __init__(self): self.is_github_runner = os.getenv('RUNNER_TEMP', None) is not None if self.is_github_runner: - def mkdtemp_runner(): - temp_path = os.path.join(os.getenv('RUNNER_TEMP'), str(uuid.uuid4())) - os.mkdir(temp_path) + def mkdtemp_runner(suffix=None, prefix=None, dir=None): + base_dir = os.getenv('RUNNER_TEMP') + if dir: + base_dir = dir + temp_path = os.path.join(base_dir, str(uuid.uuid4())) + os.makedirs(temp_path, exist_ok=True) return temp_path tempfile.mkdtemp = mkdtemp_runner From 6d4140c00ae8ab91ad1bdeecb3925c543c875a89 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 7 May 2026 11:14:21 -0400 Subject: [PATCH 15/16] Revert "Code coverage fix" This reverts commit 3ddb7993de2fd28fcae0ca6f6114e44f8ae324e2. --- scratch/config/ExtState.json | 1 - scratch/log/events/1777045280270.json | 1 - scratch/log/events/1777045280353.json | 1 - scratch/log/events/1777045280356.json | 1 - scratch/log/events/1777045280358.json | 1 - scratch/status/1.complete.status | 1 - scratch/status/1.status | 1 - scratch/tmp/temp1.list | 1 - src/config/26.settings | 24 ---- .../package_managers/Dnf5PackageManager.py | 0 src/core/tests/Test_CredentialSanitizer.py | 122 ----------------- .../tests/Test_CredentialSanitizer.py | 125 ------------------ src/extension/tests/Test_TelemetryWriter.py | 70 +++++++--- src/scratch/config/ExtState.json | 1 - src/scratch/log/events/1234000.json | 1 - src/scratch/log/events/1777995214540.json | 1 - src/scratch/log/events/1777995214585.json | 1 - src/scratch/log/events/1777995214589.json | 1 - src/scratch/log/events/1777995214592.json | 1 - src/scratch/status/1.complete.status | 1 - src/scratch/status/1.status | 1 - src/scratch/tmp/temp1.list | 1 - test_real_event.py | 0 test_real_message.py | 0 test_sanitization.py | 0 test_sanitize.py | 0 tools/sanitization_demo.py | 0 27 files changed, 55 insertions(+), 303 deletions(-) delete mode 100644 scratch/config/ExtState.json delete mode 100644 scratch/log/events/1777045280270.json delete mode 100644 scratch/log/events/1777045280353.json delete mode 100644 scratch/log/events/1777045280356.json delete mode 100644 scratch/log/events/1777045280358.json delete mode 100644 scratch/status/1.complete.status delete mode 100644 scratch/status/1.status delete mode 100644 scratch/tmp/temp1.list delete mode 100644 src/config/26.settings delete mode 100644 src/core/src/package_managers/Dnf5PackageManager.py delete mode 100644 src/core/tests/Test_CredentialSanitizer.py delete mode 100644 src/extension/tests/Test_CredentialSanitizer.py delete mode 100644 src/scratch/config/ExtState.json delete mode 100644 src/scratch/log/events/1234000.json delete mode 100644 src/scratch/log/events/1777995214540.json delete mode 100644 src/scratch/log/events/1777995214585.json delete mode 100644 src/scratch/log/events/1777995214589.json delete mode 100644 src/scratch/log/events/1777995214592.json delete mode 100644 src/scratch/status/1.complete.status delete mode 100644 src/scratch/status/1.status delete mode 100644 src/scratch/tmp/temp1.list delete mode 100644 test_real_event.py delete mode 100644 test_real_message.py delete mode 100644 test_sanitization.py delete mode 100644 test_sanitize.py delete mode 100644 tools/sanitization_demo.py diff --git a/scratch/config/ExtState.json b/scratch/config/ExtState.json deleted file mode 100644 index c1d864ad5..000000000 --- a/scratch/config/ExtState.json +++ /dev/null @@ -1 +0,0 @@ -{"extensionSequence": {"number": "1", "achieveEnableBy": "2026-04-24T15:41:20.374263Z", "operation": "Installation"}} \ No newline at end of file diff --git a/scratch/log/events/1777045280270.json b/scratch/log/events/1777045280270.json deleted file mode 100644 index 296dc2804..000000000 --- a/scratch/log/events/1777045280270.json +++ /dev/null @@ -1 +0,0 @@ -[{"Version": "1.6.64", "Timestamp": "2026-04-24 15:41:20.270182", "TaskName": "Core.Startup_15:41:20_20760", "EventLevel": "Informational", "Message": "Started Linux patch core operation. [TC=1]", "EventPid": "", "EventTid": "", "OperationId": "2026-04-24 15:41:20.269189"}] \ No newline at end of file diff --git a/scratch/log/events/1777045280353.json b/scratch/log/events/1777045280353.json deleted file mode 100644 index 9e7cab0dd..000000000 --- a/scratch/log/events/1777045280353.json +++ /dev/null @@ -1 +0,0 @@ -[{"Version": "1.6.64", "Timestamp": "2026-04-24 15:41:20.352251", "TaskName": "Core.Startup_15:41:20_20760", "EventLevel": "Informational", "Message": "Machine info is: [PlatformName=][PlatformVersion=][MachineCpu=No information found][MachineArch=AMD64][DiskType=Unknown] [TC=2]", "EventPid": "", "EventTid": "", "OperationId": "2026-04-24 15:41:20.269189"}] \ No newline at end of file diff --git a/scratch/log/events/1777045280356.json b/scratch/log/events/1777045280356.json deleted file mode 100644 index 37d6b9cf8..000000000 --- a/scratch/log/events/1777045280356.json +++ /dev/null @@ -1 +0,0 @@ -[{"Version": "1.6.64", "Timestamp": "2026-04-24 15:41:20.356248", "TaskName": "Core.Startup_15:41:20_20760", "EventLevel": "Informational", "Message": "Started Linux patch core operation. [TC=1]", "EventPid": "", "EventTid": "", "OperationId": "2026-04-24 15:41:20.355247"}] \ No newline at end of file diff --git a/scratch/log/events/1777045280358.json b/scratch/log/events/1777045280358.json deleted file mode 100644 index 8639402cd..000000000 --- a/scratch/log/events/1777045280358.json +++ /dev/null @@ -1 +0,0 @@ -[{"Version": "1.6.64", "Timestamp": "2026-04-24 15:41:20.358249", "TaskName": "Core.Startup_15:41:20_20760", "EventLevel": "Informational", "Message": "Machine info is: [PlatformName=Ubuntu][PlatformVersion=16.04][MachineCpu=Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz][MachineArch=x86_64][DiskType=Unknown] [TC=2]", "EventPid": "", "EventTid": "", "OperationId": "2026-04-24 15:41:20.355247"}] \ No newline at end of file diff --git a/scratch/status/1.complete.status b/scratch/status/1.complete.status deleted file mode 100644 index d4dc81886..000000000 --- a/scratch/status/1.complete.status +++ /dev/null @@ -1 +0,0 @@ -[{"version": 1.0, "timestampUTC": "2026-04-24T15:41:20Z", "status": {"name": "Azure Patch Management", "operation": "Installation", "status": "success", "code": 0, "formattedMessage": {"lang": "en-US", "message": ""}, "substatus": []}}] \ No newline at end of file diff --git a/scratch/status/1.status b/scratch/status/1.status deleted file mode 100644 index d4dc81886..000000000 --- a/scratch/status/1.status +++ /dev/null @@ -1 +0,0 @@ -[{"version": 1.0, "timestampUTC": "2026-04-24T15:41:20Z", "status": {"name": "Azure Patch Management", "operation": "Installation", "status": "success", "code": 0, "formattedMessage": {"lang": "en-US", "message": ""}, "substatus": []}}] \ No newline at end of file diff --git a/scratch/tmp/temp1.list b/scratch/tmp/temp1.list deleted file mode 100644 index 7d7e801b2..000000000 --- a/scratch/tmp/temp1.list +++ /dev/null @@ -1 +0,0 @@ -test temp file \ No newline at end of file diff --git a/src/config/26.settings b/src/config/26.settings deleted file mode 100644 index 525419df2..000000000 --- a/src/config/26.settings +++ /dev/null @@ -1,24 +0,0 @@ -{ - runtimeSettings [{ - handlerSettings { - protectedSettingsCertThumbprint , - protectedSettings , - publicSettings { - operation Installation, - activityId 12345-2312-1234-23245-32112, - startTime 2021-08-08T123456Z, - maximumDuration PT3H, - rebootSetting IfRequired, - classificationsToInclude[Critical,Security], - patchesToInclude [ern=1.2, kern=1.23.45], - patchesToExclude [test, test], - internalSettings test, - maintenanceRunId 2019-07-20T121214Z, - healthStoreId 2019-07-20T121214Z, - patchMode AutomaticByPlatform, - assessmentMode AutomaticByPlatform, - maximumAssessmentInterval PT4H - } - } - }] -} \ No newline at end of file diff --git a/src/core/src/package_managers/Dnf5PackageManager.py b/src/core/src/package_managers/Dnf5PackageManager.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/core/tests/Test_CredentialSanitizer.py b/src/core/tests/Test_CredentialSanitizer.py deleted file mode 100644 index 86a714273..000000000 --- a/src/core/tests/Test_CredentialSanitizer.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2026 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requires Python 2.7+ - -import unittest -from core.src.service_interfaces.CredentialSanitizer import CredentialSanitizer -from core.tests.library.ArgumentComposer import ArgumentComposer -from core.tests.library.RuntimeCompositor import RuntimeCompositor - - -class TestCredentialSanitizer(unittest.TestCase): - """Tests for CredentialSanitizer class in core""" - - def setUp(self): - self.runtime = RuntimeCompositor(ArgumentComposer().get_composed_arguments(), True) - self.logger = self.runtime.composite_logger - self.sanitizer = CredentialSanitizer(self.logger) - - def tearDown(self): - self.runtime.stop() - - def test_sanitize_uri_with_credentials_all_schemes(self): - """Test sanitization of URIs (HTTPS, HTTP, FTP) with user:password@host pattern""" - # Test HTTPS - https_message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" - https_result = self.sanitizer.sanitize(https_message) - self.assertNotIn("TESTTOKEN123456", https_result) - self.assertEqual("Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml", https_result) - - # Test HTTP - http_message = "Connection failed to http://user123:password123@example.com/path" - http_result = self.sanitizer.sanitize(http_message) - self.assertNotIn("password123", http_result) - self.assertIn("user123@example.com", http_result) - - # Test FTP - ftp_message = "ftp://user:pass@host/file" - ftp_result = self.sanitizer.sanitize(ftp_message) - self.assertEqual("ftp://user@host/file", ftp_result) - - def test_sanitize_multiple_urls_and_special_cases(self): - """Test multiple URLs, newlines, and special characters in credentials""" - # Test multiple URLs - multi_message = "a https://u:p@h and b https://u2:p2@h2" - self.assertEqual("a https://u@h and b https://u2@h2", self.sanitizer.sanitize(multi_message)) - - # Test newlines - newline_message = "err\nhttps://user:token@repo\nmore" - self.assertEqual("err\nhttps://user@repo\nmore", self.sanitizer.sanitize(newline_message)) - - # Test special characters in token - special_message = "https://user:tok-en_123.456@host" - self.assertEqual("https://user@host", self.sanitizer.sanitize(special_message)) - - # Test query string preservation - query_message = "https://user:tok@host/path?x=1&y=2" - self.assertEqual("https://user@host/path?x=1&y=2", self.sanitizer.sanitize(query_message)) - - def test_no_sanitization_when_not_needed(self): - """Test that URLs without credentials or non-URL patterns remain unchanged""" - # No userinfo - self.assertEqual("https://host/path", self.sanitizer.sanitize("https://host/path")) - - # Username only (no colon) - self.assertEqual("https://user@host", self.sanitizer.sanitize("https://user@host")) - - # Port number - self.assertEqual("https://host:8080/path", self.sanitizer.sanitize("https://host:8080/path")) - - # Query token (out of scope) - self.assertEqual("https://host/path?token=abc", self.sanitizer.sanitize("https://host/path?token=abc")) - - # Random colon-at without scheme - self.assertEqual("user:pass@host", self.sanitizer.sanitize("user:pass@host")) - - def test_edge_cases_and_exception_handling(self): - """Test edge cases: empty string, None input, messages without URIs""" - # None input - self.assertIsNone(self.sanitizer.sanitize(None)) - - # Empty string - self.assertEqual("", self.sanitizer.sanitize("")) - - # No URIs - message = "This is a normal error message without URLs" - self.assertEqual(message, self.sanitizer.sanitize(message)) - - # Message with @ but no scheme - message_with_at = "Failed auth user:pass@host" - self.assertEqual(message_with_at, self.sanitizer.sanitize(message_with_at)) - - def test_real_world_scenarios(self): - """Test real-world error messages with embedded credentials""" - # YUM error - yum_message = ("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Status code: 401 for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm") - yum_result = self.sanitizer.sanitize(yum_message) - self.assertNotIn("TESTTOKEN123456", yum_result) - self.assertIn("testuser@packages-microsoft-com-prod", yum_result) - - # JFrog Artifactory error - jfrog_message = ("Failed to retrieve from https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml " - "Status code: 401 for https://user:token@cec-aa.jfrog.io/artifactory/repo") - jfrog_result = self.sanitizer.sanitize(jfrog_message) - self.assertNotIn(":token@", jfrog_result) - self.assertIn("user@cec-aa.jfrog.io", jfrog_result) - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/src/extension/tests/Test_CredentialSanitizer.py b/src/extension/tests/Test_CredentialSanitizer.py deleted file mode 100644 index 2a8223e0a..000000000 --- a/src/extension/tests/Test_CredentialSanitizer.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2026 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requires Python 2.7+ - -import unittest -from extension.src.CredentialSanitizer import CredentialSanitizer -from extension.tests.helpers.RuntimeComposer import RuntimeComposer -from extension.tests.helpers.VirtualTerminal import VirtualTerminal - - -class TestCredentialSanitizer(unittest.TestCase): - """Tests for CredentialSanitizer class""" - - def setUp(self): - VirtualTerminal().print_lowlight("\n----------------- setup test runner -----------------") - self.runtime = RuntimeComposer() - self.logger = self.runtime.logger - self.sanitizer = CredentialSanitizer(self.logger) - - def tearDown(self): - VirtualTerminal().print_lowlight("\n----------------- tear down test runner -----------------") - - def test_sanitize_uri_with_credentials_all_schemes(self): - """Test sanitization of URIs (HTTPS, HTTP, FTP) with user:password@host pattern""" - # Test HTTPS - https_message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml" - https_result = self.sanitizer.sanitize(https_message) - self.assertNotIn("TESTTOKEN123456", https_result) - self.assertEqual("Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml", https_result) - - # Test HTTP - http_message = "Connection failed to http://user123:password123@example.com/path" - http_result = self.sanitizer.sanitize(http_message) - self.assertNotIn("password123", http_result) - self.assertIn("user123@example.com", http_result) - - # Test FTP - ftp_message = "ftp://user:pass@host/file" - ftp_result = self.sanitizer.sanitize(ftp_message) - self.assertEqual("ftp://user@host/file", ftp_result) - - def test_sanitize_multiple_urls_and_special_cases(self): - """Test multiple URLs, newlines, and special characters in credentials""" - # Test multiple URLs - multi_message = "a https://u:p@h and b https://u2:p2@h2" - self.assertEqual("a https://u@h and b https://u2@h2", self.sanitizer.sanitize(multi_message)) - - # Test newlines - newline_message = "err\nhttps://user:token@repo\nmore" - self.assertEqual("err\nhttps://user@repo\nmore", self.sanitizer.sanitize(newline_message)) - - # Test special characters in token - special_message = "https://user:tok-en_123.456@host" - self.assertEqual("https://user@host", self.sanitizer.sanitize(special_message)) - - # Test query string preservation - query_message = "https://user:tok@host/path?x=1&y=2" - self.assertEqual("https://user@host/path?x=1&y=2", self.sanitizer.sanitize(query_message)) - - def test_no_sanitization_when_not_needed(self): - """Test that URLs without credentials or non-URL patterns remain unchanged""" - # No userinfo - self.assertEqual("https://host/path", self.sanitizer.sanitize("https://host/path")) - - # Username only (no colon) - self.assertEqual("https://user@host", self.sanitizer.sanitize("https://user@host")) - - # Port number - self.assertEqual("https://host:8080/path", self.sanitizer.sanitize("https://host:8080/path")) - - # Query token (out of scope) - self.assertEqual("https://host/path?token=abc", self.sanitizer.sanitize("https://host/path?token=abc")) - - # Random colon-at without scheme - self.assertEqual("user:pass@host", self.sanitizer.sanitize("user:pass@host")) - - def test_edge_cases_and_exception_handling(self): - """Test edge cases: empty string, None input, messages without URIs""" - # None input - self.assertIsNone(self.sanitizer.sanitize(None)) - - # Empty string - self.assertEqual("", self.sanitizer.sanitize("")) - - # No URIs - message = "This is a normal error message without URLs" - self.assertEqual(message, self.sanitizer.sanitize(message)) - - # Message with @ but no scheme - message_with_at = "Failed auth user:pass@host" - self.assertEqual(message_with_at, self.sanitizer.sanitize(message_with_at)) - - def test_real_world_scenarios(self): - """Test real-world error messages with embedded credentials""" - # YUM error - yum_message = ("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Status code: 401 for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm") - yum_result = self.sanitizer.sanitize(yum_message) - self.assertNotIn("TESTTOKEN123456", yum_result) - self.assertIn("testuser@packages-microsoft-com-prod", yum_result) - - # JFrog Artifactory error - jfrog_message = ("Failed to retrieve from https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml " - "Status code: 401 for https://user:token@cec-aa.jfrog.io/artifactory/repo") - jfrog_result = self.sanitizer.sanitize(jfrog_message) - self.assertNotIn(":token@", jfrog_result) - self.assertIn("user@cec-aa.jfrog.io", jfrog_result) - - -if __name__ == '__main__': - unittest.main() - - diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 7b66fd545..2c360e84e 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -190,6 +190,15 @@ def _read_event_from_file(self, file_index=None, event_index=-1): raise Exception("No events found in event file") return events[event_index] + def _get_message_without_tc(self, event): + """ + Helper method to extract the message without the TC (telemetry counter) portion. + Args: + event: The event dictionary + Returns: The message from the event + """ + return event["Message"] + def _validate_sanitized_event(self, expected_message, task_name=None, event_index=-1, file_index=None): """ Helper method to validate an event's message and task name against expected values. @@ -203,7 +212,8 @@ def _validate_sanitized_event(self, expected_message, task_name=None, event_inde event = self._read_event_from_file(file_index=file_index, event_index=event_index) self.assertIsNotNone(event) - self.assertEqual(expected_message, event["Message"]) + message = self._get_message_without_tc(event) + self.assertEqual(expected_message, message) if task_name is not None: self.assertEqual(task_name, event["TaskName"]) @@ -216,7 +226,7 @@ def _load_sanitized_event(self, message): Returns: The sanitized message from the event """ # Write event to telemetry - self.telemetry_writer.write_event(message) + actual_message = self.telemetry_writer.write_event(message) # Load the event file using helper method (gets first event from latest file) event = self._read_event_from_file(file_index=None, event_index=0) @@ -228,9 +238,12 @@ def test_load_sanitized_event_full_path(self): original_is_github_runner = self.runtime.is_github_runner self.runtime.is_github_runner = False - result = self._load_sanitized_event("https://user:pass@example.com") + message = "https://user:pass@example.com" + result = self._load_sanitized_event(message) + # On non-GitHub runner, should return the sanitized message self.assertIsNotNone(result) + self.assertIn("user@example.com", result) self.assertEqual("https://user@example.com", result) # Restore @@ -238,30 +251,57 @@ def test_load_sanitized_event_full_path(self): def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ - self.telemetry_writer.write_event("Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data", Constants.TelemetryEventLevel.Error, "Test Task") + message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" + expected_message = "Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data" + + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - self._validate_sanitized_event("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", task_name="Test Task", event_index=-1) + self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak(self): """ ERROR with 401 status code from jfrog.io """ - self.telemetry_writer.write_event("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", Constants.TelemetryEventLevel.Error, "Test Task") + message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" + expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - self._validate_sanitized_event("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", task_name="Test Task", event_index=-1) + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ - self.telemetry_writer.write_event("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml", Constants.TelemetryEventLevel.Error, "Test Task") + message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") + expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") + + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - self._validate_sanitized_event("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml", task_name="Test Task", event_index=-1) + self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) def test_sanitize_credentials_expired_with_credentials_leak(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ - self.telemetry_writer.write_event("ERROR: Customer environment error (expired SSL certs):Command=sudo yum update -y --disablerepo='*' Status code: 401 " - "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm", Constants.TelemetryEventLevel.Error, "Test Task") - self._validate_sanitized_event("ERROR: Customer environment error (expired SSL certs):Command=sudo yum update -y --disablerepo='*' Status code: 401 " - "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm", task_name="Test Task", event_index=-1) + message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + expected_message = ("ERROR: Customer environment error (expired SSL certs): " + "Command=sudo yum update -y --disablerepo='*' " + "--enablerepo='microsoft' !!Code=11 Out- Updating " + "Subscription Management repositories. " + "Unable to read consumer identity This system is not registered " + "with an entitlement server. Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " + "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " + "Cannot download repomd.xml: All mirrors were tried") + + self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + + self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ diff --git a/src/scratch/config/ExtState.json b/src/scratch/config/ExtState.json deleted file mode 100644 index 63a532a4b..000000000 --- a/src/scratch/config/ExtState.json +++ /dev/null @@ -1 +0,0 @@ -{"extensionSequence": {"number": "1", "achieveEnableBy": "2026-05-05T15:33:34.612394Z", "operation": "Installation"}} \ No newline at end of file diff --git a/src/scratch/log/events/1234000.json b/src/scratch/log/events/1234000.json deleted file mode 100644 index 7d3635dd0..000000000 --- a/src/scratch/log/events/1234000.json +++ /dev/null @@ -1 +0,0 @@ -[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.616416", "TaskName": "Test Task", "EventLevel": "Error", "Message": "testing telemetry write to file [TC=3]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.589333"}, {"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.619406", "TaskName": "Test Task2", "EventLevel": "Error", "Message": "testing telemetry write to file [TC=4]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.589333"}] \ No newline at end of file diff --git a/src/scratch/log/events/1777995214540.json b/src/scratch/log/events/1777995214540.json deleted file mode 100644 index c60e81dbb..000000000 --- a/src/scratch/log/events/1777995214540.json +++ /dev/null @@ -1 +0,0 @@ -[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.540176", "TaskName": "Core.Startup_15:33:34_32196", "EventLevel": "Informational", "Message": "Started Linux patch core operation. [TC=1]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.540176"}] \ No newline at end of file diff --git a/src/scratch/log/events/1777995214585.json b/src/scratch/log/events/1777995214585.json deleted file mode 100644 index 64b0c010f..000000000 --- a/src/scratch/log/events/1777995214585.json +++ /dev/null @@ -1 +0,0 @@ -[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.585325", "TaskName": "Core.Startup_15:33:34_32196", "EventLevel": "Informational", "Message": "Machine info is: [PlatformName=][PlatformVersion=][MachineCpu=No information found][MachineArch=AMD64][DiskType=Unknown] [TC=2]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.540176"}] \ No newline at end of file diff --git a/src/scratch/log/events/1777995214589.json b/src/scratch/log/events/1777995214589.json deleted file mode 100644 index dd5c67a6c..000000000 --- a/src/scratch/log/events/1777995214589.json +++ /dev/null @@ -1 +0,0 @@ -[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.589333", "TaskName": "Core.Startup_15:33:34_32196", "EventLevel": "Informational", "Message": "Started Linux patch core operation. [TC=1]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.589333"}] \ No newline at end of file diff --git a/src/scratch/log/events/1777995214592.json b/src/scratch/log/events/1777995214592.json deleted file mode 100644 index bf4f147bb..000000000 --- a/src/scratch/log/events/1777995214592.json +++ /dev/null @@ -1 +0,0 @@ -[{"Version": "1.6.64", "Timestamp": "2026-05-05 15:33:34.592325", "TaskName": "Core.Startup_15:33:34_32196", "EventLevel": "Informational", "Message": "Machine info is: [PlatformName=Ubuntu][PlatformVersion=16.04][MachineCpu=Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz][MachineArch=x86_64][DiskType=Unknown] [TC=2]", "EventPid": "", "EventTid": "", "OperationId": "2026-05-05 15:33:34.589333"}] \ No newline at end of file diff --git a/src/scratch/status/1.complete.status b/src/scratch/status/1.complete.status deleted file mode 100644 index 782705291..000000000 --- a/src/scratch/status/1.complete.status +++ /dev/null @@ -1 +0,0 @@ -[{"version": 1.0, "timestampUTC": "2026-05-05T15:33:34Z", "status": {"name": "Azure Patch Management", "operation": "Installation", "status": "success", "code": 0, "formattedMessage": {"lang": "en-US", "message": ""}, "substatus": []}}] \ No newline at end of file diff --git a/src/scratch/status/1.status b/src/scratch/status/1.status deleted file mode 100644 index 782705291..000000000 --- a/src/scratch/status/1.status +++ /dev/null @@ -1 +0,0 @@ -[{"version": 1.0, "timestampUTC": "2026-05-05T15:33:34Z", "status": {"name": "Azure Patch Management", "operation": "Installation", "status": "success", "code": 0, "formattedMessage": {"lang": "en-US", "message": ""}, "substatus": []}}] \ No newline at end of file diff --git a/src/scratch/tmp/temp1.list b/src/scratch/tmp/temp1.list deleted file mode 100644 index 7d7e801b2..000000000 --- a/src/scratch/tmp/temp1.list +++ /dev/null @@ -1 +0,0 @@ -test temp file \ No newline at end of file diff --git a/test_real_event.py b/test_real_event.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/test_real_message.py b/test_real_message.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/test_sanitization.py b/test_sanitization.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/test_sanitize.py b/test_sanitize.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tools/sanitization_demo.py b/tools/sanitization_demo.py deleted file mode 100644 index e69de29bb..000000000 From 31746d9b897628748ba6fdb6d1386565aea18416 Mon Sep 17 00:00:00 2001 From: Yashna Parikh Date: Thu, 7 May 2026 11:15:34 -0400 Subject: [PATCH 16/16] Reverting back after extra file commit --- src/extension/tests/Test_TelemetryWriter.py | 98 +++---------------- .../tests/helpers/RuntimeComposer.py | 9 +- 2 files changed, 18 insertions(+), 89 deletions(-) diff --git a/src/extension/tests/Test_TelemetryWriter.py b/src/extension/tests/Test_TelemetryWriter.py index 2c360e84e..3c784cc3f 100644 --- a/src/extension/tests/Test_TelemetryWriter.py +++ b/src/extension/tests/Test_TelemetryWriter.py @@ -16,6 +16,8 @@ def setUp(self): self.runtime = RuntimeComposer() self.telemetry_writer = self.runtime.telemetry_writer self.telemetry_writer.events_folder_path = tempfile.mkdtemp() + self.telemetry_writer._TelemetryWriter__agent_is_compatible = True + Constants.TELEMETRY_ENABLED_AT_EXTENSION = True def tearDown(self): VirtualTerminal().print_lowlight("\n----------------- tear down test runner -----------------") @@ -190,15 +192,6 @@ def _read_event_from_file(self, file_index=None, event_index=-1): raise Exception("No events found in event file") return events[event_index] - def _get_message_without_tc(self, event): - """ - Helper method to extract the message without the TC (telemetry counter) portion. - Args: - event: The event dictionary - Returns: The message from the event - """ - return event["Message"] - def _validate_sanitized_event(self, expected_message, task_name=None, event_index=-1, file_index=None): """ Helper method to validate an event's message and task name against expected values. @@ -210,98 +203,37 @@ def _validate_sanitized_event(self, expected_message, task_name=None, event_inde file_index: Index of the event file (default: None for latest file) """ event = self._read_event_from_file(file_index=file_index, event_index=event_index) - self.assertIsNotNone(event) - message = self._get_message_without_tc(event) - self.assertEqual(expected_message, message) + self.assertEqual(expected_message, event["Message"]) if task_name is not None: self.assertEqual(task_name, event["TaskName"]) - def _load_sanitized_event(self, message): - """ - Helper method to write event to telemetry and load the sanitized message. - The regex sanitization happens automatically in TelemetryWriter. - Args: - message: The message to write to telemetry - Returns: The sanitized message from the event - """ - # Write event to telemetry - actual_message = self.telemetry_writer.write_event(message) - - # Load the event file using helper method (gets first event from latest file) - event = self._read_event_from_file(file_index=None, event_index=0) - return event["Message"] - - def test_load_sanitized_event_full_path(self): - """Test: Helper method executes full path when not on GitHub runner""" - # Force is_github_runner to False to ensure full path coverage on CI - original_is_github_runner = self.runtime.is_github_runner - self.runtime.is_github_runner = False - - message = "https://user:pass@example.com" - result = self._load_sanitized_event(message) - - # On non-GitHub runner, should return the sanitized message - self.assertIsNotNone(result) - self.assertIn("user@example.com", result) - self.assertEqual("https://user@example.com", result) - - # Restore - self.runtime.is_github_runner = original_is_github_runner - def test_sanitize_credentials_multiple_urls_with_credentials_leak(self): """ Test sanitization with multiple URLs containing credentials """ - message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data" - expected_message = "Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data" - - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + self.telemetry_writer.write_event("Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data", Constants.TelemetryEventLevel.Error, "Test Task") - self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) + self._validate_sanitized_event("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", task_name="Test Task", event_index=-1) def test_sanitize_credentials_with_no_credentials_in_input_with_credentials_leak(self): """ ERROR with 401 status code from jfrog.io """ - message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml" - - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") + self.telemetry_writer.write_event("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", Constants.TelemetryEventLevel.Error, "Test Task") - self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) + self._validate_sanitized_event("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", task_name="Test Task", event_index=-1) def test_sanitize_credentials_with_error_and_credentials_leak(self): """ Curl error with buildbot:BuildBotToken credentials """ - message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml") - expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " - "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml") + self.telemetry_writer.write_event("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml", Constants.TelemetryEventLevel.Error, "Test Task") - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - - self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) + self._validate_sanitized_event("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not " + "retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml", task_name="Test Task", event_index=-1) def test_sanitize_credentials_expired_with_credentials_leak(self): """ ERROR with expired SSL certs and TESTTOKEN123456 """ - message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - expected_message = ("ERROR: Customer environment error (expired SSL certs): " - "Command=sudo yum update -y --disablerepo='*' " - "--enablerepo='microsoft' !!Code=11 Out- Updating " - "Subscription Management repositories. " - "Unable to read consumer identity This system is not registered " - "with an entitlement server. Status code: 401 " - "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm " - "Error: Failed to download metadata for repo 'packages-microsoft-com-prod': " - "Cannot download repomd.xml: All mirrors were tried") - - self.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task") - - self._validate_sanitized_event(expected_message, task_name="Test Task", event_index=-1) + self.telemetry_writer.write_event("ERROR: Customer environment error (expired SSL certs):Command=sudo yum update -y --disablerepo='*' Status code: 401 " + "for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm", Constants.TelemetryEventLevel.Error, "Test Task") + self._validate_sanitized_event("ERROR: Customer environment error (expired SSL certs):Command=sudo yum update -y --disablerepo='*' Status code: 401 " + "for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm", task_name="Test Task", event_index=-1) def test_sanitize_credentials_exception_handling(self): """ Test exception handling: passing None should return the input unchanged """ diff --git a/src/extension/tests/helpers/RuntimeComposer.py b/src/extension/tests/helpers/RuntimeComposer.py index 590f06cc3..f9fdcf710 100644 --- a/src/extension/tests/helpers/RuntimeComposer.py +++ b/src/extension/tests/helpers/RuntimeComposer.py @@ -30,12 +30,9 @@ def __init__(self): self.is_github_runner = os.getenv('RUNNER_TEMP', None) is not None if self.is_github_runner: - def mkdtemp_runner(suffix=None, prefix=None, dir=None): - base_dir = os.getenv('RUNNER_TEMP') - if dir: - base_dir = dir - temp_path = os.path.join(base_dir, str(uuid.uuid4())) - os.makedirs(temp_path, exist_ok=True) + def mkdtemp_runner(): + temp_path = os.path.join(os.getenv('RUNNER_TEMP'), str(uuid.uuid4())) + os.mkdir(temp_path) return temp_path tempfile.mkdtemp = mkdtemp_runner