Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/core/src/bootstrap/Bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(self, argv, capture_stdout=True):
if capture_stdout:
self.stdout_file_mirror = StdOutFileMirror(self.env_layer, self.file_logger)
self.composite_logger = self.container.get('composite_logger')
self.credential_sanitizer = self.container.get("credential_sanitizer")
Comment thread
michellemcdaniel marked this conversation as resolved.
self.telemetry_writer = self.container.get('telemetry_writer')
self.composite_logger.telemetry_writer = self.telemetry_writer # Need to set telemetry_writer within logger to enable sending all logs to telemetry

Expand Down
8 changes: 7 additions & 1 deletion src/core/src/bootstrap/ConfigurationFactory.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from core.src.package_managers.AzL3TdnfPackageManager import AzL3TdnfPackageManager
from core.src.package_managers.YumPackageManager import YumPackageManager
from core.src.package_managers.ZypperPackageManager import ZypperPackageManager
from core.src.service_interfaces.CredentialSanitizer import CredentialSanitizer

from core.src.service_interfaces.LifecycleManager import LifecycleManager
from core.src.service_interfaces.LifecycleManagerAzure import LifecycleManagerAzure
Expand Down Expand Up @@ -151,9 +152,14 @@ def new_bootstrap_configuration(config_env, log_file_path, events_folder, teleme
'telemetry_writer': None # Has to be initialized without telemetry_writer to avoid running into a circular dependency loop. Telemetry writer within composite logger will be set later after telemetry writer has been initialized
}
},
'credential_sanitizer': {
'component': CredentialSanitizer,
'component_args': ['composite_logger'],
'component_kwargs': {}
},
'telemetry_writer': {
'component': TelemetryWriter,
'component_args': ['env_layer', 'composite_logger'],
'component_args': ['env_layer', 'composite_logger', 'credential_sanitizer'],
'component_kwargs': {
'events_folder_path': events_folder,
'telemetry_supported': telemetry_supported
Expand Down
44 changes: 44 additions & 0 deletions src/core/src/service_interfaces/CredentialSanitizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright 2026 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.7+
import re


class CredentialSanitizer(object):
"""Service that sanitizes credential-like values from URIs by removing password/token from URI userinfo."""

def __init__(self, composite_logger):
self.composite_logger = composite_logger

def sanitize(self, message):
"""Removes password/token from URI credentials in the given message.
Args:
message: The message to sanitize
Returns: The message with credentials removed from URIs
"""
try:
# Pattern matches: scheme://user:password@host → scheme://user@host
# Handles credentials containing special characters (except @, /, whitespace)
# Groups:
# (1) scheme: https://, http://, or ftp://
# (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters
# (3) password: zero or more non-whitespace, non-slash, non-@ characters
sanitized_message = re.sub(r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message)
self.composite_logger.log_verbose("Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message)))
return sanitized_message
except Exception as error:
self.composite_logger.log_error("Error occurred while sanitizing credentials from message: [Error={0}]".format(repr(error)))
return message
Comment thread
rane-rajasi marked this conversation as resolved.

10 changes: 8 additions & 2 deletions src/core/src/service_interfaces/TelemetryWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@ class TelemetryWriter(object):

TELEMETRY_BUFFER_DELIMETER= "\n|\t"

def __init__(self, env_layer, composite_logger, events_folder_path, telemetry_supported):
def __init__(self, env_layer, composite_logger, credential_sanitizer, events_folder_path, telemetry_supported):
self.env_layer = env_layer
self.composite_logger = composite_logger
self.__operation_id = str(datetime.datetime.utcnow())
self.__task_name_watermark = "_" + str(datetime.datetime.utcnow().hour) + ":" + str(datetime.datetime.utcnow().minute) + ":" + str(datetime.datetime.utcnow().second) + "_" + str(os.getpid())
self.__task_name = Constants.TelemetryTaskName.STARTUP + self.__task_name_watermark
self.events_folder_path = None
self.__telemetry_event_counter = 1 # will be added at the end of each event sent to telemetry to assist in tracing and identifying event/message loss in telemetry
self.credential_sanitizer = credential_sanitizer
self.start_time_for_event_count_throttle_check = datetime.datetime.utcnow()
self.event_count = 1

Expand Down Expand Up @@ -127,12 +128,17 @@ def __get_events_folder_path_exists(events_folder_path):
return events_folder_path is not None and os.path.exists(events_folder_path)

def __new_event_json(self, event_level, message, task_name):
# Step 1: Apply message restrictions (formatting, truncation)
restricted_message = self.__ensure_message_restriction_compliance(message)
# Step 2: Sanitize credentials from URIs
sanitized_message = self.credential_sanitizer.sanitize(restricted_message)

return {
"Version": Constants.EXT_VERSION,
"Timestamp": str(datetime.datetime.utcnow()),
"TaskName": task_name,
"EventLevel": event_level,
"Message": self.__ensure_message_restriction_compliance(message),
"Message": sanitized_message,
"EventPid": "",
"EventTid": "",
"OperationId": self.__operation_id # activity id from from config settings
Expand Down
154 changes: 154 additions & 0 deletions src/core/tests/Test_TelemetryWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,5 +311,159 @@ def test_write_event_with_buffer_true_and_empty_string_and_then_flush_with_non_e
f.close()
self.assertTrue(text_found.string.startswith("Message 1"))

# ==================== Unit Tests for Credential Sanitization ====================
# ==================== Helper functions for Credential Sanitization Tests ====================
def _clear_events_folder(self):
"""
Helper method to clear the events folder for sanitization test setup.
Removes all existing JSON event files.
"""
for f in os.listdir(self.runtime.telemetry_writer.events_folder_path):
if f.endswith('.json'):
os.remove(os.path.join(self.runtime.telemetry_writer.events_folder_path, f))

def _read_event_from_file(self, file_index=None, event_index=-1):
"""
Helper method to open and read an event from an event file in the events folder.
Args:
file_index: Index of the event file to read. If None, uses latest file
event_index: Index of the event within the file (default: -1 for last event)
Returns: The parsed event dictionary from the JSON file
"""
event_files = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)]
if not event_files:
raise Exception("No event files found in events folder")

if file_index is None:
event_file_path = os.path.join(self.runtime.telemetry_writer.events_folder_path, event_files[-1])
else:
event_file_path = os.path.join(self.runtime.telemetry_writer.events_folder_path, event_files[file_index])

with open(event_file_path, 'r+') as f:
events = json.load(f)
f.close()
if not events:
raise Exception("No events found in event file")
return events[event_index]

def _get_message_without_tc(self, event):
"""
Helper method to extract the message without the TC (telemetry counter) portion.
Args:
event: The event dictionary
Returns: The message portion before " [TC=" marker
"""
return event["Message"][:event["Message"].rfind(" [TC=")]

def _validate_sanitized_event(self, expected_message, task_name=None, event_index=-1, file_index=None):
"""
Helper method to validate an event's message and task name against expected values.
Args:
expected_message: The expected sanitized message (without TC counter)
task_name: The expected task name (optional validation)
event_index: Index of the event within the file (default: -1 for last event)
file_index: Index of the event file (default: None for latest file)
"""
event = self._read_event_from_file(file_index=file_index, event_index=event_index)

self.assertIsNotNone(event)
message_without_tc = self._get_message_without_tc(event)
self.assertEqual(expected_message, message_without_tc)
if task_name is not None:
self.assertEqual(task_name, event["TaskName"])

# ==================== Credential Sanitization Test Cases ====================
def test_sanitize_credentials_from_uri_https_with_credentials_leak(self):
""" Test sanitization of HTTPS URIs with credentials """
self._clear_events_folder()
self.assertEqual(len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]), 0)

message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml"
expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml"

self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")

# Validate exactly one event file was created
event_files_count = len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)])
self.assertEqual(event_files_count, 1)

# Validate using helper
self._validate_sanitized_event(expected_message, task_name="Test Task")

def test_sanitize_credentials_from_uri_http_with_credentials_leak(self):
""" Test sanitization of HTTP URIs with credentials """
message = "Connection failed to http://user123:password123@example.com/path"
expected_message = "Connection failed to http://user123@example.com/path"

self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")

self._validate_sanitized_event(expected_message, task_name="Test Task")

def test_sanitize_credentials_multiple_urls_with_credentials_leak(self):
""" Test sanitization with multiple URLs containing credentials """
message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data"
expected_message = "Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data"

self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")

self._validate_sanitized_event(expected_message, task_name="Test Task")

def test_sanitize_credentials_with_error_and_no_credentials(self):
""" ERROR with 401 status code from jfrog.io """
message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml"
expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml"

self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")

self._validate_sanitized_event(expected_message, task_name="Test Task")

def test_sanitize_credentials_with_error_and_credentials_leak(self):
""" Curl error with buildbot:BuildBotToken credentials """
message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not "
"retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml")
expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not "
"retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml")

self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
self._validate_sanitized_event(expected_message, task_name="Test Task")

def test_sanitize_credentials_with_credentials_leak(self):
""" ERROR with expired SSL certs and TESTTOKEN123456 """
self._clear_events_folder()
self.assertEqual(len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]), 0)

message = ("ERROR: Customer environment error (expired SSL certs): "
"Command=sudo yum update -y --disablerepo='*' "
"--enablerepo='microsoft' !!Code=11 Out- Updating "
"Subscription Management repositories. "
"Unable to read consumer identity This system is not registered "
"with an entitlement server. Status code: 401 "
"for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm "
"Error: Failed to download metadata for repo 'packages-microsoft-com-prod': "
"Cannot download repomd.xml: All mirrors were tried")
expected_message = ("ERROR: Customer environment error (expired SSL certs): "
"Command=sudo yum update -y --disablerepo='*' "
"--enablerepo='microsoft' !!Code=11 Out- Updating "
"Subscription Management repositories. "
"Unable to read consumer identity This system is not registered "
"with an entitlement server. Status code: 401 "
"for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm "
"Error: Failed to download metadata for repo 'packages-microsoft-com-prod': "
"Cannot download repomd.xml: All mirrors were tried")

self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")

# Validate exactly one event file was created
event_files_count = len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)])
self.assertEqual(event_files_count, 1)
self._validate_sanitized_event(expected_message, task_name="Test Task")

def test_sanitize_credentials_exception_handling(self):
""" Test exception handling: passing None should return the input unchanged """
result = self.runtime.telemetry_writer.credential_sanitizer.sanitize(None)
self.assertIsNone(result)


if __name__ == '__main__':
unittest.main()

3 changes: 2 additions & 1 deletion src/core/tests/library/RuntimeCompositor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,10 @@ def mkdtemp_runner():
self.container = self.bootstrapper.build_out_container()
self.file_logger = self.bootstrapper.file_logger
self.composite_logger = self.bootstrapper.composite_logger
self.credential_sanitizer = self.bootstrapper.credential_sanitizer

# re-initializing telemetry_writer, outside of Bootstrapper, to correctly set the env_layer configured for tests
self.telemetry_writer = TelemetryWriter(self.env_layer, self.composite_logger, self.bootstrapper.telemetry_writer.events_folder_path, self.bootstrapper.telemetry_supported)
self.telemetry_writer = TelemetryWriter(self.env_layer, self.composite_logger, self.credential_sanitizer, self.bootstrapper.telemetry_writer.events_folder_path, self.bootstrapper.telemetry_supported)
self.bootstrapper.telemetry_writer = self.telemetry_writer
self.bootstrapper.composite_logger.telemetry_writer = self.telemetry_writer

Expand Down
45 changes: 45 additions & 0 deletions src/extension/src/CredentialSanitizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright 2026 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.7+

import re


class CredentialSanitizer(object):
"""Service that sanitizes credential-like values from URIs by removing password/token from URI userinfo."""

Comment thread
rane-rajasi marked this conversation as resolved.
def __init__(self, logger):
self.logger = logger

def sanitize(self, message):
"""Removes password/token from URI credentials in the given message.
Args:
message: The message to sanitize
Returns: The message with credentials removed from URIs
"""
try:
# Pattern matches: scheme://user:password@host → scheme://user@host
# Handles credentials containing special characters (except @, /, whitespace)
# Groups:
# (1) scheme: https://, http://, or ftp://
# (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters
# (3) password: zero or more non-whitespace, non-slash, non-@ characters
sanitized_message = re.sub(r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message)
self.logger.log_verbose("Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message)))
return sanitized_message
except Exception as error:
self.logger.log_error("Error occurred while sanitizing credentials from message: [Error={0}]".format(repr(error)))
return message

Loading
Loading