Skip to content

Commit

Permalink
Scrub sensitive information from the logs (#524)
Browse files Browse the repository at this point in the history
  • Loading branch information
zezha-msft committed Nov 10, 2018
1 parent 5c38cf7 commit 8bb4204
Show file tree
Hide file tree
Showing 14 changed files with 340 additions and 413 deletions.
3 changes: 2 additions & 1 deletion azure-storage-common/ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

- When unable to sign request, avoid wasting time on retries by failing faster.
- Allow the use of custom domain when creating service object targeting emulators.
- azure-storage-nspkg is not installed anymore on Python 3 (PEP420-based namespace package)
- azure-storage-nspkg is not installed anymore on Python 3 (PEP420-based namespace package).
- Scrub off sensitive information on requests when logging them.


## Version 1.3.0:
Expand Down
22 changes: 12 additions & 10 deletions azure-storage-common/azure/storage/common/_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
DEV_ACCOUNT_NAME,
DEV_ACCOUNT_SECONDARY_NAME
)
import sys
if sys.version_info >= (3,):
from urllib.parse import parse_qsl
else:
from urlparse import parse_qsl


import logging
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -110,18 +116,14 @@ def __init__(self, sas_token):
# ignore ?-prefix (added by tools such as Azure Portal) on sas tokens
# doing so avoids double question marks when signing
if sas_token[0] == '?':
self.sas_token = sas_token[1:]
else:
self.sas_token = sas_token
sas_token = sas_token[1:]

self.sas_qs = parse_qsl(sas_token)

def sign_request(self, request):
# if 'sig=' is present, then the request has already been signed
# if 'sig' is present, then the request has already been signed
# as is the case when performing retries
if 'sig=' in request.path:
if 'sig' in request.query:
return
if '?' in request.path:
request.path += '&'
else:
request.path += '?'

request.path += self.sas_token
request.query.update(self.sas_qs)
4 changes: 4 additions & 0 deletions azure-storage-common/azure/storage/common/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@

# Encryption constants
_ENCRYPTION_PROTOCOL_V1 = '1.0'

_AUTHORIZATION_HEADER_NAME = 'Authorization'
_COPY_SOURCE_HEADER_NAME = 'x-ms-copy-source'
_REDACTED_VALUE = 'REDACTED'
70 changes: 60 additions & 10 deletions azure-storage-common/azure/storage/common/storageclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
# license information.
# --------------------------------------------------------------------------

import sys
import requests
from abc import ABCMeta
import logging

logger = logging.getLogger(__name__)
from time import sleep
import sys

import requests
from azure.common import (
AzureException,
AzureHttpError,
Expand All @@ -23,6 +21,9 @@
DEFAULT_USER_AGENT_STRING,
USER_AGENT_STRING_PREFIX,
USER_AGENT_STRING_SUFFIX,
_AUTHORIZATION_HEADER_NAME,
_REDACTED_VALUE,
_COPY_SOURCE_HEADER_NAME,
)
from ._error import (
_ERROR_DECRYPTION_FAILURE,
Expand All @@ -43,6 +44,23 @@
)
from .retry import ExponentialRetry
from io import UnsupportedOperation
from .sharedaccesssignature import _QueryStringConstants

if sys.version_info >= (3,):
from urllib.parse import (
urlparse,
parse_qsl,
urlunparse,
urlencode,
)
else:
from urlparse import (
urlparse,
parse_qsl,
urlunparse,
)
from urllib import urlencode
logger = logging.getLogger(__name__)


class StorageClient(object):
Expand Down Expand Up @@ -212,6 +230,36 @@ def extract_date_and_request_id(retry_context):
else:
return ""

@staticmethod
def _scrub_headers(headers):
# make a copy to avoid contaminating the request
clean_headers = headers.copy()

if _AUTHORIZATION_HEADER_NAME in clean_headers:
clean_headers[_AUTHORIZATION_HEADER_NAME] = _REDACTED_VALUE

# in case of copy operations, there could be a SAS signature present in the header value
if _COPY_SOURCE_HEADER_NAME in clean_headers \
and _QueryStringConstants.SIGNED_SIGNATURE + "=" in clean_headers[_COPY_SOURCE_HEADER_NAME]:
# take the url apart and scrub away the signed signature
scheme, netloc, path, params, query, fragment = urlparse(clean_headers[_COPY_SOURCE_HEADER_NAME])
parsed_qs = dict(parse_qsl(query))
parsed_qs[_QueryStringConstants.SIGNED_SIGNATURE] = _REDACTED_VALUE

# the SAS needs to be put back together
clean_headers[_COPY_SOURCE_HEADER_NAME] = urlunparse(
(scheme, netloc, path, params, urlencode(parsed_qs), fragment))
return clean_headers

@staticmethod
def _scrub_query_parameters(query):
# make a copy to avoid contaminating the request
clean_queries = query.copy()

if _QueryStringConstants.SIGNED_SIGNATURE in clean_queries:
clean_queries[_QueryStringConstants.SIGNED_SIGNATURE] = _REDACTED_VALUE
return clean_queries

def _perform_request(self, request, parser=None, parser_args=None, operation_context=None, expected_errors=None):
'''
Sends the request and return response. Catches HTTPError and hands it
Expand Down Expand Up @@ -260,12 +308,14 @@ def _perform_request(self, request, parser=None, parser_args=None, operation_con
retry_context.request = request

# Log the request before it goes out
logger.info("%s Outgoing request: Method=%s, Path=%s, Query=%s, Headers=%s.",
client_request_id_prefix,
request.method,
request.path,
request.query,
str(request.headers).replace('\n', ''))
# Avoid unnecessary scrubbing if the logger is not on
if logger.isEnabledFor(logging.INFO):
logger.info("%s Outgoing request: Method=%s, Path=%s, Query=%s, Headers=%s.",
client_request_id_prefix,
request.method,
request.path,
self._scrub_query_parameters(request.query),
str(self._scrub_headers(request.headers)).replace('\n', ''))

# Perform the request
response = self._httpclient.perform_request(request)
Expand Down
6 changes: 3 additions & 3 deletions tests/blob/test_common_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -1210,8 +1210,8 @@ def test_sas_signed_identifier(self):
blob_name = self._create_block_blob()

access_policy = AccessPolicy()
access_policy.start = '2011-10-11'
access_policy.expiry = '2018-10-12'
access_policy.start = datetime.utcnow() - timedelta(hours=1)
access_policy.expiry = datetime.utcnow() + timedelta(hours=1)
access_policy.permission = BlobPermissions.READ
identifiers = {'testid': access_policy}

Expand Down Expand Up @@ -1274,7 +1274,7 @@ def test_token_credential(self):
token_credential = TokenCredential(self.generate_oauth_token())

# Action 1: make sure token works
service = BlockBlobService(self.settings.STORAGE_ACCOUNT_NAME, token_credential=token_credential)
service = BlockBlobService(self.settings.OAUTH_STORAGE_ACCOUNT_NAME, token_credential=token_credential)
result = service.exists("test")
self.assertIsNotNone(result)

Expand Down
132 changes: 132 additions & 0 deletions tests/common/test_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
import sys
from datetime import datetime, timedelta

from azure.storage.blob import (
BlockBlobService,
ContainerPermissions,
BlobPermissions,
)
from azure.storage.common._constants import (
_AUTHORIZATION_HEADER_NAME,
)
from azure.storage.common.sharedaccesssignature import (
_QueryStringConstants,
)
from tests.testcase import (
StorageTestCase,
TestMode,
record,
LogCaptured,
)

if sys.version_info >= (3,):
from urllib.parse import parse_qs
else:
from urlparse import parse_qs


class StorageLoggingTest(StorageTestCase):

def setUp(self):
super(StorageLoggingTest, self).setUp()

self.bs = self._create_storage_service(BlockBlobService, self.settings)
self.container_name = self.get_resource_name('utcontainer')

# create source blob to be copied from
self.source_blob_name = self.get_resource_name('srcblob')
self.source_blob_data = self.get_random_bytes(4 * 1024)

if not self.is_playback():
self.bs.create_container(self.container_name)
self.bs.create_blob_from_bytes(self.container_name, self.source_blob_name, self.source_blob_data)

# generate a SAS so that it is accessible with a URL
sas_token = self.bs.generate_blob_shared_access_signature(
self.container_name,
self.source_blob_name,
permission=BlobPermissions.READ,
expiry=datetime.utcnow() + timedelta(hours=1),
)
self.source_blob_url = self.bs.make_blob_url(self.container_name, self.source_blob_name, sas_token=sas_token)

def tearDown(self):
if not self.is_playback():
self.bs.delete_container(self.container_name)

return super(StorageLoggingTest, self).tearDown()

@record
def test_authorization_is_scrubbed_off(self):
# Act
with LogCaptured(self) as log_captured:
self.bs.exists(self.container_name)
log_as_str = log_captured.getvalue()

# Assert
# make sure authorization header is logged, but its value is not
# the keyword SharedKey is present in the authorization header's value
self.assertTrue(_AUTHORIZATION_HEADER_NAME in log_as_str)
self.assertFalse('SharedKey' in log_as_str)

@record
def test_sas_signature_is_scrubbed_off(self):
# SAS URL is calculated from storage key, so this test runs live only
if TestMode.need_recording_file(self.test_mode):
return

# Arrange
token = self.bs.generate_container_shared_access_signature(
self.container_name,
permission=ContainerPermissions.READ,
expiry=datetime.utcnow() + timedelta(hours=1),
)
# parse out the signed signature
token_components = parse_qs(token)
signed_signature = token_components[_QueryStringConstants.SIGNED_SIGNATURE][0]

bs_with_sas = BlockBlobService(account_name=self.settings.STORAGE_ACCOUNT_NAME, sas_token=token,
protocol=self.settings.PROTOCOL)

# Act
with LogCaptured(self) as log_captured:
bs_with_sas.get_blob_account_information(self.container_name)
log_as_str = log_captured.getvalue()

# Assert
# make sure the query parameter 'sig' is logged, but its value is not
self.assertTrue(_QueryStringConstants.SIGNED_SIGNATURE in log_as_str)
self.assertFalse(signed_signature in log_as_str)

@record
def test_copy_source_sas_is_scrubbed_off(self):
# SAS URL is calculated from storage key, so this test runs live only
if TestMode.need_recording_file(self.test_mode):
return

# Arrange
dest_blob_name = self.get_resource_name('destblob')

# parse out the signed signature
token_components = parse_qs(self.source_blob_url)
signed_signature = token_components[_QueryStringConstants.SIGNED_SIGNATURE][0]

# Act
with LogCaptured(self) as log_captured:
self.bs.copy_blob(self.container_name, dest_blob_name, self.source_blob_url, requires_sync=True)
log_as_str = log_captured.getvalue()

# Assert
# make sure the query parameter 'sig' is logged, but its value is not
self.assertTrue(_QueryStringConstants.SIGNED_SIGNATURE in log_as_str)
self.assertFalse(signed_signature in log_as_str)

# make sure authorization header is logged, but its value is not
# the keyword SharedKey is present in the authorization header's value
self.assertTrue(_AUTHORIZATION_HEADER_NAME in log_as_str)
self.assertFalse('SharedKey' in log_as_str)
4 changes: 2 additions & 2 deletions tests/file/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,8 +994,8 @@ def test_sas_signed_identifier(self):
file_name = self._create_file()

access_policy = AccessPolicy()
access_policy.start = '2011-10-11'
access_policy.expiry = '2018-10-12'
access_policy.start = datetime.utcnow() - timedelta(hours=1)
access_policy.expiry = datetime.utcnow() + timedelta(hours=1)
access_policy.permission = FilePermissions.READ
identifiers = {'testid': access_policy}

Expand Down
6 changes: 3 additions & 3 deletions tests/queues/test_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def test_token_credential(self):
token_credential = TokenCredential(self.generate_oauth_token())

# Action 1: make sure token works
service = QueueService(self.settings.STORAGE_ACCOUNT_NAME, token_credential=token_credential)
service = QueueService(self.settings.OAUTH_STORAGE_ACCOUNT_NAME, token_credential=token_credential)
queues = list(service.list_queues())
self.assertIsNotNone(queues)

Expand Down Expand Up @@ -697,8 +697,8 @@ def test_sas_signed_identifier(self):

# Arrange
access_policy = AccessPolicy()
access_policy.start = '2011-10-11'
access_policy.expiry = '2018-10-12'
access_policy.start = datetime.utcnow() - timedelta(hours=1)
access_policy.expiry = datetime.utcnow() + timedelta(hours=1)
access_policy.permission = QueuePermissions.READ

identifiers = {'testid': access_policy}
Expand Down
Loading

0 comments on commit 8bb4204

Please sign in to comment.