Skip to content

Commit

Permalink
fix mmh3.hash64 unicode exception with python2 (#10685)
Browse files Browse the repository at this point in the history
* fix `mmh3.hash64` unicode exception with python2 on Windows

Required for #10637

The test suite improvements in that PR which expand Windows test coverage are what surfaced this bug.

* add unicode test case
  • Loading branch information
djova committed Nov 19, 2021
1 parent 1e2abac commit cd2c541
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
3 changes: 2 additions & 1 deletion datadog_checks_base/datadog_checks/base/utils/db/sql.py
Expand Up @@ -6,6 +6,7 @@

import mmh3

from datadog_checks.base import ensure_bytes
from datadog_checks.base.utils.serialization import json, sort_keys_kwargs

# Unicode character "Arabic Decimal Separator" (U+066B) is a character which looks like an ascii
Expand All @@ -22,7 +23,7 @@ def compute_sql_signature(normalized_query):
return None
# Note: please be cautious when changing this function as some features rely on this
# hash matching the APM resource hash generated on our backend.
return format(mmh3.hash64(normalized_query, signed=False)[0], 'x')
return format(mmh3.hash64(ensure_bytes(normalized_query), signed=False)[0], 'x')


def normalize_query_tag(query):
Expand Down
1 change: 1 addition & 0 deletions datadog_checks_base/tests/base/utils/db/test_db_sql.py
Expand Up @@ -16,6 +16,7 @@ def test_compute_sql_signature(self):
when changes are made to the hashing algorithm. Changes to the hash can have
product impact since the backend expects consistency with the APM resource hash.
"""
assert '6db2e4f3905c3b5b' == compute_sql_signature('select * from dÒgs')
assert '11b755a835280e8e' == compute_sql_signature('select * from dogs')
assert 'd2a193f97126ad67' == compute_sql_signature('update dogs set name = ? where id = ?')

Expand Down

0 comments on commit cd2c541

Please sign in to comment.