From 38bc4dadd0101a01c2c0ee6655bd07ac30e27f4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Peccatte?= Date: Tue, 15 Sep 2015 14:32:44 +0200 Subject: [PATCH] Fixes #7195: Canonify in ncf doesn't work like cfengine does --- tests/unit/test_ncf.py | 1 + tests/unit/test_ncf_rudder.py | 21 +++++++++++++++++++++ tools/ncf_rudder.py | 19 +++++++++++++++---- 3 files changed, 37 insertions(+), 4 deletions(-) mode change 100644 => 100755 tests/unit/test_ncf.py diff --git a/tests/unit/test_ncf.py b/tests/unit/test_ncf.py old mode 100644 new mode 100755 index 4eddd724f..c51e44c24 --- a/tests/unit/test_ncf.py +++ b/tests/unit/test_ncf.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- import unittest import ncf diff --git a/tests/unit/test_ncf_rudder.py b/tests/unit/test_ncf_rudder.py index 1d2c2f62a..3c79d3a16 100755 --- a/tests/unit/test_ncf_rudder.py +++ b/tests/unit/test_ncf_rudder.py @@ -1,10 +1,12 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- import unittest import ncf import ncf_rudder import re import os.path +import sys import xml.etree.cElementTree as XML from pprint import pprint @@ -146,5 +148,24 @@ def test_category_xml_content(self): # ncf_rudder.write_technique_for_rudder(path, technique_metadata) # ncf_rudder.write_all_techniques_for_rudder(path) + + def test_canonify(self): + result = ncf_rudder.canonify("ascii @&_ string") + self.assertEquals(result, "ascii_____string") + + # python/ncf reads UTF-8 files and produces u'' strings in python2 and '' strings in python3 + # python2 tests + if sys.version_info[0] == 2: + # unicode in source file -> interpreted as unicode with u'' -> correct iso in python string (ncf builder use case) + result = ncf_rudder.canonify(u'héhé') + self.assertEquals(result, 'h__h__') + + # python3 tests + if sys.version_info[0] == 3: + # unicode in source file -> correct unicode in python string (ncf builder use case) + result = ncf_rudder.canonify('héhé') + self.assertEquals(result, "h__h__") + + if __name__ == '__main__': unittest.main() diff --git a/tools/ncf_rudder.py b/tools/ncf_rudder.py index bffc5623b..efd9f3c4c 100755 --- a/tools/ncf_rudder.py +++ b/tools/ncf_rudder.py @@ -71,10 +71,18 @@ def canonify_expected_reports(expected_reports, dest): # Replace the second field with a canonified version of itself (a la CFEngine) fields = line.strip().split(";;") - regex = re.compile("[^a-zA-Z0-9_]", flags=re.UNICODE ) - fields[1] = regex.sub("_", fields[1]) + fields[1] = canonify(fields[1]) dest_file.write(";;".join(fields) + "\n") +def canonify(string): + # String should be unicode string (ie u'') which is the case if they are read from files opened with encoding="utf-8". + # To match cfengine behaviour we need to treat utf8 as if it was ascii (see #7195). + # Pure ASCII would provoke an error in python, but any 8 bits encoding that is compatible with ASCII will do + # since everything above 127 will be transformed to '_', so we choose arbitrarily "iso-8859-1" + string = string.encode("utf-8").decode("iso-8859-1") + regex = re.compile("[^a-zA-Z0-9_]") + return regex.sub("_", string) + # OTHER FUNCTIONS ################# @@ -303,10 +311,13 @@ def generate_rudder_reporting(technique): generic_method = generic_methods[method_name] key_value = method_call["args"][generic_method["class_parameter_id"]-1] - regex = re.compile("[^\$\{\}\w](?![^{}]+})|\$(?!{)", flags=re.UNICODE) + # this regex allows to canonify everything except variables + regex = re.compile("[^\$\{\}a-zA-Z0-9_](?![^{}]+})|\$(?!{)") + # to match cfengine behaviour we need to treat utf8 as if it was ascii (see #7195) + # string should be unicode string (ie u'') which is the case if they are read from files opened with encoding="utf-8" + key_value = key_value.encode("utf-8").decode("iso-8859-1") key_value_canonified = regex.sub("_", key_value) - class_prefix = generic_method["class_prefix"]+"_"+key_value_canonified # Always add an empty line for readability