Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #7195: Canonify in ncf doesn't work like cfengine does #237

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/unit/test_ncf.py 100644 → 100755
@@ -1,4 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import unittest
import ncf
Expand Down
21 changes: 21 additions & 0 deletions tests/unit/test_ncf_rudder.py
@@ -1,10 +1,12 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import unittest
import ncf
import ncf_rudder
import re
import os.path
import sys
import xml.etree.cElementTree as XML
from pprint import pprint

Expand Down Expand Up @@ -146,5 +148,24 @@ def test_category_xml_content(self):
# ncf_rudder.write_technique_for_rudder(path, technique_metadata)
# ncf_rudder.write_all_techniques_for_rudder(path)


def test_canonify(self):
result = ncf_rudder.canonify("ascii @&_ string")
self.assertEquals(result, "ascii_____string")

# python/ncf reads UTF-8 files and produces u'' strings in python2 and '' strings in python3
# python2 tests
if sys.version_info[0] == 2:
# unicode in source file -> interpreted as unicode with u'' -> correct iso in python string (ncf builder use case)
result = ncf_rudder.canonify(u'héhé')
self.assertEquals(result, 'h__h__')

# python3 tests
if sys.version_info[0] == 3:
# unicode in source file -> correct unicode in python string (ncf builder use case)
result = ncf_rudder.canonify('héhé')
self.assertEquals(result, "h__h__")


if __name__ == '__main__':
unittest.main()
19 changes: 15 additions & 4 deletions tools/ncf_rudder.py
Expand Up @@ -71,10 +71,18 @@ def canonify_expected_reports(expected_reports, dest):

# Replace the second field with a canonified version of itself (a la CFEngine)
fields = line.strip().split(";;")
regex = re.compile("[^a-zA-Z0-9_]", flags=re.UNICODE )
fields[1] = regex.sub("_", fields[1])
fields[1] = canonify(fields[1])
dest_file.write(";;".join(fields) + "\n")

def canonify(string):
# String should be unicode string (ie u'') which is the case if they are read from files opened with encoding="utf-8".
# To match cfengine behaviour we need to treat utf8 as if it was ascii (see #7195).
# Pure ASCII would provoke an error in python, but any 8 bits encoding that is compatible with ASCII will do
# since everything above 127 will be transformed to '_', so we choose arbitrarily "iso-8859-1"
string = string.encode("utf-8").decode("iso-8859-1")
regex = re.compile("[^a-zA-Z0-9_]")
return regex.sub("_", string)


# OTHER FUNCTIONS
#################
Expand Down Expand Up @@ -303,10 +311,13 @@ def generate_rudder_reporting(technique):
generic_method = generic_methods[method_name]

key_value = method_call["args"][generic_method["class_parameter_id"]-1]
regex = re.compile("[^\$\{\}\w](?![^{}]+})|\$(?!{)", flags=re.UNICODE)
# this regex allows to canonify everything except variables
regex = re.compile("[^\$\{\}a-zA-Z0-9_](?![^{}]+})|\$(?!{)")
# to match cfengine behaviour we need to treat utf8 as if it was ascii (see #7195)
# string should be unicode string (ie u'') which is the case if they are read from files opened with encoding="utf-8"
key_value = key_value.encode("utf-8").decode("iso-8859-1")
key_value_canonified = regex.sub("_", key_value)


class_prefix = generic_method["class_prefix"]+"_"+key_value_canonified

# Always add an empty line for readability
Expand Down