Fix %{escape:} and %{unecape:} to work correctly with UTF8 safe chara…

…cters
FreeRADIUS · Nov 2, 2014 · 463e5c4 · 463e5c4
1 parent 7992742
commit 463e5c4
Show file tree

Hide file tree

Showing 5 changed files with 96 additions and 33 deletions.
diff --git a/raddb/mods-available/expr b/raddb/mods-available/expr
@@ -62,5 +62,5 @@ expr {
 	# Characters that will not be encoded by the %{encode}
 	# xlat function.
 	#
-	safe_characters = "@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_: /"
+	safe_characters = "@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_: /äéöüàâæçèéêëîïôœùûüaÿÄÉÖÜßÀÂÆÇÈÉÊËÎÏÔŒÙÛÜŸ"
 }
diff --git a/src/include/libradius.h b/src/include/libradius.h
@@ -423,6 +423,7 @@ int		fr_check_lib_magic(uint64_t magic);
  *	Printing functions.
  */
 int		fr_utf8_char(uint8_t const *str);
+char		*fr_utf8_strchr(int *chr_len, const char *str, const char *chr);
 size_t		fr_print_string(char const *in, size_t inlen,
 				char *out, size_t outlen, char quote);
 	size_t		fr_print_string_len(char const *in, size_t inlen, char quote);

diff --git a/src/lib/print.c b/src/lib/print.c
@@ -121,6 +121,40 @@ int fr_utf8_char(uint8_t const *str)
 	return 0;
 }
 
+/** Return a pointer to the first UTF8 char in a string.
+ *
+ * @param[out] chr_len Where to write the length of the multibyte char passed in chr (may be NULL).
+ * @param[in] str Haystack.
+ * @param[in] chr Multibyte needle.
+ * @return The position of chr in str or NULL if not found.
+ */
+char *fr_utf8_strchr(int *chr_len, const char *str, const char *chr)
+{
+	int cchr;
+	char *out;
+
+	cchr = fr_utf8_char((uint8_t const *)chr);
+	if (cchr == 0) cchr = 1;
+	if (chr_len) *chr_len = cchr;
+
+	while (str[0]) {
+		int schr;
+
+		schr = fr_utf8_char((uint8_t const *)chr);
+		if (schr == 0) schr = 1;
+		if (schr != cchr) goto next;
+
+		if (memcmp(str, chr, schr) == 0) {
+			memcpy(&out, &str, sizeof(out));
+			return out;
+		}
+	next:
+		str += schr;
+	}
+
+	return NULL;
+}
+
 /** Escape any non printable or non-UTF8 characters in the input string
  *
  * @param[in] in string to escape.

diff --git a/src/modules/rlm_expr/rlm_expr.c b/src/modules/rlm_expr/rlm_expr.c
@@ -740,40 +740,66 @@ static ssize_t urlunquote_xlat(UNUSED void *instance, UNUSED REQUEST *request,
 	return outlen - freespace;
 }
 
-/** Equivalent to the old safe_characters functionality in rlm_sql
+/** Equivalent to the old safe_characters functionality in rlm_sql but with utf8 support
  *
  * @verbatim Example: "%{escape:<img>foo.jpg</img>}" == "=60img=62foo.jpg=60/img=62" @endverbatim
  */
 static ssize_t escape_xlat(void *instance, UNUSED REQUEST *request,
 			   char const *fmt, char *out, size_t outlen)
 {
 	rlm_expr_t *inst = instance;
-	char const *p;
+	char const *p = fmt;
 	size_t freespace = outlen;
 
-	if (outlen <= 1) return 0;
+	while (p[0]) {
+		int chr_len = 1;
+		int ret;
 
-	p = fmt;
-	while (*p && (--freespace > 0)) {
-		/*
-		 *	Non-printable characters get replaced with their
-		 *	mime-encoded equivalents.
-		 */
-		if ((*p > 31) && strchr(inst->allowed_chars, *p)) {
-			*out++ = *p++;
+		if (fr_utf8_strchr(&chr_len, inst->allowed_chars, p) == NULL) {
+			/*
+			 *	'=' 1 + ([hex]{2}) * chr_len)
+			 */
+			if (freespace <= (size_t)(1 + (chr_len * 3))) break;
+
+			switch (chr_len) {
+			case 4:
+				ret = snprintf(out, freespace, "=%02X=%02X=%02X=%02X",
+					       (uint8_t)p[0], (uint8_t)p[1], (uint8_t)p[2], (uint8_t)p[3]);
+				break;
+
+			case 3:
+				ret = snprintf(out, freespace, "=%02X=%02X=%02X",
+					       (uint8_t)p[0], (uint8_t)p[1], (uint8_t)p[2]);
+				break;
+
+			case 2:
+				ret = snprintf(out, freespace, "=%02X=%02X", (uint8_t)p[0], (uint8_t)p[1]);
+				break;
+
+			case 1:
+				ret = snprintf(out, freespace, "=%02X", (uint8_t)p[0]);
+				break;
+			}
+
+			p += chr_len;
+			out += ret;
+			freespace -= ret;
 			continue;
 		}
 
-		if (freespace < 3)
-			break;
-
-		snprintf(out, 4, "=%02X", (uint8_t)*p++);
+		/*
+		 *	Only one byte left.
+		 */
+		if (freespace <= 1) break;
 
-		/* Already decremented */
-		freespace -= 2;
-		out += 3;
+		/*
+		 *	Allowed character (copy whole mb chars at once)
+		 */
+		memcpy(out, p, chr_len);
+		out += chr_len;
+		p += chr_len;
+		freespace -= chr_len;
 	}
-
 	*out = '\0';
 
 	return outlen - freespace;
@@ -783,10 +809,9 @@ static ssize_t escape_xlat(void *instance, UNUSED REQUEST *request,
  *
  * @verbatim Example: "%{unescape:=60img=62foo.jpg=60/img=62}" == "<img>foo.jpg</img>" @endverbatim
  */
-static ssize_t unescape_xlat(void *instance, UNUSED REQUEST *request,
-			       char const *fmt, char *out, size_t outlen)
+static ssize_t unescape_xlat(UNUSED void *instance, UNUSED REQUEST *request,
+			     char const *fmt, char *out, size_t outlen)
 {
-	rlm_expr_t *inst = instance;
 	char const *p;
 	char *c1, *c2, c3;
 	size_t	freespace = outlen;
@@ -808,12 +833,6 @@ static ssize_t unescape_xlat(void *instance, UNUSED REQUEST *request,
 		    !(c2 = memchr(hextab, tolower(*(p + 2)), 16))) goto next;
 		c3 = ((c1 - hextab) << 4) + (c2 - hextab);
 
-		/*
-		 *	It was just random occurrence which just happens
-		 *	to match the escape sequence for a safe character.
-		 *	Copy it across verbatim.
-		 */
-		if (strchr(inst->allowed_chars, c3)) goto next;
 		*out++ = c3;
 		p += 3;
 	}

diff --git a/src/tests/keywords/escape b/src/tests/keywords/escape
@@ -3,15 +3,18 @@
 #
 update request {
         control:Cleartext-Password := 'hello'
-       reply:Filter-Id := "filter"
+        reply:Filter-Id := "filter"
 
 	Tmp-String-0 := '@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_: /'
 	Tmp-String-1 := '±§#$%^&+={[}];<,>?`|"'
 	Tmp-String-2 := '™œ¥¤'
-	# The extra =5C is because the " is printed as \"
 	Tmp-String-3 := '=C2=B1=C2=A7=23=24=25=5E=26=2B=3D=7B=5B=7D=5D=3B=3C=2C=3E=3F=60=7C=22'
 	Tmp-String-4 := '=E2=84=A2=C5=93=C2=A5=C2=A4'
 	Tmp-String-5 := '=40=61=62=63=64=65=66=67'
+
+	# Mixture of safe and unsafe chars
+	Tmp-String-6 := 'ŒČÿ'
+	Tmp-String-7 := 'Œ=C4=8Cÿ'
 }
 
 if (<string>"%{escape:%{request:Tmp-String-0}}" != &Tmp-String-0) {
@@ -50,9 +53,15 @@ if (<string>"%{unescape:%{request:Tmp-String-4}}" != &Tmp-String-2) {
 	}
 }
 
-# All safe chars, so should not be touched.
-if (<string>"%{unescape:%{request:Tmp-String-5}}" != &Tmp-String-5) {
+if (<string>"%{escape:%{request:Tmp-String-6}}" != &Tmp-String-7) {
 	update reply {
 		Filter-Id += 'Fail 7'
 	}
 }
+
+if (<string>"%{unescape:%{request:Tmp-String-7}}" != &Tmp-String-6) {
+	update reply {
+		Filter-Id += 'Fail 8'
+	}
+}
+