diff --git a/core/string_api.php b/core/string_api.php index 1621f3b643..9fd81b504f 100644 --- a/core/string_api.php +++ b/core/string_api.php @@ -910,6 +910,10 @@ function string_html_entities( $p_string ) { * @return string */ function string_html_specialchars( $p_string ) { + # Remove any invalid character from the string per XML 1.0 specification + # http://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char + $p_string = preg_replace( '/[^\x9\xA\xD\x20-\xD7FF\xE000-\xFFFD\x{10000}-\x{10FFFF}]/u', '', $p_string ); + # achumakov: @ added to avoid warning output in unsupported codepages # e.g. 8859-2, windows-1257, Korean, which are treated as 8859-1. # This is VERY important for Eastern European, Baltic and Korean languages