diff --git a/pod/perlguts.pod b/pod/perlguts.pod index ffddc552eec4..4dfea65eeced 100644 --- a/pod/perlguts.pod +++ b/pod/perlguts.pod @@ -3568,15 +3568,18 @@ whether the byte is encoded as a single byte even in UTF-8): char */ STRLEN len; /* Returned length of character in bytes */ - if (!UTF8_IS_INVARIANT(*utf)) + if (!UTF8_IS_INVARIANT(*utf)) { /* Must treat this as UTF-8 */ - uv = utf8_to_uvchr_buf(utf, utf_end, &len); + if (! utf8_to_uv(utf, utf_end, &uv, &len)) { + /* handle error */ + } + } else /* OK to treat this character as a byte */ uv = *utf; -You can also see in that example that we use C to get the -value of the character; the inverse function C is available +You can also see in that example that we use C to get the +value of the character; the inverse function C is available for putting a UV into UTF-8: if (!UVCHR_IS_INVARIANT(uv)) @@ -3794,7 +3797,7 @@ the PV to somewhere, pass on the flag too. =item * -If a string is UTF-8, B use C to get at the value, +If a string is UTF-8, B use C to get at the value, unless C in which case you can use C<*s>. =item * diff --git a/utf8.c b/utf8.c index 722195b4e74c..0f8c7700886f 100644 --- a/utf8.c +++ b/utf8.c @@ -123,19 +123,19 @@ const char super_cp_format[] = "Code point 0x%" UVXf " is not Unicode," =for apidoc_item uvchr_to_utf8_flags_msgs These functions are identical. THEY SHOULD BE USED IN ONLY VERY SPECIALIZED -CIRCUMSTANCES. +CIRCUMSTANCES. The C spelling is preferred in new code. -Most code should use C()> rather than call this directly. +Most code should use C()> rather than call these directly. -This function is for code that wants any warning and/or error messages to be +These functions are for code that wants any warning and/or error messages to be returned to the caller rather than be displayed. Any message that would have been displayed if all lexical warnings are enabled will instead be returned. -It is just like C> but it takes an extra parameter -placed after all the others, C. If this parameter is 0, this function -behaves identically to C>. Otherwise, C should -be a pointer to an C variable, in which this function creates a new HV to -contain any appropriate message. The hash has three key-value pairs, as +They are just like C> but take an extra parameter +placed after all the others, C. If this parameter is 0, the functions +behave identically to C>. Otherwise, C should +be a pointer to an C variable, in which these functions create a new HV +to contain any appropriate message. The hash has three key-value pairs, as follows: =over 4 @@ -169,7 +169,7 @@ The possibilities are: =back It's important to note that specifying this parameter as non-null will cause -any warning this function would otherwise generate to be suppressed, and +any warning the functions would otherwise generate to be suppressed, and instead be placed in C<*msgs>. The caller can check the lexical warnings state (or not) when choosing what to do with the returned message.