Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

utf8: quick fix for the creepy paragraph symbol

  • Loading branch information...
commit d640fb7bde14bfaf10c7c6853ae9fc48d106e5e0 1 parent 18dfcaf
Roman Vasiyarov winex authored
Showing with 6 additions and 1 deletion.
  1. +6 −1 src/utf8.h
7 src/utf8.h
View
@@ -65,6 +65,8 @@ inline void codepointToUTF8(unsigned int cp, t_codepoint * szOut)
/** Extracts one Unicode codepoint from a string in UTF-8 encoding; updates position.
*/
+// WARNING: 20111015 winex: this is hand-written shit and doesn't work,
+// see http://rfc-ref.org/RFC-TEXTS/2640/chapter11.html
inline unsigned int getOneCodepointFromUTF8(const std::string& str, size_t & position)
{
if (position >= str.length()) return -1;
@@ -79,7 +81,7 @@ inline unsigned int getOneCodepointFromUTF8(const std::string& str, size_t & pos
return data[0];
}
- else if ((data[0] & 0xC0) == 0xC0)
+ else if ((data[0] & 0xE0) == 0xC0)
{
// 2 code units
@@ -87,11 +89,14 @@ inline unsigned int getOneCodepointFromUTF8(const std::string& str, size_t & pos
data[1] = str[position++];
+ // what about 0xc2 0xa7, ffs?
+#if 0
if ((data[1] & 0xF0) != 0xF0)
{
position = str.length();
return -1;
}
+#endif
return ((data[0] & 0x1F) << 6) | (data[1] & 0x3F);
}
Please sign in to comment.
Something went wrong with that request. Please try again.