Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add unicode/byte string classes which are supposed to eventually phase
out std::string
- Loading branch information
Showing
2 changed files
with
149 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#include <sstream> | ||
#include <vector> | ||
#include <locale> | ||
#include <codecvt> | ||
|
||
#include "String.h" | ||
|
||
std::string ByteString::ConversionError::formatError(ByteString::value_type const *at, ByteString::value_type const *upto) | ||
{ | ||
std::stringstream ss; | ||
ss << "Could not convert sequence to UTF-8:"; | ||
for(int i = 0; i < 4 && at + i < upto; i++) | ||
ss << " " << std::hex << (unsigned int)std::make_unsigned<ByteString::value_type>::type(at[i]); | ||
return ss.str(); | ||
} | ||
|
||
|
||
std::codecvt_utf8<char32_t> convert(1); | ||
|
||
String ByteString::FromUtf8(bool ignoreError) const | ||
{ | ||
std::vector<String::value_type> destination = std::vector<String::value_type>(size(), String::value_type()); | ||
std::codecvt_utf8<char32_t>::state_type state; | ||
|
||
ByteString::value_type const *from = data(), *from_next; | ||
String::value_type *to = destination.data(), *to_next; | ||
|
||
while(true) | ||
{ | ||
std::codecvt_utf8<char32_t>::result result = convert.in(state, from, data() + size(), from_next, to, destination.data() + destination.size(), to_next); | ||
from = from_next; | ||
to = to_next; | ||
if(result == std::codecvt_base::ok || result == std::codecvt_base::noconv) | ||
{ | ||
destination.resize(to - destination.data()); | ||
return String(destination.data(), destination.size()); | ||
} | ||
else if(result == std::codecvt_base::partial && to == destination.data() + destination.size()) | ||
{ | ||
String::value_type *old_data = destination.data(); | ||
destination.resize(2 * destination.size()); | ||
to = destination.data() + (to - old_data); | ||
} | ||
else | ||
{ | ||
if(!ignoreError) | ||
throw ConversionError(from, data() + size()); | ||
|
||
if(to == destination.data() + destination.size()) | ||
{ | ||
String::value_type *old_data = destination.data(); | ||
destination.resize(2 * destination.size()); | ||
to = destination.data() + (to - old_data); | ||
} | ||
*(to++) = std::make_unsigned<ByteString::value_type>::type(*(from++)); | ||
} | ||
} | ||
} | ||
|
||
ByteString String::ToUtf8() const | ||
{ | ||
std::vector<ByteString::value_type> destination = std::vector<ByteString::value_type>(size(), ByteString::value_type()); | ||
std::codecvt_utf8<char32_t>::state_type state; | ||
|
||
String::value_type const *from = data(), *from_next; | ||
ByteString::value_type *to = destination.data(), *to_next; | ||
|
||
while(true) | ||
{ | ||
std::codecvt_utf8<char32_t>::result result = convert.out(state, from, data() + size(), from_next, to, destination.data() + destination.size(), to_next); | ||
from = from_next; | ||
to = to_next; | ||
if(result == std::codecvt_base::ok || result == std::codecvt_base::noconv) | ||
{ | ||
destination.resize(to - destination.data()); | ||
return ByteString(destination.data(), destination.size()); | ||
} | ||
else if(result == std::codecvt_base::error) | ||
{ | ||
throw ByteString::ConversionError(true); | ||
} | ||
else if(result == std::codecvt_base::partial) | ||
{ | ||
ByteString::value_type *old_data = destination.data(); | ||
destination.resize(2 * destination.size()); | ||
to = destination.data() + (to - old_data); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#ifndef TPT_STRING | ||
#define TPT_STRING | ||
|
||
#include <string> | ||
|
||
class String; | ||
|
||
class ByteString : public std::basic_string<char> | ||
{ | ||
public: | ||
inline ByteString(): std::basic_string<char>() {} | ||
inline ByteString(size_type count, value_type ch): std::basic_string<char>(count, ch) {} | ||
inline ByteString(value_type const *ch, size_type count): std::basic_string<char>(ch, count) {} | ||
inline ByteString(value_type const *ch): std::basic_string<char>(ch) {} | ||
template<class It> inline ByteString(It first, It last): std::basic_string<char>(first, last) {} | ||
inline ByteString(ByteString const &other): std::basic_string<char>(other) {} | ||
inline ByteString(ByteString &&other): std::basic_string<char>(std::move(other)) {} | ||
|
||
ByteString &operator=(ByteString const &other) { std::basic_string<char>::operator=(other); return *this; } | ||
ByteString &operator=(ByteString &&other) { std::basic_string<char>::operator=(std::move(other)); return *this; } | ||
|
||
class ConversionError : public std::runtime_error | ||
{ | ||
static std::string formatError(value_type const *at, value_type const *upto); | ||
public: | ||
inline ConversionError(value_type const *at, value_type const *upto): std::runtime_error(formatError(at, upto)) {} | ||
inline ConversionError(bool to): std::runtime_error(to ? "Could not convert to UTF-8" : "Could not convert from UTF-8") {} | ||
}; | ||
|
||
String FromUtf8(bool ignoreError = true) const; | ||
inline String FromAscii() const; | ||
}; | ||
|
||
class String : public std::basic_string<char32_t> | ||
{ | ||
public: | ||
inline String(): std::basic_string<char32_t>() {} | ||
inline String(size_type count, value_type ch): std::basic_string<char32_t>(count, ch) {} | ||
inline String(value_type const *ch, size_type count): std::basic_string<char32_t>(ch, count) {} | ||
inline String(value_type const *ch): std::basic_string<char32_t>(ch) {} | ||
template<class It> inline String(It first, It last): std::basic_string<char32_t>(first, last) {} | ||
inline String(String const &other): std::basic_string<char32_t>(other) {} | ||
inline String(String &&other): std::basic_string<char32_t>(std::move(other)) {} | ||
|
||
String &operator=(String const &other) { std::basic_string<char32_t>::operator=(other); return *this; } | ||
String &operator=(String &&other) { std::basic_string<char32_t>::operator=(std::move(other)); return *this; } | ||
|
||
template<unsigned N> inline String(ByteString::value_type const (&ch)[N]): std::basic_string<char32_t>(ByteString(ch, N).FromAscii()) {} | ||
|
||
ByteString ToUtf8() const; | ||
}; | ||
|
||
inline String ByteString::FromAscii() const | ||
{ | ||
String destination = String(size(), String::value_type()); | ||
for(size_t i = 0; i < size(); i++) | ||
destination[i] = typename String::value_type(operator[](i)); | ||
return destination; | ||
} | ||
#endif |