Permalink
Browse files

Add unicode/byte string classes which are supposed to eventually phase

out std::string
  • Loading branch information...
mniip committed Apr 29, 2018
1 parent c0c550e commit 4912674bfe7c168baec0f8ad524ad6799a12aa95
Showing with 149 additions and 0 deletions.
  1. +89 −0 src/common/String.cpp
  2. +60 −0 src/common/String.h
@@ -0,0 +1,89 @@
#include <sstream>
#include <vector>
#include <locale>
#include <codecvt>
#include "String.h"
std::string ByteString::ConversionError::formatError(ByteString::value_type const *at, ByteString::value_type const *upto)
{
std::stringstream ss;
ss << "Could not convert sequence to UTF-8:";
for(int i = 0; i < 4 && at + i < upto; i++)
ss << " " << std::hex << (unsigned int)std::make_unsigned<ByteString::value_type>::type(at[i]);
return ss.str();
}
std::codecvt_utf8<char32_t> convert(1);
String ByteString::FromUtf8(bool ignoreError) const
{
std::vector<String::value_type> destination = std::vector<String::value_type>(size(), String::value_type());
std::codecvt_utf8<char32_t>::state_type state;
ByteString::value_type const *from = data(), *from_next;
String::value_type *to = destination.data(), *to_next;
while(true)
{
std::codecvt_utf8<char32_t>::result result = convert.in(state, from, data() + size(), from_next, to, destination.data() + destination.size(), to_next);
from = from_next;
to = to_next;
if(result == std::codecvt_base::ok || result == std::codecvt_base::noconv)
{
destination.resize(to - destination.data());
return String(destination.data(), destination.size());
}
else if(result == std::codecvt_base::partial && to == destination.data() + destination.size())
{
String::value_type *old_data = destination.data();
destination.resize(2 * destination.size());
to = destination.data() + (to - old_data);
}
else
{
if(!ignoreError)
throw ConversionError(from, data() + size());
if(to == destination.data() + destination.size())
{
String::value_type *old_data = destination.data();
destination.resize(2 * destination.size());
to = destination.data() + (to - old_data);
}
*(to++) = std::make_unsigned<ByteString::value_type>::type(*(from++));
}
}
}
ByteString String::ToUtf8() const
{
std::vector<ByteString::value_type> destination = std::vector<ByteString::value_type>(size(), ByteString::value_type());
std::codecvt_utf8<char32_t>::state_type state;
String::value_type const *from = data(), *from_next;
ByteString::value_type *to = destination.data(), *to_next;
while(true)
{
std::codecvt_utf8<char32_t>::result result = convert.out(state, from, data() + size(), from_next, to, destination.data() + destination.size(), to_next);
from = from_next;
to = to_next;
if(result == std::codecvt_base::ok || result == std::codecvt_base::noconv)
{
destination.resize(to - destination.data());
return ByteString(destination.data(), destination.size());
}
else if(result == std::codecvt_base::error)
{
throw ByteString::ConversionError(true);
}
else if(result == std::codecvt_base::partial)
{
ByteString::value_type *old_data = destination.data();
destination.resize(2 * destination.size());
to = destination.data() + (to - old_data);
}
}
}
@@ -0,0 +1,60 @@
#ifndef TPT_STRING
#define TPT_STRING
#include <string>
class String;
class ByteString : public std::basic_string<char>
{
public:
inline ByteString(): std::basic_string<char>() {}
inline ByteString(size_type count, value_type ch): std::basic_string<char>(count, ch) {}
inline ByteString(value_type const *ch, size_type count): std::basic_string<char>(ch, count) {}
inline ByteString(value_type const *ch): std::basic_string<char>(ch) {}
template<class It> inline ByteString(It first, It last): std::basic_string<char>(first, last) {}
inline ByteString(ByteString const &other): std::basic_string<char>(other) {}
inline ByteString(ByteString &&other): std::basic_string<char>(std::move(other)) {}
ByteString &operator=(ByteString const &other) { std::basic_string<char>::operator=(other); return *this; }
ByteString &operator=(ByteString &&other) { std::basic_string<char>::operator=(std::move(other)); return *this; }
class ConversionError : public std::runtime_error
{
static std::string formatError(value_type const *at, value_type const *upto);
public:
inline ConversionError(value_type const *at, value_type const *upto): std::runtime_error(formatError(at, upto)) {}
inline ConversionError(bool to): std::runtime_error(to ? "Could not convert to UTF-8" : "Could not convert from UTF-8") {}
};
String FromUtf8(bool ignoreError = true) const;
inline String FromAscii() const;
};
class String : public std::basic_string<char32_t>
{
public:
inline String(): std::basic_string<char32_t>() {}
inline String(size_type count, value_type ch): std::basic_string<char32_t>(count, ch) {}
inline String(value_type const *ch, size_type count): std::basic_string<char32_t>(ch, count) {}
inline String(value_type const *ch): std::basic_string<char32_t>(ch) {}
template<class It> inline String(It first, It last): std::basic_string<char32_t>(first, last) {}
inline String(String const &other): std::basic_string<char32_t>(other) {}
inline String(String &&other): std::basic_string<char32_t>(std::move(other)) {}
String &operator=(String const &other) { std::basic_string<char32_t>::operator=(other); return *this; }
String &operator=(String &&other) { std::basic_string<char32_t>::operator=(std::move(other)); return *this; }
template<unsigned N> inline String(ByteString::value_type const (&ch)[N]): std::basic_string<char32_t>(ByteString(ch, N).FromAscii()) {}
ByteString ToUtf8() const;
};
inline String ByteString::FromAscii() const
{
String destination = String(size(), String::value_type());
for(size_t i = 0; i < size(); i++)
destination[i] = typename String::value_type(operator[](i));
return destination;
}
#endif

0 comments on commit 4912674

Please sign in to comment.