Skip to content

Commit

Permalink
Add unicode/byte string classes which are supposed to eventually phase
Browse files Browse the repository at this point in the history
out std::string
  • Loading branch information
mniip committed Apr 29, 2018
1 parent c0c550e commit 4912674
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 0 deletions.
89 changes: 89 additions & 0 deletions src/common/String.cpp
@@ -0,0 +1,89 @@
#include <sstream>
#include <vector>
#include <locale>
#include <codecvt>

#include "String.h"

std::string ByteString::ConversionError::formatError(ByteString::value_type const *at, ByteString::value_type const *upto)
{
std::stringstream ss;
ss << "Could not convert sequence to UTF-8:";
for(int i = 0; i < 4 && at + i < upto; i++)
ss << " " << std::hex << (unsigned int)std::make_unsigned<ByteString::value_type>::type(at[i]);
return ss.str();
}


std::codecvt_utf8<char32_t> convert(1);

String ByteString::FromUtf8(bool ignoreError) const
{
std::vector<String::value_type> destination = std::vector<String::value_type>(size(), String::value_type());
std::codecvt_utf8<char32_t>::state_type state;

ByteString::value_type const *from = data(), *from_next;
String::value_type *to = destination.data(), *to_next;

while(true)
{
std::codecvt_utf8<char32_t>::result result = convert.in(state, from, data() + size(), from_next, to, destination.data() + destination.size(), to_next);
from = from_next;
to = to_next;
if(result == std::codecvt_base::ok || result == std::codecvt_base::noconv)
{
destination.resize(to - destination.data());
return String(destination.data(), destination.size());
}
else if(result == std::codecvt_base::partial && to == destination.data() + destination.size())
{
String::value_type *old_data = destination.data();
destination.resize(2 * destination.size());
to = destination.data() + (to - old_data);
}
else
{
if(!ignoreError)
throw ConversionError(from, data() + size());

if(to == destination.data() + destination.size())
{
String::value_type *old_data = destination.data();
destination.resize(2 * destination.size());
to = destination.data() + (to - old_data);
}
*(to++) = std::make_unsigned<ByteString::value_type>::type(*(from++));
}
}
}

ByteString String::ToUtf8() const
{
std::vector<ByteString::value_type> destination = std::vector<ByteString::value_type>(size(), ByteString::value_type());
std::codecvt_utf8<char32_t>::state_type state;

String::value_type const *from = data(), *from_next;
ByteString::value_type *to = destination.data(), *to_next;

while(true)
{
std::codecvt_utf8<char32_t>::result result = convert.out(state, from, data() + size(), from_next, to, destination.data() + destination.size(), to_next);
from = from_next;
to = to_next;
if(result == std::codecvt_base::ok || result == std::codecvt_base::noconv)
{
destination.resize(to - destination.data());
return ByteString(destination.data(), destination.size());
}
else if(result == std::codecvt_base::error)
{
throw ByteString::ConversionError(true);
}
else if(result == std::codecvt_base::partial)
{
ByteString::value_type *old_data = destination.data();
destination.resize(2 * destination.size());
to = destination.data() + (to - old_data);
}
}
}
60 changes: 60 additions & 0 deletions src/common/String.h
@@ -0,0 +1,60 @@
#ifndef TPT_STRING
#define TPT_STRING

#include <string>

class String;

class ByteString : public std::basic_string<char>
{
public:
inline ByteString(): std::basic_string<char>() {}
inline ByteString(size_type count, value_type ch): std::basic_string<char>(count, ch) {}
inline ByteString(value_type const *ch, size_type count): std::basic_string<char>(ch, count) {}
inline ByteString(value_type const *ch): std::basic_string<char>(ch) {}
template<class It> inline ByteString(It first, It last): std::basic_string<char>(first, last) {}
inline ByteString(ByteString const &other): std::basic_string<char>(other) {}
inline ByteString(ByteString &&other): std::basic_string<char>(std::move(other)) {}

ByteString &operator=(ByteString const &other) { std::basic_string<char>::operator=(other); return *this; }
ByteString &operator=(ByteString &&other) { std::basic_string<char>::operator=(std::move(other)); return *this; }

class ConversionError : public std::runtime_error
{
static std::string formatError(value_type const *at, value_type const *upto);
public:
inline ConversionError(value_type const *at, value_type const *upto): std::runtime_error(formatError(at, upto)) {}
inline ConversionError(bool to): std::runtime_error(to ? "Could not convert to UTF-8" : "Could not convert from UTF-8") {}
};

String FromUtf8(bool ignoreError = true) const;
inline String FromAscii() const;
};

class String : public std::basic_string<char32_t>
{
public:
inline String(): std::basic_string<char32_t>() {}
inline String(size_type count, value_type ch): std::basic_string<char32_t>(count, ch) {}
inline String(value_type const *ch, size_type count): std::basic_string<char32_t>(ch, count) {}
inline String(value_type const *ch): std::basic_string<char32_t>(ch) {}
template<class It> inline String(It first, It last): std::basic_string<char32_t>(first, last) {}
inline String(String const &other): std::basic_string<char32_t>(other) {}
inline String(String &&other): std::basic_string<char32_t>(std::move(other)) {}

String &operator=(String const &other) { std::basic_string<char32_t>::operator=(other); return *this; }
String &operator=(String &&other) { std::basic_string<char32_t>::operator=(std::move(other)); return *this; }

template<unsigned N> inline String(ByteString::value_type const (&ch)[N]): std::basic_string<char32_t>(ByteString(ch, N).FromAscii()) {}

ByteString ToUtf8() const;
};

inline String ByteString::FromAscii() const
{
String destination = String(size(), String::value_type());
for(size_t i = 0; i < size(); i++)
destination[i] = typename String::value_type(operator[](i));
return destination;
}
#endif

0 comments on commit 4912674

Please sign in to comment.