Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
2016-08-02 Qiang Kou <qkou@umail.iu.edu>

* inst/include/Rcpp/String.h: CE_UTF8 as default encoding
* inst/unitTests/cpp/String.cpp: Update unit test
* inst/unitTests/runit.String.R: Idem

2016-08-01 Nathan Russell <russell.nr2012@gmail.com>

* inst/include/Rcpp/vector/Vector.h: Added decreasing option for Vector
Expand Down
85 changes: 20 additions & 65 deletions inst/include/Rcpp/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
#define RCPP_STRING_DEBUG_3(fmt, M1, M2, M3)
#endif


namespace Rcpp {

/**
Expand All @@ -53,7 +52,7 @@ namespace Rcpp {
typedef internal::const_string_proxy<STRSXP> const_StringProxy;

/** default constructor */
String(): data(Rf_mkChar("")), buffer(), valid(true), buffer_ready(true), enc(CE_NATIVE) {
String(): data(Rf_mkCharCE("", CE_UTF8)), buffer(), valid(true), buffer_ready(true), enc(CE_UTF8) {
Rcpp_PreserveObject(data);
RCPP_STRING_DEBUG("String()");
}
Expand All @@ -64,12 +63,6 @@ namespace Rcpp {
RCPP_STRING_DEBUG("String(const String&)");
}

String(const String& other, const std::string& enc) : data(other.get_sexp()), valid(true), buffer_ready(false) {
Rcpp_PreserveObject(data);
set_encoding(enc);
RCPP_STRING_DEBUG("String(const String&)");
}

/** construct a string from a single CHARSXP SEXP */
String(SEXP charsxp) : data(R_NilValue) {
if (TYPEOF(charsxp) == STRSXP) {
Expand All @@ -88,33 +81,16 @@ namespace Rcpp {
RCPP_STRING_DEBUG("String(SEXP)");
}

String(SEXP charsxp, const std::string& enc) : data(R_NilValue) {
if (TYPEOF(charsxp) == STRSXP) {
data = STRING_ELT(charsxp, 0);
} else if (TYPEOF(charsxp) == CHARSXP) {
data = charsxp;
}

if (::Rf_isString(data) && ::Rf_length(data) != 1)
throw ::Rcpp::not_compatible("expecting a single value");

valid = true;
buffer_ready = false;
Rcpp_PreserveObject(data);
set_encoding(enc);
RCPP_STRING_DEBUG("String(SEXP)");
}

/** from string proxy */
String(const StringProxy& proxy): data(proxy.get()), valid(true), buffer_ready(false), enc(Rf_getCharCE(proxy.get())) {
Rcpp_PreserveObject(data);
RCPP_STRING_DEBUG("String(const StringProxy&)");
}

String(const StringProxy& proxy, const std::string& enc): data(proxy.get()), valid(true), buffer_ready(false) {
String(const StringProxy& proxy, cetype_t enc): data(proxy.get()), valid(true), buffer_ready(false) {
Rcpp_PreserveObject(data);
set_encoding(enc);
RCPP_STRING_DEBUG("String(const StringProxy&)");
RCPP_STRING_DEBUG("String(const StringProxy&, cetype_t)");
}

/** from string proxy */
Expand All @@ -123,40 +99,40 @@ namespace Rcpp {
RCPP_STRING_DEBUG("String(const const_StringProxy&)");
}

String(const const_StringProxy& proxy, const std::string& enc): data(proxy.get()), valid(true), buffer_ready(false) {
String(const const_StringProxy& proxy, cetype_t enc): data(proxy.get()), valid(true), buffer_ready(false) {
Rcpp_PreserveObject(data);
set_encoding(enc);
RCPP_STRING_DEBUG("String(const const_StringProxy&)");
RCPP_STRING_DEBUG("String(const const_StringProxy&, cetype_t)");
}

/** from a std::string */
String(const std::string& s) : buffer(s), valid(false), buffer_ready(true), enc(CE_NATIVE) {
String(const std::string& s, cetype_t enc = CE_UTF8) : buffer(s), valid(false), buffer_ready(true), enc(enc) {
data = R_NilValue;
RCPP_STRING_DEBUG("String(const std::string&)");
RCPP_STRING_DEBUG("String(const std::string&, cetype_t)");
}

String(const std::wstring& s) : data(internal::make_charsexp(s)), valid(true), buffer_ready(false), enc(CE_NATIVE) {
String(const std::wstring& s, cetype_t enc = CE_UTF8) : data(internal::make_charsexp(s)), valid(true), buffer_ready(false), enc(enc) {
Rcpp_PreserveObject(data);
RCPP_STRING_DEBUG("String(const std::wstring&)");
RCPP_STRING_DEBUG("String(const std::wstring&, cetype_t)");
}

/** from a const char* */
String(const char* s) : buffer(s), valid(false), buffer_ready(true), enc(CE_NATIVE) {
String(const char* s, cetype_t enc = CE_UTF8) : buffer(s), valid(false), buffer_ready(true), enc(enc) {
data = R_NilValue;
RCPP_STRING_DEBUG("String(const char*)");
RCPP_STRING_DEBUG("String(const char*, cetype_t)");
}

String(const wchar_t* s) : data(internal::make_charsexp(s)), valid(true), buffer_ready(false), enc(CE_NATIVE) {
String(const wchar_t* s, cetype_t enc = CE_UTF8) : data(internal::make_charsexp(s)), valid(true), buffer_ready(false), enc(enc) {
Rcpp_PreserveObject(data);
RCPP_STRING_DEBUG("String(const wchar_t* s)");
RCPP_STRING_DEBUG("String(const wchar_t* s, cetype_t)");
}

/** constructors from R primitives */
String(int x) : data(internal::r_coerce<INTSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
String(double x) : data(internal::r_coerce<REALSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
String(bool x) : data(internal::r_coerce<LGLSXP,STRSXP>(x)), valid(true) , buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
String(Rcomplex x) : data(internal::r_coerce<CPLXSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
String(Rbyte x) : data(internal::r_coerce<RAWSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
String(int x) : data(internal::r_coerce<INTSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}
String(double x) : data(internal::r_coerce<REALSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}
String(bool x) : data(internal::r_coerce<LGLSXP,STRSXP>(x)), valid(true) , buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}
String(Rcomplex x) : data(internal::r_coerce<CPLXSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}
String(Rbyte x) : data(internal::r_coerce<RAWSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}

~String() {
Rcpp_ReleaseObject(data);
Expand Down Expand Up @@ -406,17 +382,8 @@ namespace Rcpp {
return buffer_ready ? buffer.c_str() : CHAR(data);
}

inline const std::string get_encoding() const {
switch (enc) {
case CE_BYTES:
return "bytes";
case CE_LATIN1:
return "latin1";
case CE_UTF8:
return "UTF-8";
default:
return "unknown";
}
inline cetype_t get_encoding() const {
return enc;
}

inline void set_encoding(cetype_t encoding) {
Expand All @@ -431,18 +398,6 @@ namespace Rcpp {
}
}

inline void set_encoding(const std::string & encoding) {
if (encoding == "bytes") {
set_encoding(CE_BYTES);
} else if (encoding == "latin1") {
set_encoding(CE_LATIN1);
} else if (encoding == "UTF-8") {
set_encoding(CE_UTF8);
} else {
set_encoding(CE_ANY);
}
}

bool operator<(const Rcpp::String& other) const {
return strcmp(get_cstring(), other.get_cstring()) < 0;
}
Expand Down
8 changes: 4 additions & 4 deletions inst/unitTests/cpp/String.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,27 +90,27 @@ String test_push_front(String x) {
}

// [[Rcpp::export]]
String test_String_encoding(String x) {
int test_String_encoding(String x) {
return x.get_encoding();
}

// [[Rcpp::export]]
String test_String_set_encoding(String x) {
x.set_encoding("UTF-8");
x.set_encoding(CE_UTF8);
return x;
}

// [[Rcpp::export]]
String test_String_ctor_encoding(String x) {
String y(x);
y.set_encoding("UTF-8");
y.set_encoding(CE_UTF8);
return y;
}


// [[Rcpp::export]]
String test_String_ctor_encoding2() {
String y("å");
y.set_encoding("UTF-8");
y.set_encoding(CE_UTF8);
return y;
}
4 changes: 2 additions & 2 deletions inst/unitTests/runit.String.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ if (.runThisTest) {
a <- b <- "å"
Encoding(a) <- "unknown"
Encoding(b) <- "UTF-8"
checkEquals(test_String_encoding(a), "unknown")
checkEquals(test_String_encoding(b), "UTF-8")
checkEquals(test_String_encoding(a), 0)
checkEquals(test_String_encoding(b), 1)
checkEquals(Encoding(test_String_set_encoding(a)), "UTF-8")
checkEquals(Encoding(test_String_ctor_encoding(a)), "UTF-8")
checkEquals(Encoding(test_String_ctor_encoding2()), "UTF-8")
Expand Down