diff --git a/ChangeLog b/ChangeLog index 8b652e93b..3690ef436 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2017-04-22 Nathan Russell + + * inst/include/Rcpp/sugar/functions/strings/trimws.h: Added sugar + function trimws with unit tests + * inst/include/Rcpp/sugar/functions/strings/strings.h: Idem + * inst/unitTests/cpp/sugar.cpp: Idem + * inst/unitTests/runit.sugar.R: Idem + 2017-04-20 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index 4e1be6c0b..c63d50cf6 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -25,6 +25,11 @@ (James Balamuta in \ghpr{661} addressing \ghit{628}, \ghit{563}, \ghit{552}, \ghit{460}, \ghit{419}, and \ghit{251}). } + \item Changes in Rcpp Sugar: + \itemize{ + \item Added sugar function \code{trimws} (Nathan Russell in \ghpr{680} + addressing \ghit{679}). + } } } diff --git a/inst/include/Rcpp/sugar/functions/strings/strings.h b/inst/include/Rcpp/sugar/functions/strings/strings.h index 260b3af16..6031e53b5 100644 --- a/inst/include/Rcpp/sugar/functions/strings/strings.h +++ b/inst/include/Rcpp/sugar/functions/strings/strings.h @@ -23,5 +23,6 @@ #define RCPP_SUGAR_FUNCTIONS_STRINGS_H #include +#include #endif diff --git a/inst/include/Rcpp/sugar/functions/strings/trimws.h b/inst/include/Rcpp/sugar/functions/strings/trimws.h new file mode 100644 index 000000000..92ca7a409 --- /dev/null +++ b/inst/include/Rcpp/sugar/functions/strings/trimws.h @@ -0,0 +1,219 @@ +// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*- +// +// trimws.h: Rcpp R/C++ interface class library -- trimws +// +// Copyright (C) 2017 Nathan Russell +// +// This file is part of Rcpp. +// +// Rcpp is free software: you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// Rcpp is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Rcpp. If not, see . + +#ifndef Rcpp__sugar__trimws_h +#define Rcpp__sugar__trimws_h + +#include +#include + +namespace Rcpp { +namespace sugar { +namespace detail { + + +/* NB: std::isspace is not used because it also counts + '\f' and '\v' as whitespace, whereas base::trimws only + checks for ' ', '\t', '\r', and '\n' */ +inline bool isws(const char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +inline const char* trim_left(const char* str) { + if (!str) { + return ""; + } + + while (isws(*str)) { + ++str; + } + + return str; +} + +inline const char* trim_right(const char* str, R_len_t sz, std::string* buff) { + if (!str) { + return ""; + } + + buff->clear(); + const char* ptr = str + sz - 1; + + for (; ptr > str && isws(*ptr); --sz, --ptr); + + buff->append(str, sz - isws(*ptr)); + return buff->c_str(); +} + +inline const char* trim_both(const char* str, R_len_t sz, std::string* buff) { + if (!str) { + return ""; + } + + buff->clear(); + + while (isws(*str)) { + ++str; --sz; + } + + const char* ptr = str + sz - 1; + + for (; ptr > str && isws(*ptr); --sz, --ptr); + + buff->append(str, sz); + return buff->c_str(); +} + + +} // detail +} // sugar + + +inline Vector trimws(const Vector& x, const char* which = "both") { + R_xlen_t i = 0, sz = x.size(); + Vector res = no_init(sz); + std::string buffer; + + if (*which == 'b') { + for (; i < sz; i++) { + if (traits::is_na(x[i])) { + res[i] = x[i]; + } else { + res[i] = sugar::detail::trim_both( + x[i], + LENGTH(x[i]), + &buffer + ); + } + } + } else if (*which == 'l') { + for (; i < sz; i++) { + if (traits::is_na(x[i])) { + res[i] = x[i]; + } else { + res[i] = sugar::detail::trim_left(x[i]); + } + } + } else if (*which == 'r') { + for (; i < sz; i++) { + if (traits::is_na(x[i])) { + res[i] = x[i]; + } else { + res[i] = sugar::detail::trim_right( + x[i], + LENGTH(x[i]), + &buffer + ); + } + } + } else { + stop("Invalid `which` argument '%s'!", which); + return Vector::create("Unreachable"); + } + + return res; +} + +inline Matrix trimws(const Matrix& x, const char* which = "both") { + R_xlen_t i = 0, nr = x.nrow(), nc = x.ncol(), sz = x.size(); + Matrix res = no_init(nr, nc); + std::string buffer; + + if (*which == 'b') { + for (; i < sz; i++) { + if (traits::is_na(x[i])) { + res[i] = x[i]; + } else { + res[i] = sugar::detail::trim_both( + x[i], + LENGTH(x[i]), + &buffer + ); + } + } + } else if (*which == 'l') { + for (; i < sz; i++) { + if (traits::is_na(x[i])) { + res[i] = x[i]; + } else { + res[i] = sugar::detail::trim_left(x[i]); + } + } + } else if (*which == 'r') { + for (; i < sz; i++) { + if (traits::is_na(x[i])) { + res[i] = x[i]; + } else { + res[i] = sugar::detail::trim_right( + x[i], + LENGTH(x[i]), + &buffer + ); + } + } + } else { + stop("Invalid `which` argument '%s'!", which); + return Matrix(); + } + + return res; +} + +inline String trimws(const String& str, const char* which = "both") { + std::string buffer; + + if (*which == 'b') { + if (traits::is_na(str.get_sexp())) { + return String(str.get_sexp()); + } + return sugar::detail::trim_both( + str.get_cstring(), + LENGTH(str.get_sexp()), + &buffer + ); + } + + if (*which == 'l') { + if (traits::is_na(str.get_sexp())) { + return String(str.get_sexp()); + } + return sugar::detail::trim_left(str.get_cstring()); + } + + if (*which == 'r') { + if (traits::is_na(str.get_sexp())) { + return String(str.get_sexp()); + } + return sugar::detail::trim_right( + str.get_cstring(), + LENGTH(str.get_sexp()), + &buffer + ); + } + + stop("Invalid `which` argument '%s'!", which); + return String("Unreachable"); +} + + +} // Rcpp + +#endif // Rcpp__sugar__trimws_h diff --git a/inst/unitTests/cpp/sugar.cpp b/inst/unitTests/cpp/sugar.cpp index c002bff63..a5e7f26b8 100644 --- a/inst/unitTests/cpp/sugar.cpp +++ b/inst/unitTests/cpp/sugar.cpp @@ -1201,3 +1201,21 @@ LogicalMatrix UpperTri(NumericMatrix x, bool diag = false) { LogicalMatrix LowerTri(NumericMatrix x, bool diag = false) { return lower_tri(x, diag); } + + +// 22 April 2017: trimws + +// [[Rcpp::export]] +CharacterVector vtrimws(CharacterVector x, const char* which = "both") { + return trimws(x, which); +} + +// [[Rcpp::export]] +CharacterMatrix mtrimws(CharacterMatrix x, const char* which = "both") { + return trimws(x, which); +} + +// [[Rcpp::export]] +String strimws(String x, const char* which = "both") { + return trimws(x, which); +} diff --git a/inst/unitTests/runit.sugar.R b/inst/unitTests/runit.sugar.R index 632af26ee..c8cf3ed2f 100644 --- a/inst/unitTests/runit.sugar.R +++ b/inst/unitTests/runit.sugar.R @@ -2048,4 +2048,135 @@ if (.runThisTest) { } + + ## 22 April 2017 + ## trimws -- vector + test.sugar.vtrimws <- function() { + + x <- c( + " a b c", "a b c ", " a b c ", + "\t\ta b c", "a b c\t\t", "\t\ta b c\t\t", + "\r\ra b c", "a b c\r\r", "\r\ra b c\r\r", + "\n\na b c", "a b c\n\n", "\n\na b c\n\n", + NA, "", " ", " \t\r\n ", "\n \t \r " + ) + + checkEquals( + vtrimws(x), trimws(x), + "vtrimws / which = 'both'" + ) + + checkEquals( + vtrimws(x, 'l'), trimws(x, 'l'), + "vtrimws / which = 'left'" + ) + + checkEquals( + vtrimws(x, 'r'), trimws(x, 'r'), + "vtrimws / which = 'right'" + ) + + checkException( + vtrimws(x, "invalid"), + msg = "vtrimws -- bad `which` argument" + ) + + } + + + ## trimws -- matrix + test.sugar.mtrimws <- function() { + + x <- c( + " a b c", "a b c ", " a b c ", + "\t\ta b c", "a b c\t\t", "\t\ta b c\t\t", + "\r\ra b c", "a b c\r\r", "\r\ra b c\r\r", + "\n\na b c", "a b c\n\n", "\n\na b c\n\n", + NA, "", " ", " \t\r\n ", "\n \t \r " + ) + x <- matrix(x, nrow = length(x), ncol = 4) + + checkEquals( + mtrimws(x), trimws(x), + "mtrimws / which = 'both'" + ) + + checkEquals( + mtrimws(x, 'l'), trimws(x, 'l'), + "mtrimws / which = 'left'" + ) + + checkEquals( + mtrimws(x, 'r'), trimws(x, 'r'), + "mtrimws / which = 'right'" + ) + + checkException( + mtrimws(x, "invalid"), + msg = "mtrimws -- bad `which` argument" + ) + + } + + + ## trimws -- String + test.sugar.strimws <- function() { + + x <- c( + " a b c", "a b c ", " a b c ", + "\t\ta b c", "a b c\t\t", "\t\ta b c\t\t", + "\r\ra b c", "a b c\r\r", "\r\ra b c\r\r", + "\n\na b c", "a b c\n\n", "\n\na b c\n\n", + NA, "", " ", " \t\r\n ", "\n \t \r " + ) + + lhs <- vapply( + x, strimws, character(1), + USE.NAMES = FALSE + ) + rhs <- vapply( + x, trimws, character(1), + USE.NAMES = FALSE + ) + + checkEquals( + lhs, rhs, + "strimws / which = 'both'" + ) + + lhs <- vapply( + x, strimws, character(1), + which = 'l', USE.NAMES = FALSE + ) + rhs <- vapply( + x, trimws, character(1), + which = 'l', USE.NAMES = FALSE + ) + + checkEquals( + lhs, rhs, + "strimws / which = 'left'" + ) + + lhs <- vapply( + x, strimws, character(1), + which = 'r', USE.NAMES = FALSE + ) + rhs <- vapply( + x, trimws, character(1), + which = 'r', USE.NAMES = FALSE + ) + + checkEquals( + lhs, rhs, + "strimws / which = 'right'" + ) + + checkException( + strimws(x[1], "invalid"), + msg = "strimws -- bad `which` argument" + ) + + } + }