Skip to content

Commit

Permalink
Merge pull request #680 from nathan-russell/feature/sugar-trimws
Browse files Browse the repository at this point in the history
Sugar function 'trimws' with unit tests (closes #679)
  • Loading branch information
eddelbuettel committed Apr 23, 2017
2 parents 7492cff + c56e54b commit e81493b
Show file tree
Hide file tree
Showing 6 changed files with 382 additions and 0 deletions.
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
2017-04-22 Nathan Russell <russell.nr2012@gmail.com>

* inst/include/Rcpp/sugar/functions/strings/trimws.h: Added sugar
function trimws with unit tests
* inst/include/Rcpp/sugar/functions/strings/strings.h: Idem
* inst/unitTests/cpp/sugar.cpp: Idem
* inst/unitTests/runit.sugar.R: Idem

2017-04-20 Dirk Eddelbuettel <edd@debian.org>

* DESCRIPTION (Version, Date): Roll minor version
Expand Down
5 changes: 5 additions & 0 deletions inst/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
(James Balamuta in \ghpr{661} addressing \ghit{628}, \ghit{563},
\ghit{552}, \ghit{460}, \ghit{419}, and \ghit{251}).
}
\item Changes in Rcpp Sugar:
\itemize{
\item Added sugar function \code{trimws} (Nathan Russell in \ghpr{680}
addressing \ghit{679}).
}
}
}

Expand Down
1 change: 1 addition & 0 deletions inst/include/Rcpp/sugar/functions/strings/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@
#define RCPP_SUGAR_FUNCTIONS_STRINGS_H

#include <Rcpp/sugar/functions/strings/collapse.h>
#include <Rcpp/sugar/functions/strings/trimws.h>

#endif
219 changes: 219 additions & 0 deletions inst/include/Rcpp/sugar/functions/strings/trimws.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
//
// trimws.h: Rcpp R/C++ interface class library -- trimws
//
// Copyright (C) 2017 Nathan Russell
//
// This file is part of Rcpp.
//
// Rcpp is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// Rcpp is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Rcpp. If not, see <http://www.gnu.org/licenses/>.

#ifndef Rcpp__sugar__trimws_h
#define Rcpp__sugar__trimws_h

#include <string>
#include <cstring>

namespace Rcpp {
namespace sugar {
namespace detail {


/* NB: std::isspace is not used because it also counts
'\f' and '\v' as whitespace, whereas base::trimws only
checks for ' ', '\t', '\r', and '\n' */
inline bool isws(const char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}

inline const char* trim_left(const char* str) {
if (!str) {
return "";
}

while (isws(*str)) {
++str;
}

return str;
}

inline const char* trim_right(const char* str, R_len_t sz, std::string* buff) {
if (!str) {
return "";
}

buff->clear();
const char* ptr = str + sz - 1;

for (; ptr > str && isws(*ptr); --sz, --ptr);

buff->append(str, sz - isws(*ptr));
return buff->c_str();
}

inline const char* trim_both(const char* str, R_len_t sz, std::string* buff) {
if (!str) {
return "";
}

buff->clear();

while (isws(*str)) {
++str; --sz;
}

const char* ptr = str + sz - 1;

for (; ptr > str && isws(*ptr); --sz, --ptr);

buff->append(str, sz);
return buff->c_str();
}


} // detail
} // sugar


inline Vector<STRSXP> trimws(const Vector<STRSXP>& x, const char* which = "both") {
R_xlen_t i = 0, sz = x.size();
Vector<STRSXP> res = no_init(sz);
std::string buffer;

if (*which == 'b') {
for (; i < sz; i++) {
if (traits::is_na<STRSXP>(x[i])) {
res[i] = x[i];
} else {
res[i] = sugar::detail::trim_both(
x[i],
LENGTH(x[i]),
&buffer
);
}
}
} else if (*which == 'l') {
for (; i < sz; i++) {
if (traits::is_na<STRSXP>(x[i])) {
res[i] = x[i];
} else {
res[i] = sugar::detail::trim_left(x[i]);
}
}
} else if (*which == 'r') {
for (; i < sz; i++) {
if (traits::is_na<STRSXP>(x[i])) {
res[i] = x[i];
} else {
res[i] = sugar::detail::trim_right(
x[i],
LENGTH(x[i]),
&buffer
);
}
}
} else {
stop("Invalid `which` argument '%s'!", which);
return Vector<STRSXP>::create("Unreachable");
}

return res;
}

inline Matrix<STRSXP> trimws(const Matrix<STRSXP>& x, const char* which = "both") {
R_xlen_t i = 0, nr = x.nrow(), nc = x.ncol(), sz = x.size();
Matrix<STRSXP> res = no_init(nr, nc);
std::string buffer;

if (*which == 'b') {
for (; i < sz; i++) {
if (traits::is_na<STRSXP>(x[i])) {
res[i] = x[i];
} else {
res[i] = sugar::detail::trim_both(
x[i],
LENGTH(x[i]),
&buffer
);
}
}
} else if (*which == 'l') {
for (; i < sz; i++) {
if (traits::is_na<STRSXP>(x[i])) {
res[i] = x[i];
} else {
res[i] = sugar::detail::trim_left(x[i]);
}
}
} else if (*which == 'r') {
for (; i < sz; i++) {
if (traits::is_na<STRSXP>(x[i])) {
res[i] = x[i];
} else {
res[i] = sugar::detail::trim_right(
x[i],
LENGTH(x[i]),
&buffer
);
}
}
} else {
stop("Invalid `which` argument '%s'!", which);
return Matrix<STRSXP>();
}

return res;
}

inline String trimws(const String& str, const char* which = "both") {
std::string buffer;

if (*which == 'b') {
if (traits::is_na<STRSXP>(str.get_sexp())) {
return String(str.get_sexp());
}
return sugar::detail::trim_both(
str.get_cstring(),
LENGTH(str.get_sexp()),
&buffer
);
}

if (*which == 'l') {
if (traits::is_na<STRSXP>(str.get_sexp())) {
return String(str.get_sexp());
}
return sugar::detail::trim_left(str.get_cstring());
}

if (*which == 'r') {
if (traits::is_na<STRSXP>(str.get_sexp())) {
return String(str.get_sexp());
}
return sugar::detail::trim_right(
str.get_cstring(),
LENGTH(str.get_sexp()),
&buffer
);
}

stop("Invalid `which` argument '%s'!", which);
return String("Unreachable");
}


} // Rcpp

#endif // Rcpp__sugar__trimws_h
18 changes: 18 additions & 0 deletions inst/unitTests/cpp/sugar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1201,3 +1201,21 @@ LogicalMatrix UpperTri(NumericMatrix x, bool diag = false) {
LogicalMatrix LowerTri(NumericMatrix x, bool diag = false) {
return lower_tri(x, diag);
}


// 22 April 2017: trimws

// [[Rcpp::export]]
CharacterVector vtrimws(CharacterVector x, const char* which = "both") {
return trimws(x, which);
}

// [[Rcpp::export]]
CharacterMatrix mtrimws(CharacterMatrix x, const char* which = "both") {
return trimws(x, which);
}

// [[Rcpp::export]]
String strimws(String x, const char* which = "both") {
return trimws(x, which);
}
Loading

0 comments on commit e81493b

Please sign in to comment.