diff --git a/inst/include/Rcpp/sugar/functions/functions.h b/inst/include/Rcpp/sugar/functions/functions.h index d93ad148b..48e76e6e8 100644 --- a/inst/include/Rcpp/sugar/functions/functions.h +++ b/inst/include/Rcpp/sugar/functions/functions.h @@ -80,5 +80,7 @@ #include #include +#include + #endif diff --git a/inst/include/Rcpp/sugar/functions/median.h b/inst/include/Rcpp/sugar/functions/median.h new file mode 100644 index 000000000..8694c6795 --- /dev/null +++ b/inst/include/Rcpp/sugar/functions/median.h @@ -0,0 +1,290 @@ +// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*- +// +// median.h: Rcpp R/C++ interface class library -- median +// +// Copyright (C) 2016 Dirk Eddelbuettel, Romain Francois, and Nathan Russell +// +// This file is part of Rcpp. +// +// Rcpp is free software: you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// Rcpp is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Rcpp. If not, see . + +#ifndef Rcpp__sugar__median_h +#define Rcpp__sugar__median_h + +namespace Rcpp { +namespace sugar { +namespace median_detail { + +// need to return double for integer vectors +// (in case of even-length input vector) +// and Rcpp::String for STRSXP +// also need to return NA_REAL for +// integer vector yielding NA result +template +struct result { + typedef typename Rcpp::traits::storage_type::type type; + enum { rtype = RTYPE }; +}; + +template <> +struct result { + typedef double type; + enum { rtype = REALSXP }; +}; + +template <> +struct result { + typedef Rcpp::String type; + enum { rtype = STRSXP }; +}; + +// std::nth_element and std::max_element don't +// know how to compare Rcomplex values +template +inline bool less(T lhs, T rhs) { + return lhs < rhs; +} + +template<> +inline bool less(Rcomplex lhs, Rcomplex rhs) { + if (lhs.r < rhs.r) return true; + if (lhs.i < rhs.i) return true; + return false; +} + +// compiler does not know how to handle +// Rcomplex numerator / double denominator +// and need explicit cast for INTSXP case +inline double half(double lhs) { + return lhs / 2.0; +} + +inline double half(int lhs) { + return static_cast(lhs) / 2.0; +} + +inline Rcomplex half(Rcomplex lhs) { + lhs.r /= 2.0; + lhs.i /= 2.0; + return lhs; +} + +} // median_detail + +// base case +template +class Median { +public: + typedef typename median_detail::result::type result_type; + typedef typename Rcpp::traits::storage_type::type stored_type; + enum { RESULT_RTYPE = median_detail::result::rtype }; + typedef T VECTOR; + +private: + VECTOR x; + +public: + Median(const VECTOR& xx) + : x(Rcpp::clone(xx)) {} + + operator result_type() { + if (x.size() < 1) { + return Rcpp::traits::get_na(); + } + + if (Rcpp::any(Rcpp::is_na(x))) { + return Rcpp::traits::get_na(); + } + + R_xlen_t n = x.size() / 2; + std::nth_element( + x.begin(), x.begin() + n, x.end(), + median_detail::less); + + if (x.size() % 2) return x[n]; + return median_detail::half( + x[n] + *std::max_element( + x.begin(), x.begin() + n, + median_detail::less)); + } +}; + +// na.rm = TRUE +template +class Median { +public: + typedef typename median_detail::result::type result_type; + typedef typename Rcpp::traits::storage_type::type stored_type; + enum { RESULT_RTYPE = median_detail::result::rtype }; + typedef T VECTOR; + +private: + VECTOR x; + +public: + Median(const VECTOR& xx) + : x(Rcpp::na_omit(Rcpp::clone(xx))) {} + + operator result_type() { + if (!x.size()) { + return Rcpp::traits::get_na(); + } + + R_xlen_t n = x.size() / 2; + std::nth_element( + x.begin(), x.begin() + n, x.end(), + median_detail::less); + + if (x.size() % 2) return x[n]; + return median_detail::half( + x[n] + *std::max_element( + x.begin(), x.begin() + n, + median_detail::less)); + } +}; + +// NA = false +template +class Median { +public: + typedef typename median_detail::result::type result_type; + typedef typename Rcpp::traits::storage_type::type stored_type; + enum { RESULT_RTYPE = median_detail::result::rtype }; + typedef T VECTOR; + +private: + VECTOR x; + +public: + Median(const VECTOR& xx) + : x(Rcpp::clone(xx)) {} + + operator result_type() { + if (x.size() < 1) { + return Rcpp::traits::get_na(); + } + + R_xlen_t n = x.size() / 2; + std::nth_element( + x.begin(), x.begin() + n, x.end(), + median_detail::less); + + if (x.size() % 2) return x[n]; + return median_detail::half( + x[n] + *std::max_element( + x.begin(), x.begin() + n, + median_detail::less)); + } +}; + +// specialize for character vector +// due to string_proxy's incompatibility +// with certain std:: algorithms; +// need to return NA for even-length vectors +template +class Median { +public: + typedef typename median_detail::result::type result_type; + typedef typename Rcpp::traits::storage_type::type stored_type; + typedef T VECTOR; + +private: + VECTOR x; + +public: + Median(const VECTOR& xx) + : x(Rcpp::clone(xx)) {} + + operator result_type() { + if (!(x.size() % 2)) { + return Rcpp::traits::get_na(); + } + + if (Rcpp::any(Rcpp::is_na(x))) { + return Rcpp::traits::get_na(); + } + + R_xlen_t n = x.size() / 2; + x.sort(); + + return x[n]; + } +}; + +// na.rm = TRUE +template +class Median { +public: + typedef typename median_detail::result::type result_type; + typedef typename Rcpp::traits::storage_type::type stored_type; + typedef T VECTOR; + +private: + VECTOR x; + +public: + Median(const VECTOR& xx) + : x(Rcpp::na_omit(Rcpp::clone(xx))) {} + + operator result_type() { + if (!(x.size() % 2)) { + return Rcpp::traits::get_na(); + } + + R_xlen_t n = x.size() / 2; + x.sort(); + + return x[n]; + } +}; + +// NA = false +template +class Median { +public: + typedef typename median_detail::result::type result_type; + typedef typename Rcpp::traits::storage_type::type stored_type; + typedef T VECTOR; + +private: + VECTOR x; + +public: + Median(const VECTOR& xx) + : x(Rcpp::clone(xx)) {} + + operator result_type() { + if (!(x.size() % 2)) { + return Rcpp::traits::get_na(); + } + + R_xlen_t n = x.size() / 2; + x.sort(); + + return x[n]; + } +}; + +} // sugar + +template +inline typename sugar::median_detail::result::type +median(const Rcpp::VectorBase& x, bool na_rm = false) { + if (!na_rm) return sugar::Median(x); + return sugar::Median(x); +} + +} // Rcpp + +#endif // Rcpp__sugar__median_h diff --git a/inst/unitTests/cpp/sugar.cpp b/inst/unitTests/cpp/sugar.cpp index d47126a4f..dd0362aca 100644 --- a/inst/unitTests/cpp/sugar.cpp +++ b/inst/unitTests/cpp/sugar.cpp @@ -716,3 +716,25 @@ IntegerVector runit_cummax_iv(IntegerVector xx){ } +// 18 January 2016: median + +// [[Rcpp::export]] +double median_dbl(Rcpp::NumericVector x, bool na_rm = false) { + return Rcpp::median(x, na_rm); +} + +// [[Rcpp::export]] +double median_int(Rcpp::IntegerVector x, bool na_rm = false) { + return Rcpp::median(x, na_rm); +} + +// [[Rcpp::export]] +Rcomplex median_cx(Rcpp::ComplexVector x, bool na_rm = false) { + return Rcpp::median(x, na_rm); +} + +// [[Rcpp::export]] +Rcpp::String median_ch(Rcpp::CharacterVector x, bool na_rm = false) { + return Rcpp::median(x, na_rm); +} + diff --git a/inst/unitTests/runit.sugar.R b/inst/unitTests/runit.sugar.R index 672c35dd8..ba3500e70 100644 --- a/inst/unitTests/runit.sugar.R +++ b/inst/unitTests/runit.sugar.R @@ -775,6 +775,130 @@ if (.runThisTest) { x[4] <- NA checkEquals(fx(x), cummax(x)) } + + + ## 18 January 2016: median + ## median of integer vector + test.sugar.median_int <- function() { + fx <- median_int + + x <- as.integer(rpois(5, 20)) + checkEquals(fx(x), median(x), + "median_int / odd length / no NA / na.rm = FALSE") + + x[4] <- NA + checkEquals(fx(x), median(x), + "median_int / odd length / with NA / na.rm = FALSE") + + checkEquals(fx(x, TRUE), median(x, TRUE), + "median_int / odd length / with NA / na.rm = TRUE") + + ## + x <- as.integer(rpois(6, 20)) + checkEquals(fx(x), median(x), + "median_int / even length / no NA / na.rm = FALSE") + + x[4] <- NA + checkEquals(fx(x), median(x), + "median_int / even length / with NA / na.rm = FALSE") + + checkEquals(fx(x, TRUE), median(x, TRUE), + "median_int / even length / with NA / na.rm = TRUE") + } + + ## median of numeric vector + test.sugar.median_dbl <- function() { + fx <- median_dbl + + x <- rnorm(5) + checkEquals(fx(x), median(x), + "median_dbl / odd length / no NA / na.rm = FALSE") + + x[4] <- NA + checkEquals(fx(x), median(x), + "median_dbl / odd length / with NA / na.rm = FALSE") + + checkEquals(fx(x, TRUE), median(x, TRUE), + "median_dbl / odd length / with NA / na.rm = TRUE") + + ## + x <- rnorm(6) + checkEquals(fx(x), median(x), + "median_dbl / even length / no NA / na.rm = FALSE") + + x[4] <- NA + checkEquals(fx(x), median(x), + "median_dbl / even length / with NA / na.rm = FALSE") + + checkEquals(fx(x, TRUE), median(x, TRUE), + "median_dbl / even length / with NA / na.rm = TRUE") + } + + ## median of complex vector + test.sugar.median_cx <- function() { + fx <- median_cx + + x <- rnorm(5) + 2i + checkEquals(fx(x), median(x), + "median_cx / odd length / no NA / na.rm = FALSE") + + x[4] <- NA + checkEquals(fx(x), median(x), + "median_cx / odd length / with NA / na.rm = FALSE") + + checkEquals(fx(x, TRUE), median(x, TRUE), + "median_cx / odd length / with NA / na.rm = TRUE") + + ## + x <- rnorm(6) + 2i + checkEquals(fx(x), median(x), + "median_cx / even length / no NA / na.rm = FALSE") + + x[4] <- NA + checkEquals(fx(x), median(x), + "median_cx / even length / with NA / na.rm = FALSE") + + checkEquals(fx(x, TRUE), median(x, TRUE), + "median_cx / even length / with NA / na.rm = TRUE") + } + + ## median of character vector + test.sugar.median_ch <- function() { + fx <- median_ch + + x <- sample(letters, 5) + checkEquals(fx(x), median(x), + "median_ch / odd length / no NA / na.rm = FALSE") + + x[4] <- NA + checkEquals(fx(x), median(x), + "median_ch / odd length / with NA / na.rm = FALSE") + + ## median(x, TRUE) returns NA_real_ for character vector input + ## which results in a warning; i.e. if the vector it passes to + ## `mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L])` + ## has ((length(x) %% 2) == 0) + + checkEquals(fx(x, TRUE), + as.character(suppressWarnings(median(x, TRUE))), + "median_ch / odd length / with NA / na.rm = TRUE") + + ## + x <- sample(letters, 6) + checkEquals(fx(x), + as.character(suppressWarnings(median(x))), + "median_ch / even length / no NA / na.rm = FALSE") + + x[4] <- NA + checkEquals(fx(x), + as.character(suppressWarnings(median(x))), + "median_ch / even length / with NA / na.rm = FALSE") + + checkEquals(fx(x, TRUE), + as.character(suppressWarnings(median(x, TRUE))), + "median_ch / even length / with NA / na.rm = TRUE") + } + }