diff --git a/ChangeLog b/ChangeLog index 537e17186..a39dc226d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2025-11-01 Iñaki Ucar + + * inst/include/Rcpp/hash/IndexHash.h: Normalize values for all comparisons + * inst/include/Rcpp/hash/SelfHash.h: Idem + * inst/tinytest/test_sugar.R: Add test for signed zeroes + 2025-10-21 Iñaki Ucar * inst/include/Rcpp/exceptions_impl.h: use __has_include to simplify checks diff --git a/inst/include/Rcpp/hash/IndexHash.h b/inst/include/Rcpp/hash/IndexHash.h index de994cbbc..6bc73ed3d 100644 --- a/inst/include/Rcpp/hash/IndexHash.h +++ b/inst/include/Rcpp/hash/IndexHash.h @@ -4,7 +4,8 @@ // // Copyright (C) 2010, 2011 Simon Urbanek // Copyright (C) 2012 - 2013 Dirk Eddelbuettel and Romain Francois -// Copyright (C) 2014 - 2021 Dirk Eddelbuettel, Romain Francois and Kevin Ushey +// Copyright (C) 2014 - 2024 Dirk Eddelbuettel, Romain Francois and Kevin Ushey +// Copyright (C) 2025 Dirk Eddelbuettel, Romain Francois, Kevin Ushey and Iñaki Ucar // // This file is part of Rcpp. // @@ -159,13 +160,15 @@ namespace Rcpp{ #endif } + STORAGE normalize(STORAGE val) const { return val; } + inline bool not_equal(const STORAGE& lhs, const STORAGE& rhs) { - return ! internal::NAEquals()(lhs, rhs); + return ! internal::NAEquals()(normalize(lhs), rhs); } bool add_value(int i){ RCPP_DEBUG_2( "%s::add_value(%d)", DEMANGLE(IndexHash), i ) - STORAGE val = src[i++] ; + STORAGE val = normalize(src[i++]); uint32_t addr = get_addr(val) ; while (data[addr] && not_equal( src[data[addr] - 1], val)) { addr++; @@ -199,6 +202,15 @@ namespace Rcpp{ uint32_t get_addr(STORAGE value) const ; } ; + template <> + inline double IndexHash::normalize(double val) const { + /* double is a bit tricky - we have to normalize 0.0, NA and NaN */ + if (val == 0.0) val = 0.0; + if (internal::Rcpp_IsNA(val)) val = NA_REAL; + else if (internal::Rcpp_IsNaN(val)) val = R_NaN; + return val; + } + template <> inline uint32_t IndexHash::get_addr(int value) const { return RCPP_HASH(value) ; @@ -211,10 +223,6 @@ namespace Rcpp{ uint32_t u[2]; }; union dint_u val_u; - /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */ - if (val == 0.0) val = 0.0; - if (internal::Rcpp_IsNA(val)) val = NA_REAL; - else if (internal::Rcpp_IsNaN(val)) val = R_NaN; val_u.d = val; addr = RCPP_HASH(val_u.u[0] + val_u.u[1]); return addr ; diff --git a/inst/include/Rcpp/hash/SelfHash.h b/inst/include/Rcpp/hash/SelfHash.h index 20607277d..1f566da56 100644 --- a/inst/include/Rcpp/hash/SelfHash.h +++ b/inst/include/Rcpp/hash/SelfHash.h @@ -3,8 +3,9 @@ // hash.h: Rcpp R/C++ interface class library -- hashing utility, inspired // from Simon's fastmatch package // -// Copyright (C) 2010, 2011 Simon Urbanek -// Copyright (C) 2012 Dirk Eddelbuettel and Romain Francois +// Copyright (C) 2010, 2011 Simon Urbanek +// Copyright (C) 2012 - 2024 Dirk Eddelbuettel and Romain Francois +// Copyright (C) 2025 Dirk Eddelbuettel, Romain Francois and Iñaki Ucar // // This file is part of Rcpp. // @@ -60,10 +61,16 @@ namespace sugar{ std::vector indices ; int size_ ; + STORAGE normalize(STORAGE val) const { return val; } + + inline bool not_equal(const STORAGE& lhs, const STORAGE& rhs) { + return ! internal::NAEquals()(normalize(lhs), rhs); + } + int add_value_get_index(int i){ - STORAGE val = src[i++] ; + STORAGE val = normalize(src[i++]); unsigned int addr = get_addr(val) ; - while (data[addr] && src[data[addr] - 1] != val) { + while (data[addr] && not_equal( src[data[addr] - 1], val)) { addr++; if (addr == static_cast(m)) addr = 0; } @@ -90,6 +97,15 @@ namespace sugar{ unsigned int get_addr(STORAGE value) const ; } ; + template <> + inline double SelfHash::normalize(double val) const { + /* double is a bit tricky - we have to normalize 0.0, NA and NaN */ + if (val == 0.0) val = 0.0; + if (internal::Rcpp_IsNA(val)) val = NA_REAL; + else if (internal::Rcpp_IsNaN(val)) val = R_NaN; + return val; + } + template <> inline unsigned int SelfHash::get_addr(int value) const { return RCPP_HASH(value) ; @@ -102,10 +118,6 @@ namespace sugar{ unsigned int u[2]; }; union dint_u val_u; - /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */ - if (val == 0.0) val = 0.0; - if (internal::Rcpp_IsNA(val)) val = NA_REAL; - else if (internal::Rcpp_IsNaN(val)) val = R_NaN; val_u.d = val; addr = RCPP_HASH(val_u.u[0] + val_u.u[1]); return addr ; diff --git a/inst/tinytest/test_sugar.R b/inst/tinytest/test_sugar.R index c92d09cef..4465cedf7 100644 --- a/inst/tinytest/test_sugar.R +++ b/inst/tinytest/test_sugar.R @@ -708,6 +708,8 @@ expect_equal(sort(unique(x)), sort(runit_unique_dbl(x)), info = "unique / numeri x <- c(x, NA, NA) expect_equal(sort(unique(x), na.last = TRUE), sort(runit_unique_dbl(x), na.last = TRUE), info = "unique / numeric / with NA") +x <- c(x, -0.0, +0.0) +expect_equal(sort(unique(x)), sort(runit_unique_dbl(x)), info = "unique / numeric / with signed 0s") # test.sort_unique <- function() { set.seed(123)