From 587b3421c79aba4b8b59df4a919ce691659b9691 Mon Sep 17 00:00:00 2001 From: Walter Somerville Date: Wed, 1 Jul 2020 18:02:49 +1200 Subject: [PATCH 1/7] Implement push_back and push_front for DataFrame. This explicitly defines push_back and push_back methods for DataFrame which copies row.names and class attributes from the original object. This fixes #1094 where these operations converted the object to a list. Tests for this behavior are also implemented. --- ChangeLog | 7 +++++++ inst/include/Rcpp/DataFrame.h | 36 +++++++++++++++++++++++++++++++++ inst/tinytest/cpp/DataFrame.cpp | 36 +++++++++++++++++++++++++++++++++ inst/tinytest/test_dataframe.R | 20 ++++++++++++++++++ 4 files changed, 99 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5edcd79ab..5bb9b251a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -17,6 +17,13 @@ * inst/NEWS.Rd: Idem * vignettes/rmd/Rcpp.bib: Idem * inst/bib/Rcpp.bib: Idem +2020-07-01 Walter Somerville + + * inst/include/Rcpp/DataFrame.h: Implement explict push_back and + push_front for DataFrame, which takes care to set the class and + row.names attributes. + * inst/tinyTest/test_dataframe.R Add in tests for push_back/push_front + * inst/tinyTest/cpp/DataFrame.cpp 2020-06-27 Dirk Eddelbuettel diff --git a/inst/include/Rcpp/DataFrame.h b/inst/include/Rcpp/DataFrame.h index 4e37081f3..081ba7f90 100644 --- a/inst/include/Rcpp/DataFrame.h +++ b/inst/include/Rcpp/DataFrame.h @@ -83,6 +83,42 @@ namespace Rcpp{ return LENGTH(rn); } + template + void push_back( const T& object){ + R_xlen_t NewSize = std::max(object.size(), static_cast(nrow())); + Parent::push_back(object); + IntegerVector RowNames = Range(1, NewSize); + Rf_setAttrib(Parent::get__(), R_RowNamesSymbol, RowNames); + Rf_setAttrib(Parent::get__(), R_ClassSymbol, Rf_mkString("data.frame")); + } + + template + void push_back( const T& object, const std::string& name ){ + R_xlen_t NewSize = std::max(object.size(), static_cast(nrow())); + Parent::push_back(object, name); + IntegerVector RowNames = Range(1, NewSize); + Rf_setAttrib(Parent::get__(), R_RowNamesSymbol, RowNames); + Rf_setAttrib(Parent::get__(), R_ClassSymbol, Rf_mkString("data.frame")); + } + + template + void push_front( const T& object){ + R_xlen_t NewSize = std::max(object.size(), static_cast(nrow())); + Parent::push_front(object); + IntegerVector RowNames = Range(1, NewSize); + Rf_setAttrib(Parent::get__(), R_RowNamesSymbol, RowNames); + Rf_setAttrib(Parent::get__(), R_ClassSymbol, Rf_mkString("data.frame")); + } + + template + void push_front( const T& object, const std::string& name){ + R_xlen_t NewSize = std::max(object.size(), static_cast(nrow())); + Parent::push_front(object, name); + IntegerVector RowNames = Range(1, NewSize); + Rf_setAttrib(Parent::get__(), R_RowNamesSymbol, RowNames); + Rf_setAttrib(Parent::get__(), R_ClassSymbol, Rf_mkString("data.frame")); + } + // Offer multiple variants to accomodate both old interface here and signatures in other classes inline int nrows() const { return DataFrame_Impl::nrow(); } inline int rows() const { return DataFrame_Impl::nrow(); } diff --git a/inst/tinytest/cpp/DataFrame.cpp b/inst/tinytest/cpp/DataFrame.cpp index 9cc6f27f3..47724fd5c 100644 --- a/inst/tinytest/cpp/DataFrame.cpp +++ b/inst/tinytest/cpp/DataFrame.cpp @@ -94,3 +94,39 @@ IntegerVector DataFrame_nrow( DataFrame df){ IntegerVector DataFrame_ncol( DataFrame df){ return IntegerVector::create(df.ncol(), df.cols()); } + +// [[Rcpp::export]] +DataFrame DataFrame_PushBackNamed(){ + NumericVector u(2); + NumericVector v(2); + DataFrame df = DataFrame::create(_["u"] = u); + df.push_back(v, "v"); + return df; +} + +// [[Rcpp::export]] +DataFrame DataFrame_PushBackUnnamed(){ + NumericVector u(2); + NumericVector v(2); + DataFrame df = DataFrame::create(_["u"] = u); + df.push_back(v); + return df; +} + +// [[Rcpp::export]] +DataFrame DataFrame_PushFrontNamed(){ + NumericVector u(2); + NumericVector v(2); + DataFrame df = DataFrame::create(_["u"] = u); + df.push_front(v, "v"); + return df; +} + +// [[Rcpp::export]] +DataFrame DataFrame_PushFrontUnnamed(){ + NumericVector u(2); + NumericVector v(2); + DataFrame df = DataFrame::create(_["u"] = u); + df.push_front(v); + return df; +} diff --git a/inst/tinytest/test_dataframe.R b/inst/tinytest/test_dataframe.R index df9e8e343..53737d091 100644 --- a/inst/tinytest/test_dataframe.R +++ b/inst/tinytest/test_dataframe.R @@ -70,3 +70,23 @@ expect_equal( DataFrame_nrow( df ), rep(nrow(df), 2) ) # test.DataFrame.ncol <- function(){ df <- data.frame( x = 1:10, y = 1:10 ) expect_equal( DataFrame_ncol( df ), rep(ncol(df), 2) ) + +# test.DataFrame.PushBackNamed <- function(){ +df <- data.frame( u = c(0, 0), v = c(0, 0) ) +expect_true( is.data.frame( DataFrame_PushBackNamed() ) ) +expect_equal( DataFrame_PushBackNamed(), df ) + +# test.DataFrame.PushBackUnamed <- function(){ +df <- data.frame( u = c(0, 0), c(0, 0), fix.empty.names = FALSE ) +expect_true( is.data.frame( DataFrame_PushBackUnnamed() ) ) +expect_equal( DataFrame_PushBackUnnamed(), df ) + +# test.DataFrame.PushFrontNamed <- function(){ +df <- data.frame( v = c(0, 0), u = c(0, 0) ) +expect_true( is.data.frame( DataFrame_PushFrontNamed() ) ) +expect_equal( DataFrame_PushFrontNamed(), df ) + +# test.DataFrame.PushFrontUnnamed <- function(){ +df <- data.frame( c(0, 0), u = c(0, 0), fix.empty.names = FALSE ) +expect_true( is.data.frame( DataFrame_PushFrontUnnamed() ) ) +expect_equal( DataFrame_PushFrontUnnamed(), df ) From d37dae3c3ec2d66e04c962896f7f59c183cda198 Mon Sep 17 00:00:00 2001 From: Walter Somerville Date: Wed, 1 Jul 2020 19:43:26 +1200 Subject: [PATCH 2/7] Correct format of ChangeLog entry --- ChangeLog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5bb9b251a..ab44fd3a4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -22,8 +22,8 @@ * inst/include/Rcpp/DataFrame.h: Implement explict push_back and push_front for DataFrame, which takes care to set the class and row.names attributes. - * inst/tinyTest/test_dataframe.R Add in tests for push_back/push_front - * inst/tinyTest/cpp/DataFrame.cpp + * inst/tinyTest/test_dataframe.R: Add in tests for push_back/push_front + * inst/tinyTest/cpp/DataFrame.cpp: Idem 2020-06-27 Dirk Eddelbuettel From fab8b387c88f8c2bee154f88085f2453b33aa5c8 Mon Sep 17 00:00:00 2001 From: Walter Somerville Date: Wed, 1 Jul 2020 19:43:43 +1200 Subject: [PATCH 3/7] Modify approach to copying attributes. Make use of the fact set__() already does a conversion, and just use that. This has the benefit that it works with push_back(DataFrame), as well as push_back(Vector), and also handles adding vectors of different lengths properly. --- inst/include/Rcpp/DataFrame.h | 20 ++++---------------- inst/tinytest/cpp/DataFrame.cpp | 27 +++++++++++++++++++++++++++ inst/tinytest/test_dataframe.R | 15 +++++++++++++-- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/inst/include/Rcpp/DataFrame.h b/inst/include/Rcpp/DataFrame.h index 081ba7f90..0c8629e15 100644 --- a/inst/include/Rcpp/DataFrame.h +++ b/inst/include/Rcpp/DataFrame.h @@ -85,38 +85,26 @@ namespace Rcpp{ template void push_back( const T& object){ - R_xlen_t NewSize = std::max(object.size(), static_cast(nrow())); Parent::push_back(object); - IntegerVector RowNames = Range(1, NewSize); - Rf_setAttrib(Parent::get__(), R_RowNamesSymbol, RowNames); - Rf_setAttrib(Parent::get__(), R_ClassSymbol, Rf_mkString("data.frame")); + set__(Parent::get__()); } template void push_back( const T& object, const std::string& name ){ - R_xlen_t NewSize = std::max(object.size(), static_cast(nrow())); Parent::push_back(object, name); - IntegerVector RowNames = Range(1, NewSize); - Rf_setAttrib(Parent::get__(), R_RowNamesSymbol, RowNames); - Rf_setAttrib(Parent::get__(), R_ClassSymbol, Rf_mkString("data.frame")); + set__(Parent::get__()); } template void push_front( const T& object){ - R_xlen_t NewSize = std::max(object.size(), static_cast(nrow())); Parent::push_front(object); - IntegerVector RowNames = Range(1, NewSize); - Rf_setAttrib(Parent::get__(), R_RowNamesSymbol, RowNames); - Rf_setAttrib(Parent::get__(), R_ClassSymbol, Rf_mkString("data.frame")); + set__(Parent::get__()); } template void push_front( const T& object, const std::string& name){ - R_xlen_t NewSize = std::max(object.size(), static_cast(nrow())); Parent::push_front(object, name); - IntegerVector RowNames = Range(1, NewSize); - Rf_setAttrib(Parent::get__(), R_RowNamesSymbol, RowNames); - Rf_setAttrib(Parent::get__(), R_ClassSymbol, Rf_mkString("data.frame")); + set__(Parent::get__()); } // Offer multiple variants to accomodate both old interface here and signatures in other classes diff --git a/inst/tinytest/cpp/DataFrame.cpp b/inst/tinytest/cpp/DataFrame.cpp index 47724fd5c..19bcd158d 100644 --- a/inst/tinytest/cpp/DataFrame.cpp +++ b/inst/tinytest/cpp/DataFrame.cpp @@ -130,3 +130,30 @@ DataFrame DataFrame_PushFrontUnnamed(){ df.push_front(v); return df; } + +// [[Rcpp::export]] +DataFrame DataFrame_PushFrontDataFrame(){ + NumericVector u(2); + NumericVector v(2); + NumericVector w(2); + NumericVector x(2); + + DataFrame df1 = DataFrame::create(_["u"] = u, _["v"] = v); + DataFrame df2 = DataFrame::create(_["w"] = w, _["x"] = x); + df1.push_front(df2); + return df1; +} + +// [[Rcpp::export]] +DataFrame DataFrame_PushBackDataFrame(){ + NumericVector u(2); + NumericVector v(2); + NumericVector w(2); + NumericVector x(2); + + DataFrame df1 = DataFrame::create(_["u"] = u, _["v"] = v); + DataFrame df2 = DataFrame::create(_["w"] = w, _["x"] = x); + df1.push_back(df2); + return df1; +} + diff --git a/inst/tinytest/test_dataframe.R b/inst/tinytest/test_dataframe.R index 53737d091..5230d1235 100644 --- a/inst/tinytest/test_dataframe.R +++ b/inst/tinytest/test_dataframe.R @@ -77,7 +77,7 @@ expect_true( is.data.frame( DataFrame_PushBackNamed() ) ) expect_equal( DataFrame_PushBackNamed(), df ) # test.DataFrame.PushBackUnamed <- function(){ -df <- data.frame( u = c(0, 0), c(0, 0), fix.empty.names = FALSE ) +df <- data.frame( u = c(0, 0), c(0, 0) ) expect_true( is.data.frame( DataFrame_PushBackUnnamed() ) ) expect_equal( DataFrame_PushBackUnnamed(), df ) @@ -87,6 +87,17 @@ expect_true( is.data.frame( DataFrame_PushFrontNamed() ) ) expect_equal( DataFrame_PushFrontNamed(), df ) # test.DataFrame.PushFrontUnnamed <- function(){ -df <- data.frame( c(0, 0), u = c(0, 0), fix.empty.names = FALSE ) +df <- data.frame( c(0, 0), u = c(0, 0) ) expect_true( is.data.frame( DataFrame_PushFrontUnnamed() ) ) expect_equal( DataFrame_PushFrontUnnamed(), df ) + + +# test.DataFrame.PushFrontDataFrame <- function(){ +df <- data.frame( w = c(0, 0), x = c(0, 0), u = c(0, 0), v = c(0, 0) ) +expect_true( is.data.frame( DataFrame_PushFrontDataFrame() ) ) +expect_equal( DataFrame_PushFrontDataFrame(), df ) + +# test.DataFrame.PushBackDataFrame <- function(){ +df <- data.frame( u = c(0, 0), v = c(0, 0), w = c(0, 0), x = c(0, 0) ) +expect_true( is.data.frame( DataFrame_PushBackDataFrame() ) ) +expect_equal( DataFrame_PushBackDataFrame(), df ) From d800e2ae7afa1f9b1eac99d1bfb3c1dadd267132 Mon Sep 17 00:00:00 2001 From: Walter Somerville Date: Fri, 10 Jul 2020 00:59:00 +1200 Subject: [PATCH 4/7] Add in warning when DataFrame created will have incorrect rows. With this commit, if after a push_back the row count of the DataFrame would be invalid, a warning is generated. The type is then not coerced to a DataFrame, but left as a list. In future, this could instead `stop()`. --- inst/include/Rcpp/DataFrame.h | 32 ++++++++++++++++++++++++++++---- inst/tinytest/cpp/DataFrame.cpp | 24 ++++++++++++++++++++++++ inst/tinytest/test_dataframe.R | 9 +++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/inst/include/Rcpp/DataFrame.h b/inst/include/Rcpp/DataFrame.h index 0c8629e15..45fbaadf2 100644 --- a/inst/include/Rcpp/DataFrame.h +++ b/inst/include/Rcpp/DataFrame.h @@ -86,25 +86,25 @@ namespace Rcpp{ template void push_back( const T& object){ Parent::push_back(object); - set__(Parent::get__()); + set_type_after_push(); } template void push_back( const T& object, const std::string& name ){ Parent::push_back(object, name); - set__(Parent::get__()); + set_type_after_push(); } template void push_front( const T& object){ Parent::push_front(object); - set__(Parent::get__()); + set_type_after_push(); } template void push_front( const T& object, const std::string& name){ Parent::push_front(object, name); - set__(Parent::get__()); + set_type_after_push(); } // Offer multiple variants to accomodate both old interface here and signatures in other classes @@ -130,6 +130,30 @@ namespace Rcpp{ } } + void set_type_after_push(){ + int max_rows = 0; + bool invalid_column_size = false; + SEXP data = Parent::get__(); + List::iterator it; + // Get the maximum number of rows + for (it = Parent::begin(); it != Parent::end(); ++it) { + if (Rf_xlength(*it) > max_rows) { + max_rows = Rf_xlength(*it); + } + } + for (it = Parent::begin(); it != Parent::end(); ++it) { + if (Rf_xlength(*it) > 1 && max_rows % Rf_xlength(*it) != 0) { + // We have a column that is not an integer fraction of the largest + invalid_column_size = true; + } + } + if (invalid_column_size) { + warning("Column sizes are not equal in DataFrame::push_back, object degrading to List\n"); + } else { + set__(Parent::get__()); + } + } + static DataFrame_Impl from_list( Parent obj ){ bool use_default_strings_as_factors = true ; bool strings_as_factors = true ; diff --git a/inst/tinytest/cpp/DataFrame.cpp b/inst/tinytest/cpp/DataFrame.cpp index 19bcd158d..855551e0e 100644 --- a/inst/tinytest/cpp/DataFrame.cpp +++ b/inst/tinytest/cpp/DataFrame.cpp @@ -157,3 +157,27 @@ DataFrame DataFrame_PushBackDataFrame(){ return df1; } +// [[Rcpp::export]] +DataFrame DataFrame_PushWrongSize(){ + NumericVector u(2); + NumericVector v(3); + + DataFrame df1 = DataFrame::create(_["u"] = u); + df1.push_back(v); + return df1; +} + +// [[Rcpp::export]] +DataFrame DataFrame_PushReplicateLength(){ + NumericVector u(2); + NumericVector v(4); + NumericVector x(1); + + u[0] = 1; + x[0] = 2; + + DataFrame df1 = DataFrame::create(_["u"] = u); + df1.push_back(v); + df1.push_back(x); + return df1; +} diff --git a/inst/tinytest/test_dataframe.R b/inst/tinytest/test_dataframe.R index 5230d1235..8c1930738 100644 --- a/inst/tinytest/test_dataframe.R +++ b/inst/tinytest/test_dataframe.R @@ -101,3 +101,12 @@ expect_equal( DataFrame_PushFrontDataFrame(), df ) df <- data.frame( u = c(0, 0), v = c(0, 0), w = c(0, 0), x = c(0, 0) ) expect_true( is.data.frame( DataFrame_PushBackDataFrame() ) ) expect_equal( DataFrame_PushBackDataFrame(), df ) + +# test.DataFrame.PushWrongSize <- function(){ +df <- data.frame( u = c(0, 0), v = c(0, 0), w = c(0, 0), x = c(0, 0) ) +expect_warning( DataFrame_PushWrongSize() ) + +# test.DataFrame.PushReplicateLength <- function(){ +df <- data.frame( u = c(1, 0), v = c(0, 0, 0, 0), x = c(2) ) +expect_true( is.data.frame( DataFrame_PushReplicateLength() ) ) +expect_equal( DataFrame_PushReplicateLength(), df ) From 2bfd0d1f472bfaf2dd46488a3d7f95ea4b08116d Mon Sep 17 00:00:00 2001 From: Walter Somerville Date: Fri, 10 Jul 2020 01:31:25 +1200 Subject: [PATCH 5/7] Correct behaviour when pushing zero-length object to DataFrame. This triggers the warning when adding a zero-length object to a DataFrame. It is not a valid DataFrame, so calls `warning()`, and doesn't call `as.data.frame()`. --- inst/include/Rcpp/DataFrame.h | 2 +- inst/tinytest/cpp/DataFrame.cpp | 11 +++++++++++ inst/tinytest/test_dataframe.R | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/inst/include/Rcpp/DataFrame.h b/inst/include/Rcpp/DataFrame.h index 45fbaadf2..0c8ce6335 100644 --- a/inst/include/Rcpp/DataFrame.h +++ b/inst/include/Rcpp/DataFrame.h @@ -142,7 +142,7 @@ namespace Rcpp{ } } for (it = Parent::begin(); it != Parent::end(); ++it) { - if (Rf_xlength(*it) > 1 && max_rows % Rf_xlength(*it) != 0) { + if (Rf_xlength(*it) == 0 || ( Rf_xlength(*it) > 1 && max_rows % Rf_xlength(*it) != 0 )) { // We have a column that is not an integer fraction of the largest invalid_column_size = true; } diff --git a/inst/tinytest/cpp/DataFrame.cpp b/inst/tinytest/cpp/DataFrame.cpp index 855551e0e..d2a2c8d8e 100644 --- a/inst/tinytest/cpp/DataFrame.cpp +++ b/inst/tinytest/cpp/DataFrame.cpp @@ -181,3 +181,14 @@ DataFrame DataFrame_PushReplicateLength(){ df1.push_back(x); return df1; } + +// [[Rcpp::export]] +DataFrame DataFrame_PushZeroLength(){ + NumericVector u(2); + NumericVector v(0); + + + DataFrame df1 = DataFrame::create(_["u"] = u); + df1.push_back(v); + return df1; +} diff --git a/inst/tinytest/test_dataframe.R b/inst/tinytest/test_dataframe.R index 8c1930738..a2cc420da 100644 --- a/inst/tinytest/test_dataframe.R +++ b/inst/tinytest/test_dataframe.R @@ -110,3 +110,6 @@ expect_warning( DataFrame_PushWrongSize() ) df <- data.frame( u = c(1, 0), v = c(0, 0, 0, 0), x = c(2) ) expect_true( is.data.frame( DataFrame_PushReplicateLength() ) ) expect_equal( DataFrame_PushReplicateLength(), df ) + +# test.DataFrame.PushZeroLength <- function(){ +expect_warning( DataFrame_PushZeroLength()) From 89b69dd38c4dc0ba559e107e06b0a5ca3ba26adb Mon Sep 17 00:00:00 2001 From: Walter Somerville Date: Fri, 10 Jul 2020 01:40:30 +1200 Subject: [PATCH 6/7] Correct failing test. --- inst/tinytest/cpp/DataFrame.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tinytest/cpp/DataFrame.cpp b/inst/tinytest/cpp/DataFrame.cpp index d2a2c8d8e..ef48e2572 100644 --- a/inst/tinytest/cpp/DataFrame.cpp +++ b/inst/tinytest/cpp/DataFrame.cpp @@ -177,8 +177,8 @@ DataFrame DataFrame_PushReplicateLength(){ x[0] = 2; DataFrame df1 = DataFrame::create(_["u"] = u); - df1.push_back(v); - df1.push_back(x); + df1.push_back(v, "v"); + df1.push_back(x, "x"); return df1; } From ff1416797709e379adfb9e8810a85204954d5d3c Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 9 Jul 2020 08:39:20 -0500 Subject: [PATCH 7/7] correct ChangeLog --- ChangeLog | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index ab44fd3a4..055133c20 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2020-07-09 Walter Somerville + + * inst/include/Rcpp/DataFrame.h: Warn when data.frame has varying length + * inst/tinyTest/cpp/DataFrame.cpp: Test added + * inst/tinyTest/test_dataframe.R: Idem + +2020-07-07 Walter Somerville + + * inst/include/Rcpp/DataFrame.h: Implement explict push_back and + push_front for DataFrame, which takes care to set the class and + row.names attributes. + * inst/tinyTest/test_dataframe.R: Add in tests for push_back/push_front + * inst/tinyTest/cpp/DataFrame.cpp: Idem + 2020-07-06 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version @@ -17,13 +31,6 @@ * inst/NEWS.Rd: Idem * vignettes/rmd/Rcpp.bib: Idem * inst/bib/Rcpp.bib: Idem -2020-07-01 Walter Somerville - - * inst/include/Rcpp/DataFrame.h: Implement explict push_back and - push_front for DataFrame, which takes care to set the class and - row.names attributes. - * inst/tinyTest/test_dataframe.R: Add in tests for push_back/push_front - * inst/tinyTest/cpp/DataFrame.cpp: Idem 2020-06-27 Dirk Eddelbuettel