diff --git a/R/RcppExports.R b/R/RcppExports.R index 0647e00d..f5e09b7f 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -29,7 +29,7 @@ lm_variance_cr2 <- function(X, Xunweighted, XtX_inv, ei, weight_mean, clusters, .Call(`_estimatr_lm_variance_cr2`, X, Xunweighted, XtX_inv, ei, weight_mean, clusters, J, ci, which_covs) } -naomitwhy <- function(df, isna, recursive_subset) { - .Call(`_estimatr_naomitwhy`, df, isna, recursive_subset) +naomitwhy <- function(df, recursive_subset) { + .Call(`_estimatr_naomitwhy`, df, recursive_subset) } diff --git a/R/helper_na_omit_detailed.R b/R/helper_na_omit_detailed.R index 3bd457b9..b85dcb8a 100644 --- a/R/helper_na_omit_detailed.R +++ b/R/helper_na_omit_detailed.R @@ -10,7 +10,7 @@ #' @seealso \code{\link{na.omit}} na.omit_detailed.data.frame <- function(object){ - naomitwhy(object, is.na(object), function(x, w) x[w, , drop=FALSE]) + naomitwhy(object, function(x, w) x[w, , drop=FALSE]) } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 505ce564..5462f1a8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -107,15 +107,14 @@ BEGIN_RCPP END_RCPP } // naomitwhy -DataFrame naomitwhy(DataFrame df, LogicalMatrix isna, Function recursive_subset); -RcppExport SEXP _estimatr_naomitwhy(SEXP dfSEXP, SEXP isnaSEXP, SEXP recursive_subsetSEXP) { +DataFrame naomitwhy(DataFrame df, Function recursive_subset); +RcppExport SEXP _estimatr_naomitwhy(SEXP dfSEXP, SEXP recursive_subsetSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< DataFrame >::type df(dfSEXP); - Rcpp::traits::input_parameter< LogicalMatrix >::type isna(isnaSEXP); Rcpp::traits::input_parameter< Function >::type recursive_subset(recursive_subsetSEXP); - rcpp_result_gen = Rcpp::wrap(naomitwhy(df, isna, recursive_subset)); + rcpp_result_gen = Rcpp::wrap(naomitwhy(df, recursive_subset)); return rcpp_result_gen; END_RCPP } @@ -128,7 +127,7 @@ static const R_CallMethodDef CallEntries[] = { {"_estimatr_lm_solver", (DL_FUNC) &_estimatr_lm_solver, 3}, {"_estimatr_lm_variance", (DL_FUNC) &_estimatr_lm_variance, 8}, {"_estimatr_lm_variance_cr2", (DL_FUNC) &_estimatr_lm_variance_cr2, 9}, - {"_estimatr_naomitwhy", (DL_FUNC) &_estimatr_naomitwhy, 3}, + {"_estimatr_naomitwhy", (DL_FUNC) &_estimatr_naomitwhy, 2}, {NULL, NULL, 0} }; diff --git a/src/naomit.cpp b/src/naomit.cpp index 269f5264..32a1130b 100644 --- a/src/naomit.cpp +++ b/src/naomit.cpp @@ -15,75 +15,54 @@ SEXP generic_logical_subset( SEXP xin , LogicalVector w){ } // [[Rcpp::export]] -DataFrame naomitwhy(DataFrame df, LogicalMatrix isna, Function recursive_subset) { +DataFrame naomitwhy(DataFrame df, Function recursive_subset) { int m = df.nrow(); int n = df.ncol(); - int N = isna.ncol(); + Function isna("is.na"); CharacterVector df_names = df.names(); - IntegerVector na_to_col_map(n); - if(N == n){ - std::fill(na_to_col_map.begin(), na_to_col_map.end(), 1); - } - else { - Function dim("dim"); - - for(int i = 0; i < n; i++){ - SEXP dfi = df[i]; - if(Rf_isVectorAtomic(dfi) && LENGTH(dfi) == m){ - na_to_col_map[i] = 1; - } else { - SEXP nc = dim(dfi); - na_to_col_map[i] = Rf_isNull(nc) ? 1 : INTEGER(nc)[1]; - } - } - } - LogicalVector omit = LogicalVector(m); - int omit_count = 0, omit_f = m, omit_l = 0; + int omit_count = 0; List why_omit(n); why_omit.names() = df_names; LogicalVector why_omit_idx(n); - for (int j = 0, ii = 0; j < n; j++) { + for (int j = 0; j < n; j++) { std::vector why_omit_j; - for (int j_sub = na_to_col_map[j]; j_sub; j_sub--){ - for (int i = 0; i < m; i++, ii++){ + LogicalVector v_isna = isna(df[j]); - if(isna[ii]){ - if(!omit[i]){ - why_omit_j.push_back(i + 1); - } + for(int ii = m; ii < LENGTH(v_isna); ){ + for(int i = 0; i < m; i++, ii++) + v_isna[i] |= v_isna[ii]; + } - omit[i] = true; + for(int i = 0; i < m; i++){ + if(v_isna[i]){ + if(!omit[i]){ + why_omit_j.push_back(i + 1); } - } + omit[i] = true; + }; } if(why_omit_j.size() > 0){ - if(na_to_col_map[j] > 1){ - std::sort(why_omit_j.begin(), why_omit_j.end()); - } why_omit[j] = wrap(why_omit_j); why_omit_idx[j] = true; - omit_f = std::min(omit_f, why_omit_j.front()); - omit_l = std::max(omit_l, why_omit_j.back()); omit_count += why_omit_j.size(); } } - if(omit_count == 0){ return(df); } - // Rcout << "after\n" << omit_count << "\n"; + if(omit_count == 0){ return(df); } IntegerVector omit_idx = IntegerVector(omit_count); - for(int i = omit_f-1, ii=0; i < omit_l; i++){ + for(int i = 0, ii=0; ii < omit_count; i++){ if(omit[i]) omit_idx[ii++] = i+1; } @@ -92,7 +71,6 @@ DataFrame naomitwhy(DataFrame df, LogicalMatrix isna, Function recursive_subset) omit_idx.attr("why_omit") = why_omit[why_omit_idx]; omit_idx.attr("class") = CharacterVector::create("omit", "detailed"); - //omit_idx.attr("tokeep") = !omit; omit = !omit; @@ -100,12 +78,11 @@ DataFrame naomitwhy(DataFrame df, LogicalMatrix isna, Function recursive_subset) for(int i = 0; i < n; i++){ SEXP dfi = df(i); - if(Rf_isVectorAtomic(dfi) && LENGTH(dfi) == m){ + if(LENGTH(dfi) == m){ out[i] = generic_logical_subset(dfi, omit); } else { out[i] = recursive_subset(dfi, omit); } - } out.names() = df_names; @@ -120,12 +97,11 @@ DataFrame naomitwhy(DataFrame df, LogicalMatrix isna, Function recursive_subset) // require(microbenchmark) // df <- expand.grid(x=c(1:100, NA), y=c(1:5, NA), z=c(1:8, NA), q=c(NA,2:5)) // df2 <- na.omit(df) - // microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df)) + // microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms") // microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms") - // + // df <- rbind(df, df2, df) // df2 <- rbind(df2, df2, df2) - // // microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms") // microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms")