Skip to content

Commit

Permalink
Merge branch 'ls/gp' of github.com:DeclareDesign/estimatr into ls/gp
Browse files Browse the repository at this point in the history
  • Loading branch information
lukesonnet committed Mar 21, 2018
2 parents bc762ae + 0ea76fb commit e192430
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 83 deletions.
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ lm_variance_cr2 <- function(X, Xunweighted, XtX_inv, ei, weight_mean, clusters,
.Call(`_estimatr_lm_variance_cr2`, X, Xunweighted, XtX_inv, ei, weight_mean, clusters, J, ci, which_covs)
}

naomitwhy <- function(df, is_na_generic) {
.Call(`_estimatr_naomitwhy`, df, is_na_generic)
naomitwhy <- function(df, recursive_subset) {
.Call(`_estimatr_naomitwhy`, df, recursive_subset)
}

11 changes: 4 additions & 7 deletions R/helper_na_omit_detailed.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,9 @@
#' column name, if any were dropped.
#'
#' @seealso \code{\link{na.omit}}
na.omit_detailed.data.frame <- function(object) {
why_omit <- naomitwhy(object, function(x) is.na(x))
na.omit_detailed.data.frame <- function(object){

naomitwhy(object, function(x, w) x[w, , drop=FALSE])

if (length(why_omit)) {
object <- if (length(dim(object))) object[-why_omit, , drop = FALSE] else object[-why_omit]
attr(object, "na.action") <- why_omit
}
object
}

8 changes: 4 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,14 @@ BEGIN_RCPP
END_RCPP
}
// naomitwhy
IntegerVector naomitwhy(DataFrame df, Function is_na_generic);
RcppExport SEXP _estimatr_naomitwhy(SEXP dfSEXP, SEXP is_na_genericSEXP) {
DataFrame naomitwhy(DataFrame df, Function recursive_subset);
RcppExport SEXP _estimatr_naomitwhy(SEXP dfSEXP, SEXP recursive_subsetSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< DataFrame >::type df(dfSEXP);
Rcpp::traits::input_parameter< Function >::type is_na_generic(is_na_genericSEXP);
rcpp_result_gen = Rcpp::wrap(naomitwhy(df, is_na_generic));
Rcpp::traits::input_parameter< Function >::type recursive_subset(recursive_subsetSEXP);
rcpp_result_gen = Rcpp::wrap(naomitwhy(df, recursive_subset));
return rcpp_result_gen;
END_RCPP
}
Expand Down
139 changes: 69 additions & 70 deletions src/naomit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,114 +5,113 @@
using namespace Rcpp;


SEXP generic_logical_subset( SEXP xin , LogicalVector w);


template <int RTYPE>
Vector<RTYPE> generic_logical_subset_impl( Vector<RTYPE> xin, LogicalVector w){
return xin[w] ;
}

SEXP generic_logical_subset( SEXP xin , LogicalVector w){
RCPP_RETURN_VECTOR(generic_logical_subset_impl, xin, w) ;
RCPP_RETURN_VECTOR(generic_logical_subset_impl, xin, w) ;
}

// [[Rcpp::export]]
IntegerVector naomitwhy(DataFrame df, Function is_na_generic) {
DataFrame naomitwhy(DataFrame df, Function recursive_subset) {
int m = df.nrow();
int n = df.ncol();
CharacterVector df_names = df.names();

Function isna("is.na");

CharacterVector df_names = df.names();

LogicalVector omit = LogicalVector(m);

int omit_count = 0;

List why_omit(n);
why_omit.names() = df_names;
LogicalVector why_omit_idx(n);

bool anyomit = false;

for (int j =0; j < n; j++) {
std::string nm = as<std::string>(df_names(j));

LogicalVector isna = is_na_generic(df(j));
for (int j = 0; j < n; j++) {

SEXP d = isna.attr("dim");
std::vector<int> why_omit_j;

if(!Rf_isNull(d) && Rf_length(d) == 2 && INTEGER(d)[1] > 1){
int dcols = INTEGER(d)[1];
LogicalVector v_isna = isna(df[j]);


LogicalVector na2(m);

for(int k = 0, ii=0; k < dcols; k++)
for(int i = 0; i < m; i++, ii++){
if(isna[ii]) na2[i] = true;
}

isna = na2;
for(int ii = m; ii < LENGTH(v_isna); ){
for(int i = 0; i < m; i++, ii++)
v_isna[i] |= v_isna[ii];
}

int ii = 0;
IntegerVector why_omit_j(m);

for (int i = 0; i < m; i++){

if(isna[i]){
for(int i = 0; i < m; i++){
if(v_isna[i]){
if(!omit[i]){
why_omit_j[ii++] = i + 1;
why_omit_j.push_back(i + 1);
}

omit[i] = true;
}
};
}

if(ii > 0){
why_omit[j] = why_omit_j[seq(0,ii-1)];
if(why_omit_j.size() > 0){
why_omit[j] = wrap(why_omit_j);
why_omit_idx[j] = true;
anyomit = true;
omit_count += why_omit_j.size();
}
}
if(!anyomit){ return(NULL); }

IntegerVector omit_idx = seq_len(m);
omit_idx = generic_logical_subset(omit_idx, omit);
omit_idx.attr("names") = generic_logical_subset(df.attr("row.names"), omit);
if(omit_count == 0){ return(df); }

IntegerVector omit_idx = IntegerVector(omit_count);
for(int i = 0, ii=0; ii < omit_count; i++){
if(omit[i]) omit_idx[ii++] = i+1;
}

CharacterVector rownames = df.attr("row.names");
omit_idx.attr("names") = rownames[omit];

omit_idx.attr("why_omit") = why_omit[why_omit_idx];
omit_idx.attr("class") = CharacterVector::create("omit", "detailed");

omit = !omit;

List out(n);

for(int i = 0; i < n; i++){
SEXP dfi = df(i);
if(LENGTH(dfi) == m){
out[i] = generic_logical_subset(dfi, omit);
} else {
out[i] = recursive_subset(dfi, omit);
}
}

out.names() = df_names;
out.attr("row.names") = rownames[omit];
out.attr("na.action") = omit_idx;
out.attr("class") = df.attr("class");

return(omit_idx);
return(out);
}

// # NJF 10/18
// # Silly microbenchmark to make sure I didn't make it slower
// # df <- expand.grid(x=c(1:100, NA), y=c(1:5, NA), z=c(1:8, NA), q=c(NA,2:5))
//
// # microbenchmark(stock=na.omit(df), hack1=na.omit_detailed.data.frame(df))
// ## Unit: milliseconds
// ## expr min lq mean median uq max neval
// ## stock 6.114132 6.184318 7.744881 6.232744 6.961491 101.823530 100
// ## hack1 5.360638 5.480531 6.525075 5.694078 7.752104 9.323943 100

// > microbenchmark(
// + out1 = estimatr:::naomitwhy(df),
// + out2 = estimatr:::na.omit_detailed.data.frame(df)
// + )
// Unit: milliseconds
// expr min lq mean median uq max neval
// out1 2.830644 2.9078700 3.88764381 2.9900055 3.655617 67.368491 100
// out2 3.463163 3.7196865 5.70407818 4.5768560 4.748749 70.595884 100
// > df <- na.omit(df)
// > nrow(df)
// [1] 16000
// > microbenchmark(
// + out1 = estimatr:::naomitwhy(df),
// + out2 = estimatr:::na.omit_detailed.data.frame(df)
// + )
// Unit: microseconds
// expr min lq mean median uq max neval
// out1 152.816 159.3180 214.70463 171.3470 185.2210 1023.877 100
// out2 1200.924 1227.9935 2193.33799 1271.2825 2040.2235 64933.756 100
// >

// require(microbenchmark)
// df <- expand.grid(x=c(1:100, NA), y=c(1:5, NA), z=c(1:8, NA), q=c(NA,2:5))
// df2 <- na.omit(df)
// microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms")
// microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms")

// df <- rbind(df, df2, df)
// df2 <- rbind(df2, df2, df2)
// microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms")
// microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms")

// df <- cbind(df, df,df)
// df2 <- cbind(df2, df2, df2)
// microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms")
// microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms")

// sleep[c("sleep", "foo")] = list(sleep, matrix(1:40, 20))
// sleep[cbind(c(1,5,9), c(2,1,3))] <- NA
// sleep$sleep[cbind(1+c(1,5,9), c(2,1,3))] <- NA
// sleep$foo[12,1] <- NA
// attributes(estimatr:::na.omit_detailed.data.frame(sleep))

0 comments on commit e192430

Please sign in to comment.