Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
nfultz committed Mar 21, 2018
1 parent 5640e85 commit e6463be
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 52 deletions.
4 changes: 2 additions & 2 deletions R/RcppExports.R
Expand Up @@ -29,7 +29,7 @@ lm_variance_cr2 <- function(X, Xunweighted, XtX_inv, ei, weight_mean, clusters,
.Call(`_estimatr_lm_variance_cr2`, X, Xunweighted, XtX_inv, ei, weight_mean, clusters, J, ci, which_covs)
}

naomitwhy <- function(df, isna, recursive_subset) {
.Call(`_estimatr_naomitwhy`, df, isna, recursive_subset)
naomitwhy <- function(df, recursive_subset) {
.Call(`_estimatr_naomitwhy`, df, recursive_subset)
}

2 changes: 1 addition & 1 deletion R/helper_na_omit_detailed.R
Expand Up @@ -10,7 +10,7 @@
#' @seealso \code{\link{na.omit}}
na.omit_detailed.data.frame <- function(object){

naomitwhy(object, is.na(object), function(x, w) x[w, , drop=FALSE])
naomitwhy(object, function(x, w) x[w, , drop=FALSE])

}

9 changes: 4 additions & 5 deletions src/RcppExports.cpp
Expand Up @@ -107,15 +107,14 @@ BEGIN_RCPP
END_RCPP
}
// naomitwhy
DataFrame naomitwhy(DataFrame df, LogicalMatrix isna, Function recursive_subset);
RcppExport SEXP _estimatr_naomitwhy(SEXP dfSEXP, SEXP isnaSEXP, SEXP recursive_subsetSEXP) {
DataFrame naomitwhy(DataFrame df, Function recursive_subset);
RcppExport SEXP _estimatr_naomitwhy(SEXP dfSEXP, SEXP recursive_subsetSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< DataFrame >::type df(dfSEXP);
Rcpp::traits::input_parameter< LogicalMatrix >::type isna(isnaSEXP);
Rcpp::traits::input_parameter< Function >::type recursive_subset(recursive_subsetSEXP);
rcpp_result_gen = Rcpp::wrap(naomitwhy(df, isna, recursive_subset));
rcpp_result_gen = Rcpp::wrap(naomitwhy(df, recursive_subset));
return rcpp_result_gen;
END_RCPP
}
Expand All @@ -128,7 +127,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_estimatr_lm_solver", (DL_FUNC) &_estimatr_lm_solver, 3},
{"_estimatr_lm_variance", (DL_FUNC) &_estimatr_lm_variance, 8},
{"_estimatr_lm_variance_cr2", (DL_FUNC) &_estimatr_lm_variance_cr2, 9},
{"_estimatr_naomitwhy", (DL_FUNC) &_estimatr_naomitwhy, 3},
{"_estimatr_naomitwhy", (DL_FUNC) &_estimatr_naomitwhy, 2},
{NULL, NULL, 0}
};

Expand Down
64 changes: 20 additions & 44 deletions src/naomit.cpp
Expand Up @@ -15,75 +15,54 @@ SEXP generic_logical_subset( SEXP xin , LogicalVector w){
}

// [[Rcpp::export]]
DataFrame naomitwhy(DataFrame df, LogicalMatrix isna, Function recursive_subset) {
DataFrame naomitwhy(DataFrame df, Function recursive_subset) {
int m = df.nrow();
int n = df.ncol();
int N = isna.ncol();

Function isna("is.na");

CharacterVector df_names = df.names();

IntegerVector na_to_col_map(n);
if(N == n){
std::fill(na_to_col_map.begin(), na_to_col_map.end(), 1);
}
else {
Function dim("dim");

for(int i = 0; i < n; i++){
SEXP dfi = df[i];
if(Rf_isVectorAtomic(dfi) && LENGTH(dfi) == m){
na_to_col_map[i] = 1;
} else {
SEXP nc = dim(dfi);
na_to_col_map[i] = Rf_isNull(nc) ? 1 : INTEGER(nc)[1];
}
}
}

LogicalVector omit = LogicalVector(m);

int omit_count = 0, omit_f = m, omit_l = 0;
int omit_count = 0;

List why_omit(n);
why_omit.names() = df_names;
LogicalVector why_omit_idx(n);


for (int j = 0, ii = 0; j < n; j++) {
for (int j = 0; j < n; j++) {

std::vector<int> why_omit_j;

for (int j_sub = na_to_col_map[j]; j_sub; j_sub--){
for (int i = 0; i < m; i++, ii++){
LogicalVector v_isna = isna(df[j]);

if(isna[ii]){
if(!omit[i]){
why_omit_j.push_back(i + 1);
}
for(int ii = m; ii < LENGTH(v_isna); ){
for(int i = 0; i < m; i++, ii++)
v_isna[i] |= v_isna[ii];
}

omit[i] = true;
for(int i = 0; i < m; i++){
if(v_isna[i]){
if(!omit[i]){
why_omit_j.push_back(i + 1);
}
}
omit[i] = true;
};
}

if(why_omit_j.size() > 0){
if(na_to_col_map[j] > 1){
std::sort(why_omit_j.begin(), why_omit_j.end());
}
why_omit[j] = wrap(why_omit_j);
why_omit_idx[j] = true;
omit_f = std::min(omit_f, why_omit_j.front());
omit_l = std::max(omit_l, why_omit_j.back());
omit_count += why_omit_j.size();
}
}
if(omit_count == 0){ return(df); }

// Rcout << "after\n" << omit_count << "\n";
if(omit_count == 0){ return(df); }

IntegerVector omit_idx = IntegerVector(omit_count);
for(int i = omit_f-1, ii=0; i < omit_l; i++){
for(int i = 0, ii=0; ii < omit_count; i++){
if(omit[i]) omit_idx[ii++] = i+1;
}

Expand All @@ -92,20 +71,18 @@ DataFrame naomitwhy(DataFrame df, LogicalMatrix isna, Function recursive_subset)

omit_idx.attr("why_omit") = why_omit[why_omit_idx];
omit_idx.attr("class") = CharacterVector::create("omit", "detailed");
//omit_idx.attr("tokeep") = !omit;

omit = !omit;

List out(n);

for(int i = 0; i < n; i++){
SEXP dfi = df(i);
if(Rf_isVectorAtomic(dfi) && LENGTH(dfi) == m){
if(LENGTH(dfi) == m){
out[i] = generic_logical_subset(dfi, omit);
} else {
out[i] = recursive_subset(dfi, omit);
}

}

out.names() = df_names;
Expand All @@ -120,12 +97,11 @@ DataFrame naomitwhy(DataFrame df, LogicalMatrix isna, Function recursive_subset)
// require(microbenchmark)
// df <- expand.grid(x=c(1:100, NA), y=c(1:5, NA), z=c(1:8, NA), q=c(NA,2:5))
// df2 <- na.omit(df)
// microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df))
// microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms")
// microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms")
//

// df <- rbind(df, df2, df)
// df2 <- rbind(df2, df2, df2)
//
// microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms")
// microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms")

Expand Down

0 comments on commit e6463be

Please sign in to comment.