Skip to content

Commit

Permalink
move breaks_to_interval() input checking to C
Browse files Browse the repository at this point in the history
  • Loading branch information
TimTaylor committed Mar 5, 2024
1 parent 6a58839 commit 05af25b
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 142 deletions.
20 changes: 0 additions & 20 deletions R/breaks_to_interval.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,26 +41,6 @@
#'
#' @export
breaks_to_interval <- function(breaks, max_upper = Inf) {

# check breaks are numeric
.assert_numeric(breaks)

# coerce breaks to integer
breaks <- as.integer(breaks)

# ensure valid
if (anyNA(breaks))
stop("`breaks` must be finite, and, coercible to integer.")

# check strictly increasing breaks
if (is.unsorted(breaks, strictly = TRUE))
stop("`breaks` must be in strictly increasing order.")

# check max_upper
.assert_scalar_numeric_not_na(max_upper)
if (max_upper <= max(breaks))
stop("`max_upper` must be greater than all `breaks`.")

.Call(C_breaks_to_interval, breaks, max_upper)
}

Expand Down
170 changes: 48 additions & 122 deletions src/breaks_to_interval.c
Original file line number Diff line number Diff line change
@@ -1,16 +1,61 @@
#include <R.h>
#include <Rinternals.h>

#define IS_NUMERIC(x) (isReal(x) || isInteger(x))


SEXP breaks_to_interval(SEXP breaks, SEXP max_upper) {

// PROTECT counter
int protected = 0;

// create pointer to breaks and calculate length
// ensure max_upper is numeric scalar
if(!IS_NUMERIC(max_upper) || LENGTH(max_upper) != 1)
error("`max_upper` must be scalar numeric and not NA.");

// Ensure max_upper is not NA/NaN
double max_upper_bound = asReal(max_upper);
if(ISNA(max_upper_bound) || ISNAN(max_upper_bound))
error("`max_upper` must be scalar numeric and not NA.");

// ensure numeric breaks
if (!IS_NUMERIC(breaks) || LENGTH(breaks) == 0)
error("`breaks` must be numeric and of length >= 1.");

// coerce breaks to integer
breaks = PROTECT(coerceVector(breaks, INTSXP)); protected++;

// Ensure breaks are not NA and are in strictly increasing order and less than max_upper
int* p_breaks = INTEGER(breaks);
int brk = p_breaks[0];
if (brk == NA_INTEGER)
error("`breaks` must be non-missing (not NA) and coercible to integer.");

if ((double) brk >= max_upper_bound)
error("`max_upper` must be greater than all `breaks`.");

for (int i = 0; i < LENGTH(breaks) - 1; i++) {

int next_brk = p_breaks[i + 1];

if (next_brk == NA_INTEGER)
error("`breaks` must be non-missing (not NA) and coercible to integer.");

if (next_brk <= brk)
error("`breaks` must be in strictly increasing order.");

brk = next_brk;

if ((double) brk >= max_upper_bound)
error("`max_upper` must be greater than all `breaks`.");
}


// create pointer to breaks and calculate length
int n_breaks = LENGTH(breaks);

// mmake upper bound integerish
double max_upper_bound = round(asReal(max_upper));
// make upper bound integerish
max_upper_bound = round(max_upper_bound);

// create numeric lower bounds
SEXP lower_bound = PROTECT(coerceVector(breaks, REALSXP)); protected++;
Expand Down Expand Up @@ -87,123 +132,4 @@ SEXP breaks_to_interval(SEXP breaks, SEXP max_upper) {
UNPROTECT(protected);
return out;




// // check breaks are numeric
// if (!(isReal(breaks) || isInteger(breaks)))
// error("`breaks` must be numeric.");
//
// // coerce breaks to integer
// // this is mainly for the warning messages
// breaks = PROTECT(coerceVector(breaks, INTSXP));
//
// // loop over breaks to check valid
// int n_breaks = LENGTH(breaks);
// int* p_breaks = INTEGER(breaks);
// if (p_breaks[n_breaks - 1] == NA_INTEGER)
// error("`breaks` must be non-missing, finite, and, coercible to integer.");
//
//
// for (int i = 0; i < n_breaks - 1; ++i) {
// int lower = p_breaks[i];
// if (lower == NA_INTEGER) {
// error("`breaks` must be non-missing, finite, and, coercible to integer.");
// }
// int upper = p_breaks[i + 1];
// if (upper <= lower) {
// error("`breaks` must be in strictly increasing order.");
// }
// }
//
// // check max_upper is numeric scalar and appropriately bounded
// if (!(isReal(max_upper) || isInteger(max_upper)))
// error("`max_upper` must be a numeric scalar.");
// max_upper = PROTECT(coerceVector(max_upper, REALSXP));
// int n_max_upper = LENGTH(max_upper);
// if (n_max_upper != 1)
// error("`max_upper` must be a numeric scalar.");
// double max = asReal(max_upper);
// if (ISNA(max))
// error("`max_upper` must be a numeric scalar.");
//
// // create numeric lower bounds
// SEXP lower_bound = PROTECT(coerceVector(breaks, REALSXP));
// double* p_lower_bound = REAL(lower_bound);
//
// // check breaks < max_upper
// if (max <= p_lower_bound[n_breaks - 1])
// error("`max_upper` must be greater than all `breaks`");
//
// // create numeric upper bounds
// SEXP upper_bound = PROTECT(allocVector(REALSXP, n_breaks));
// double* p_upper_bound = REAL(upper_bound);
// for (int i = 0; i < n_breaks - 1; ++i) {
// p_upper_bound[i] = p_lower_bound[i + 1];
// }
// p_upper_bound[n_breaks - 1] = max;
//
//
// // create underlying integers for interval factors
// SEXP interval = PROTECT(allocVector(INTSXP, n_breaks));
// int* p_interval = INTEGER(interval);
// for (int i = 0; i < n_breaks; i++) {
// p_interval[i] = i + 1;
// }
//
// // create levels
// SEXP lvls = PROTECT(allocVector(STRSXP, n_breaks));
//
// // create all but the last names for the levels, "[%d,%d)"
// for (int i = 0; i < n_breaks - 1; ++i) {
// int bufsz = snprintf(NULL, 0, "[%d, %d)", p_breaks[i], p_breaks[i+1]);
// char* buf = R_Calloc(bufsz + 1, char);
// snprintf(buf, bufsz + 1, "[%d, %d)", p_breaks[i], p_breaks[i+1]);
// SET_STRING_ELT(lvls, i, mkChar(buf));
// R_Free(buf);
// }
//
// // create last level name allowing for "[%d, Inf)"
// if (!R_FINITE(max)) {
// int bufsz = snprintf(NULL, 0, "[%d, Inf)", (int) p_breaks[n_breaks - 1]);
// char* buf = R_Calloc(bufsz + 1, char);
// snprintf(buf, bufsz + 1, "[%d, Inf)", (int) p_breaks[n_breaks - 1]);
// SET_STRING_ELT(lvls, n_breaks - 1, mkChar(buf));
// R_Free(buf);
// } else {
// int bufsz = snprintf(NULL, 0, "[%d, %.f)", (int) p_breaks[n_breaks - 1], max);
// char* buf = R_Calloc(bufsz + 1, char);
// snprintf(buf, bufsz + 1, "[%d, %.f)", (int) p_breaks[n_breaks - 1], max);
// SET_STRING_ELT(lvls, n_breaks - 1, mkChar(buf));
// R_Free(buf);
// }
//
// // add levels and class to intervals
// setAttrib(interval, R_LevelsSymbol, lvls);
// SEXP fclass = PROTECT(allocVector(STRSXP, 2));
// SET_STRING_ELT(fclass, 0, mkChar("ordered"));
// SET_STRING_ELT(fclass, 1, mkChar("factor"));
// classgets(interval, fclass);
//
// // create list with lower and upper bound entries
// const char *names[] = {"interval", "lower_bound", "upper_bound", ""};
// SEXP out = PROTECT(mkNamed(VECSXP, names));
// SET_VECTOR_ELT(out, 0, interval);
// SET_VECTOR_ELT(out, 1, lower_bound);
// SET_VECTOR_ELT(out, 2, upper_bound);
//
// // add the data frame class
// SEXP class = PROTECT(allocVector(STRSXP, 1));
// SET_STRING_ELT(class, 0, mkChar("data.frame"));
// classgets(out, class);
//
// // add row names in short form
// // this format can be seen in the R function .set_row_names()
// SEXP rnms = PROTECT(allocVector(INTSXP, 2));
// INTEGER(rnms)[0] = NA_INTEGER;
// INTEGER(rnms)[1] = -n_breaks;
// setAttrib(out, R_RowNamesSymbol, rnms);
//
// UNPROTECT(10);
// return out;
}

0 comments on commit 05af25b

Please sign in to comment.