Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added skip_absent arguement to colnamesInt() #6068

Merged
merged 29 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e81f30c
Added skip_absent arguement to colnamesInt()
Apr 10, 2024
3b19900
Update NEWS.md
Nj221102 Apr 10, 2024
d924584
Update NEWS.md
Nj221102 Apr 10, 2024
f423930
Update utils.c
Nj221102 Apr 10, 2024
e8c2f0f
Update utils.c
Nj221102 Apr 10, 2024
5a4e230
Update utils.c
Nj221102 Apr 10, 2024
eab32e6
Update utils.c
Nj221102 Apr 10, 2024
58e335a
Update utils.c
Nj221102 Apr 10, 2024
583ce4d
added test
Apr 10, 2024
d5d3e10
Update src/nafill.c
Nj221102 Apr 11, 2024
fdb30c8
Update src/utils.c
Nj221102 Apr 11, 2024
4d0273d
Update src/utils.c
Nj221102 Apr 11, 2024
ade9f96
Update src/utils.c
Nj221102 Apr 11, 2024
5938bdd
Implemented suggestions
Apr 11, 2024
39b73f1
Merge branch 'master' into colnamesInt
Nj221102 Apr 11, 2024
9fdf048
small fix
Apr 11, 2024
b9a3135
Update utils.c
Nj221102 Apr 11, 2024
c525564
minor issues
MichaelChirico Apr 11, 2024
02d758d
restore comment for now
MichaelChirico Apr 11, 2024
1de9230
Update nafill.Rraw
Nj221102 Apr 11, 2024
6bc4fd0
adjusted any colno. > ncol to 0L
Apr 11, 2024
eae60d5
Added test and changed refrence to deep copy
Apr 11, 2024
d8510b0
Merge branch 'master' into colnamesInt
Nj221102 Apr 11, 2024
53eaf43
annotate test purpose
MichaelChirico Apr 11, 2024
fd4b01d
More careful about when duplicate() is needed
MichaelChirico Apr 11, 2024
701ff2e
refine comment
MichaelChirico Apr 11, 2024
6c422bc
whitespace
MichaelChirico Apr 11, 2024
1344bf3
Add a new test against duplicates for numeric input
MichaelChirico Apr 11, 2024
cc0d661
update last test number
MichaelChirico Apr 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE)
fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na)
fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L])

colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups)
colnamesInt = function(x, cols, check_dups=FALSE, skip_absent=FALSE) .Call(CcolnamesInt, x, cols, check_dups, skip_absent)

testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L])

Expand Down
13 changes: 12 additions & 1 deletion inst/tests/nafill.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,19 @@ test(4.20, colnamesInt(dt, integer()), integer())
test(4.21, colnamesInt(dt, NULL), seq_along(dt))
test(4.22, colnamesInt("asd", 1), error="must be data.table compatible")
test(4.23, colnamesInt(dt, 1, check_dups="a"), error="check_dups")
test(4.24, colnamesInt(dt, c("a", "e"), skip_absent=TRUE), c(1L,0L))
test(4.25, colnamesInt(dt, c(1L, 4L), skip_absent=TRUE), c(1L,0L))
test(4.26, colnamesInt(dt, c(1, 4), skip_absent=TRUE), c(1L,0L))
test(4.27, colnamesInt(dt, c("a", NA), skip_absent=TRUE), c(1L,0L))
Nj221102 marked this conversation as resolved.
Show resolved Hide resolved
test(4.28, colnamesInt(dt, c(1L, 0L), skip_absent=TRUE), error="received non-existing column*.*0")
test(4.29, colnamesInt(dt, c(1, -5), skip_absent=TRUE), error="received non-existing column*.*-5")
test(4.30, colnamesInt(dt, c(1, 4), skip_absent=NULL), error="skip_absent must be TRUE or FALSE")
test(4.31, colnamesInt(dt, c(1L, 1000L), skip_absent=TRUE), c(1L,0L))
cols=c(1L,100L)
test(4.32, colnamesInt(dt, cols, skip_absent=TRUE), c(1L, 0L))
test(4.33, cols, c(1L, 100L)) # ensure input was not overwritten with output 0
MichaelChirico marked this conversation as resolved.
Show resolved Hide resolved
names(dt) <- NULL
test(4.24, colnamesInt(dt, "a"), error="has no names")
test(4.34, colnamesInt(dt, "a"), error="has no names")
MichaelChirico marked this conversation as resolved.
Show resolved Hide resolved

# verbose
dt = data.table(a=c(1L, 2L, NA_integer_), b=c(1, 2, NA_real_))
Expand Down
2 changes: 1 addition & 1 deletion src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ bool isRealReallyInt(SEXP x);
SEXP isRealReallyIntR(SEXP x);
SEXP isReallyReal(SEXP x);
bool allNA(SEXP x, bool errorForBadType);
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups);
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent);
bool INHERITS(SEXP x, SEXP char_);
SEXP copyAsPlain(SEXP x);
void copySharedColumns(SEXP x);
Expand Down
2 changes: 1 addition & 1 deletion src/nafill.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list
SET_VECTOR_ELT(obj, 0, obj1);
}
SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj)
SEXP ricols = PROTECT(colnamesInt(obj, cols, /* check_dups= */ ScalarLogical(TRUE), /* skip_absent= */ ScalarLogical(FALSE))); protecti++; // nafill cols=NULL which turns into seq_along(obj)
x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++;
int *icols = INTEGER(ricols);
for (int i=0; i<length(ricols); i++) {
Expand Down
27 changes: 19 additions & 8 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,20 @@ SEXP allNAR(SEXP x) {
* adds validation for:
* correct range [1,ncol], and if type real checks whole integer
* existing columns for character
* optionally check for no duplicates
* optionally (check_dups) check for no duplicates
* optionally (skip_absent) skip (return 0) for numbers outside the range or not naming extant columns
*/
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups) {
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent) {
if (!isNewList(x))
error(_("'x' argument must be data.table compatible"));
if (!IS_TRUE_OR_FALSE(check_dups))
error(_("%s must be TRUE or FALSE"), "check_dups");
if (!IS_TRUE_OR_FALSE(skip_absent))
error(_("%s must be TRUE or FALSE"), "skip_absent");
int protecti = 0;
R_len_t nx = length(x);
R_len_t nc = length(cols);
bool bskip_absent = LOGICAL(skip_absent)[0];
SEXP ricols = R_NilValue;
if (isNull(cols)) { // seq_along(x)
ricols = PROTECT(allocVector(INTSXP, nx)); protecti++;
Expand All @@ -116,26 +120,33 @@ SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups) {
ricols = PROTECT(allocVector(INTSXP, 0)); protecti++;
} else if (isInteger(cols) || isReal(cols)) {
if (isInteger(cols)) {
ricols = cols;
if (bskip_absent) { // we might overwrite values with 0, so make a copy
ricols = PROTECT(duplicate(cols)); protecti++;
} else
ricols = cols;
} else if (isReal(cols)) {
if (!isRealReallyInt(cols))
error(_("argument specifying columns is type 'double' and one or more items in it are not whole integers"));
ricols = PROTECT(coerceVector(cols, INTSXP)); protecti++;
}
int *icols = INTEGER(ricols);
for (int i=0; i<nc; i++) {
if ((icols[i]>nx) || (icols[i]<1))
for (int i=0; i<nc; ++i) {
if ((!bskip_absent && icols[i]>nx) || (icols[i]<1))
error(_("argument specifying columns received non-existing column(s): cols[%d]=%d"), i+1, icols[i]); // handles NAs also
else if(bskip_absent && icols[i]>nx)
icols[i] = 0L;
}
} else if (isString(cols)) {
SEXP xnames = PROTECT(getAttrib(x, R_NamesSymbol)); protecti++;
if (isNull(xnames))
error(_("'x' argument data.table has no names"));
ricols = PROTECT(chmatch(cols, xnames, 0)); protecti++;
int *icols = INTEGER(ricols);
for (int i=0; i<nc; i++) {
if (icols[i]==0)
error(_("argument specifying columns received non-existing column(s): cols[%d]='%s'"), i+1, CHAR(STRING_ELT(cols, i))); // handles NAs also
if (!bskip_absent){
MichaelChirico marked this conversation as resolved.
Show resolved Hide resolved
for (int i=0; i<nc; ++i) {
if (icols[i]==0)
error(_("argument specifying columns received non-existing column(s): cols[%d]='%s'"), i+1, CHAR(STRING_ELT(cols, i))); // handles NAs also
}
}
} else {
error(_("argument specifying columns must be character or numeric"));
Expand Down
Loading