Skip to content

Commit

Permalink
Use set C function in R
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolaasuni committed Oct 9, 2018
1 parent 9c45d55 commit e8a3481
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 43 deletions.
4 changes: 2 additions & 2 deletions c/src/variantkey/set.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ static inline void sort_uint64_t(uint64_t *arr, uint64_t nitems)
uint64_t i;
uint64_t *tmp = (uint64_t *)malloc(nitems * sizeof(uint64_t));
uint64_t v;
// calculate counts (histograms)
// calculate counts
for(i = 0; i < nitems; i++)
{
v = arr[i];
Expand All @@ -70,7 +70,7 @@ static inline void sort_uint64_t(uint64_t *arr, uint64_t nitems)
c1[((v >> 48) & 0xff)]++;
c0[((v >> 56) & 0xff)]++;
}
// convert counts to offsets (indices)
// convert counts to offsets
for(i = 0; i < 256; i++)
{
t7 = (o7 + c7[i]);
Expand Down
58 changes: 17 additions & 41 deletions r/variantkey/R/uint64.R
Original file line number Diff line number Diff line change
Expand Up @@ -306,71 +306,47 @@ as.data.frame.uint64 <- function(x, ...) {
return(ret)
}

# @TODO: implement native uint64 functions to avoid conversion to and from hex

#' Ordering Permutation.
#' @param x uint64 vector
#' @export
order.uint64 <- function(x, ...) {
return(order(as.character(as.hex64(x)), ...))
}

#' Sorting or Ordering uint64 Vectors.
#' Sorts a uint64 vector in ascending order.
#' @param x uint64 vector
#' @useDynLib variantkey R_sort_uint64
#' @export
sort.uint64 <- function(x, ...) {
return(as.uint64(as.hex64(sort(as.character(as.hex64(x)), ...))))
ret <- uint64(length(x))
return(.Call("R_sort_uint64", as.uint64(x), ret))
}

#' Extract Unique Elements.
#' Eliminates all but the first element from every consecutive group of equal values.
#' @param x uint64 vector
#' @useDynLib variantkey R_unique_uint64
#' @export
unique.uint64 <- function(x, ...) {
return(as.uint64(as.hex64(unique(as.character(as.hex64(x)), ...))))
}

#' Performs set union on two vectors.
#' @param x uint64 vector
#' @param y uint64 vector
#' @export
union.uint64 <- function(x, y) {
return(as.uint64(as.hex64(union(as.character(as.hex64(x)), as.character(as.hex64(y))))))
ret <- uint64(length(x))
return(.Call("R_unique_uint64", as.uint64(x), ret))
}

#' Performs set (asymmetric!) difference on two vectors.
#' Returns the intersection of two sorted uint64 vectors.
#' @param x uint64 vector
#' @param y uint64 vector
#' @useDynLib variantkey R_intersect_uint64
#' @export
intersect.uint64 <- function(x, y) {
return(as.uint64(as.hex64(intersect(as.character(as.hex64(x)), as.character(as.hex64(y))))))
ret <- uint64(min(length(x), length(y)))
return(.Call("R_intersect_uint64", as.uint64(x), as.uint64(y), ret))
}

#' Performs set setdiff on two vectors.
#' Returns the union of two sorted uint64 vectors.
#' @param x uint64 vector
#' @param y uint64 vector
#' @useDynLib variantkey R_union_uint64
#' @export
setdiff.uint64 <- function(x, y) {
return(as.uint64(as.hex64(setdiff(as.character(as.hex64(x)), as.character(as.hex64(y))))))
}

#' Performs set equality on two vectors.
#' @param x uint64 vector
#' @param y uint64 vector
#' @export
setequal.uint64 <- function(x, y) {
return(setequal(as.character(as.hex64(x)), as.character(as.hex64(y))))
}

#' Determines which elements of a vector or data frame are duplicates of elements with smaller subscripts, and returns a logical vector indicating which elements (rows) are duplicates.
#' @param x uint64 vector
#' @export
duplicated.uint64 <- function(x, ...) {
return(duplicated(as.character(as.hex64(x)), ...))
}

#' Generalized” more efficient shortcut for any(duplicated(.)).
#' @param x uint64 vector
#' @export
anyDuplicated.uint64 <- function(x, ...) {
return(anyDuplicated(as.character(as.hex64(x)), ...))
union.uint64 <- function(x, y) {
ret <- uint64(length(x) + length(y))
return(.Call("R_union_uint64", as.uint64(x), as.uint64(y), ret))
}
42 changes: 42 additions & 0 deletions r/variantkey/src/uint64.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <Rdefines.h>
#include <stdlib.h>
#include "../../../c/src/variantkey/hex.h"
#include "../../../c/src/variantkey/set.h"

#define MAX_UINT64_DEC_CHARS 21

Expand Down Expand Up @@ -140,3 +141,44 @@ SEXP R_integer_to_uint64(SEXP x, SEXP ret)
}
return ret;
}

SEXP R_sort_uint64(SEXP x, SEXP ret)
{
uint64_t n = LENGTH(ret);
uint64_t *res = (uint64_t *)REAL(ret);
memcpy((void *)res, (void *)REAL(x), (n * sizeof(uint64_t)));
sort_uint64_t(res, n);
return ret;
}

SEXP R_unique_uint64(SEXP x, SEXP ret)
{
uint64_t n = LENGTH(ret);
uint64_t *res = (uint64_t *)REAL(ret);
memcpy((void *)res, (void *)REAL(x), (n * sizeof(uint64_t)));
uint64_t *p = unique_uint64_t(res, n);
SETLENGTH(ret, (p - res));
return ret;
}

SEXP R_intersect_uint64(SEXP x, SEXP y, SEXP ret)
{
uint64_t nx = LENGTH(x), ny = LENGTH(y);
uint64_t *res = (uint64_t *)REAL(ret);
uint64_t *px = (uint64_t *)REAL(x);
uint64_t *py = (uint64_t *)REAL(y);
uint64_t *p = intersection_uint64_t(px, nx, py, ny, res);
SETLENGTH(ret, (p - res));
return ret;
}

SEXP R_union_uint64(SEXP x, SEXP y, SEXP ret)
{
uint64_t nx = LENGTH(x), ny = LENGTH(y);
uint64_t *res = (uint64_t *)REAL(ret);
uint64_t *px = (uint64_t *)REAL(x);
uint64_t *py = (uint64_t *)REAL(y);
uint64_t *p = union_uint64_t(px, nx, py, ny, res);
SETLENGTH(ret, (p - res));
return ret;
}

0 comments on commit e8a3481

Please sign in to comment.