Skip to content

Commit

Permalink
Implement subset functions.
Browse files Browse the repository at this point in the history
No unit tests yet.
  • Loading branch information
Ezibenroc committed Sep 17, 2016
1 parent 4d57ecb commit ed1ef84
Show file tree
Hide file tree
Showing 13 changed files with 366 additions and 1 deletion.
1 change: 1 addition & 0 deletions amalgamation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ $SCRIPTPATH/include/roaring/containers/bitset.h
$SCRIPTPATH/include/roaring/containers/run.h
$SCRIPTPATH/include/roaring/containers/convert.h
$SCRIPTPATH/include/roaring/containers/mixed_equal.h
$SCRIPTPATH/include/roaring/containers/mixed_subset.h
$SCRIPTPATH/include/roaring/containers/mixed_andnot.h
$SCRIPTPATH/include/roaring/containers/mixed_intersection.h
$SCRIPTPATH/include/roaring/containers/mixed_negation.h
Expand Down
6 changes: 6 additions & 0 deletions include/roaring/containers/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,12 @@ static inline int32_t array_container_size_in_bytes(
bool array_container_equals(array_container_t *container1,
array_container_t *container2);

/**
* Return true if container1 is a subset of container2.
*/
bool array_container_is_subset(array_container_t *container1,
array_container_t *container2);

/**
* If the element of given rank is in this container, supposing that the first
* element has rank start_rank, then the function returns true and sets element
Expand Down
6 changes: 6 additions & 0 deletions include/roaring/containers/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,12 @@ static inline int32_t bitset_container_size_in_bytes(
bool bitset_container_equals(bitset_container_t *container1,
bitset_container_t *container2);

/**
* Return true if container1 is a subset of container2.
*/
bool bitset_container_is_subset(bitset_container_t *container1,
bitset_container_t *container2);

/**
* If the element of given rank is in this container, supposing that the first
* element has rank start_rank, then the function returns true and sets element
Expand Down
49 changes: 49 additions & 0 deletions include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <roaring/containers/bitset.h>
#include <roaring/containers/convert.h>
#include <roaring/containers/mixed_equal.h>
#include <roaring/containers/mixed_subset.h>
#include <roaring/containers/mixed_intersection.h>
#include <roaring/containers/mixed_negation.h>
#include <roaring/containers/mixed_union.h>
Expand Down Expand Up @@ -541,6 +542,54 @@ static inline bool container_equals(const void *c1, uint8_t type1,
}
}

/**
* Returns true if the container c1 is a subset of the container c2. Note that
* c1 can be a subset of c2 even if they have a different type.
*/
static inline bool container_is_subset(const void *c1, uint8_t type1,
const void *c2, uint8_t type2) {
c1 = container_unwrap_shared(c1, &type1);
c2 = container_unwrap_shared(c2, &type2);
switch (CONTAINER_PAIR(type1, type2)) {
case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
BITSET_CONTAINER_TYPE_CODE):
return bitset_container_is_subset((bitset_container_t *)c1,
(bitset_container_t *)c2);
case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
RUN_CONTAINER_TYPE_CODE):
return bitset_container_is_subset_run((bitset_container_t *)c1,
(run_container_t *)c2);
case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
BITSET_CONTAINER_TYPE_CODE):
return run_container_is_subset_bitset((run_container_t *)c1,
(bitset_container_t *)c2);
case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
ARRAY_CONTAINER_TYPE_CODE):
return false; // by construction, size(c1) > size(c2)
case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
BITSET_CONTAINER_TYPE_CODE):
return array_container_is_subset_bitset((array_container_t *)c1,
(bitset_container_t *)c2);
case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
return array_container_is_subset_run((array_container_t *)c1,
(run_container_t *)c2);
case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
return run_container_is_subset_array((run_container_t *)c1,
(array_container_t *)c2);
case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
ARRAY_CONTAINER_TYPE_CODE):
return array_container_is_subset((array_container_t *)c1,
(array_container_t *)c2);
case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
return run_container_is_subset((run_container_t *)c1,
(run_container_t *)c2);
default:
assert(false);
__builtin_unreachable();
return false;
}
}

// macro-izations possibilities for generic non-inplace binary-op dispatch

/**
Expand Down
43 changes: 43 additions & 0 deletions include/roaring/containers/mixed_subset.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* mixed_subset.h
*
*/

#ifndef CONTAINERS_MIXED_SUBSET_H_
#define CONTAINERS_MIXED_SUBSET_H_

#include <roaring/containers/array.h>
#include <roaring/containers/bitset.h>
#include <roaring/containers/run.h>

/**
* Return true if container1 is a subset of container2.
*/
bool array_container_is_subset_bitset(array_container_t* container1,
bitset_container_t* container2);

/**
* Return true if container1 is a subset of container2.
*/
bool run_container_is_subset_array(run_container_t* container1,
array_container_t* container2);

/**
* Return true if container1 is a subset of container2.
*/
bool array_container_is_subset_run(array_container_t* container1,
run_container_t* container2);

/**
* Return true if container1 is a subset of container2.
*/
bool run_container_is_subset_bitset(run_container_t* container1,
bitset_container_t* container2);

/**
* Return true if container1 is a subset of container2.
*/
bool bitset_container_is_subset_run(bitset_container_t* container1,
run_container_t* container2);

#endif /* CONTAINERS_MIXED_SUBSET_H_ */
6 changes: 6 additions & 0 deletions include/roaring/containers/run.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,12 @@ static inline int32_t run_container_size_in_bytes(
bool run_container_equals(run_container_t *container1,
run_container_t *container2);

/**
* Return true if container1 is a subset of container2.
*/
bool run_container_is_subset(run_container_t *container1,
run_container_t *container2);

/**
* Used in a start-finish scan that appends segments, for XOR and NOT
*/
Expand Down
5 changes: 5 additions & 0 deletions include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,11 @@ bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator,
*/
bool roaring_bitmap_equals(roaring_bitmap_t *ra1, roaring_bitmap_t *ra2);

/**
* Return true if all the elements of ra1 are also in ra2.
*/
bool roaring_bitmap_is_subset(roaring_bitmap_t *ra1, roaring_bitmap_t *ra2);

/**
* (For expert users who seek high performance.)
*
Expand Down
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ set(ROARING_SRC
containers/mixed_intersection.c
containers/mixed_union.c
containers/mixed_equal.c
containers/mixed_subset.c
containers/mixed_negation.c
containers/mixed_xor.c
containers/mixed_andnot.c
Expand All @@ -29,5 +30,5 @@ set(ROARING_SRC

add_library(${ROARING_LIB_NAME} ${ROARING_LIB_TYPE} ${ROARING_SRC})
install(TARGETS ${ROARING_LIB_NAME} DESTINATION lib)
set_target_properties(${ROARING_LIB_NAME} PROPERTIES
set_target_properties(${ROARING_LIB_NAME} PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "..")
26 changes: 26 additions & 0 deletions src/containers/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,32 @@ bool array_container_equals(array_container_t *container1,
return true;
}

bool array_container_is_subset(array_container_t *container1,
array_container_t *container2) {
if (container1->cardinality > container2->cardinality) {
return false;
}
int i1 = 0, i2 = 0;
while(i1 < container1->cardinality && i2 < container2->cardinality) {
if(container1->array[i1] == container2->array[i2]) {
i1++;
i2++;
}
else if(container1->array[i1] > container2->array[i2]) {
i2++;
}
else { // container1->array[i1] < container2->array[i2]
return false;
}
}
if(i1 == container1->cardinality) {
return true;
}
else {
return false;
}
}

int32_t array_container_read(int32_t cardinality, array_container_t *container,
const char *buf) {
if (container->capacity < cardinality) {
Expand Down
15 changes: 15 additions & 0 deletions src/containers/bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,21 @@ bool bitset_container_equals(bitset_container_t *container1, bitset_container_t
return true;
}

bool bitset_container_is_subset(bitset_container_t *container1,
bitset_container_t *container2) {
if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
if(container1->cardinality > container2->cardinality) {
return false;
}
}
for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
if((container1->array[i] & container2->array[i]) != container1->array[i]) {
return false;
}
}
return true;
}

bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) {
int card = bitset_container_cardinality(container);
if(rank >= *start_rank + card) {
Expand Down
133 changes: 133 additions & 0 deletions src/containers/mixed_subset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#include <roaring/containers/mixed_subset.h>
#include <roaring/array_util.h>

bool array_container_is_subset_bitset(array_container_t* container1,
bitset_container_t* container2) {
if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
if (container2->cardinality < container1->cardinality) {
return false;
}
}
for (int i = 0; i < container1->cardinality; ++i) {
if(!bitset_container_contains(container2, container1->array[i])) {
return false;
}
}
return true;
}

bool run_container_is_subset_array(run_container_t* container1,
array_container_t* container2) {
if (run_container_cardinality(container1) > container2->cardinality)
return false;
int32_t start_pos = -1, stop_pos = -1;
for (int i = 0; i < container1->n_runs; ++i) {
int32_t start = container1->runs[i].value;
int32_t stop = start+container1->runs[i].length;
start_pos = advanceUntil(container2->array, stop_pos, container2->cardinality, start);
stop_pos = advanceUntil(container2->array, start_pos, container2->cardinality, stop);
if(stop_pos == container2->cardinality || start_pos == container2->cardinality) {
return false;
}
else if(stop_pos-start_pos != stop-start ||
container2->array[start_pos] != start || container2->array[stop_pos] != stop) {
return false;
}
}
return true;
}

bool array_container_is_subset_run(array_container_t* container1,
run_container_t* container2) {
if (container1->cardinality > run_container_cardinality(container2))
return false;
int i_array = 0, i_run = 0;
run_container_printf(container2); printf("\n");
while(i_array < container1->cardinality && i_run < container2->n_runs) {
uint32_t start = container2->runs[i_run].value;
uint32_t stop = start+container2->runs[i_run].length;
if(container1->array[i_array] < start) {
return false;
}
else if (container1->array[i_array] > stop) {
i_run ++;
}
else { // the value of the array is in the run
i_array++;
}
}
if(i_array == container1->cardinality) {
return true;
}
else {
return false;
}
}

bool run_container_is_subset_bitset(run_container_t* container1,
bitset_container_t* container2) {
// todo: this code could be much faster
if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
if (container2->cardinality < run_container_cardinality(container1)) {
return false;
}
} else {
int32_t card = bitset_container_compute_cardinality(
container2); // modify container2?
if (card < run_container_cardinality(container1)) {
return false;
}
}
for (int i = 0; i < container1->n_runs; ++i) {
uint32_t run_start = container1->runs[i].value;
uint32_t le = container1->runs[i].length;
for (uint32_t j = run_start; j <= run_start + le; ++j) {
if (!bitset_container_contains(container2, j)) {
return false;
}
}
}
return true;
}

bool bitset_container_is_subset_run(bitset_container_t* container1,
run_container_t* container2) {
// todo: this code could be much faster
if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) {
if (container1->cardinality > run_container_cardinality(container2)) {
return false;
}
}
int32_t i_bitset=0, i_run=0;
while(i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && i_run < container2->n_runs) {
uint64_t w = container1->array[i_bitset];
while (w != 0 && i_run < container2->n_runs) {
uint32_t start = container2->runs[i_run].value;
uint32_t stop = start+container2->runs[i_run].length;
uint64_t t = w & -w;
uint16_t r = i_bitset * 64 + __builtin_ctzll(w);
if (r < start) {
return false;
}
else if(r > stop) {
i_run++;
continue;
}
else {
w ^= t;
}
}
if(w == 0) {
i_bitset++;
}
else {
return false;
}
}
if(i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) {
return false;
}
else {
return true;
}
}

0 comments on commit ed1ef84

Please sign in to comment.