Skip to content

Commit

Permalink
1. We no longer fake BIG-ENDIAN support. If you have big-endian hardw…
Browse files Browse the repository at this point in the history
…are... well, we need your help to test things out.

2. Merging the two serialization routines. There were a few weird things with the non-portable serialization functions (such as writing to disk pointer addresses). Now, we just add a char that branches on either an array list or a bitmap. This will ease future maintenance.

3. To avoid a silly extra malloc, the roaring_bimap_t struct has the roaring_array_t directly in it, instead of a pointer. This should (very slightly) speed things up *and*, reduce memory usage (very slightly) and accelerate bitmap creation (by a measurable amount).
  • Loading branch information
lemire committed Sep 2, 2016
1 parent 371c44e commit 411ad00
Show file tree
Hide file tree
Showing 12 changed files with 562 additions and 754 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ of the latest hardware. Roaring bitmaps are already available on a variety of pl
- CMake (to contribute to the project, users can rely on amalgamation/unity builds)
- clang-format (optional)

Serialization on big endian hardware may not be compatible with serialization on little endian hardware.

# Amalgamation/Unity Build

The CRoaring library can be amalgamated into a single source file that makes it easier
Expand Down
19 changes: 19 additions & 0 deletions include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ static inline const char *get_full_container_name(void *container,
return "unknown";
}
__builtin_unreachable();
return NULL;
}

/**
Expand Down Expand Up @@ -1670,6 +1671,9 @@ static inline bool container_iterate(const void *container, uint8_t typecode,
assert(false);
__builtin_unreachable();
}
assert(false);
__builtin_unreachable();
return false;
}

static inline void *container_not(const void *c, uint8_t typ,
Expand Down Expand Up @@ -1698,6 +1702,9 @@ static inline void *container_not(const void *c, uint8_t typ,
assert(false);
__builtin_unreachable();
}
assert(false);
__builtin_unreachable();
return NULL;
}

static inline void *container_not_range(const void *c, uint8_t typ,
Expand Down Expand Up @@ -1730,6 +1737,9 @@ static inline void *container_not_range(const void *c, uint8_t typ,
assert(false);
__builtin_unreachable();
}
assert(false);
__builtin_unreachable();
return NULL;
}

static inline void *container_inot(void *c, uint8_t typ, uint8_t *result_type) {
Expand Down Expand Up @@ -1759,6 +1769,9 @@ static inline void *container_inot(void *c, uint8_t typ, uint8_t *result_type) {
assert(false);
__builtin_unreachable();
}
assert(false);
__builtin_unreachable();
return NULL;
}

static inline void *container_inot_range(void *c, uint8_t typ,
Expand Down Expand Up @@ -1791,6 +1804,9 @@ static inline void *container_inot_range(void *c, uint8_t typ,
assert(false);
__builtin_unreachable();
}
assert(false);
__builtin_unreachable();
return NULL;
}

/**
Expand Down Expand Up @@ -1832,6 +1848,9 @@ static inline bool container_select(const void *container, uint8_t typecode,
assert(false);
__builtin_unreachable();
}
assert(false);
__builtin_unreachable();
return false;
}

#endif
10 changes: 5 additions & 5 deletions include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ extern "C" {
#include <roaring/roaring_types.h>

typedef struct roaring_bitmap_s {
roaring_array_t *high_low_container;
roaring_array_t high_low_container;
bool copy_on_write; /* copy_on_write: whether you want to use copy-on-write
(saves memory and avoids
copies but needs more care in a threaded context). */
Expand Down Expand Up @@ -74,7 +74,7 @@ roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
const roaring_bitmap_t *x2);

/**
* Inplace version modifies x1. TODO: decide whether x1 == x2 allowed
* Inplace version modifies x1, x1 == x2 is allowed
*/
void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
const roaring_bitmap_t *x2);
Expand Down Expand Up @@ -188,13 +188,13 @@ inline bool roaring_bitmap_contains(const roaring_bitmap_t *r,
* here it is possible to bypass the binary search and the ra_get_index
* call with the following call that might often come true
*/
int32_t i = ra_get_index(r->high_low_container, hb);
int32_t i = ra_get_index(& r->high_low_container, hb);
if (i < 0) return false;

uint8_t typecode;
// next call ought to be cheap
void *container =
ra_get_container_at_index(r->high_low_container, i, &typecode);
ra_get_container_at_index(& r->high_low_container, i, &typecode);
// rest might be a tad expensive
return container_contains(container, val & 0xFFFF, typecode);
}
Expand Down Expand Up @@ -243,7 +243,7 @@ bool roaring_bitmap_run_optimize(roaring_bitmap_t *r);
//
// Returns how many bytes were written which should be
// roaring_bitmap_size_in_bytes(ra).
size_t roaring_bitmap_serialize(roaring_bitmap_t *ra, char *buf);
size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf);

// use with roaring_bitmap_serialize
// see roaring_bitmap_portable_deserialize if you want a format that's
Expand Down
99 changes: 55 additions & 44 deletions include/roaring/roaring_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,30 +42,47 @@ typedef struct roaring_array_s {
roaring_array_t *ra_create(void);

/**
* Create a new roaring array with the specified capacity (in number
* Initialize an existing roaring array with the specified capacity (in number
* of containers)
*/
roaring_array_t *ra_create_with_capacity(uint32_t cap);
bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap);

/**
* Copies this roaring array (caller is responsible for memory management)
* Initialize with default capacity
*/
roaring_array_t *ra_copy(roaring_array_t *r, bool copy_on_write);
bool ra_init(roaring_array_t * t) ;

/**
* Copies this roaring array, we assume that dest is not initialized
*/
bool ra_copy(const roaring_array_t *source, roaring_array_t * dest, bool copy_on_write);

/**
* Copies this roaring array, we assume that dest is initialized
*/
bool ra_overwrite(const roaring_array_t *source, roaring_array_t * dest, bool copy_on_write);


/**
* Frees the memory used by a roaring array
*/
void ra_free(roaring_array_t *r);
void ra_clear(roaring_array_t *r);

/**
* Frees the memory used by a roaring array, but does not free the containers
*/
void ra_free_without_containers(roaring_array_t *r);
void ra_clear_without_containers(roaring_array_t *r);


/**
* Frees just the containers
*/
void ra_clear_containers(roaring_array_t *ra);

/**
* Get the index corresponding to a 16-bit key
*/
inline int32_t ra_get_index(roaring_array_t *ra, uint16_t x) {
inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) {
if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1;

return binarySearch(ra->keys, (int32_t)ra->size, x);
Expand All @@ -74,7 +91,7 @@ inline int32_t ra_get_index(roaring_array_t *ra, uint16_t x) {
/**
* Retrieves the container at index i, filling in the typecode
*/
inline void *ra_get_container_at_index(roaring_array_t *ra, uint16_t i,
inline void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i,
uint8_t *typecode) {
*typecode = ra->typecodes[i];
return ra->containers[i];
Expand All @@ -83,7 +100,7 @@ inline void *ra_get_container_at_index(roaring_array_t *ra, uint16_t i,
/**
* Retrieves the key at index i
*/
uint16_t ra_get_key_at_index(roaring_array_t *ra, uint16_t i);
uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i);

/**
* Add a new key-value pair at index i
Expand All @@ -100,35 +117,35 @@ void ra_append(roaring_array_t *ra, uint16_t s, void *c, uint8_t typecode);
* Append a new key-value pair to ra, cloning (in COW sense) a value from sa
* at index index
*/
void ra_append_copy(roaring_array_t *ra, roaring_array_t *sa, uint16_t index,
void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, uint16_t index,
bool copy_on_write);

/**
* Append new key-value pairs to ra, cloning (in COW sense) values from sa
* at indexes
* [start_index, uint16_t end_index)
*/
void ra_append_copy_range(roaring_array_t *ra, roaring_array_t *sa,
void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
uint16_t start_index, uint16_t end_index,
bool copy_on_write);

/** appends from sa to ra, ending with the greatest key that is
* is less or equal stopping_key
*/
void ra_append_copies_until(roaring_array_t *ra, roaring_array_t *sa,
void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
uint16_t stopping_key, bool copy_on_write);

/** appends from sa to ra, starting with the smallest key that is
* is strictly greater than before_start
*/

void ra_append_copies_after(roaring_array_t *ra, roaring_array_t *sa,
void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
uint16_t before_start, bool copy_on_write);

/**
* Move the key-value pairs to ra from sa at indexes
* [start_index, uint16_t end_index), old array should not be freed
* (use ra_free_without_containers)
* (use ra_clear_without_containers)
**/
void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
uint16_t start_index, uint16_t end_index);
Expand All @@ -144,19 +161,24 @@ void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
* Set the container at the corresponding index using the specified
* typecode.
*/
void ra_set_container_at_index(roaring_array_t *ra, int32_t i, void *c,
uint8_t typecode);
inline void ra_set_container_at_index(const roaring_array_t *ra, int32_t i, void *c,
uint8_t typecode) {
assert(i < ra->size);
ra->containers[i] = c;
ra->typecodes[i] = typecode;
}


/**
* If needed, increase the capacity of the array so that it can fit k values
* (at
* least);
*/
void extend_array(roaring_array_t *ra, uint32_t k);
bool extend_array(roaring_array_t *ra, int32_t k);

inline int32_t ra_get_size(roaring_array_t *ra) { return ra->size; }
inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; }

static inline int32_t ra_advance_until(roaring_array_t *ra, uint16_t x,
static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
int32_t pos) {
return advanceUntil(ra->keys, pos, ra->size, x);
}
Expand All @@ -165,63 +187,52 @@ int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos);

void ra_downsize(roaring_array_t *ra, int32_t new_length);

void ra_replace_key_and_container_at_index(roaring_array_t *ra, int32_t i,
inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, int32_t i,
uint16_t key, void *c,
uint8_t typecode);

// write set bits to an array
void ra_to_uint32_array(roaring_array_t *ra, uint32_t *ans);

// see ra_portable_serialize if you want a format that's compatible with
// Java
// and Go implementations
size_t ra_serialize(roaring_array_t *ra, char *buf);
uint8_t typecode) {
assert(i < ra->size);

// see ra_portable_serialize if you want a format that's compatible with
// Java
// and Go implementations
roaring_array_t *ra_deserialize(const void *buf);
ra->keys[i] = key;
ra->containers[i] = c;
ra->typecodes[i] = typecode;
}

/**
* How many bytes are required to serialize this bitmap (NOT
* compatible
* with Java and Go versions)
*/
size_t ra_size_in_bytes(roaring_array_t *ra) ;
// write set bits to an array
void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans);

/**
* write a bitmap to a buffer. This is meant to be compatible with
* the
* Java and Go versions. Return the size in bytes of the serialized
* output (which should be ra_portable_size_in_bytes(ra)).
*/
size_t ra_portable_serialize(roaring_array_t *ra, char *buf);
size_t ra_portable_serialize(const roaring_array_t *ra, char *buf);

/**
* read a bitmap from a serialized version. This is meant to be compatible
* with
* the
* Java and Go versions.
*/
roaring_array_t *ra_portable_deserialize(const char *buf);
bool ra_portable_deserialize(roaring_array_t * ra, const char *buf);

/**
* How many bytes are required to serialize this bitmap (meant to be
* compatible
* with Java and Go versions)
*/
size_t ra_portable_size_in_bytes(roaring_array_t *ra);
size_t ra_portable_size_in_bytes(const roaring_array_t *ra);

/**
* return true if it contains at least one run container.
*/
bool ra_has_run_container(roaring_array_t *ra);
bool ra_has_run_container(const roaring_array_t *ra);

/**
* Size of the header when serializing (meant to be compatible
* with Java and Go versions)
*/
uint32_t ra_portable_header_size(roaring_array_t *ra);
uint32_t ra_portable_header_size(const roaring_array_t *ra);

/**
* If the container at the index i is share, unshare it (creating a local
Expand Down
13 changes: 1 addition & 12 deletions src/containers/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,17 +394,8 @@ int32_t array_container_serialize(array_container_t *container, char *buf) {
*
*/
int32_t array_container_write(const array_container_t *container, char *buf) {
if (IS_BIG_ENDIAN) {
// forcing little endian (could be faster)
for (int32_t i = 0; i < container->cardinality; i++) {
uint16_t val = container->array[i];
buf[2 * i] = (uint8_t)val;
buf[2 * i + 1] = (uint8_t)(val >> 8);
}
} else {
memcpy(buf, container->array,
memcpy(buf, container->array,
container->cardinality * sizeof(uint16_t));
}
return array_container_size_in_bytes(container);
}

Expand All @@ -426,8 +417,6 @@ int32_t array_container_read(int32_t cardinality, array_container_t *container,
array_container_grow(container, cardinality, DEFAULT_MAX_SIZE, false);
}
container->cardinality = cardinality;
assert(!IS_BIG_ENDIAN); // TODO: Implement

memcpy(container->array, buf, container->cardinality * sizeof(uint16_t));

return array_container_size_in_bytes(container);
Expand Down
11 changes: 0 additions & 11 deletions src/containers/bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,25 +417,14 @@ int32_t bitset_container_serialize(bitset_container_t *container, char *buf) {

int32_t bitset_container_write(const bitset_container_t *container,
char *buf) {
if( IS_BIG_ENDIAN){
// forcing little endian (could be faster)
for(int32_t i = 0 ; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) {
uint64_t val = container->array[i];
val = __builtin_bswap64(val);
memcpy(buf + i * sizeof(uint64_t), &val, sizeof(uint64_t));
}
} else {
memcpy(buf, container->array, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
}
return bitset_container_size_in_bytes(container);
}


int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container,
const char *buf) {
container->cardinality = cardinality;
assert(!IS_BIG_ENDIAN);// TODO: Implement

memcpy(container->array, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
return bitset_container_size_in_bytes(container);
}
Expand Down
Loading

0 comments on commit 411ad00

Please sign in to comment.