Skip to content

Commit

Permalink
fix a bug in variable length short calculation which can cause corrup…
Browse files Browse the repository at this point in the history
…t keyvi files (#32)

fixes a off by 1 miscalculation between the length calculated by getVarshortLength and encodeVarshort, while the 1st returned 2 for 0x7fff the 2nd correctly returned 1 as 0x7fff fits in 1 byte (0111 1111 1111 1111 ). The same bug existed for the next boundary 0x3fffffff.

As a result of this miscalculation it was possible - although rather unlikely - that a bit in a bitvector was set, marking a written cell while it did not got written as the required space was 1 instead of 2. The consequence of the unwritten cell is a default to '\0', this '\0' caused a mismatch starting with version 0.2 where '\0' became a valid transition. This broken transition caused a segfault due to a broken pointer.
  • Loading branch information
Hendrik Muhs committed Jan 31, 2018
1 parent f25d90c commit 3f8cfcf
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 5 deletions.
6 changes: 3 additions & 3 deletions keyvi/include/keyvi/util/vint.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void encodeVarint(int_t value, uint8_t* output, size_t* outputSizePtr) {
}

/**
* Encodes an unsigned variable-length integer using the MSB algorithm.
* Encodes an unsigned variable-length short using the MSB algorithm.
* @param value The input value. Any standard integer type is allowed.
* @param output A pointer to a piece of reserved memory. Should have a minimum size dependent on the input size (32 bit
* = 5 bytes, 64 bit = 10 bytes).
Expand Down Expand Up @@ -106,7 +106,7 @@ size_t getVarintLength(int_t value) {
*/
template <typename int_t = uint64_t>
size_t getVarshortLength(int_t value) {
return (value > 0x1fffffffffff) ? 4 : (value < 0x3fffffff) ? (value < 0x7fff) ? 1 : 2 : 3;
return (value > 0x1fffffffffff) ? 4 : (value < 0x40000000) ? (value < 0x8000) ? 1 : 2 : 3;
}

/**
Expand Down Expand Up @@ -149,7 +149,7 @@ int_t decodeVarint(const uint8_t* input) {
}

/**
* Decodes an unsigned variable-length integer using the MSB algorithm.
* Decodes an unsigned variable-length short using the MSB algorithm.
* @param value The input value. Any standard integer type is allowed.
*/
template <typename int_t = uint64_t>
Expand Down
21 changes: 19 additions & 2 deletions keyvi/tests/keyvi/util/vint_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,33 @@ BOOST_AUTO_TEST_CASE(VShortLength) {
BOOST_CHECK_EQUAL(77777, decodeVarshort(buffer));

encodeVarshort(32767, buffer, &size);
BOOST_CHECK_EQUAL(util::getVarshortLength(1), size);
BOOST_CHECK_EQUAL(util::getVarshortLength(32767), size);
BOOST_CHECK_EQUAL(1, size);
BOOST_CHECK_EQUAL(32767, decodeVarshort(buffer));

encodeVarshort(32768, buffer, &size);
BOOST_CHECK_EQUAL(util::getVarshortLength(32768), size);
BOOST_CHECK_EQUAL(2, size);
BOOST_CHECK_EQUAL(32768, decodeVarshort(buffer));

encodeVarshort(0x3fffffff, buffer, &size);
BOOST_CHECK_EQUAL(util::getVarshortLength(0x3fffffff), size);
BOOST_CHECK_EQUAL(2, size);
BOOST_CHECK_EQUAL(0x3fffffff, decodeVarshort(buffer));

encodeVarshort(0x40000000, buffer, &size);
BOOST_CHECK_EQUAL(util::getVarshortLength(0x40000000), size);
BOOST_CHECK_EQUAL(3, size);
BOOST_CHECK_EQUAL(0x40000000, decodeVarshort(buffer));

encodeVarshort(0x200000000000, buffer, &size);
encodeVarshort(0x1fffffffffff, buffer, &size);
BOOST_CHECK_EQUAL(util::getVarshortLength(0x1fffffffffff), size);
BOOST_CHECK_EQUAL(3, size);
BOOST_CHECK_EQUAL(0x1fffffffffff, decodeVarshort<uint64_t>(buffer));

encodeVarshort(0x200000000000, buffer, &size);
BOOST_CHECK_EQUAL(util::getVarshortLength(0x200000000000), size);
BOOST_CHECK_EQUAL(4, size);
BOOST_CHECK_EQUAL(0x200000000000, decodeVarshort<uint64_t>(buffer));

uint64_t x = 11687;
Expand Down

0 comments on commit 3f8cfcf

Please sign in to comment.