Skip to content

Commit

Permalink
Merge 999f1d1 into a9367e7
Browse files Browse the repository at this point in the history
  • Loading branch information
Hendrik Muhs committed Aug 1, 2018
2 parents a9367e7 + 999f1d1 commit d510f48
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ class MemoryMapManager final {
*/
bool GetAddressQuickTestOk(size_t offset, size_t length) const {
size_t chunk_offset = offset % chunk_size_;

return (length <= (chunk_size_ - chunk_offset));
}

Expand Down Expand Up @@ -192,7 +191,7 @@ class MemoryMapManager final {

while (remaining > 0) {
size_t bytes_in_chunk = std::min(chunk_size_, remaining);
TRACE("write chunk %d, with size: %ld, remaining: %ld", i, bytes_in_chunk, remaining);
TRACE("write chunk %d, with size: %ld, remaining: %ld", chunk, bytes_in_chunk, remaining);

const char* ptr = reinterpret_cast<const char*>(mappings_[chunk].region_->get_address());
stream.write(ptr, bytes_in_chunk);
Expand Down Expand Up @@ -230,6 +229,8 @@ class MemoryMapManager final {
mappings_.clear();
}

size_t GetChunkSize() const { return chunk_size_; }

private:
struct mapping {
boost::interprocess::file_mapping* mapping_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ class SparseArrayPersistence final {
TRACE("Wrote Transitions, stream at %d", stream.tellp());
}

size_t GetChunkSizeExternalTransitions() const { return transitions_extern_->GetChunkSize(); }

private:
unsigned char* labels_;
MemoryMapManager* labels_extern_;
Expand Down Expand Up @@ -289,19 +291,21 @@ inline uint64_t SparseArrayPersistence<uint16_t>::ResolveTransitionValue(size_t
if (pt & 0x8000) {
// clear the first bit
pt &= 0x7FFF;
const size_t overflow_bucket = (pt >> 4) + offset - 512;
const size_t overflow_bucket = (pt >> 4) + offset - COMPACT_SIZE_WINDOW;

if (overflow_bucket >= in_memory_buffer_offset_) {
resolved_ptr = keyvi::util::decodeVarshort(transitions_ + overflow_bucket - in_memory_buffer_offset_);
} else {
if (transitions_extern_->GetAddressQuickTestOk(overflow_bucket * sizeof(uint16_t), 5)) {
// value needs to be read from external storage, which in 99.9% is a trivial access to the mmap'ed area
// but in rare cases might be spread across 2 chunks, for the chunk border test we assume worst case 3 varshorts
// to be read, that is a maximum of 2**45, so together with shifting 2**48 == 256 TB of addressable space
if (transitions_extern_->GetAddressQuickTestOk(overflow_bucket * sizeof(uint16_t), 3 * sizeof(uint16_t))) {
resolved_ptr = keyvi::util::decodeVarshort(
reinterpret_cast<uint16_t*>(transitions_extern_->GetAddress(overflow_bucket * sizeof(uint16_t))));
} else {
// value might be on the chunk border, take a secure approach
uint16_t buffer[10];
transitions_extern_->GetBuffer((offset + FINAL_OFFSET_TRANSITION) * sizeof(uint16_t), buffer,
10 * sizeof(uint16_t));
// value might be on the chunk border, read it into a buffer and read from there
uint16_t buffer[3 * sizeof(uint16_t)];
transitions_extern_->GetBuffer(overflow_bucket * sizeof(uint16_t), buffer, 3 * sizeof(uint16_t));

resolved_ptr = keyvi::util::decodeVarshort(buffer);
}
Expand All @@ -311,11 +315,11 @@ inline uint64_t SparseArrayPersistence<uint16_t>::ResolveTransitionValue(size_t

if (pt & 0x8) {
// relative coding
resolved_ptr = offset - resolved_ptr + 512;
resolved_ptr = offset - resolved_ptr + COMPACT_SIZE_WINDOW;
}

} else {
resolved_ptr = offset - pt + 512;
resolved_ptr = offset - pt + COMPACT_SIZE_WINDOW;
}

return resolved_ptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ BOOST_AUTO_TEST_CASE(writeTransitionRelativeOverflowZerobyteGhostState) {
u2.Add(65, 100);
u2.Add(66, 101);
u2.Add(233, 102);
for (int i = 1; i < 255 + 65; ++i) {
for (size_t i = 1; i < 255 + 65; ++i) {
// mark transitions
if (i == 255) {
continue;
Expand Down Expand Up @@ -237,7 +237,7 @@ BOOST_AUTO_TEST_CASE(writeTransitionRelativeOverflowZerobyteEdgecase) {

p.BeginNewState(1000000);

for (int i = 0xff; i > 1; i--) {
for (size_t i = 0xff; i > 1; i--) {
// mark some state beginnings that could lead to zombie states
b.state_start_positions_.Set(1000001 - i);
}
Expand Down Expand Up @@ -282,7 +282,7 @@ BOOST_AUTO_TEST_CASE(writeTransitionRelativeOverflowZerobyteEdgecaseStartPositio

p.BeginNewState(1000000);

for (int i = 0; i < 1000; ++i) {
for (size_t i = 0; i < 1000; ++i) {
// mark some state beginnings that could lead to zombie states
b.state_start_positions_.Set(1000000 + i);

Expand All @@ -298,7 +298,7 @@ BOOST_AUTO_TEST_CASE(writeTransitionRelativeOverflowZerobyteEdgecaseStartPositio
BOOST_CHECK_EQUAL(p.ReadTransitionLabel(1001000), 65);
BOOST_CHECK_EQUAL(p.ResolveTransitionValue(1001000, p.ReadTransitionValue(1001000)), 333336);

for (int i = 0; i < 1000; ++i) {
for (size_t i = 0; i < 1000; ++i) {
BOOST_CHECK_EQUAL(p.ReadTransitionLabel(1000000 + i), 70);
}
}
Expand Down Expand Up @@ -343,6 +343,68 @@ BOOST_AUTO_TEST_CASE(writeTransitionFinalStateTransition) {
BOOST_CHECK_EQUAL(p.ReadTransitionLabel(1000000 + FINAL_OFFSET_TRANSITION + 1), 2);
}

BOOST_AUTO_TEST_CASE(writeTransitionExternalMemory) {
const size_t memory_limit_persistence = 64000;
SparseArrayPersistence<uint16_t> p(memory_limit_persistence, boost::filesystem::temp_directory_path());
const int64_t limit = 1024 * 1024;
SparseArrayBuilder<SparseArrayPersistence<uint16_t>> b(limit, &p, false);

// simulate that sparse array builder got tons of states
b.highest_persisted_state_ = 1024 * 1024;

const size_t chunk_size = p.GetChunkSizeExternalTransitions();
const size_t factor = (1024 * 1024) / memory_limit_persistence;

const size_t offset = (factor * chunk_size) - 2;

p.BeginNewState(offset - 100);

// write a transition on the chunk border with a overflowing transition
b.WriteTransition(offset - 20, 20, offset - 80000);
b.taken_positions_in_sparsearray_.Set(offset - 20);

// force flushing buffers
p.BeginNewState(chunk_size * (factor + 2));

const uint16_t val = p.ReadTransitionValue(offset - 20);

BOOST_CHECK_EQUAL(offset - 80000, p.ResolveTransitionValue(offset - 20, val));
}

BOOST_AUTO_TEST_CASE(writeTransitionChunkborder) {
const size_t memory_limit_persistence = 64000;
SparseArrayPersistence<uint16_t> p(memory_limit_persistence, boost::filesystem::temp_directory_path());
const int64_t limit = 1024 * 1024;
SparseArrayBuilder<SparseArrayPersistence<uint16_t>> b(limit, &p, false);

// simulate that sparse array builder got tons of states
b.highest_persisted_state_ = 1024 * 1024;

// find some setting to setup write on a chunk border
const size_t chunk_size = p.GetChunkSizeExternalTransitions();
const size_t factor = (1024 * 1024) / memory_limit_persistence;

const size_t offset = (factor * chunk_size) - 2;

// mark slots taken in sparse array to force writing on chunk border
for (size_t i = offset - COMPACT_SIZE_WINDOW - 10; i <= offset - 1; ++i) {
b.taken_positions_in_sparsearray_.Set(i);
}

p.BeginNewState(offset - 5);

// write a transition on the chunk border with a overflowing transition
b.WriteTransition(offset - 3, 5, offset - 80000);
b.taken_positions_in_sparsearray_.Set(offset - 3);

// force flushing buffers
p.BeginNewState(chunk_size * (factor + 2));

const uint16_t val = p.ReadTransitionValue(offset - 3);

BOOST_CHECK_EQUAL(offset - 80000, p.ResolveTransitionValue(offset - 3, val));
}

BOOST_AUTO_TEST_SUITE_END()

} /* namespace internal */
Expand Down

0 comments on commit d510f48

Please sign in to comment.