Skip to content

Commit

Permalink
improve memory efficiency of cuckoo reader
Browse files Browse the repository at this point in the history
Summary:
When creating a new iterator, instead of storing mapping from key to
bucket id for sorting, store only bucket id and read key from mmap file
based on the id. This reduces from 20 bytes per entry to only 4 bytes.

Test Plan: db_bench

Reviewers: igor, yhchiang, sdong

Reviewed By: sdong

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D23757
  • Loading branch information
Lei Jin committed Sep 25, 2014
1 parent 581442d commit c627595
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 54 deletions.
2 changes: 2 additions & 0 deletions include/rocksdb/table.h
Expand Up @@ -255,6 +255,8 @@ struct CuckooTablePropertyNames {
static const std::string kIdentityAsFirstHash;
// Indicate if using module or bit and to calculate hash value
static const std::string kUseModuleHash;
// Fixed user key length
static const std::string kUserKeyLength;
};

struct CuckooTableOptions {
Expand Down
7 changes: 7 additions & 0 deletions table/cuckoo_table_builder.cc
Expand Up @@ -39,6 +39,8 @@ const std::string CuckooTablePropertyNames::kIdentityAsFirstHash =
"rocksdb.cuckoo.hash.identityfirst";
const std::string CuckooTablePropertyNames::kUseModuleHash =
"rocksdb.cuckoo.hash.usemodule";
const std::string CuckooTablePropertyNames::kUserKeyLength =
"rocksdb.cuckoo.hash.userkeylength";

// Obtained by running echo rocksdb.table.cuckoo | sha1sum
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
Expand Down Expand Up @@ -280,6 +282,11 @@ Status CuckooTableBuilder::Finish() {
CuckooTablePropertyNames::kUseModuleHash].assign(
reinterpret_cast<const char*>(&use_module_hash_),
sizeof(use_module_hash_));
uint32_t user_key_len = static_cast<uint32_t>(smallest_user_key_.size());
properties_.user_collected_properties[
CuckooTablePropertyNames::kUserKeyLength].assign(
reinterpret_cast<const char*>(&user_key_len),
sizeof(user_key_len));

// Write meta blocks.
MetaIndexBuilder meta_index_builder;
Expand Down
126 changes: 72 additions & 54 deletions table/cuckoo_table_reader.cc
Expand Up @@ -16,14 +16,16 @@
#include <utility>
#include <vector>
#include "rocksdb/iterator.h"
#include "rocksdb/table.h"
#include "table/meta_blocks.h"
#include "table/cuckoo_table_factory.h"
#include "util/arena.h"
#include "util/coding.h"

namespace rocksdb {
namespace {
static const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1);
const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1);
const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
}

extern const uint64_t kCuckooTableMagicNumber;
Expand Down Expand Up @@ -62,6 +64,14 @@ CuckooTableReader::CuckooTableReader(
unused_key_ = unused_key->second;

key_length_ = props->fixed_key_len;
auto user_key_len = user_props.find(CuckooTablePropertyNames::kUserKeyLength);
if (user_key_len == user_props.end()) {
status_ = Status::Corruption("User key length not found");
return;
}
user_key_length_ = *reinterpret_cast<const uint32_t*>(
user_key_len->second.data());

auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength);
if (value_length == user_props.end()) {
status_ = Status::Corruption("Value length not found");
Expand Down Expand Up @@ -104,7 +114,6 @@ CuckooTableReader::CuckooTableReader(
}
use_module_hash_ = *reinterpret_cast<const bool*>(
use_module_hash->second.data());
fprintf(stderr, "use_module_hash %d\n", use_module_hash_);
auto cuckoo_block_size = user_props.find(
CuckooTablePropertyNames::kCuckooBlockSize);
if (cuckoo_block_size == user_props.end()) {
Expand Down Expand Up @@ -185,30 +194,39 @@ class CuckooTableIterator : public Iterator {
void LoadKeysFromReader();

private:
struct CompareKeys {
CompareKeys(const Comparator* ucomp, const bool last_level)
: ucomp_(ucomp),
is_last_level_(last_level) {}
bool operator()(const std::pair<Slice, uint32_t>& first,
const std::pair<Slice, uint32_t>& second) const {
if (is_last_level_) {
return ucomp_->Compare(first.first, second.first) < 0;
} else {
return ucomp_->Compare(ExtractUserKey(first.first),
ExtractUserKey(second.first)) < 0;
}
struct BucketComparator {
BucketComparator(const Slice file_data, const Comparator* ucomp,
uint32_t bucket_len, uint32_t user_key_len,
const Slice target = Slice())
: file_data_(file_data),
ucomp_(ucomp),
bucket_len_(bucket_len),
user_key_len_(user_key_len),
target_(target) {}
bool operator()(const uint32_t first, const uint32_t second) const {
const char* first_bucket =
(first == kInvalidIndex) ? target_.data() :
&file_data_.data()[first * bucket_len_];
const char* second_bucket =
(second == kInvalidIndex) ? target_.data() :
&file_data_.data()[second * bucket_len_];
return ucomp_->Compare(Slice(first_bucket, user_key_len_),
Slice(second_bucket, user_key_len_)) < 0;
}

private:
const Slice file_data_;
const Comparator* ucomp_;
const bool is_last_level_;
const uint32_t bucket_len_;
const uint32_t user_key_len_;
const Slice target_;
};
const CompareKeys comparator_;

const BucketComparator bucket_comparator_;
void PrepareKVAtCurrIdx();
CuckooTableReader* reader_;
Status status_;
// Contains a map of keys to bucket_id sorted in key order.
std::vector<std::pair<Slice, uint32_t>> key_to_bucket_id_;
std::vector<uint32_t> sorted_bucket_ids_;
// We assume that the number of items can be stored in uint32 (4 Billion).
uint32_t curr_key_idx_;
Slice curr_value_;
Expand All @@ -219,29 +237,31 @@ class CuckooTableIterator : public Iterator {
};

CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
: comparator_(reader->ucomp_, reader->is_last_level_),
: bucket_comparator_(reader->file_data_, reader->ucomp_,
reader->bucket_length_, reader->user_key_length_),
reader_(reader),
curr_key_idx_(std::numeric_limits<int32_t>::max()) {
key_to_bucket_id_.clear();
curr_key_idx_(kInvalidIndex) {
sorted_bucket_ids_.clear();
curr_value_.clear();
curr_key_.Clear();
}

void CuckooTableIterator::LoadKeysFromReader() {
key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries);
sorted_bucket_ids_.reserve(reader_->GetTableProperties()->num_entries);
uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1;
for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) {
Slice read_key;
status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_,
reader_->key_length_, &read_key, nullptr);
if (read_key != Slice(reader_->unused_key_)) {
key_to_bucket_id_.push_back(std::make_pair(read_key, bucket_id));
assert(num_buckets < kInvalidIndex);
const char* bucket = reader_->file_data_.data();
for (uint32_t bucket_id = 0; bucket_id < num_buckets; ++bucket_id) {
if (Slice(bucket, reader_->key_length_) != Slice(reader_->unused_key_)) {
sorted_bucket_ids_.push_back(bucket_id);
}
bucket += reader_->bucket_length_;
}
assert(key_to_bucket_id_.size() ==
assert(sorted_bucket_ids_.size() ==
reader_->GetTableProperties()->num_entries);
std::sort(key_to_bucket_id_.begin(), key_to_bucket_id_.end(), comparator_);
curr_key_idx_ = key_to_bucket_id_.size();
std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(),
bucket_comparator_);
curr_key_idx_ = kInvalidIndex;
}

void CuckooTableIterator::SeekToFirst() {
Expand All @@ -250,25 +270,25 @@ void CuckooTableIterator::SeekToFirst() {
}

void CuckooTableIterator::SeekToLast() {
curr_key_idx_ = key_to_bucket_id_.size() - 1;
curr_key_idx_ = sorted_bucket_ids_.size() - 1;
PrepareKVAtCurrIdx();
}

void CuckooTableIterator::Seek(const Slice& target) {
// We assume that the target is an internal key. If this is last level file,
// we need to take only the user key part to seek.
Slice target_to_search = reader_->is_last_level_ ?
ExtractUserKey(target) : target;
auto seek_it = std::lower_bound(key_to_bucket_id_.begin(),
key_to_bucket_id_.end(),
std::make_pair(target_to_search, 0),
comparator_);
curr_key_idx_ = std::distance(key_to_bucket_id_.begin(), seek_it);
const BucketComparator seek_comparator(
reader_->file_data_, reader_->ucomp_,
reader_->bucket_length_, reader_->user_key_length_,
ExtractUserKey(target));
auto seek_it = std::lower_bound(sorted_bucket_ids_.begin(),
sorted_bucket_ids_.end(),
kInvalidIndex,
seek_comparator);
curr_key_idx_ = std::distance(sorted_bucket_ids_.begin(), seek_it);
PrepareKVAtCurrIdx();
}

bool CuckooTableIterator::Valid() const {
return curr_key_idx_ < key_to_bucket_id_.size();
return curr_key_idx_ < sorted_bucket_ids_.size();
}

void CuckooTableIterator::PrepareKVAtCurrIdx() {
Expand All @@ -277,15 +297,17 @@ void CuckooTableIterator::PrepareKVAtCurrIdx() {
curr_key_.Clear();
return;
}
uint64_t offset = ((uint64_t) key_to_bucket_id_[curr_key_idx_].second
* reader_->bucket_length_) + reader_->key_length_;
status_ = reader_->file_->Read(offset, reader_->value_length_,
&curr_value_, nullptr);
uint32_t id = sorted_bucket_ids_[curr_key_idx_];
const char* offset = reader_->file_data_.data() +
id * reader_->bucket_length_;
if (reader_->is_last_level_) {
// Always return internal key.
curr_key_.SetInternalKey(
key_to_bucket_id_[curr_key_idx_].first, 0, kTypeValue);
curr_key_.SetInternalKey(Slice(offset, reader_->user_key_length_),
0, kTypeValue);
} else {
curr_key_.SetKey(Slice(offset, reader_->key_length_));
}
curr_value_ = Slice(offset + reader_->key_length_, reader_->value_length_);
}

void CuckooTableIterator::Next() {
Expand All @@ -300,7 +322,7 @@ void CuckooTableIterator::Next() {

void CuckooTableIterator::Prev() {
if (curr_key_idx_ == 0) {
curr_key_idx_ = key_to_bucket_id_.size();
curr_key_idx_ = sorted_bucket_ids_.size();
}
if (!Valid()) {
curr_value_.clear();
Expand All @@ -313,11 +335,7 @@ void CuckooTableIterator::Prev() {

Slice CuckooTableIterator::key() const {
assert(Valid());
if (reader_->is_last_level_) {
return curr_key_.GetKey();
} else {
return key_to_bucket_id_[curr_key_idx_].first;
}
return curr_key_.GetKey();
}

Slice CuckooTableIterator::value() const {
Expand Down
1 change: 1 addition & 0 deletions table/cuckoo_table_reader.h
Expand Up @@ -71,6 +71,7 @@ class CuckooTableReader: public TableReader {
uint32_t num_hash_func_;
std::string unused_key_;
uint32_t key_length_;
uint32_t user_key_length_;
uint32_t value_length_;
uint32_t bucket_length_;
uint32_t cuckoo_block_size_;
Expand Down

0 comments on commit c627595

Please sign in to comment.