Permalink
Browse files

Refactor dictionary properties (#109)

re-factor the file properties (header parts of a keyvi file to store entry points, type information etc.) into 2 property classes (for the FSA part and the value part), both for reading and writing.

json parsing/writing now uses rapidjson instead of boost property trees.

fixes #80
  • Loading branch information...
hendrikmuhs committed Jan 14, 2019
1 parent 388bc34 commit f1f2bbf1e7c92b9fd77ce068c73e866268af0b6e
Showing with 988 additions and 523 deletions.
  1. +4 −0 CMakeLists.txt
  2. +10 −14 keyvi/bin/keyvicompiler/keyvicompiler.cpp
  3. +1 −0 keyvi/flags.cmake
  4. +1 −1 keyvi/include/keyvi/dictionary/dictionary.h
  5. +306 −0 keyvi/include/keyvi/dictionary/dictionary_properties.h
  6. +24 −69 keyvi/include/keyvi/dictionary/fsa/automata.h
  7. +7 −18 keyvi/include/keyvi/dictionary/fsa/generator.h
  8. +4 −0 keyvi/include/keyvi/dictionary/fsa/internal/constants.h
  9. +3 −8 keyvi/include/keyvi/dictionary/fsa/internal/ivalue_store.h
  10. +27 −51 keyvi/include/keyvi/dictionary/fsa/internal/json_value_store.h
  11. +9 −16 keyvi/include/keyvi/dictionary/fsa/internal/sparse_array_persistence.h
  12. +21 −41 keyvi/include/keyvi/dictionary/fsa/internal/string_value_store.h
  13. +9 −7 keyvi/include/keyvi/dictionary/fsa/internal/value_store_factory.h
  14. +174 −0 keyvi/include/keyvi/dictionary/fsa/internal/value_store_properties.h
  15. +0 −97 keyvi/include/keyvi/dictionary/keyvi_file.h
  16. +1 −1 keyvi/include/keyvi/dictionary/util/endian.h
  17. +15 −14 keyvi/include/keyvi/index/internal/index_reader_worker.h
  18. +30 −16 keyvi/include/keyvi/index/internal/index_writer_worker.h
  19. +38 −48 keyvi/include/keyvi/util/serialization_utils.h
  20. +66 −18 keyvi/include/keyvi/vector/vector_file.h
  21. +69 −0 keyvi/tests/keyvi/dictionary/dictionary_properties_test.cpp
  22. +1 −1 keyvi/tests/keyvi/dictionary/fsa/generator_test.cpp
  23. +4 −1 keyvi/tests/keyvi/dictionary/fsa/internal/json_value_store_test.cpp
  24. +3 −2 keyvi/tests/keyvi/dictionary/fsa/internal/null_value_store_test.cpp
  25. +35 −0 keyvi/tests/keyvi/dictionary/fsa/internal/value_store_properties_test.cpp
  26. +81 −0 keyvi/tests/keyvi/util/serialization_utils_test.cpp
  27. +1 −1 python/setup.py
  28. +0 −5 python/src/addons/CompletionDictionaryCompiler.pyx
  29. +0 −5 python/src/addons/CompletionDictionaryMerger.pyx
  30. +1 −11 python/src/addons/Dictionary.pyx
  31. +0 −4 python/src/addons/IntDictionaryCompiler.pyx
  32. +0 −5 python/src/addons/IntDictionaryMerger.pyx
  33. +0 −4 python/src/addons/JsonDictionaryCompiler.pyx
  34. +0 −4 python/src/addons/JsonDictionaryCompilerSmallData.pyx
  35. +0 −5 python/src/addons/JsonDictionaryMerger.pyx
  36. +0 −4 python/src/addons/KeyOnlyDictionaryCompiler.pyx
  37. +0 −5 python/src/addons/KeyOnlyDictionaryMerger.pyx
  38. +0 −4 python/src/addons/StringDictionaryCompiler.pyx
  39. +0 −5 python/src/addons/StringDictionaryMerger.pyx
  40. +2 −1 python/src/pxds/dictionary.pxd
  41. +6 −6 python/src/pxds/dictionary_compiler.pxd
  42. +5 −5 python/src/pxds/dictionary_merger.pxd
  43. +16 −12 python/tests/dictionary/loading_test.py
  44. +3 −2 python/tests/int/int_dictionary_test.py
  45. +0 −2 python/tests/json/json_dictionary_test.py
  46. +11 −10 python/tests/statistics_test.py
@@ -9,6 +9,8 @@ else()
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DRAPIDJSON_HAS_STDSTRING")


set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb3")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -DNDEBUG")
@@ -83,3 +85,5 @@ add_custom_target(bindings
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:tiny-process-library> ${CMAKE_BINARY_DIR}
DEPENDS tpie tiny-process-library
)

configure_file(keyvi/flags.cmake keyvi/flags)
@@ -92,22 +92,18 @@ void compile_multiple(CompilerType* compiler, std::function<std::pair<std::strin
}

template <typename CompilerType>
void finalize_compile(CompilerType* compiler, const std::string& output, const std::string& manifest = "") {
void finalize_compile(CompilerType* compiler, const std::string& output, const std::string& manifest = {}) {
std::ofstream out_stream(output, std::ios::binary);
compiler->Compile(callback);
try {
compiler->SetManifest(manifest);
} catch (boost::property_tree::json_parser::json_parser_error const& error) {
std::cout << "Failed to set manifest: " << manifest << std::endl;
std::cout << error.what() << std::endl;
}
compiler->SetManifest(manifest);

compiler->Write(out_stream);
out_stream.close();
}

template <class BucketT = uint32_t>
void compile_completion(const std::vector<std::string>& input, const std::string& output,
const std::string& manifest = "",
const std::string& manifest = {},
const keyvi::util::parameters_t& value_store_params = keyvi::util::parameters_t()) {
keyvi::dictionary::CompletionDictionaryCompiler compiler(value_store_params);

@@ -133,7 +129,7 @@ void compile_completion(const std::vector<std::string>& input, const std::string
finalize_compile(&compiler, output, manifest);
}

void compile_integer(const std::vector<std::string>& input, const std::string& output, const std::string& manifest = "",
void compile_integer(const std::vector<std::string>& input, const std::string& output, const std::string& manifest = {},
const keyvi::util::parameters_t& value_store_params = keyvi::util::parameters_t()) {
keyvi::dictionary::IntDictionaryCompiler compiler(value_store_params);

@@ -161,7 +157,7 @@ void compile_integer(const std::vector<std::string>& input, const std::string& o

template <class Compiler>
void compile_strings_inner(Compiler* compiler, const std::vector<std::string>& input, const std::string& output,
const std::string& manifest = "") {
const std::string& manifest = {}) {
std::function<std::pair<std::string, std::string>(std::string)> parser = [](std::string line) {
size_t tab = line.find('\t');
if (tab == std::string::npos) return std::pair<std::string, std::string>();
@@ -176,14 +172,14 @@ void compile_strings_inner(Compiler* compiler, const std::vector<std::string>& i
finalize_compile(compiler, output, manifest);
}

void compile_strings(const std::vector<std::string>& input, const std::string& output, const std::string& manifest = "",
void compile_strings(const std::vector<std::string>& input, const std::string& output, const std::string& manifest = {},
const keyvi::util::parameters_t& value_store_params = keyvi::util::parameters_t()) {
keyvi::dictionary::StringDictionaryCompiler compiler(value_store_params);
compile_strings_inner(&compiler, input, output, manifest);
}

void compile_key_only(const std::vector<std::string>& input, const std::string& output,
const std::string& manifest = "",
const std::string& manifest = {},
const keyvi::util::parameters_t& value_store_params = keyvi::util::parameters_t()) {
keyvi::dictionary::KeyOnlyDictionaryCompiler compiler(value_store_params);

@@ -203,7 +199,7 @@ void compile_key_only(const std::vector<std::string>& input, const std::string&
finalize_compile(&compiler, output, manifest);
}

void compile_json(const std::vector<std::string>& input, const std::string& output, const std::string& manifest = "",
void compile_json(const std::vector<std::string>& input, const std::string& output, const std::string& manifest = {},
const keyvi::util::parameters_t& value_store_params = keyvi::util::parameters_t()) {
keyvi::dictionary::JsonDictionaryCompiler compiler(value_store_params);
compile_strings_inner(&compiler, input, output, manifest);
@@ -244,7 +240,7 @@ int main(int argc, char** argv) {
->composing(),
"An option; format is -p xxx=yyy");

description.add_options()("manifest", boost::program_options::value<std::string>()->default_value(""),
description.add_options()("manifest", boost::program_options::value<std::string>()->default_value({}),
"manifest to be embedded");

// Declare which options are positional
@@ -0,0 +1 @@
KEYVI_CXX_FLAGS=@CMAKE_CXX_FLAGS@
@@ -388,7 +388,7 @@ class Dictionary final {
return MatchIterator::MakeIteratorPair(func, data->FirstMatch());
}

std::string GetManifestAsString() const { return fsa_->GetManifestAsString(); }
std::string GetManifest() const { return fsa_->GetManifest(); }

private:
fsa::automata_t fsa_;
Oops, something went wrong.

0 comments on commit f1f2bbf

Please sign in to comment.