diff --git a/keyvi/bin/keyvicompiler/keyvicompiler.cpp b/keyvi/bin/keyvicompiler/keyvicompiler.cpp index 3d5599b11..bd64360b6 100644 --- a/keyvi/bin/keyvicompiler/keyvicompiler.cpp +++ b/keyvi/bin/keyvicompiler/keyvicompiler.cpp @@ -23,42 +23,44 @@ * Author: hendrik */ -#include -#include -#include -#include -#include -#include +#include + #include #include +#include +#include +#include +#include +#include +#include #include #include + #include "dictionary/dictionary_compiler.h" -#include "dictionary/fsa/internal/sparse_array_persistence.h" -#include "dictionary/fsa/internal/int_value_store.h" #include "dictionary/fsa/internal/int_inner_weights_value_store.h" -#include "dictionary/fsa/internal/string_value_store.h" +#include "dictionary/fsa/internal/int_value_store.h" #include "dictionary/fsa/internal/json_value_store.h" +#include "dictionary/fsa/internal/sparse_array_persistence.h" +#include "dictionary/fsa/internal/string_value_store.h" typedef keyvi::dictionary::fsa::internal::IValueStoreWriter::vs_param_t vs_param_t; -void callback (size_t added, size_t overall, void*) { - std::cout << "Processed " << added << "/" << overall << "(" << ((100 * added) / overall) << "%)." << std::endl; +void callback(size_t added, size_t overall, void*) { + std::cout << "Processed " << added << "/" << overall << "(" << ((100 * added) / overall) << "%)." << std::endl; } -template -void compile_multiple(CompilerType& compiler, std::function(std::string)> parser, - std::vector& inputs) -{ +template +void compile_multiple(CompilerType* compiler, std::function(std::string)> parser, + const std::vector& inputs) { boost::iostreams::filtering_istream input_stream; std::string line; for (auto input_as_string : inputs) { auto input = boost::filesystem::path(input_as_string); - if(boost::filesystem::is_directory(input)) { + if (boost::filesystem::is_directory(input)) { int files_added = 0; - for(auto& entry : boost::make_iterator_range(boost::filesystem::directory_iterator(input), {})) { + for (auto& entry : boost::make_iterator_range(boost::filesystem::directory_iterator(input), {})) { if (entry.path().extension() == ".gz") { input_stream.push(boost::iostreams::gzip_decompressor()); } @@ -67,19 +69,18 @@ void compile_multiple(CompilerType& compiler, std::functionAdd(parse_result.first, parse_result.second); } input_stream.reset(); } } else { - if (input.extension() == ".gz"){ + if (input.extension() == ".gz") { input_stream.push(boost::iostreams::gzip_decompressor()); } @@ -88,41 +89,38 @@ void compile_multiple(CompilerType& compiler, std::functionAdd(parse_result.first, parse_result.second); } input_stream.reset(); } } } -template -void finalize_compile(CompilerType& compiler, std::string& output, const std::string& manifest = "") { +template +void finalize_compile(CompilerType* compiler, const std::string& output, const std::string& manifest = "") { std::ofstream out_stream(output, std::ios::binary); - compiler.Compile(callback); + compiler->Compile(callback); try { - compiler.SetManifestFromString(manifest); - } catch(boost::property_tree::json_parser::json_parser_error const& error) { + compiler->SetManifestFromString(manifest); + } catch (boost::property_tree::json_parser::json_parser_error const& error) { std::cout << "Failed to set manifest: " << manifest << std::endl; std::cout << error.what() << std::endl; } - compiler.Write(out_stream); + compiler->Write(out_stream); out_stream.close(); } -template -void compile_completion(std::vector& input, std::string& output, - const std::string& manifest = "", - const vs_param_t& value_store_params = vs_param_t()) { - keyvi::dictionary::DictionaryCompiler< - keyvi::dictionary::fsa::internal::SparseArrayPersistence, - keyvi::dictionary::fsa::internal::IntInnerWeightsValueStore> compiler( - value_store_params); +template +void compile_completion(const std::vector& input, const std::string& output, + const std::string& manifest = "", const vs_param_t& value_store_params = vs_param_t()) { + keyvi::dictionary::DictionaryCompiler, + keyvi::dictionary::fsa::internal::IntInnerWeightsValueStore> + compiler(value_store_params); - std::function(std::string)> parser = [] (std::string line) { + std::function(std::string)> parser = [](std::string line) { size_t tab = line.find('\t'); - if (tab == std::string::npos) - return std::pair(); + if (tab == std::string::npos) return std::pair(); std::string key = line.substr(0, tab); std::string value_as_string = line.substr(tab + 1); @@ -134,28 +132,24 @@ void compile_completion(std::vector& input, std::string& output, std::cout << "Error: value was not valid: " << line << std::endl; return std::pair(); } - return std::pair(key, value); + return std::pair(key, value); }; - compile_multiple(compiler, parser, input); + compile_multiple(&compiler, parser, input); - finalize_compile(compiler, output, manifest); + finalize_compile(&compiler, output, manifest); } - -template -void compile_integer(std::vector& input, std::string& output, - const std::string& manifest = "", +template +void compile_integer(const std::vector& input, const std::string& output, const std::string& manifest = "", const vs_param_t& value_store_params = vs_param_t()) { - keyvi::dictionary::DictionaryCompiler< - keyvi::dictionary::fsa::internal::SparseArrayPersistence, - keyvi::dictionary::fsa::internal::IntValueStore> compiler( - value_store_params); + keyvi::dictionary::DictionaryCompiler, + keyvi::dictionary::fsa::internal::IntValueStore> + compiler(value_store_params); - std::function(std::string)> parser = [] (std::string line) { + std::function(std::string)> parser = [](std::string line) { size_t tab = line.find('\t'); - if (tab == std::string::npos) - return std::pair(); + if (tab == std::string::npos) return std::pair(); std::string key = line.substr(0, tab); std::string value_as_string = line.substr(tab + 1); @@ -167,22 +161,19 @@ void compile_integer(std::vector& input, std::string& output, std::cout << "Error: value was not valid: " << line << std::endl; return std::pair(); } - return std::pair(key, value); + return std::pair(key, value); }; - compile_multiple(compiler, parser, input); + compile_multiple(&compiler, parser, input); - finalize_compile(compiler, output, manifest); + finalize_compile(&compiler, output, manifest); } - -template -void compile_strings_inner(Compiler& compiler, - std::vector& input, std::string& output, +template +void compile_strings_inner(Compiler* compiler, const std::vector& input, const std::string& output, const std::string& manifest = "") { - std::function(std::string)> parser = [] (std::string line) { + std::function(std::string)> parser = [](std::string line) { size_t tab = line.find('\t'); - if (tab == std::string::npos) - return std::pair(); + if (tab == std::string::npos) return std::pair(); std::string key = line.substr(0, tab); std::string value = line.substr(tab + 1); @@ -194,27 +185,22 @@ void compile_strings_inner(Compiler& compiler, finalize_compile(compiler, output, manifest); } -template -void compile_strings(std::vector& input, std::string& output, - const std::string& manifest = "", +template +void compile_strings(const std::vector& input, const std::string& output, const std::string& manifest = "", const vs_param_t& value_store_params = vs_param_t()) { - keyvi::dictionary::DictionaryCompiler< - keyvi::dictionary::fsa::internal::SparseArrayPersistence, - keyvi::dictionary::fsa::internal::StringValueStore> compiler( - value_store_params); - compile_strings_inner(compiler, input, output, manifest); + keyvi::dictionary::DictionaryCompiler, + keyvi::dictionary::fsa::internal::StringValueStore> + compiler(value_store_params); + compile_strings_inner(&compiler, input, output, manifest); } -template -void compile_key_only(std::vector& input, std::string& output, - const std::string& manifest = "", - const vs_param_t& value_store_params = vs_param_t()) { - keyvi::dictionary::DictionaryCompiler< - keyvi::dictionary::fsa::internal::SparseArrayPersistence> compiler( +template +void compile_key_only(const std::vector& input, const std::string& output, + const std::string& manifest = "", const vs_param_t& value_store_params = vs_param_t()) { + keyvi::dictionary::DictionaryCompiler> compiler( value_store_params); - std::function(std::string)> parser = [] (std::string line) { - + std::function(std::string)> parser = [](std::string line) { std::string key = line; size_t tab = line.find('\t'); @@ -225,27 +211,24 @@ void compile_key_only(std::vector& input, std::string& output, return std::pair(key, 0); }; - compile_multiple(compiler, parser, input); + compile_multiple(&compiler, parser, input); - finalize_compile(compiler, output, manifest); + finalize_compile(&compiler, output, manifest); } -template -void compile_json(std::vector& input, std::string& output, - const std::string& manifest = "", +template +void compile_json(const std::vector& input, const std::string& output, const std::string& manifest = "", const vs_param_t& value_store_params = vs_param_t()) { - keyvi::dictionary::DictionaryCompiler< - keyvi::dictionary::fsa::internal::SparseArrayPersistence, - keyvi::dictionary::fsa::internal::JsonValueStore> compiler( - value_store_params); - compile_strings_inner(compiler, input, output, manifest); + keyvi::dictionary::DictionaryCompiler, + keyvi::dictionary::fsa::internal::JsonValueStore> + compiler(value_store_params); + compile_strings_inner(&compiler, input, output, manifest); } -/** Extracts the value store parameters. */ -vs_param_t extract_value_store_parameters( - const boost::program_options::variables_map& vm) { +/** Extracts the parameters. */ +vs_param_t extract_parameters(const boost::program_options::variables_map& vm) { vs_param_t ret; - for (auto& v : vm["value-store-parameter"].as >()) { + for (auto& v : vm["parameter"].as>()) { std::vector key_value; boost::split(key_value, v, std::bind1st(std::equal_to(), '=')); if (key_value.size() == 2) { @@ -261,34 +244,22 @@ int main(int argc, char** argv) { std::vector input_files; std::string output_file; - boost::program_options::options_description description( - "keyvi compiler options:"); - - description.add_options()("help,h", "Display this help message")( - "version,v", "Display the version number"); - - description.add_options()("input-file,i", - boost::program_options::value>(), - "input file"); - description.add_options()("output-file,o", - boost::program_options::value(), - "output file"); - description.add_options()("memory-limit,m", - boost::program_options::value(), - "amount of main memory to use"); - description.add_options()( - "dictionary-type,d", - boost::program_options::value()->default_value("integer"), - "type of dictionary (integer (default), string, key-only, json, completion)"); - description.add_options()( - "value-store-parameter,V", - boost::program_options::value< std::vector >()->default_value(std::vector(), "EMPTY")->composing(), - "A value store option; format is -V xxx=yyy"); - - description.add_options()( - "manifest", - boost::program_options::value()->default_value(""), - "manifest to be embedded"); + boost::program_options::options_description description("keyvi compiler options:"); + + description.add_options()("help,h", "Display this help message")("version,v", "Display the version number"); + + description.add_options()("input-file,i", boost::program_options::value>(), "input file"); + description.add_options()("output-file,o", boost::program_options::value(), "output file"); + description.add_options()("memory-limit,m", boost::program_options::value(), "amount of main memory to use"); + description.add_options()("dictionary-type,d", boost::program_options::value()->default_value("integer"), + "type of dictionary (integer (default), string, key-only, json, completion)"); + description.add_options()("parameter,p", boost::program_options::value>() + ->default_value(std::vector(), "EMPTY") + ->composing(), + "An option; format is -p xxx=yyy"); + + description.add_options()("manifest", boost::program_options::value()->default_value(""), + "manifest to be embedded"); // Declare which options are positional boost::program_options::positional_options_description p; @@ -297,18 +268,14 @@ int main(int argc, char** argv) { boost::program_options::variables_map vm; try { - boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options( - description).run(), - vm); + boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(description).run(), + vm); boost::program_options::notify(vm); // parse positional options boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options( - description).positional(p).run(), - vm); + boost::program_options::command_line_parser(argc, argv).options(description).positional(p).run(), vm); boost::program_options::notify(vm); if (vm.count("help")) { @@ -320,32 +287,27 @@ int main(int argc, char** argv) { std::cout << manifest << std::endl; std::string dictionary_type = vm["dictionary-type"].as(); - vs_param_t value_store_params = extract_value_store_parameters(vm); + vs_param_t value_store_params = extract_parameters(vm); if (vm.count("memory-limit")) { - value_store_params[MEMORY_LIMIT_KEY] = vm["memory-limit"].as(); - } + value_store_params[MEMORY_LIMIT_KEY] = vm["memory-limit"].as(); + } if (vm.count("input-file") && vm.count("output-file")) { input_files = vm["input-file"].as>(); output_file = vm["output-file"].as(); if (dictionary_type == "integer") { - compile_integer(input_files, output_file, - manifest, value_store_params); + compile_integer(input_files, output_file, manifest, value_store_params); } else if (dictionary_type == "string") { - compile_strings(input_files, output_file, - manifest, value_store_params); + compile_strings(input_files, output_file, manifest, value_store_params); } else if (dictionary_type == "key-only") { - compile_key_only(input_files, output_file, - manifest, value_store_params); + compile_key_only(input_files, output_file, manifest, value_store_params); } else if (dictionary_type == "json") { - compile_json(input_files, output_file, - manifest, value_store_params); + compile_json(input_files, output_file, manifest, value_store_params); } else if (dictionary_type == "completion") { - compile_integer(input_files, output_file, - manifest, value_store_params); + compile_integer(input_files, output_file, manifest, value_store_params); } else { std::cout << "ERROR: unknown dictionary type." << std::endl << std::endl; std::cout << description; @@ -356,14 +318,13 @@ int main(int argc, char** argv) { std::cout << description; return 1; } + } catch (std::exception& e) { + std::cout << "ERROR: arguments wrong or missing." << std::endl << std::endl; - } catch(std::exception& e) { - std::cout << "ERROR: arguments wrong or missing." << std::endl << std::endl; + std::cout << e.what() << std::endl << std::endl; + std::cout << description; - std::cout << e.what() << std::endl << std::endl; - std::cout << description; - - return 1; + return 1; } return 0; } diff --git a/keyvi/bin/keyviinspector/keyviinspector.cpp b/keyvi/bin/keyviinspector/keyviinspector.cpp index 2389854d5..663aa417b 100644 --- a/keyvi/bin/keyviinspector/keyviinspector.cpp +++ b/keyvi/bin/keyviinspector/keyviinspector.cpp @@ -22,26 +22,26 @@ * Created on: May 13, 2014 * Author: hendrik */ - #include -#include + #include +#include + #include "dictionary/fsa/automata.h" #include "dictionary/fsa/entry_iterator.h" -void dump(std::string& input, std::string& output, bool keys_only = false) { - - keyvi::dictionary::fsa::automata_t automata (new keyvi::dictionary::fsa::Automata(input.c_str())); +void dump(const std::string& input, const std::string& output, bool keys_only = false) { + keyvi::dictionary::fsa::automata_t automata(new keyvi::dictionary::fsa::Automata(input.c_str())); keyvi::dictionary::fsa::EntryIterator it(automata); keyvi::dictionary::fsa::EntryIterator end_it = keyvi::dictionary::fsa::EntryIterator(); std::ofstream out_stream(output); - while (it != end_it){ + while (it != end_it) { it.WriteKey(out_stream); if (!keys_only) { - std::string value = it.GetValueAsString(); + std::string value = it.GetValueAsString(); if (value.size()) { out_stream << "\t"; out_stream << value; @@ -53,15 +53,14 @@ void dump(std::string& input, std::string& output, bool keys_only = false) { out_stream.close(); } -void dump_with_attributes(std::string& input, std::string& output) { - - keyvi::dictionary::fsa::automata_t automata (new keyvi::dictionary::fsa::Automata(input.c_str())); +void dump_with_attributes(const std::string& input, const std::string& output) { + keyvi::dictionary::fsa::automata_t automata(new keyvi::dictionary::fsa::Automata(input.c_str())); keyvi::dictionary::fsa::EntryIterator it(automata); keyvi::dictionary::fsa::EntryIterator end_it = keyvi::dictionary::fsa::EntryIterator(); std::ofstream out_stream(output); - while (it != end_it){ + while (it != end_it) { it.WriteKey(out_stream); out_stream << "\t"; @@ -73,8 +72,8 @@ void dump_with_attributes(std::string& input, std::string& output) { out_stream.close(); } -void print_statistics(std::string& input) { - keyvi::dictionary::fsa::automata_t automata (new keyvi::dictionary::fsa::Automata(input.c_str())); +void print_statistics(const std::string& input) { + keyvi::dictionary::fsa::automata_t automata(new keyvi::dictionary::fsa::Automata(input.c_str())); std::cout << automata->GetStatistics() << std::endl; } @@ -82,34 +81,24 @@ int main(int argc, char** argv) { std::string input_file; std::string output_file; - boost::program_options::options_description description( - "keyvi inspector options:"); + boost::program_options::options_description description("keyvi inspector options:"); - description.add_options()("help,h", "Display this help message") - ("version,v", "Display the version number") - ("input-file,i", boost::program_options::value(), - "input file") - ("output-file,o", boost::program_options::value(), - "output file") - ("keys-only,k", "dump only the keys") - ("statistics,s", "Show statistics of the file"); + description.add_options()("help,h", "Display this help message")("version,v", "Display the version number")( + "input-file,i", boost::program_options::value(), "input file")( + "output-file,o", boost::program_options::value(), "output file")( + "keys-only,k", "dump only the keys")("statistics,s", "Show statistics of the file"); // Declare which options are positional boost::program_options::positional_options_description p; p.add("input-file", -1); boost::program_options::variables_map vm; - boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options( - description).run(), - vm); + boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(description).run(), vm); boost::program_options::notify(vm); // parse positional options boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options( - description).positional(p).run(), - vm); + boost::program_options::command_line_parser(argc, argv).options(description).positional(p).run(), vm); boost::program_options::notify(vm); if (vm.count("help")) { std::cout << description; @@ -125,8 +114,8 @@ int main(int argc, char** argv) { input_file = vm["input-file"].as(); output_file = vm["output-file"].as(); - dump (input_file, output_file, key_only); - //dump_with_attributes (input_file, output_file); + dump(input_file, output_file, key_only); + // dump_with_attributes (input_file, output_file); return 0; } @@ -140,6 +129,3 @@ int main(int argc, char** argv) { std::cout << description; return 1; } - - - diff --git a/keyvi/bin/keyvimerger/keyvimerger.cpp b/keyvi/bin/keyvimerger/keyvimerger.cpp index d8ea449be..d19df6f94 100644 --- a/keyvi/bin/keyvimerger/keyvimerger.cpp +++ b/keyvi/bin/keyvimerger/keyvimerger.cpp @@ -33,7 +33,22 @@ typedef keyvi::dictionary::fsa::internal::IValueStoreWriter::vs_param_t vs_param_t; -int main(int argc, char **argv) { +/** Extracts the parameters. */ +vs_param_t extract_parameters(const boost::program_options::variables_map& vm) { + vs_param_t ret; + for (auto& v : vm["parameter"].as>()) { + std::vector key_value; + boost::split(key_value, v, std::bind1st(std::equal_to(), '=')); + if (key_value.size() == 2) { + ret[key_value[0]] = key_value[1]; + } else { + throw std::invalid_argument("Invalid value store parameter format: " + v); + } + } + return ret; +} + +int main(int argc, char** argv) { std::vector input_files; std::string output_file; @@ -43,6 +58,11 @@ int main(int argc, char **argv) { description.add_options()("input-file,i", boost::program_options::value>(), "input file"); description.add_options()("output-file,o", boost::program_options::value(), "output file"); + description.add_options()("memory-limit,m", boost::program_options::value(), "amount of main memory to use"); + description.add_options()("parameter,p", boost::program_options::value>() + ->default_value(std::vector(), "EMPTY") + ->composing(), + "An option; format is -p xxx=yyy"); // Declare which options are positional boost::program_options::positional_options_description p; @@ -61,8 +81,6 @@ int main(int argc, char **argv) { return 0; } - size_t memory_limit = 1073741824; - if (vm.count("input-file") && vm.count("output-file")) { input_files = vm["input-file"].as>(); output_file = vm["output-file"].as(); @@ -72,7 +90,7 @@ int main(int argc, char **argv) { for (auto f : input_files) { if (boost::filesystem::is_directory(f)) { int files_added = 0; - for (auto &entry : boost::make_iterator_range(boost::filesystem::directory_iterator(f), {})) { + for (auto& entry : boost::make_iterator_range(boost::filesystem::directory_iterator(f), {})) { if (entry.path().extension() == ".kv") { inputs.push_back(entry.path().string()); } @@ -84,9 +102,10 @@ int main(int argc, char **argv) { typedef keyvi::dictionary::fsa::internal::IValueStoreWriter::vs_param_t vs_param_t; - vs_param_t params; - params["merge_mode"] = "append"; - params["memory-limit"] = std::to_string(memory_limit); + vs_param_t params = extract_parameters(vm); + if (vm.count("memory-limit")) { + params[MEMORY_LIMIT_KEY] = vm["memory-limit"].as(); + } keyvi::dictionary::JsonDictionaryMerger jsonDictionaryMerger(params); for (auto f : inputs) {