Skip to content

Commit

Permalink
framework: improve command line option parsing
Browse files Browse the repository at this point in the history
Allow multiple map values to contain commas. Only use the last comma
before a key value as a delimiter. Also, strip any extra delimiters in
values.

Additionally, add an explicit option type for parsing command line
options. This allows proper parsing of options for DPDK and resolves
issue Spirent#248.
  • Loading branch information
DerangedMonkeyNinja committed Oct 16, 2020
1 parent cfe085b commit 2ca46bb
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 42 deletions.
195 changes: 157 additions & 38 deletions src/framework/config/op_config_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace openperf::config::file {
using namespace std;

using path_iterator = std::vector<std::string>::const_iterator;
using string_pair = std::pair<std::string, std::string>;

static string config_file_name;
static unordered_map<int, std::string> cli_options;
Expand All @@ -24,6 +25,18 @@ constexpr static std::string_view pair_delimiter("=");

std::string_view op_config_get_file_name() { return (config_file_name); }

static size_t find_last_of_before(std::string_view input,
std::string_view item_delimiters,
std::string_view kv_delimiters)
{
auto cursor = input.find_first_of(kv_delimiters);
if (cursor != std::string::npos) {
cursor = input.find_last_of(item_delimiters, cursor + 1);
}

return (cursor);
}

static std::vector<std::string> split_string(std::string_view input,
std::string_view delimiters)
{
Expand All @@ -45,20 +58,122 @@ static bool starts_with(std::string_view val, std::string_view prefix)
&& (val.compare(0, prefix.size(), prefix) == 0));
}

static void set_map_data_node(YAML::Node& node, std::string_view opt_data)
static std::string trim_left(std::string&& s, std::string_view chars)
{
auto data_pairs = split_string(opt_data, list_delimiters);
s.erase(std::begin(s),
std::find_if(std::begin(s), std::end(s), [&](auto c) {
return (chars.find(c) == std::string::npos);
}));
return (std::move(s));
}

// XXX: yaml-cpp does not have type conversion for unordered_map.
std::map<std::string, std::string> output;
for (auto& data_pair : data_pairs) {
size_t pos = data_pair.find_first_of(pair_delimiter);
if (pos == std::string::npos) { continue; }
static std::string trim_right(std::string&& s, std::string_view chars)
{
s.erase(std::find_if(
std::rbegin(s),
std::rend(s),
[&](auto c) { return (chars.find(c) == std::string::npos); })
.base(),
std::end(s));
return (std::move(s));
}

static std::vector<string_pair> split_map_string(std::string_view input,
std::string_view delimiters)
{
std::vector<string_pair> output;
size_t cursor = 0;
while (cursor < input.length()) {
auto key_end = input.find_first_of(pair_delimiter, cursor);

/* XXX: value_end is relative to key_end + 1 */
auto value_end = find_last_of_before(
input.substr(key_end + 1), delimiters, pair_delimiter);

output.emplace_back(
trim_left(std::string(input.substr(cursor, key_end - cursor)),
delimiters),
trim_right(std::string(input.substr(key_end + 1, value_end)),
delimiters));

cursor = (value_end == std::string::npos ? std::string::npos
: key_end + value_end + 2);
}

return (output);
}

template <typename StringType>
std::string concatenate(std::vector<StringType>& tokens, std::string_view join)
{
return (std::accumulate(std::begin(tokens),
std::end(tokens),
std::string{},
[&](auto& lhs, const auto& rhs) {
return (lhs +=
((lhs.empty() ? "" : std::string(join))
+ std::string(rhs)));
}));
}

static std::vector<std::string>
split_options_string(std::string_view input, std::string_view delimiters)
{
std::vector<std::string_view> tokens;
size_t cursor = 0, end = 0;
while ((cursor = input.find_first_not_of(delimiters, end))
!= std::string::npos) {
end = input.find_first_of(delimiters, cursor + 1);

output[data_pair.substr(0, pos)] =
data_pair.substr(pos + 1, data_pair.length());
tokens.emplace_back(input.substr(cursor, end - cursor));
}

/*
* Now turn the tokens into a vector of strings, making sure
* we keep group option arguments together as a single string.
*/
auto output = std::vector<std::string>{};
auto opt = std::optional<std::string_view>{};
auto values = std::vector<std::string_view>{};

for (auto&& t : tokens) {
if (starts_with(t, "-")) {
if (opt) {
output.emplace_back(*opt);
if (!values.empty()) {
output.emplace_back(concatenate(values, ","));
values.clear();
}
}

opt = t;
} else {
values.emplace_back(t);
}
}

/* Don't forget the last option! */
if (opt) {
output.emplace_back(*opt);
if (!values.empty()) { output.emplace_back(concatenate(values, ",")); }
}

return (output);
}

static void set_map_data_node(YAML::Node& node, std::string_view opt_data)
{
auto pairs = split_map_string(opt_data, list_delimiters);

// XXX: yaml-cpp does not have type conversion for unordered_map.
auto output = std::map<std::string, std::string>{};
std::transform(std::begin(pairs),
std::end(pairs),
std::inserter(output, std::end(output)),
[](auto&& item) -> string_pair {
return {item.first, item.second};
});

node = output;
}

Expand All @@ -67,6 +182,9 @@ static void set_data_node_value(YAML::Node& node,
enum op_option_type opt_type)
{
switch (opt_type) {
case OP_OPTION_TYPE_NONE:
node = true;
break;
case OP_OPTION_TYPE_STRING:
node = std::string(opt_data);
break;
Expand All @@ -76,17 +194,17 @@ static void set_data_node_value(YAML::Node& node,
case OP_OPTION_TYPE_LONG:
node = strtol(opt_data.data(), nullptr, 10);
break;
case OP_OPTION_TYPE_DOUBLE:
node = strtod(opt_data.data(), nullptr);
break;
case OP_OPTION_TYPE_MAP:
set_map_data_node(node, opt_data);
break;
case OP_OPTION_TYPE_LIST:
node = split_string(opt_data, list_delimiters);
break;
case OP_OPTION_TYPE_DOUBLE:
node = strtod(opt_data.data(), nullptr);
break;
case OP_OPTION_TYPE_NONE:
node = true;
case OP_OPTION_TYPE_OPTIONS:
node = split_options_string(opt_data, list_delimiters);
break;
}

Expand Down Expand Up @@ -120,10 +238,10 @@ static YAML::Node create_param_by_path(path_iterator pos,
}

/*
* Recursive function to traverse an existing YAML tree path by the given
* path component strings of the range [pos, end). If the entire path exists
* the base case will assign the requested data value. Else, function will
* switch over to creating a new path.
* Recursive function to traverse an existing YAML tree path by the
* given path component strings of the range [pos, end). If the entire
* path exists the base case will assign the requested data value. Else,
* function will switch over to creating a new path.
*/
static void update_param_by_path(YAML::Node& parent_node,
path_iterator pos,
Expand All @@ -140,8 +258,8 @@ static void update_param_by_path(YAML::Node& parent_node,
YAML::Node child_node = parent_node[*pos];
update_param_by_path(child_node, ++pos, end, opt_data, opt_type);
} else {
// Make a copy, else the ++pos operation on the right side will be
// reflected on the left side.
// Make a copy, else the ++pos operation on the right side will
// be reflected on the left side.
auto key = pos;
parent_node[*key] =
create_param_by_path(++pos, end, opt_data, opt_type);
Expand Down Expand Up @@ -243,8 +361,8 @@ int op_config_file_find(int argc, char* const argv[])
return (errno);
}

// This will do an initial parse. yaml-cpp throws exceptions when the parser
// runs into invalid YAML.
// This will do an initial parse. yaml-cpp throws exceptions when
// the parser runs into invalid YAML.
YAML::Node root_node;
try {
root_node = YAML::LoadFile(config_file_name);
Expand All @@ -254,31 +372,32 @@ int op_config_file_find(int argc, char* const argv[])
return (EINVAL);
}

// XXX: Putting this at the top can lead to the message being lost on its
// way to the logging thread. Definitely a workaround, but not a critical
// message either.
// XXX: Putting this at the top can lead to the message being lost
// on its way to the logging thread. Definitely a workaround, but
// not a critical message either.
OP_LOG(OP_LOG_DEBUG, "Reading from configuration file %s", file_name);

// We currently support three top level nodes: `core`, `modules`, and
// `resources`. Nodes are generally optional, however the `resources`
// node depends on the `modules` node. Hence, we return an error
// if 'resources' exists without `modules`.
// We currently support three top level nodes: `core`, `modules`,
// and `resources`. Nodes are generally optional, however the
// `resources` node depends on the `modules` node. Hence, we return
// an error if 'resources' exists without `modules`.
if (root_node["resources"] && !root_node["modules"]) {
std::cerr
<< "Configuration file " << file_name << " contains \"resources\""
<< " but not \"modules\". The \"modules\" section is required."
<< std::endl;
std::cerr << "Configuration file " << file_name
<< " contains \"resources\""
<< " but not \"modules\". The \"modules\" section "
"is required."
<< std::endl;
return (EINVAL);
}

// We also generate a warning if the config file contains unrecognized
// nodes.
// We also generate a warning if the config file contains
// unrecognized nodes.
auto top_level_nodes =
std::initializer_list<std::string_view>{"core", "modules", "resources"};

// Clearly, set_difference would be a better choice here, but unfortunately,
// YAML::Node only appears to allow you to retrieve the key value from an
// iterator and not from the actual node!
// Clearly, set_difference would be a better choice here, but
// unfortunately, YAML::Node only appears to allow you to retrieve
// the key value from an iterator and not from the actual node!
std::vector<std::string> unknown_nodes;
for (const auto& node : root_node) {
auto key = node.first.as<std::string>();
Expand Down
6 changes: 5 additions & 1 deletion src/framework/config/op_config_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ template <> struct op_option_type_maps<op_option_type::OP_OPTION_TYPE_DOUBLE>
};
template <> struct op_option_type_maps<op_option_type::OP_OPTION_TYPE_MAP>
{
typedef std::map<std::string, std::string> type;
using type = std::map<std::string, std::string>;
};
template <> struct op_option_type_maps<op_option_type::OP_OPTION_TYPE_OPTIONS>
{
using type = std::vector<std::string>;
};
template <> struct op_option_type_maps<op_option_type::OP_OPTION_TYPE_LIST>
{
Expand Down
8 changes: 5 additions & 3 deletions src/framework/core/op_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ typedef int(op_option_callback_fn)(int opt, const char* optarg);
/**
* Enum denoting the type of an option's data.
* Exact types are mapped as:
* NONE -> bool
* STRING -> std::string
* HEX -> long
* LONG -> long
* DOUBLE -> double
* MAP -> std::map<std::string, std::string>>
* LIST -> std::vector<std::string>
*
* NONE -> bool
* OPTIONS -> std::vector<std::string>
* The NONE -> bool thing means that if the option is present
* framework will return true when queried for it, false otherwise.
*/
Expand All @@ -45,7 +46,8 @@ typedef enum op_option_type {
OP_OPTION_TYPE_LONG,
OP_OPTION_TYPE_DOUBLE,
OP_OPTION_TYPE_MAP,
OP_OPTION_TYPE_LIST
OP_OPTION_TYPE_LIST,
OP_OPTION_TYPE_OPTIONS,
} op_option_type_t;

/**
Expand Down

0 comments on commit 2ca46bb

Please sign in to comment.